diff options
Diffstat (limited to 'kernel')
50 files changed, 2783 insertions, 1725 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index bf987b95b35..24e7cb0ba26 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -35,6 +35,7 @@ config PREEMPT_VOLUNTARY config PREEMPT bool "Preemptible Kernel (Low-Latency Desktop)" + select PREEMPT_COUNT help This option reduces the latency of the kernel by making all kernel code (that is not executing in a critical section) @@ -52,3 +53,5 @@ config PREEMPT endchoice +config PREEMPT_COUNT + bool
\ No newline at end of file diff --git a/kernel/async.c b/kernel/async.c index cd9dbb913c7..d5fe7af0de2 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -49,12 +49,13 @@ asynchronous and synchronous parts of the kernel. */ #include <linux/async.h> +#include <linux/atomic.h> +#include <linux/ktime.h> #include <linux/module.h> #include <linux/wait.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/workqueue.h> -#include <asm/atomic.h> static async_cookie_t next_cookie = 1; @@ -128,7 +129,8 @@ static void async_run_entry_fn(struct work_struct *work) /* 2) run (and print duration) */ if (initcall_debug && system_state == SYSTEM_BOOTING) { - printk("calling %lli_%pF @ %i\n", (long long)entry->cookie, + printk(KERN_DEBUG "calling %lli_%pF @ %i\n", + (long long)entry->cookie, entry->func, task_pid_nr(current)); calltime = ktime_get(); } @@ -136,7 +138,7 @@ static void async_run_entry_fn(struct work_struct *work) if (initcall_debug && system_state == SYSTEM_BOOTING) { rettime = ktime_get(); delta = ktime_sub(rettime, calltime); - printk("initcall %lli_%pF returned 0 after %lld usecs\n", + printk(KERN_DEBUG "initcall %lli_%pF returned 0 after %lld usecs\n", (long long)entry->cookie, entry->func, (long long)ktime_to_ns(delta) >> 10); @@ -270,7 +272,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, ktime_t starttime, delta, endtime; if (initcall_debug && system_state == SYSTEM_BOOTING) { - printk("async_waiting @ %i\n", task_pid_nr(current)); + printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current)); starttime = ktime_get(); } @@ -280,7 +282,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, endtime = ktime_get(); delta = ktime_sub(endtime, starttime); - printk("async_continuing @ %i after %lli usec\n", + printk(KERN_DEBUG "async_continuing @ %i after %lli usec\n", task_pid_nr(current), (long long)ktime_to_ns(delta) >> 10); } diff --git a/kernel/audit.c b/kernel/audit.c index 93950031706..52501b5d490 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -55,6 +55,9 @@ #include <net/sock.h> #include <net/netlink.h> #include <linux/skbuff.h> +#ifdef CONFIG_SECURITY +#include <linux/security.h> +#endif #include <linux/netlink.h> #include <linux/freezer.h> #include <linux/tty.h> @@ -1502,6 +1505,32 @@ void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type, } } +#ifdef CONFIG_SECURITY +/** + * audit_log_secctx - Converts and logs SELinux context + * @ab: audit_buffer + * @secid: security number + * + * This is a helper function that calls security_secid_to_secctx to convert + * secid to secctx and then adds the (converted) SELinux context to the audit + * log by calling audit_log_format, thus also preventing leak of internal secid + * to userspace. If secid cannot be converted audit_panic is called. + */ +void audit_log_secctx(struct audit_buffer *ab, u32 secid) +{ + u32 len; + char *secctx; + + if (security_secid_to_secctx(secid, &secctx, &len)) { + audit_panic("Cannot convert secid to context"); + } else { + audit_log_format(ab, " obj=%s", secctx); + security_release_secctx(secctx, len); + } +} +EXPORT_SYMBOL(audit_log_secctx); +#endif + EXPORT_SYMBOL(audit_log_start); EXPORT_SYMBOL(audit_log_end); EXPORT_SYMBOL(audit_log_format); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index e99dda04b12..5bf0790497e 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -93,16 +93,10 @@ static inline void get_tree(struct audit_tree *tree) atomic_inc(&tree->count); } -static void __put_tree(struct rcu_head *rcu) -{ - struct audit_tree *tree = container_of(rcu, struct audit_tree, head); - kfree(tree); -} - static inline void put_tree(struct audit_tree *tree) { if (atomic_dec_and_test(&tree->count)) - call_rcu(&tree->head, __put_tree); + kfree_rcu(tree, head); } /* to avoid bringing the entire thing in audit.h */ diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 1ce23d3d839..89e5e8aa4c3 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile @@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_core.o = -pg endif -obj-y := core.o +obj-y := core.o ring_buffer.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o diff --git a/kernel/events/core.c b/kernel/events/core.c index 9efe7108cca..b8785e26ee1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -36,6 +36,8 @@ #include <linux/ftrace_event.h> #include <linux/hw_breakpoint.h> +#include "internal.h" + #include <asm/irq_regs.h> struct remote_function_call { @@ -200,6 +202,22 @@ __get_cpu_context(struct perf_event_context *ctx) return this_cpu_ptr(ctx->pmu->pmu_cpu_context); } +static void perf_ctx_lock(struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ + raw_spin_lock(&cpuctx->ctx.lock); + if (ctx) + raw_spin_lock(&ctx->lock); +} + +static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ + if (ctx) + raw_spin_unlock(&ctx->lock); + raw_spin_unlock(&cpuctx->ctx.lock); +} + #ifdef CONFIG_CGROUP_PERF /* @@ -340,11 +358,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode) rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { - cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - perf_pmu_disable(cpuctx->ctx.pmu); - /* * perf_cgroup_events says at least one * context on this CPU has cgroup events. @@ -353,6 +368,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode) * events for a context. */ if (cpuctx->ctx.nr_cgroups > 0) { + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + perf_pmu_disable(cpuctx->ctx.pmu); if (mode & PERF_CGROUP_SWOUT) { cpu_ctx_sched_out(cpuctx, EVENT_ALL); @@ -372,9 +389,9 @@ void perf_cgroup_switch(struct task_struct *task, int mode) cpuctx->cgrp = perf_cgroup_from_task(task); cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); } + perf_pmu_enable(cpuctx->ctx.pmu); + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } - - perf_pmu_enable(cpuctx->ctx.pmu); } rcu_read_unlock(); @@ -731,6 +748,7 @@ static u64 perf_event_time(struct perf_event *event) /* * Update the total_time_enabled and total_time_running fields for a event. + * The caller of this function needs to hold the ctx->lock. */ static void update_event_times(struct perf_event *event) { @@ -1105,6 +1123,10 @@ static int __perf_remove_from_context(void *info) raw_spin_lock(&ctx->lock); event_sched_out(event, cpuctx, ctx); list_del_event(event, ctx); + if (!ctx->nr_events && cpuctx->task_ctx == ctx) { + ctx->is_active = 0; + cpuctx->task_ctx = NULL; + } raw_spin_unlock(&ctx->lock); return 0; @@ -1454,8 +1476,24 @@ static void add_event_to_ctx(struct perf_event *event, event->tstamp_stopped = tstamp; } -static void perf_event_context_sched_in(struct perf_event_context *ctx, - struct task_struct *tsk); +static void task_ctx_sched_out(struct perf_event_context *ctx); +static void +ctx_sched_in(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx, + enum event_type_t event_type, + struct task_struct *task); + +static void perf_event_sched_in(struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, + struct task_struct *task) +{ + cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task); + if (ctx) + ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); + cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); + if (ctx) + ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); +} /* * Cross CPU call to install and enable a performance event @@ -1466,20 +1504,37 @@ static int __perf_install_in_context(void *info) { struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; - struct perf_event *leader = event->group_leader; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); - int err; + struct perf_event_context *task_ctx = cpuctx->task_ctx; + struct task_struct *task = current; + + perf_ctx_lock(cpuctx, task_ctx); + perf_pmu_disable(cpuctx->ctx.pmu); /* - * In case we're installing a new context to an already running task, - * could also happen before perf_event_task_sched_in() on architectures - * which do context switches with IRQs enabled. + * If there was an active task_ctx schedule it out. */ - if (ctx->task && !cpuctx->task_ctx) - perf_event_context_sched_in(ctx, ctx->task); + if (task_ctx) + task_ctx_sched_out(task_ctx); + + /* + * If the context we're installing events in is not the + * active task_ctx, flip them. + */ + if (ctx->task && task_ctx != ctx) { + if (task_ctx) + raw_spin_unlock(&task_ctx->lock); + raw_spin_lock(&ctx->lock); + task_ctx = ctx; + } + + if (task_ctx) { + cpuctx->task_ctx = task_ctx; + task = task_ctx->task; + } + + cpu_ctx_sched_out(cpuctx, EVENT_ALL); - raw_spin_lock(&ctx->lock); - ctx->is_active = 1; update_context_time(ctx); /* * update cgrp time only if current cgrp @@ -1490,43 +1545,13 @@ static int __perf_install_in_context(void *info) add_event_to_ctx(event, ctx); - if (!event_filter_match(event)) - goto unlock; - - /* - * Don't put the event on if it is disabled or if - * it is in a group and the group isn't on. - */ - if (event->state != PERF_EVENT_STATE_INACTIVE || - (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)) - goto unlock; - /* - * An exclusive event can't go on if there are already active - * hardware events, and no hardware event can go on if there - * is already an exclusive event on. + * Schedule everything back in */ - if (!group_can_go_on(event, cpuctx, 1)) - err = -EEXIST; - else - err = event_sched_in(event, cpuctx, ctx); - - if (err) { - /* - * This event couldn't go on. If it is in a group - * then we have to pull the whole group off. - * If the event group is pinned then put it in error state. - */ - if (leader != event) - group_sched_out(leader, cpuctx, ctx); - if (leader->attr.pinned) { - update_group_times(leader); - leader->state = PERF_EVENT_STATE_ERROR; - } - } + perf_event_sched_in(cpuctx, task_ctx, task); -unlock: - raw_spin_unlock(&ctx->lock); + perf_pmu_enable(cpuctx->ctx.pmu); + perf_ctx_unlock(cpuctx, task_ctx); return 0; } @@ -1739,7 +1764,7 @@ out: raw_spin_unlock_irq(&ctx->lock); } -static int perf_event_refresh(struct perf_event *event, int refresh) +int perf_event_refresh(struct perf_event *event, int refresh) { /* * not supported on inherited events @@ -1752,36 +1777,35 @@ static int perf_event_refresh(struct perf_event *event, int refresh) return 0; } +EXPORT_SYMBOL_GPL(perf_event_refresh); static void ctx_sched_out(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, enum event_type_t event_type) { struct perf_event *event; + int is_active = ctx->is_active; - raw_spin_lock(&ctx->lock); - perf_pmu_disable(ctx->pmu); - ctx->is_active = 0; + ctx->is_active &= ~event_type; if (likely(!ctx->nr_events)) - goto out; + return; + update_context_time(ctx); update_cgrp_time_from_cpuctx(cpuctx); - if (!ctx->nr_active) - goto out; + return; - if (event_type & EVENT_PINNED) { + perf_pmu_disable(ctx->pmu); + if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) { list_for_each_entry(event, &ctx->pinned_groups, group_entry) group_sched_out(event, cpuctx, ctx); } - if (event_type & EVENT_FLEXIBLE) { + if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) { list_for_each_entry(event, &ctx->flexible_groups, group_entry) group_sched_out(event, cpuctx, ctx); } -out: perf_pmu_enable(ctx->pmu); - raw_spin_unlock(&ctx->lock); } /* @@ -1929,8 +1953,10 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, rcu_read_unlock(); if (do_switch) { + raw_spin_lock(&ctx->lock); ctx_sched_out(ctx, cpuctx, EVENT_ALL); cpuctx->task_ctx = NULL; + raw_spin_unlock(&ctx->lock); } } @@ -1965,8 +1991,7 @@ void __perf_event_task_sched_out(struct task_struct *task, perf_cgroup_sched_out(task); } -static void task_ctx_sched_out(struct perf_event_context *ctx, - enum event_type_t event_type) +static void task_ctx_sched_out(struct perf_event_context *ctx) { struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); @@ -1976,7 +2001,7 @@ static void task_ctx_sched_out(struct perf_event_context *ctx, if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) return; - ctx_sched_out(ctx, cpuctx, event_type); + ctx_sched_out(ctx, cpuctx, EVENT_ALL); cpuctx->task_ctx = NULL; } @@ -2055,11 +2080,11 @@ ctx_sched_in(struct perf_event_context *ctx, struct task_struct *task) { u64 now; + int is_active = ctx->is_active; - raw_spin_lock(&ctx->lock); - ctx->is_active = 1; + ctx->is_active |= event_type; if (likely(!ctx->nr_events)) - goto out; + return; now = perf_clock(); ctx->timestamp = now; @@ -2068,15 +2093,12 @@ ctx_sched_in(struct perf_event_context *ctx, * First go through the list and put on any pinned groups * in order to give them the best chance of going on. */ - if (event_type & EVENT_PINNED) + if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) ctx_pinned_sched_in(ctx, cpuctx); /* Then walk through the lower prio flexible groups */ - if (event_type & EVENT_FLEXIBLE) + if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) ctx_flexible_sched_in(ctx, cpuctx); - -out: - raw_spin_unlock(&ctx->lock); } static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, @@ -2088,19 +2110,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, ctx_sched_in(ctx, cpuctx, event_type, task); } -static void task_ctx_sched_in(struct perf_event_context *ctx, - enum event_type_t event_type) -{ - struct perf_cpu_context *cpuctx; - - cpuctx = __get_cpu_context(ctx); - if (cpuctx->task_ctx == ctx) - return; - - ctx_sched_in(ctx, cpuctx, event_type, NULL); - cpuctx->task_ctx = ctx; -} - static void perf_event_context_sched_in(struct perf_event_context *ctx, struct task_struct *task) { @@ -2110,6 +2119,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, if (cpuctx->task_ctx == ctx) return; + perf_ctx_lock(cpuctx, ctx); perf_pmu_disable(ctx->pmu); /* * We want to keep the following priority order: @@ -2118,18 +2128,18 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, */ cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); - cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); - ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); + perf_event_sched_in(cpuctx, ctx, task); cpuctx->task_ctx = ctx; + perf_pmu_enable(ctx->pmu); + perf_ctx_unlock(cpuctx, ctx); + /* * Since these rotations are per-cpu, we need to ensure the * cpu-context we got scheduled on is actually rotating. */ perf_pmu_rotate_start(ctx->pmu); - perf_pmu_enable(ctx->pmu); } /* @@ -2269,7 +2279,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) u64 interrupts, now; s64 delta; - raw_spin_lock(&ctx->lock); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (event->state != PERF_EVENT_STATE_ACTIVE) continue; @@ -2301,7 +2310,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) if (delta > 0) perf_adjust_period(event, period, delta); } - raw_spin_unlock(&ctx->lock); } /* @@ -2309,16 +2317,12 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) */ static void rotate_ctx(struct perf_event_context *ctx) { - raw_spin_lock(&ctx->lock); - /* * Rotate the first entry last of non-pinned groups. Rotation might be * disabled by the inheritance code. */ if (!ctx->rotate_disable) list_rotate_left(&ctx->flexible_groups); - - raw_spin_unlock(&ctx->lock); } /* @@ -2345,6 +2349,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) rotate = 1; } + perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_pmu_disable(cpuctx->ctx.pmu); perf_ctx_adjust_freq(&cpuctx->ctx, interval); if (ctx) @@ -2355,21 +2360,20 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (ctx) - task_ctx_sched_out(ctx, EVENT_FLEXIBLE); + ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); rotate_ctx(&cpuctx->ctx); if (ctx) rotate_ctx(ctx); - cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current); - if (ctx) - task_ctx_sched_in(ctx, EVENT_FLEXIBLE); + perf_event_sched_in(cpuctx, ctx, current); done: if (remove) list_del_init(&cpuctx->rotation_list); perf_pmu_enable(cpuctx->ctx.pmu); + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } void perf_event_task_tick(void) @@ -2424,9 +2428,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) * in. */ perf_cgroup_sched_out(current); - task_ctx_sched_out(ctx, EVENT_ALL); raw_spin_lock(&ctx->lock); + task_ctx_sched_out(ctx); list_for_each_entry(event, &ctx->pinned_groups, group_entry) { ret = event_enable_on_exec(event, ctx); @@ -2835,16 +2839,12 @@ retry: unclone_ctx(ctx); ++ctx->pin_count; raw_spin_unlock_irqrestore(&ctx->lock, flags); - } - - if (!ctx) { + } else { ctx = alloc_perf_context(pmu, task); err = -ENOMEM; if (!ctx) goto errout; - get_ctx(ctx); - err = 0; mutex_lock(&task->perf_event_mutex); /* @@ -2856,14 +2856,14 @@ retry: else if (task->perf_event_ctxp[ctxn]) err = -EAGAIN; else { + get_ctx(ctx); ++ctx->pin_count; rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); } mutex_unlock(&task->perf_event_mutex); if (unlikely(err)) { - put_task_struct(task); - kfree(ctx); + put_ctx(ctx); if (err == -EAGAIN) goto retry; @@ -2890,7 +2890,7 @@ static void free_event_rcu(struct rcu_head *head) kfree(event); } -static void perf_buffer_put(struct perf_buffer *buffer); +static void ring_buffer_put(struct ring_buffer *rb); static void free_event(struct perf_event *event) { @@ -2913,9 +2913,9 @@ static void free_event(struct perf_event *event) } } - if (event->buffer) { - perf_buffer_put(event->buffer); - event->buffer = NULL; + if (event->rb) { + ring_buffer_put(event->rb); + event->rb = NULL; } if (is_cgroup_event(event)) @@ -2934,12 +2934,6 @@ int perf_event_release_kernel(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; - /* - * Remove from the PMU, can't get re-enabled since we got - * here because the last ref went. - */ - perf_event_disable(event); - WARN_ON_ONCE(ctx->parent_ctx); /* * There are two ways this annotation is useful: @@ -2956,8 +2950,8 @@ int perf_event_release_kernel(struct perf_event *event) mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); raw_spin_lock_irq(&ctx->lock); perf_group_detach(event); - list_del_event(event, ctx); raw_spin_unlock_irq(&ctx->lock); + perf_remove_from_context(event); mutex_unlock(&ctx->mutex); free_event(event); @@ -3149,13 +3143,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) static unsigned int perf_poll(struct file *file, poll_table *wait) { struct perf_event *event = file->private_data; - struct perf_buffer *buffer; + struct ring_buffer *rb; unsigned int events = POLL_HUP; rcu_read_lock(); - buffer = rcu_dereference(event->buffer); - if (buffer) - events = atomic_xchg(&buffer->poll, 0); + rb = rcu_dereference(event->rb); + if (rb) |