aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c47
-rw-r--r--kernel/futex.c28
-rw-r--r--kernel/futex_compat.c6
-rw-r--r--kernel/irq/manage.c27
-rw-r--r--kernel/irq/numa_migrate.c4
-rw-r--r--kernel/lockdep_proc.c3
-rw-r--r--kernel/module.c10
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/perf_counter.c677
-rw-r--r--kernel/posix-cpu-timers.c7
-rw-r--r--kernel/posix-timers.c7
-rw-r--r--kernel/rtmutex.c4
-rw-r--r--kernel/sched_cpupri.c15
-rw-r--r--kernel/sched_fair.c32
-rw-r--r--kernel/signal.c25
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/sysctl.c7
-rw-r--r--kernel/time/clockevents.c16
-rw-r--r--kernel/time/tick-broadcast.c7
-rw-r--r--kernel/time/timer_list.c2
-rw-r--r--kernel/trace/blktrace.c12
-rw-r--r--kernel/trace/ftrace.c21
-rw-r--r--kernel/trace/ring_buffer.c15
-rw-r--r--kernel/trace/trace.c25
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_event_profile.c2
-rw-r--r--kernel/trace/trace_events.c4
-rw-r--r--kernel/trace/trace_events_filter.c20
-rw-r--r--kernel/trace/trace_functions_graph.c11
-rw-r--r--kernel/trace/trace_printk.c2
-rw-r--r--kernel/wait.c5
31 files changed, 679 insertions, 369 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 29b532e718f..e6c04d462ab 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -426,7 +426,6 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
init_rwsem(&mm->mmap_sem);
INIT_LIST_HEAD(&mm->mmlist);
mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
- mm->oom_adj = (current->mm) ? current->mm->oom_adj : 0;
mm->core_state = NULL;
mm->nr_ptes = 0;
set_mm_counter(mm, file_rss, 0);
@@ -568,18 +567,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
* the value intact in a core dump, and to save the unnecessary
* trouble otherwise. Userland only wants this done for a sys_exit.
*/
- if (tsk->clear_child_tid
- && !(tsk->flags & PF_SIGNALED)
- && atomic_read(&mm->mm_users) > 1) {
- u32 __user * tidptr = tsk->clear_child_tid;
+ if (tsk->clear_child_tid) {
+ if (!(tsk->flags & PF_SIGNALED) &&
+ atomic_read(&mm->mm_users) > 1) {
+ /*
+ * We don't check the error code - if userspace has
+ * not set up a proper pointer then tough luck.
+ */
+ put_user(0, tsk->clear_child_tid);
+ sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
+ 1, NULL, NULL, 0);
+ }
tsk->clear_child_tid = NULL;
-
- /*
- * We don't check the error code - if userspace has
- * not set up a proper pointer then tough luck.
- */
- put_user(0, tidptr);
- sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
}
}
@@ -816,11 +815,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
{
struct signal_struct *sig;
- if (clone_flags & CLONE_THREAD) {
- atomic_inc(&current->signal->count);
- atomic_inc(&current->signal->live);
+ if (clone_flags & CLONE_THREAD)
return 0;
- }
sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
tsk->signal = sig;
@@ -878,16 +874,6 @@ void __cleanup_signal(struct signal_struct *sig)
kmem_cache_free(signal_cachep, sig);
}
-static void cleanup_signal(struct task_struct *tsk)
-{
- struct signal_struct *sig = tsk->signal;
-
- atomic_dec(&sig->live);
-
- if (atomic_dec_and_test(&sig->count))
- __cleanup_signal(sig);
-}
-
static void copy_flags(unsigned long clone_flags, struct task_struct *p)
{
unsigned long new_flags = p->flags;
@@ -1240,6 +1226,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
if (clone_flags & CLONE_THREAD) {
+ atomic_inc(&current->signal->count);
+ atomic_inc(&current->signal->live);
p->group_leader = current->group_leader;
list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
}
@@ -1269,6 +1257,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
write_unlock_irq(&tasklist_lock);
proc_fork_connector(p);
cgroup_post_fork(p);
+ perf_counter_fork(p);
return p;
bad_fork_free_pid:
@@ -1282,7 +1271,8 @@ bad_fork_cleanup_mm:
if (p->mm)
mmput(p->mm);
bad_fork_cleanup_signal:
- cleanup_signal(p);
+ if (!(clone_flags & CLONE_THREAD))
+ __cleanup_signal(p->signal);
bad_fork_cleanup_sighand:
__cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
@@ -1410,9 +1400,6 @@ long do_fork(unsigned long clone_flags,
init_completion(&vfork);
}
- if (!(clone_flags & CLONE_THREAD))
- perf_counter_fork(p);
-
audit_finish_fork(p);
tracehook_report_clone(regs, clone_flags, nr, p);
diff --git a/kernel/futex.c b/kernel/futex.c
index 0672ff88f15..e18cfbdc719 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1010,15 +1010,19 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
* requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
* q: the futex_q
* key: the key of the requeue target futex
+ * hb: the hash_bucket of the requeue target futex
*
* During futex_requeue, with requeue_pi=1, it is possible to acquire the
* target futex if it is uncontended or via a lock steal. Set the futex_q key
* to the requeue target futex so the waiter can detect the wakeup on the right
* futex, but remove it from the hb and NULL the rt_waiter so it can detect
- * atomic lock acquisition. Must be called with the q->lock_ptr held.
+ * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
+ * to protect access to the pi_state to fixup the owner later. Must be called
+ * with both q->lock_ptr and hb->lock held.
*/
static inline
-void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
+ struct futex_hash_bucket *hb)
{
drop_futex_key_refs(&q->key);
get_futex_key_refs(key);
@@ -1030,6 +1034,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
WARN_ON(!q->rt_waiter);
q->rt_waiter = NULL;
+ q->lock_ptr = &hb->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+ q->list.plist.lock = &hb->lock;
+#endif
+
wake_up_state(q->task, TASK_NORMAL);
}
@@ -1088,7 +1097,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
set_waiters);
if (ret == 1)
- requeue_pi_wake_futex(top_waiter, key2);
+ requeue_pi_wake_futex(top_waiter, key2, hb2);
return ret;
}
@@ -1247,8 +1256,15 @@ retry_private:
if (!match_futex(&this->key, &key1))
continue;
- WARN_ON(!requeue_pi && this->rt_waiter);
- WARN_ON(requeue_pi && !this->rt_waiter);
+ /*
+ * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
+ * be paired with each other and no other futex ops.
+ */
+ if ((requeue_pi && !this->rt_waiter) ||
+ (!requeue_pi && this->rt_waiter)) {
+ ret = -EINVAL;
+ break;
+ }
/*
* Wake nr_wake waiters. For requeue_pi, if we acquired the
@@ -1273,7 +1289,7 @@ retry_private:
this->task, 1);
if (ret == 1) {
/* We got the lock. */
- requeue_pi_wake_futex(this, &key2);
+ requeue_pi_wake_futex(this, &key2, hb2);
continue;
} else if (ret) {
/* -EDEADLK */
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d607a5b9ee2..235716556bf 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -180,7 +180,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
int cmd = op & FUTEX_CMD_MASK;
if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
- cmd == FUTEX_WAIT_BITSET)) {
+ cmd == FUTEX_WAIT_BITSET ||
+ cmd == FUTEX_WAIT_REQUEUE_PI)) {
if (get_compat_timespec(&ts, utime))
return -EFAULT;
if (!timespec_valid(&ts))
@@ -191,7 +192,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
- if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+ cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
val2 = (int) (unsigned long) utime;
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 61c679db468..0ec9ed83173 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -607,7 +607,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
*/
get_task_struct(t);
new->thread = t;
- wake_up_process(t);
}
/*
@@ -690,6 +689,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
(int)(new->flags & IRQF_TRIGGER_MASK));
}
+ new->irq = irq;
*old_ptr = new;
/* Reset broken irq detection when installing new handler */
@@ -707,7 +707,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
spin_unlock_irqrestore(&desc->lock, flags);
- new->irq = irq;
+ /*
+ * Strictly no need to wake it up, but hung_task complains
+ * when no hard interrupt wakes the thread up.
+ */
+ if (new->thread)
+ wake_up_process(new->thread);
+
register_irq_proc(irq, desc);
new->dir = NULL;
register_handler_proc(irq, new);
@@ -761,7 +767,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action, **action_ptr;
- struct task_struct *irqthread;
unsigned long flags;
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -809,9 +814,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
desc->chip->disable(irq);
}
- irqthread = action->thread;
- action->thread = NULL;
-
spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
@@ -819,12 +821,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
/* Make sure it's not being used on another CPU: */
synchronize_irq(irq);
- if (irqthread) {
- if (!test_bit(IRQTF_DIED, &action->thread_flags))
- kthread_stop(irqthread);
- put_task_struct(irqthread);
- }
-
#ifdef CONFIG_DEBUG_SHIRQ
/*
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -840,6 +836,13 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
local_irq_restore(flags);
}
#endif
+
+ if (action->thread) {
+ if (!test_bit(IRQTF_DIED, &action->thread_flags))
+ kthread_stop(action->thread);
+ put_task_struct(action->thread);
+ }
+
return action;
}
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 2f69bee57bf..3fd30197da2 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -107,8 +107,8 @@ out_unlock:
struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
{
- /* those all static, do move them */
- if (desc->irq < NR_IRQS_LEGACY)
+ /* those static or target node is -1, do not move them */
+ if (desc->irq < NR_IRQS_LEGACY || node == -1)
return desc;
if (desc->node != node)
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index d7135aa2d2c..e94caa666db 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -758,7 +758,8 @@ static int __init lockdep_proc_init(void)
&proc_lockdep_stats_operations);
#ifdef CONFIG_LOCK_STAT
- proc_create("lock_stat", S_IRUSR, NULL, &proc_lock_stat_operations);
+ proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
+ &proc_lock_stat_operations);
#endif
return 0;
diff --git a/kernel/module.c b/kernel/module.c
index fd141140355..2d537186191 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -909,16 +909,18 @@ void __symbol_put(const char *symbol)
}
EXPORT_SYMBOL(__symbol_put);
+/* Note this assumes addr is a function, which it currently always is. */
void symbol_put_addr(void *addr)
{
struct module *modaddr;
+ unsigned long a = (unsigned long)dereference_function_descriptor(addr);
- if (core_kernel_text((unsigned long)addr))
+ if (core_kernel_text(a))
return;
/* module_text_address is safe here: we're supposed to have reference
* to module from symbol_get, so it can't go away. */
- modaddr = __module_text_address((unsigned long)addr);
+ modaddr = __module_text_address(a);
BUG_ON(!modaddr);
module_put(modaddr);
}
@@ -1272,6 +1274,10 @@ static void add_notes_attrs(struct module *mod, unsigned int nsect,
struct module_notes_attrs *notes_attrs;
struct bin_attribute *nattr;
+ /* failed to create section attributes, so can't create notes */
+ if (!mod->sect_attrs)
+ return;
+
/* Count notes sections and allocate structures. */
notes = 0;
for (i = 0; i < nsect; i++)
diff --git a/kernel/panic.c b/kernel/panic.c
index 984b3ecbd72..512ab73b0ca 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -301,6 +301,7 @@ int oops_may_print(void)
*/
void oops_enter(void)
{
+ tracing_off();
/* can't trust the integrity of the kernel anymore: */
debug_locks_off();
do_oops_enter_exit();
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 95093104195..f274e195988 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -42,6 +42,7 @@ static int perf_overcommit __read_mostly = 1;
static atomic_t nr_counters __read_mostly;
static atomic_t nr_mmap_counters __read_mostly;
static atomic_t nr_comm_counters __read_mostly;
+static atomic_t nr_task_counters __read_mostly;
/*
* perf counter paranoia level:
@@ -87,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); }
void __weak hw_perf_enable(void) { barrier(); }
void __weak hw_perf_counter_setup(int cpu) { barrier(); }
+void __weak hw_perf_counter_setup_online(int cpu) { barrier(); }
int __weak
hw_perf_group_sched_in(struct perf_counter *group_leader,
@@ -305,6 +307,10 @@ counter_sched_out(struct perf_counter *counter,
return;
counter->state = PERF_COUNTER_STATE_INACTIVE;
+ if (counter->pending_disable) {
+ counter->pending_disable = 0;
+ counter->state = PERF_COUNTER_STATE_OFF;
+ }
counter->tstamp_stopped = ctx->time;
counter->pmu->disable(counter);
counter->oncpu = -1;
@@ -1103,7 +1109,7 @@ static void perf_counter_sync_stat(struct perf_counter_context *ctx,
__perf_counter_sync_stat(counter, next_counter);
counter = list_next_entry(counter, event_entry);
- next_counter = list_next_entry(counter, event_entry);
+ next_counter = list_next_entry(next_counter, event_entry);
}
}
@@ -1497,10 +1503,21 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
*/
static void __perf_counter_read(void *info)
{
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_counter *counter = info;
struct perf_counter_context *ctx = counter->ctx;
unsigned long flags;
+ /*
+ * If this is a task context, we need to check whether it is
+ * the current task context of this cpu. If not it has been
+ * scheduled out before the smp call arrived. In that case
+ * counter->count would have been updated to a recent sample
+ * when the counter was scheduled out.
+ */
+ if (ctx->task && cpuctx->task_ctx != ctx)
+ return;
+
local_irq_save(flags);
if (ctx->is_active)
update_context_time(ctx);
@@ -1654,6 +1671,8 @@ static void free_counter(struct perf_counter *counter)
atomic_dec(&nr_mmap_counters);
if (counter->attr.comm)
atomic_dec(&nr_comm_counters);
+ if (counter->attr.task)
+ atomic_dec(&nr_task_counters);
}
if (counter->destroy)
@@ -1688,14 +1707,133 @@ static int perf_release(struct inode *inode, struct file *file)
return 0;
}
+static int perf_counter_read_size(struct perf_counter *counter)
+{
+ int entry = sizeof(u64); /* value */
+ int size = 0;
+ int nr = 1;
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ size += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ size += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_ID)
+ entry += sizeof(u64);
+
+ if (counter->attr.read_format & PERF_FORMAT_GROUP) {
+ nr += counter->group_leader->nr_siblings;
+ size += sizeof(u64);
+ }
+
+ size += entry * nr;
+
+ return size;
+}
+
+static u64 perf_counter_read_value(struct perf_counter *counter)
+{
+ struct perf_counter *child;
+ u64 total = 0;
+
+ total += perf_counter_read(counter);
+ list_for_each_entry(child, &counter->child_list, child_list)
+ total += perf_counter_read(child);
+
+ return total;
+}
+
+static int perf_counter_read_entry(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ int n = 0, count = 0;
+ u64 values[2];
+
+ values[n++] = perf_counter_read_value(counter);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ count = n * sizeof(u64);
+
+ if (copy_to_user(buf, values, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static int perf_counter_read_group(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ struct perf_counter *leader = counter->group_leader, *sub;
+ int n = 0, size = 0, err = -EFAULT;
+ u64 values[3];
+
+ values[n++] = 1 + leader->nr_siblings;
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = leader->total_time_enabled +
+ atomic64_read(&leader->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = leader->total_time_running +
+ atomic64_read(&leader->child_total_time_running);
+ }
+
+ size = n * sizeof(u64);
+
+ if (copy_to_user(buf, values, size))
+ return -EFAULT;
+
+ err = perf_counter_read_entry(leader, read_format, buf + size);
+ if (err < 0)
+ return err;
+
+ size += err;
+
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ err = perf_counter_read_entry(sub, read_format,
+ buf + size);
+ if (err < 0)
+ return err;
+
+ size += err;
+ }
+
+ return size;
+}
+
+static int perf_counter_read_one(struct perf_counter *counter,
+ u64 read_format, char __user *buf)
+{
+ u64 values[4];
+ int n = 0;
+
+ values[n++] = perf_counter_read_value(counter);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = counter->total_time_enabled +
+ atomic64_read(&counter->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = counter->total_time_running +
+ atomic64_read(&counter->child_total_time_running);
+ }
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ if (copy_to_user(buf, values, n * sizeof(u64)))
+ return -EFAULT;
+
+ return n * sizeof(u64);
+}
+
/*
* Read the performance counter - simple non blocking version for now
*/
static ssize_t
perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
{
- u64 values[4];
- int n;
+ u64 read_format = counter->attr.read_format;
+ int ret;
/*
* Return end-of-file for a read on a counter that is in
@@ -1705,28 +1843,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
if (counter->state == PERF_COUNTER_STATE_ERROR)
return 0;
+ if (count < perf_counter_read_size(counter))
+ return -ENOSPC;
+
WARN_ON_ONCE(counter->ctx->parent_ctx);
mutex_lock(&counter->child_mutex);
- values[0] = perf_counter_read(counter);
- n = 1;
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- values[n++] = counter->total_time_enabled +
- atomic64_read(&counter->child_total_time_enabled);
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- values[n++] = counter->total_time_running +
- atomic64_read(&counter->child_total_time_running);
- if (counter->attr.read_format & PERF_FORMAT_ID)
- values[n++] = primary_counter_id(counter);
+ if (read_format & PERF_FORMAT_GROUP)
+ ret = perf_counter_read_group(counter, read_format, buf);
+ else
+ ret = perf_counter_read_one(counter, read_format, buf);
mutex_unlock(&counter->child_mutex);
- if (count < n * sizeof(u64))
- return -EINVAL;
- count = n * sizeof(u64);
-
- if (copy_to_user(buf, values, count))
- return -EFAULT;
-
- return count;
+ return ret;
}
static ssize_t
@@ -1891,6 +2019,10 @@ int perf_counter_task_disable(void)
return 0;
}
+#ifndef PERF_COUNTER_INDEX_OFFSET
+# define PERF_COUNTER_INDEX_OFFSET 0
+#endif
+
static int perf_counter_index(struct perf_counter *counter)
{
if (counter->state != PERF_COUNTER_STATE_ACTIVE)
@@ -2230,7 +2362,7 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
if (counter->pending_disable) {
counter->pending_disable = 0;
- perf_counter_disable(counter);
+ __perf_counter_disable(counter);
}
if (counter->pending_wakeup) {
@@ -2615,7 +2747,80 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
return task_pid_nr_ns(p, counter->ns);
}
-static void perf_counter_output(struct perf_counter *counter, int nmi,
+static void perf_output_read_one(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ u64 read_format = counter->attr.read_format;
+ u64 values[4];
+ int n = 0;
+
+ values[n++] = atomic64_read(&counter->count);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ values[n++] = counter->total_time_enabled +
+ atomic64_read(&counter->child_total_time_enabled);
+ }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ values[n++] = counter->total_time_running +
+ atomic64_read(&counter->child_total_time_running);
+ }
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(counter);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+}
+
+/*
+ * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
+ */
+static void perf_output_read_group(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ struct perf_counter *leader = counter->group_leader, *sub;
+ u64 read_format = counter->attr.read_format;
+ u64 values[5];
+ int n = 0;
+
+ values[n++] = 1 + leader->nr_siblings;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ values[n++] = leader->total_time_enabled;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ values[n++] = leader->total_time_running;
+
+ if (leader != counter)
+ leader->pmu->read(leader);
+
+ values[n++] = atomic64_read(&leader->count);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(leader);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+
+ list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+ n = 0;
+
+ if (sub != counter)
+ sub->pmu->read(sub);
+
+ values[n++] = atomic64_read(&sub->count);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_counter_id(sub);
+
+ perf_output_copy(handle, values, n * sizeof(u64));
+ }
+}
+
+static void perf_output_read(struct perf_output_handle *handle,
+ struct perf_counter *counter)
+{
+ if (counter->attr.read_format & PERF_FORMAT_GROUP)
+ perf_output_read_group(handle, counter);
+ else
+ perf_output_read_one(handle, counter);
+}
+
+void perf_counter_output(struct perf_counter *counter, int nmi,
struct perf_sample_data *data)
{
int ret;
@@ -2626,10 +2831,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
struct {
u32 pid, tid;
} tid_entry;
- struct {
- u64 id;
- u64 counter;
- } group_entry;
struct perf_callchain_entry *callchain = NULL;
int callchain_size = 0;
u64 time;
@@ -2684,10 +2885,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_PERIOD)
header.size += sizeof(u64);
- if (sample_type & PERF_SAMPLE_GROUP) {
- header.size += sizeof(u64) +
- counter->nr_siblings * sizeof(group_entry);
- }
+ if (sample_type & PERF_SAMPLE_READ)
+ header.size += perf_counter_read_size(counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
callchain = perf_callchain(data->regs);
@@ -2699,6 +2898,18 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
header.size += sizeof(u64);
}
+ if (sample_type & PERF_SAMPLE_RAW) {
+ int size = sizeof(u32);
+
+ if (data->raw)
+ size += data->raw->size;
+ else
+ size += sizeof(u32);
+
+ WARN_ON_ONCE(size & (sizeof(u64)-1));
+ header.size += size;
+ }
+
ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
if (ret)
return;
@@ -2732,26 +2943,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
if (sample_type & PERF_SAMPLE_PERIOD)
perf_output_put(&handle, data->period);
- /*
- * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
- */
- if (sample_type & PERF_SAMPLE_GROUP) {
- struct perf_counter *leader, *sub;
- u64 nr = counter->nr_siblings;
-
- perf_output_put(&handle, nr);
-
- leader = counter->group_leader;
- list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- if (sub != counter)
- sub->pmu->read(sub);
-
- group_entry.id = primary_counter_id(sub);
- group_entry.counter = atomic64_read(&sub->count);
-
- perf_output_put(&handle, group_entry);
- }
- }
+ if (sample_type & PERF_SAMPLE_READ)
+ perf_output_read(&handle, counter);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
if (callchain)
@@ -2762,6 +2955,22 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
}
}
+ if (sample_type & PERF_SAMPLE_RAW) {
+ if (data->raw) {
+ perf_output_put(&handle, data->raw->size);
+ perf_output_copy(&handle, data->raw->data, data->raw->size);
+ } else {
+ struct {
+ u32 size;
+ u32 data;
+ } raw = {
+ .size = sizeof(u32),
+ .data = 0,
+ };
+ perf_output_put(&handle, raw);
+ }
+ }
+
perf_output_end(&handle);
}
@@ -2774,8 +2983,6 @@ struct perf_read_event {
u32 pid;
u32 tid;
- u64 value;
- u64 format[3];
};
static void
@@ -2787,80 +2994,74 @@ perf_counter_read_event(struct perf_counter *counter,
.header = {
.type = PERF_EVENT_READ,
.misc = 0,
- .size = sizeof(event) - sizeof(event.format),
+ .size = sizeof(event) + perf_counter_read_size(counter),
},
.pid = perf_counter_pid(counter, task),
.tid = perf_counter_tid(counter, task),
- .value = atomic64_read(&counter->count),
};
- int ret, i = 0;
-
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
- event.header.size += sizeof(u64);
- event.format[i++] = counter->total_time_enabled;
- }
-
- if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
- event.header.size += sizeof(u64);
- event.format[i++] = counter->total_time_running;
- }
-
- if (counter->attr.read_format & PERF_FORMAT_ID) {
- event.header.size += sizeof(u64);
- event.format[i++] = primary_counter_id(counter);
- }
+ int ret;
ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
if (ret)
return;
- perf_output_copy(&handle, &event, event.header.size);
+ perf_output_put(&handle, event);
+ perf_output_read(&handle, counter);
+
perf_output_end(&handle);
}
/*
- * fork tracking
+ * task tracking -- fork/exit
+ *
+ * enabled by: attr.comm | attr.mmap | attr.task
*/
-struct perf_fork_event {
- struct task_struct *task;
+struct perf_task_event {
+ struct task_struct *task;
+ struct perf_counter_context *task_ctx;
struct {
struct perf_event_header header;
u32 pid;
u32 ppid;
+ u32 tid;
+ u32 ptid;
} event;
};
-static void perf_counter_fork_output(struct perf_counter *counter,
- struct perf_fork_event *fork_event)
+static void perf_counter_task_output(struct perf_counter *counter,
+ struct perf_task_event *task_event)
{
struct perf_output_handle handle;
- int size = fork_event->event.header.size;
- struct task_struct *task = fork_event->task;
+ int size = task_event->event.head