aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2009-11-18 22:19:03 -0800
committerDavid S. Miller <davem@davemloft.net>2009-11-18 22:19:03 -0800
commit3505d1a9fd65e2d3e00827857b6795d9d8983658 (patch)
tree941cfafdb57c427bb6b7ebf6354ee93b2a3693b5 /kernel
parentdfef948ed2ba69cf041840b5e860d6b4e16fa0b1 (diff)
parent66b00a7c93ec782d118d2c03bd599cfd041e80a1 (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Conflicts: drivers/net/sfc/sfe4001.c drivers/net/wireless/libertas/cmd.c drivers/staging/Kconfig drivers/staging/Makefile drivers/staging/rtl8187se/Kconfig drivers/staging/rtl8192e/Kconfig
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c8
-rw-r--r--kernel/exit.c6
-rw-r--r--kernel/fork.c12
-rw-r--r--kernel/futex.c23
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/irq/handle.c1
-rw-r--r--kernel/irq/spurious.c2
-rw-r--r--kernel/kthread.c23
-rw-r--r--kernel/lockdep.c20
-rw-r--r--kernel/mutex-debug.c1
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/params.c17
-rw-r--r--kernel/perf_event.c350
-rw-r--r--kernel/power/hibernate.c11
-rw-r--r--kernel/power/suspend_test.c5
-rw-r--r--kernel/power/swap.c43
-rw-r--r--kernel/rcupdate.c140
-rw-r--r--kernel/rcutorture.c4
-rw-r--r--kernel/rcutree.c390
-rw-r--r--kernel/rcutree.h97
-rw-r--r--kernel/rcutree_plugin.h149
-rw-r--r--kernel/rcutree_trace.c4
-rw-r--r--kernel/sched.c98
-rw-r--r--kernel/sched_fair.c74
-rw-r--r--kernel/sys.c25
-rw-r--r--kernel/sysctl_check.c2
-rw-r--r--kernel/time/tick-sched.c9
-rw-r--r--kernel/time/timekeeping.c1
-rw-r--r--kernel/trace/ftrace.c35
-rw-r--r--kernel/trace/kmemtrace.c2
-rw-r--r--kernel/trace/ring_buffer.c14
-rw-r--r--kernel/trace/trace.c10
-rw-r--r--kernel/trace/trace_branch.c8
-rw-r--r--kernel/trace/trace_event_profile.c15
-rw-r--r--kernel/trace/trace_events_filter.c3
-rw-r--r--kernel/trace/trace_hw_branches.c8
-rw-r--r--kernel/trace/trace_output.c23
-rw-r--r--kernel/trace/trace_syscalls.c4
-rw-r--r--kernel/user.c2
-rw-r--r--kernel/workqueue.c35
40 files changed, 1040 insertions, 639 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ca83b73fba1..0249f4be9b5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1710,14 +1710,13 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
return -EFAULT;
buffer[nbytes] = 0; /* nul-terminate */
- strstrip(buffer);
if (cft->write_u64) {
- u64 val = simple_strtoull(buffer, &end, 0);
+ u64 val = simple_strtoull(strstrip(buffer), &end, 0);
if (*end)
return -EINVAL;
retval = cft->write_u64(cgrp, cft, val);
} else {
- s64 val = simple_strtoll(buffer, &end, 0);
+ s64 val = simple_strtoll(strstrip(buffer), &end, 0);
if (*end)
return -EINVAL;
retval = cft->write_s64(cgrp, cft, val);
@@ -1753,8 +1752,7 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
}
buffer[nbytes] = 0; /* nul-terminate */
- strstrip(buffer);
- retval = cft->write_string(cgrp, cft, buffer);
+ retval = cft->write_string(cgrp, cft, strstrip(buffer));
if (!retval)
retval = nbytes;
out:
diff --git a/kernel/exit.c b/kernel/exit.c
index 5859f598c95..f7864ac2ecc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -359,10 +359,8 @@ void __set_special_pids(struct pid *pid)
{
struct task_struct *curr = current->group_leader;
- if (task_session(curr) != pid) {
+ if (task_session(curr) != pid)
change_pid(curr, PIDTYPE_SID, pid);
- proc_sid_connector(curr);
- }
if (task_pgrp(curr) != pid)
change_pid(curr, PIDTYPE_PGID, pid);
@@ -991,8 +989,6 @@ NORET_TYPE void do_exit(long code)
tsk->mempolicy = NULL;
#endif
#ifdef CONFIG_FUTEX
- if (unlikely(!list_empty(&tsk->pi_state_list)))
- exit_pi_state_list(tsk);
if (unlikely(current->pi_state_cache))
kfree(current->pi_state_cache);
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 266c6af6ef1..166b8c49257 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -91,7 +91,7 @@ int nr_processes(void)
int cpu;
int total = 0;
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu)
total += per_cpu(process_counts, cpu);
return total;
@@ -570,12 +570,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
/* Get rid of any futexes when releasing the mm */
#ifdef CONFIG_FUTEX
- if (unlikely(tsk->robust_list))
+ if (unlikely(tsk->robust_list)) {
exit_robust_list(tsk);
+ tsk->robust_list = NULL;
+ }
#ifdef CONFIG_COMPAT
- if (unlikely(tsk->compat_robust_list))
+ if (unlikely(tsk->compat_robust_list)) {
compat_exit_robust_list(tsk);
+ tsk->compat_robust_list = NULL;
+ }
#endif
+ if (unlikely(!list_empty(&tsk->pi_state_list)))
+ exit_pi_state_list(tsk);
#endif
/* Get rid of any cached register state */
diff --git a/kernel/futex.c b/kernel/futex.c
index b911adceb2c..fb65e822fc4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -150,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key)
*/
static inline int match_futex(union futex_key *key1, union futex_key *key2)
{
- return (key1->both.word == key2->both.word
+ return (key1 && key2
+ && key1->both.word == key2->both.word
&& key1->both.ptr == key2->both.ptr
&& key1->both.offset == key2->both.offset);
}
@@ -916,8 +917,8 @@ retry:
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
- double_lock_hb(hb1, hb2);
retry_private:
+ double_lock_hb(hb1, hb2);
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
@@ -1028,7 +1029,6 @@ static inline
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
struct futex_hash_bucket *hb)
{
- drop_futex_key_refs(&q->key);
get_futex_key_refs(key);
q->key = *key;
@@ -1226,6 +1226,7 @@ retry_private:
*/
if (ret == 1) {
WARN_ON(pi_state);
+ drop_count++;
task_count++;
ret = get_futex_value_locked(&curval2, uaddr2);
if (!ret)
@@ -1304,6 +1305,7 @@ retry_private:
if (ret == 1) {
/* We got the lock. */
requeue_pi_wake_futex(this, &key2, hb2);
+ drop_count++;
continue;
} else if (ret) {
/* -EDEADLK */
@@ -1791,6 +1793,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
current->timer_slack_ns);
}
+retry:
/* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
if (ret)
@@ -1808,9 +1811,14 @@ static int futex_wait(u32 __user *uaddr, int fshared,
goto out_put_key;
/*
- * We expect signal_pending(current), but another thread may
- * have handled it for us already.
+ * We expect signal_pending(current), but we might be the
+ * victim of a spurious wakeup as well.
*/
+ if (!signal_pending(current)) {
+ put_futex_key(fshared, &q.key);
+ goto retry;
+ }
+
ret = -ERESTARTSYS;
if (!abs_time)
goto out_put_key;
@@ -2117,11 +2125,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
* Unqueue the futex_q and determine which it was.
*/
plist_del(&q->list, &q->list.plist);
- drop_futex_key_refs(&q->key);
+ /* Handle spurious wakeups gracefully */
+ ret = -EWOULDBLOCK;
if (timeout && !timeout->task)
ret = -ETIMEDOUT;
- else
+ else if (signal_pending(current))
ret = -ERESTARTNOINTR;
}
return ret;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 6d7020490f9..3e1c36e7998 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -726,8 +726,6 @@ static int hrtimer_switch_to_hres(void)
/* "Retrigger" the interrupt to get things going */
retrigger_next_event(NULL);
local_irq_restore(flags);
- printk(KERN_DEBUG "Switched to high resolution mode on CPU %d\n",
- smp_processor_id());
return 1;
}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index a81cf80554d..17c71bb565c 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -11,6 +11,7 @@
*/
#include <linux/irq.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/random.h>
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 114e704760f..bd7273e6282 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -121,7 +121,9 @@ static void poll_all_shared_irqs(void)
if (!(status & IRQ_SPURIOUS_DISABLED))
continue;
+ local_irq_disable();
try_one_irq(i, desc);
+ local_irq_enable();
}
}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 5fe709982ca..ab7ae57773e 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -150,29 +150,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
EXPORT_SYMBOL(kthread_create);
/**
- * kthread_bind - bind a just-created kthread to a cpu.
- * @k: thread created by kthread_create().
- * @cpu: cpu (might not be online, must be possible) for @k to run on.
- *
- * Description: This function is equivalent to set_cpus_allowed(),
- * except that @cpu doesn't need to be online, and the thread must be
- * stopped (i.e., just returned from kthread_create()).
- */
-void kthread_bind(struct task_struct *k, unsigned int cpu)
-{
- /* Must have done schedule() in kthread() before we set_task_cpu */
- if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) {
- WARN_ON(1);
- return;
- }
- set_task_cpu(k, cpu);
- k->cpus_allowed = cpumask_of_cpu(cpu);
- k->rt.nr_cpus_allowed = 1;
- k->flags |= PF_THREAD_BOUND;
-}
-EXPORT_SYMBOL(kthread_bind);
-
-/**
* kthread_stop - stop a thread created by kthread_create().
* @k: thread created by kthread_create().
*
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 3815ac1d58b..9af56723c09 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -142,6 +142,11 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
#ifdef CONFIG_LOCK_STAT
static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
+static inline u64 lockstat_clock(void)
+{
+ return cpu_clock(smp_processor_id());
+}
+
static int lock_point(unsigned long points[], unsigned long ip)
{
int i;
@@ -158,7 +163,7 @@ static int lock_point(unsigned long points[], unsigned long ip)
return i;
}
-static void lock_time_inc(struct lock_time *lt, s64 time)
+static void lock_time_inc(struct lock_time *lt, u64 time)
{
if (time > lt->max)
lt->max = time;
@@ -234,12 +239,12 @@ static void put_lock_stats(struct lock_class_stats *stats)
static void lock_release_holdtime(struct held_lock *hlock)
{
struct lock_class_stats *stats;
- s64 holdtime;
+ u64 holdtime;
if (!lock_stat)
return;
- holdtime = sched_clock() - hlock->holdtime_stamp;
+ holdtime = lockstat_clock() - hlock->holdtime_stamp;
stats = get_lock_stats(hlock_class(hlock));
if (hlock->read)
@@ -2792,7 +2797,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
hlock->references = references;
#ifdef CONFIG_LOCK_STAT
hlock->waittime_stamp = 0;
- hlock->holdtime_stamp = sched_clock();
+ hlock->holdtime_stamp = lockstat_clock();
#endif
if (check == 2 && !mark_irqflags(curr, hlock))
@@ -3322,7 +3327,7 @@ found_it:
if (hlock->instance != lock)
return;
- hlock->waittime_stamp = sched_clock();
+ hlock->waittime_stamp = lockstat_clock();
contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
contending_point = lock_point(hlock_class(hlock)->contending_point,
@@ -3345,8 +3350,7 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
struct held_lock *hlock, *prev_hlock;
struct lock_class_stats *stats;
unsigned int depth;
- u64 now;
- s64 waittime = 0;
+ u64 now, waittime = 0;
int i, cpu;
depth = curr->lockdep_depth;
@@ -3374,7 +3378,7 @@ found_it:
cpu = smp_processor_id();
if (hlock->waittime_stamp) {
- now = sched_clock();
+ now = lockstat_clock();
waittime = now - hlock->waittime_stamp;
hlock->holdtime_stamp = now;
}
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index 50d022e5a56..ec815a960b5 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -16,6 +16,7 @@
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/poison.h>
+#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/kallsyms.h>
#include <linux/interrupt.h>
diff --git a/kernel/panic.c b/kernel/panic.c
index bcdef26e333..96b45d0b4ba 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -90,6 +90,8 @@ NORET_TYPE void panic(const char * fmt, ...)
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
+ bust_spinlocks(0);
+
if (!panic_blink)
panic_blink = no_blink;
@@ -136,7 +138,6 @@ NORET_TYPE void panic(const char * fmt, ...)
mdelay(1);
i++;
}
- bust_spinlocks(0);
}
EXPORT_SYMBOL(panic);
diff --git a/kernel/params.c b/kernel/params.c
index 9da58eabdcb..d656c276508 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -218,15 +218,11 @@ int param_set_charp(const char *val, struct kernel_param *kp)
return -ENOSPC;
}
- if (kp->flags & KPARAM_KMALLOCED)
- kfree(*(char **)kp->arg);
-
/* This is a hack. We can't need to strdup in early boot, and we
* don't need to; this mangled commandline is preserved. */
if (slab_is_available()) {
- kp->flags |= KPARAM_KMALLOCED;
*(char **)kp->arg = kstrdup(val, GFP_KERNEL);
- if (!kp->arg)
+ if (!*(char **)kp->arg)
return -ENOMEM;
} else
*(const char **)kp->arg = val;
@@ -304,6 +300,7 @@ static int param_array(const char *name,
unsigned int min, unsigned int max,
void *elem, int elemsize,
int (*set)(const char *, struct kernel_param *kp),
+ u16 flags,
unsigned int *num)
{
int ret;
@@ -313,6 +310,7 @@ static int param_array(const char *name,
/* Get the name right for errors. */
kp.name = name;
kp.arg = elem;
+ kp.flags = flags;
/* No equals sign? */
if (!val) {
@@ -358,7 +356,8 @@ int param_array_set(const char *val, struct kernel_param *kp)
unsigned int temp_num;
return param_array(kp->name, val, 1, arr->max, arr->elem,
- arr->elemsize, arr->set, arr->num ?: &temp_num);
+ arr->elemsize, arr->set, kp->flags,
+ arr->num ?: &temp_num);
}
int param_array_get(char *buffer, struct kernel_param *kp)
@@ -605,11 +604,7 @@ void module_param_sysfs_remove(struct module *mod)
void destroy_params(const struct kernel_param *params, unsigned num)
{
- unsigned int i;
-
- for (i = 0; i < num; i++)
- if (params[i].flags & KPARAM_KMALLOCED)
- kfree(*(char **)params[i].arg);
+ /* FIXME: This should free kmalloced charp parameters. It doesn't. */
}
static void __init kernel_add_sysfs_param(const char *name,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 0f86feb6db0..7f29643c898 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -20,6 +20,7 @@
#include <linux/percpu.h>
#include <linux/ptrace.h>
#include <linux/vmstat.h>
+#include <linux/vmalloc.h>
#include <linux/hardirq.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
@@ -1030,14 +1031,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
update_context_time(ctx);
perf_disable();
- if (ctx->nr_active) {
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- if (event != event->group_leader)
- event_sched_out(event, cpuctx, ctx);
- else
- group_sched_out(event, cpuctx, ctx);
- }
- }
+ if (ctx->nr_active)
+ list_for_each_entry(event, &ctx->group_list, group_entry)
+ group_sched_out(event, cpuctx, ctx);
+
perf_enable();
out:
spin_unlock(&ctx->lock);
@@ -1258,12 +1255,8 @@ __perf_event_sched_in(struct perf_event_context *ctx,
if (event->cpu != -1 && event->cpu != cpu)
continue;
- if (event != event->group_leader)
- event_sched_in(event, cpuctx, ctx, cpu);
- else {
- if (group_can_go_on(event, cpuctx, 1))
- group_sched_in(event, cpuctx, ctx, cpu);
- }
+ if (group_can_go_on(event, cpuctx, 1))
+ group_sched_in(event, cpuctx, ctx, cpu);
/*
* If this pinned group hasn't been scheduled,
@@ -1291,15 +1284,9 @@ __perf_event_sched_in(struct perf_event_context *ctx,
if (event->cpu != -1 && event->cpu != cpu)
continue;
- if (event != event->group_leader) {
- if (event_sched_in(event, cpuctx, ctx, cpu))
+ if (group_can_go_on(event, cpuctx, can_add_hw))
+ if (group_sched_in(event, cpuctx, ctx, cpu))
can_add_hw = 0;
- } else {
- if (group_can_go_on(event, cpuctx, can_add_hw)) {
- if (group_sched_in(event, cpuctx, ctx, cpu))
- can_add_hw = 0;
- }
- }
}
perf_enable();
out:
@@ -1368,7 +1355,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
u64 interrupts, freq;
spin_lock(&ctx->lock);
- list_for_each_entry(event, &ctx->group_list, group_entry) {
+ list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
@@ -2105,49 +2092,31 @@ unlock:
rcu_read_unlock();
}
-static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static unsigned long perf_data_size(struct perf_mmap_data *data)
{
- struct perf_event *event = vma->vm_file->private_data;
- struct perf_mmap_data *data;
- int ret = VM_FAULT_SIGBUS;
-
- if (vmf->flags & FAULT_FLAG_MKWRITE) {
- if (vmf->pgoff == 0)
- ret = 0;
- return ret;
- }
-
- rcu_read_lock();
- data = rcu_dereference(event->data);
- if (!data)
- goto unlock;
-
- if (vmf->pgoff == 0) {
- vmf->page = virt_to_page(data->user_page);
- } else {
- int nr = vmf->pgoff - 1;
-
- if ((unsigned)nr > data->nr_pages)
- goto unlock;
+ return data->nr_pages << (PAGE_SHIFT + data->data_order);
+}
- if (vmf->flags & FAULT_FLAG_WRITE)
- goto unlock;
+#ifndef CONFIG_PERF_USE_VMALLOC
- vmf->page = virt_to_page(data->data_pages[nr]);
- }
+/*
+ * Back perf_mmap() with regular GFP_KERNEL-0 pages.
+ */
- get_page(vmf->page);
- vmf->page->mapping = vma->vm_file->f_mapping;
- vmf->page->index = vmf->pgoff;
+static struct page *
+perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+{
+ if (pgoff > data->nr_pages)
+ return NULL;
- ret = 0;
-unlock:
- rcu_read_unlock();
+ if (pgoff == 0)
+ return virt_to_page(data->user_page);
- return ret;
+ return virt_to_page(data->data_pages[pgoff - 1]);
}
-static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_mmap_data *
+perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
{
struct perf_mmap_data *data;
unsigned long size;
@@ -2172,19 +2141,10 @@ static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
goto fail_data_pages;
}
+ data->data_order = 0;
data->nr_pages = nr_pages;
- atomic_set(&data->lock, -1);
-
- if (event->attr.watermark) {
- data->watermark = min_t(long, PAGE_SIZE * nr_pages,
- event->attr.wakeup_watermark);
- }
- if (!data->watermark)
- data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
-
- rcu_assign_pointer(event->data, data);
- return 0;
+ return data;
fail_data_pages:
for (i--; i >= 0; i--)
@@ -2196,7 +2156,7 @@ fail_user_page:
kfree(data);
fail:
- return -ENOMEM;
+ return NULL;
}
static void perf_mmap_free_page(unsigned long addr)
@@ -2207,28 +2167,169 @@ static void perf_mmap_free_page(unsigned long addr)
__free_page(page);
}
-static void __perf_mmap_data_free(struct rcu_head *rcu_head)
+static void perf_mmap_data_free(struct perf_mmap_data *data)
{
- struct perf_mmap_data *data;
int i;
- data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
-
perf_mmap_free_page((unsigned long)data->user_page);
for (i = 0; i < data->nr_pages; i++)
perf_mmap_free_page((unsigned long)data->data_pages[i]);
+}
+
+#else
+
+/*
+ * Back perf_mmap() with vmalloc memory.
+ *
+ * Required for architectures that have d-cache aliasing issues.
+ */
+
+static struct page *
+perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+{
+ if (pgoff > (1UL << data->data_order))
+ return NULL;
+
+ return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
+}
+
+static void perf_mmap_unmark_page(void *addr)
+{
+ struct page *page = vmalloc_to_page(addr);
+
+ page->mapping = NULL;
+}
+
+static void perf_mmap_data_free_work(struct work_struct *work)
+{
+ struct perf_mmap_data *data;
+ void *base;
+ int i, nr;
+
+ data = container_of(work, struct perf_mmap_data, work);
+ nr = 1 << data->data_order;
+ base = data->user_page;
+ for (i = 0; i < nr + 1; i++)
+ perf_mmap_unmark_page(base + (i * PAGE_SIZE));
+
+ vfree(base);
+}
+
+static void perf_mmap_data_free(struct perf_mmap_data *data)
+{
+ schedule_work(&data->work);
+}
+
+static struct perf_mmap_data *
+perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+{
+ struct perf_mmap_data *data;
+ unsigned long size;
+ void *all_buf;
+
+ WARN_ON(atomic_read(&event->mmap_count));
+
+ size = sizeof(struct perf_mmap_data);
+ size += sizeof(void *);
+
+ data = kzalloc(size, GFP_KERNEL);
+ if (!data)
+ goto fail;
+
+ INIT_WORK(&data->work, perf_mmap_data_free_work);
+
+ all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
+ if (!all_buf)
+ goto fail_all_buf;
+
+ data->user_page = all_buf;
+ data->data_pages[0] = all_buf + PAGE_SIZE;
+ data->data_order = ilog2(nr_pages);
+ data->nr_pages = 1;
+
+ return data;
+
+fail_all_buf:
kfree(data);
+
+fail:
+ return NULL;
}
-static void perf_mmap_data_free(struct perf_event *event)
+#endif
+
+static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct perf_event *event = vma->vm_file->private_data;
+ struct perf_mmap_data *data;
+ int ret = VM_FAULT_SIGBUS;
+
+ if (vmf->flags & FAULT_FLAG_MKWRITE) {
+ if (vmf->pgoff == 0)
+ ret = 0;
+ return ret;
+ }
+
+ rcu_read_lock();
+ data = rcu_dereference(event->data);
+ if (!data)
+ goto unlock;
+
+ if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
+ goto unlock;
+
+ vmf->page = perf_mmap_to_page(data, vmf->pgoff);
+ if (!vmf->page)
+ goto unlock;
+
+ get_page(vmf->page);
+ vmf->page->mapping = vma->vm_file->f_mapping;
+ vmf->page->index = vmf->pgoff;
+
+ ret = 0;
+unlock:
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static void
+perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
+{
+ long max_size = perf_data_size(data);
+
+ atomic_set(&data->lock, -1);
+
+ if (event->attr.watermark) {
+ data->watermark = min_t(long, max_size,
+ event->attr.wakeup_watermark);
+ }
+
+ if (!data->watermark)
+ data->watermark = max_t(long, PAGE_SIZE, max_size / 2);
+
+
+ rcu_assign_pointer(event->data, data);
+}
+
+static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
+{
+ struct perf_mmap_data *data;
+
+ data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
+ perf_mmap_data_free(data);
+ kfree(data);
+}
+
+static void perf_mmap_data_release(struct perf_event *event)
{
struct perf_mmap_data *data = event->data;
WARN_ON(atomic_read(&event->mmap_count));
rcu_assign_pointer(event->data, NULL);
- call_rcu(&data->rcu_head, __perf_mmap_data_free);
+ call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
}
static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2244,11 +2345,12 @@ static void perf_mmap_close(struct vm_area_struct *vma)
WARN_ON_ONCE(event->ctx->parent_ctx);
if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
+ unsigned long size = perf_data_size(event->data);
struct user_struct *user = current_user();
- atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm);
+ atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
vma->vm_mm->locked_vm -= event->data->nr_locked;
- perf_mmap_data_free(event);
+ perf_mmap_data_release(event);
mutex_unlock(&event->mmap_mutex);
}
}
@@ -2266,6 +2368,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
unsigned long user_locked, user_lock_limit;
struct user_struct *user = current_user();
unsigned long locked, lock_limit;
+ struct perf_mmap_data *data;
unsigned long vma_size;
unsigned long nr_pages;
long user_extra, extra;
@@ -2328,10 +2431,15 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
}
WARN_ON(event->data);
- ret = perf_mmap_data_alloc(event, nr_pages);
- if (ret)
+
+ data = perf_mmap_data_alloc(event, nr_pages);
+ ret = -ENOMEM;
+ if (!data)
goto unlock;
+ ret = 0;
+ perf_mmap_data_init(even