diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-11-17 10:16:43 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-11-17 10:17:47 +0100 |
commit | a7b63425a41cd6a8d50f76fef0660c5110f97e91 (patch) | |
tree | be17ee121f1c8814d8d39c9f3e0205d9397fab54 /kernel | |
parent | 35039eb6b199749943547c8572be6604edf00229 (diff) | |
parent | 3726cc75e581c157202da93bb2333cce25c15c98 (diff) |
Merge branch 'perf/core' into perf/probes
Resolved merge conflict in tools/perf/Makefile
Merge reason: we want to queue up a dependent patch.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 8 | ||||
-rw-r--r-- | kernel/exit.c | 4 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/futex.c | 20 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 2 | ||||
-rw-r--r-- | kernel/kthread.c | 23 | ||||
-rw-r--r-- | kernel/lockdep.c | 2 | ||||
-rw-r--r-- | kernel/params.c | 17 | ||||
-rw-r--r-- | kernel/perf_event.c | 89 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 11 | ||||
-rw-r--r-- | kernel/power/suspend_test.c | 5 | ||||
-rw-r--r-- | kernel/power/swap.c | 43 | ||||
-rw-r--r-- | kernel/rcutree.c | 60 | ||||
-rw-r--r-- | kernel/rcutree.h | 17 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 46 | ||||
-rw-r--r-- | kernel/sched.c | 65 | ||||
-rw-r--r-- | kernel/sched_fair.c | 74 | ||||
-rw-r--r-- | kernel/sys.c | 25 | ||||
-rw-r--r-- | kernel/sysctl_check.c | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 8 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 14 | ||||
-rw-r--r-- | kernel/trace/trace.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 5 | ||||
-rw-r--r-- | kernel/user.c | 2 | ||||
-rw-r--r-- | kernel/workqueue.c | 21 |
26 files changed, 375 insertions, 200 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ca83b73fba1..0249f4be9b5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1710,14 +1710,13 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, return -EFAULT; buffer[nbytes] = 0; /* nul-terminate */ - strstrip(buffer); if (cft->write_u64) { - u64 val = simple_strtoull(buffer, &end, 0); + u64 val = simple_strtoull(strstrip(buffer), &end, 0); if (*end) return -EINVAL; retval = cft->write_u64(cgrp, cft, val); } else { - s64 val = simple_strtoll(buffer, &end, 0); + s64 val = simple_strtoll(strstrip(buffer), &end, 0); if (*end) return -EINVAL; retval = cft->write_s64(cgrp, cft, val); @@ -1753,8 +1752,7 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft, } buffer[nbytes] = 0; /* nul-terminate */ - strstrip(buffer); - retval = cft->write_string(cgrp, cft, buffer); + retval = cft->write_string(cgrp, cft, strstrip(buffer)); if (!retval) retval = nbytes; out: diff --git a/kernel/exit.c b/kernel/exit.c index e61891f8012..f7864ac2ecc 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -359,10 +359,8 @@ void __set_special_pids(struct pid *pid) { struct task_struct *curr = current->group_leader; - if (task_session(curr) != pid) { + if (task_session(curr) != pid) change_pid(curr, PIDTYPE_SID, pid); - proc_sid_connector(curr); - } if (task_pgrp(curr) != pid) change_pid(curr, PIDTYPE_PGID, pid); diff --git a/kernel/fork.c b/kernel/fork.c index 4c20fff8c13..166b8c49257 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -91,7 +91,7 @@ int nr_processes(void) int cpu; int total = 0; - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) total += per_cpu(process_counts, cpu); return total; diff --git a/kernel/futex.c b/kernel/futex.c index 4949d336d88..fb65e822fc4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -150,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key) */ static inline int match_futex(union futex_key *key1, union futex_key *key2) { - return (key1->both.word == key2->both.word + return (key1 && key2 + && key1->both.word == key2->both.word && key1->both.ptr == key2->both.ptr && key1->both.offset == key2->both.offset); } @@ -1028,7 +1029,6 @@ static inline void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, struct futex_hash_bucket *hb) { - drop_futex_key_refs(&q->key); get_futex_key_refs(key); q->key = *key; @@ -1226,6 +1226,7 @@ retry_private: */ if (ret == 1) { WARN_ON(pi_state); + drop_count++; task_count++; ret = get_futex_value_locked(&curval2, uaddr2); if (!ret) @@ -1304,6 +1305,7 @@ retry_private: if (ret == 1) { /* We got the lock. */ requeue_pi_wake_futex(this, &key2, hb2); + drop_count++; continue; } else if (ret) { /* -EDEADLK */ @@ -1791,6 +1793,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, current->timer_slack_ns); } +retry: /* Prepare to wait on uaddr. */ ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); if (ret) @@ -1808,9 +1811,14 @@ static int futex_wait(u32 __user *uaddr, int fshared, goto out_put_key; /* - * We expect signal_pending(current), but another thread may - * have handled it for us already. + * We expect signal_pending(current), but we might be the + * victim of a spurious wakeup as well. */ + if (!signal_pending(current)) { + put_futex_key(fshared, &q.key); + goto retry; + } + ret = -ERESTARTSYS; if (!abs_time) goto out_put_key; @@ -2118,9 +2126,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, */ plist_del(&q->list, &q->list.plist); + /* Handle spurious wakeups gracefully */ + ret = -EWOULDBLOCK; if (timeout && !timeout->task) ret = -ETIMEDOUT; - else + else if (signal_pending(current)) ret = -ERESTARTNOINTR; } return ret; diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 114e704760f..bd7273e6282 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -121,7 +121,9 @@ static void poll_all_shared_irqs(void) if (!(status & IRQ_SPURIOUS_DISABLED)) continue; + local_irq_disable(); try_one_irq(i, desc); + local_irq_enable(); } } diff --git a/kernel/kthread.c b/kernel/kthread.c index 5fe709982ca..ab7ae57773e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -150,29 +150,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), EXPORT_SYMBOL(kthread_create); /** - * kthread_bind - bind a just-created kthread to a cpu. - * @k: thread created by kthread_create(). - * @cpu: cpu (might not be online, must be possible) for @k to run on. - * - * Description: This function is equivalent to set_cpus_allowed(), - * except that @cpu doesn't need to be online, and the thread must be - * stopped (i.e., just returned from kthread_create()). - */ -void kthread_bind(struct task_struct *k, unsigned int cpu) -{ - /* Must have done schedule() in kthread() before we set_task_cpu */ - if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) { - WARN_ON(1); - return; - } - set_task_cpu(k, cpu); - k->cpus_allowed = cpumask_of_cpu(cpu); - k->rt.nr_cpus_allowed = 1; - k->flags |= PF_THREAD_BOUND; -} -EXPORT_SYMBOL(kthread_bind); - -/** * kthread_stop - stop a thread created by kthread_create(). * @k: thread created by kthread_create(). * diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 9af56723c09..f5dcd36d315 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -49,7 +49,7 @@ #include "lockdep_internals.h" #define CREATE_TRACE_POINTS -#include <trace/events/lockdep.h> +#include <trace/events/lock.h> #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; diff --git a/kernel/params.c b/kernel/params.c index 9da58eabdcb..d656c276508 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -218,15 +218,11 @@ int param_set_charp(const char *val, struct kernel_param *kp) return -ENOSPC; } - if (kp->flags & KPARAM_KMALLOCED) - kfree(*(char **)kp->arg); - /* This is a hack. We can't need to strdup in early boot, and we * don't need to; this mangled commandline is preserved. */ if (slab_is_available()) { - kp->flags |= KPARAM_KMALLOCED; *(char **)kp->arg = kstrdup(val, GFP_KERNEL); - if (!kp->arg) + if (!*(char **)kp->arg) return -ENOMEM; } else *(const char **)kp->arg = val; @@ -304,6 +300,7 @@ static int param_array(const char *name, unsigned int min, unsigned int max, void *elem, int elemsize, int (*set)(const char *, struct kernel_param *kp), + u16 flags, unsigned int *num) { int ret; @@ -313,6 +310,7 @@ static int param_array(const char *name, /* Get the name right for errors. */ kp.name = name; kp.arg = elem; + kp.flags = flags; /* No equals sign? */ if (!val) { @@ -358,7 +356,8 @@ int param_array_set(const char *val, struct kernel_param *kp) unsigned int temp_num; return param_array(kp->name, val, 1, arr->max, arr->elem, - arr->elemsize, arr->set, arr->num ?: &temp_num); + arr->elemsize, arr->set, kp->flags, + arr->num ?: &temp_num); } int param_array_get(char *buffer, struct kernel_param *kp) @@ -605,11 +604,7 @@ void module_param_sysfs_remove(struct module *mod) void destroy_params(const struct kernel_param *params, unsigned num) { - unsigned int i; - - for (i = 0; i < num; i++) - if (params[i].flags & KPARAM_KMALLOCED) - kfree(*(char **)params[i].arg); + /* FIXME: This should free kmalloced charp parameters. It doesn't. */ } static void __init kernel_add_sysfs_param(const char *name, diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9ecaa45ab6b..3256e36ad25 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2674,20 +2674,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle) static void perf_output_lock(struct perf_output_handle *handle) { struct perf_mmap_data *data = handle->data; - int cpu; + int cur, cpu = get_cpu(); handle->locked = 0; - local_irq_save(handle->flags); - cpu = smp_processor_id(); - - if (in_nmi() && atomic_read(&data->lock) == cpu) - return; + for (;;) { + cur = atomic_cmpxchg(&data->lock, -1, cpu); + if (cur == -1) { + handle->locked = 1; + break; + } + if (cur == cpu) + break; - while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) cpu_relax(); - - handle->locked = 1; + } } static void perf_output_unlock(struct perf_output_handle *handle) @@ -2733,7 +2734,7 @@ again: if (atomic_xchg(&data->wakeup, 0)) perf_output_wakeup(handle); out: - local_irq_restore(handle->flags); + put_cpu(); } void perf_output_copy(struct perf_output_handle *handle, @@ -3976,8 +3977,9 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) regs = task_pt_regs(current); if (regs) { - if (perf_event_overflow(event, 0, &data, regs)) - ret = HRTIMER_NORESTART; + if (!(event->attr.exclude_idle && current->pid == 0)) + if (perf_event_overflow(event, 0, &data, regs)) + ret = HRTIMER_NORESTART; } period = max_t(u64, 10000, event->hw.sample_period); @@ -3986,6 +3988,42 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) return ret; } +static void perf_swevent_start_hrtimer(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swevent_hrtimer; + if (hwc->sample_period) { + u64 period; + + if (hwc->remaining) { + if (hwc->remaining < 0) + period = 10000; + else + period = hwc->remaining; + hwc->remaining = 0; + } else { + period = max_t(u64, 10000, hwc->sample_period); + } + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(period), 0, + HRTIMER_MODE_REL, 0); + } +} + +static void perf_swevent_cancel_hrtimer(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->sample_period) { + ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); + hwc->remaining = ktime_to_ns(remaining); + + hrtimer_cancel(&hwc->hrtimer); + } +} + /* * Software event: cpu wall time clock */ @@ -4008,22 +4046,14 @@ static int cpu_clock_perf_event_enable(struct perf_event *event) int cpu = raw_smp_processor_id(); atomic64_set(&hwc->prev_count, cpu_clock(cpu)); - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swevent_hrtimer; - if (hwc->sample_period) { - u64 period = max_t(u64, 10000, hwc->sample_period); - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } + perf_swevent_start_hrtimer(event); return 0; } static void cpu_clock_perf_event_disable(struct perf_event *event) { - if (event->hw.sample_period) - hrtimer_cancel(&event->hw.hrtimer); + perf_swevent_cancel_hrtimer(event); cpu_clock_perf_event_update(event); } @@ -4060,22 +4090,15 @@ static int task_clock_perf_event_enable(struct perf_event *event) now = event->ctx->time; atomic64_set(&hwc->prev_count, now); - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swevent_hrtimer; - if (hwc->sample_period) { - u64 period = max_t(u64, 10000, hwc->sample_period); - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } + + perf_swevent_start_hrtimer(event); return 0; } static void task_clock_perf_event_disable(struct perf_event *event) { - if (event->hw.sample_period) - hrtimer_cancel(&event->hw.hrtimer); + perf_swevent_cancel_hrtimer(event); task_clock_perf_event_update(event, event->ctx->time); } @@ -4252,6 +4275,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) case PERF_COUNT_SW_PAGE_FAULTS_MAJ: case PERF_COUNT_SW_CONTEXT_SWITCHES: case PERF_COUNT_SW_CPU_MIGRATIONS: + case PERF_COUNT_SW_ALIGNMENT_FAULTS: + case PERF_COUNT_SW_EMULATION_FAULTS: if (!event->parent) { atomic_inc(&perf_swevent_enabled[event_id]); event->destroy = sw_perf_event_destroy; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 04b3a83d686..04a9e90d248 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -693,21 +693,22 @@ static int software_resume(void) /* The snapshot device should not be opened while we're running */ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { error = -EBUSY; + swsusp_close(FMODE_READ); goto Unlock; } pm_prepare_console(); error = pm_notifier_call_chain(PM_RESTORE_PREPARE); if (error) - goto Finish; + goto close_finish; error = usermodehelper_disable(); if (error) - goto Finish; + goto close_finish; error = create_basic_memory_bitmaps(); if (error) - goto Finish; + goto close_finish; pr_debug("PM: Preparing processes for restore.\n"); error = prepare_processes(); @@ -719,6 +720,7 @@ static int software_resume(void) pr_debug("PM: Reading hibernation image.\n"); error = swsusp_read(&flags); + swsusp_close(FMODE_READ); if (!error) hibernation_restore(flags & SF_PLATFORM_MODE); @@ -737,6 +739,9 @@ static int software_resume(void) mutex_unlock(&pm_mutex); pr_debug("PM: Resume from disk failed.\n"); return error; +close_finish: + swsusp_close(FMODE_READ); + goto Finish; } late_initcall(software_resume); diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index 17d8bb1acf9..25596e450ac 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -19,7 +19,7 @@ * The time it takes is system-specific though, so when we test this * during system bootup we allow a LOT of time. */ -#define TEST_SUSPEND_SECONDS 5 +#define TEST_SUSPEND_SECONDS 10 static unsigned long suspend_test_start_time; @@ -49,7 +49,8 @@ void suspend_test_finish(const char *label) * has some performance issues. The stack dump of a WARN_ON * is more likely to get the right attention than a printk... */ - WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label); + WARN(msec > (TEST_SUSPEND_SECONDS * 1000), + "Component: %s, time: %u\n", label, msec); } /* diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b101cdc4df3..890f6b11b1d 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -314,7 +314,6 @@ static int save_image(struct swap_map_handle *handle, { unsigned int m; int ret; - int error = 0; int nr_pages; int err2; struct bio *bio; @@ -329,26 +328,27 @@ static int save_image(struct swap_map_handle *handle, nr_pages = 0; bio = NULL; do_gettimeofday(&start); - do { + while (1) { ret = snapshot_read_next(snapshot, PAGE_SIZE); - if (ret > 0) { - error = swap_write_page(handle, data_of(*snapshot), - &bio); - if (error) - break; - if (!(nr_pages % m)) - printk("\b\b\b\b%3d%%", nr_pages / m); - nr_pages++; - } - } while (ret > 0); + if (ret <= 0) + break; + ret = swap_write_page(handle, data_of(*snapshot), &bio); + if (ret) + break; + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + } err2 = wait_on_bio_chain(&bio); do_gettimeofday(&stop); - if (!error) - error = err2; - if (!error) + if (!ret) + ret = err2; + if (!ret) printk("\b\b\b\bdone\n"); + else + printk("\n"); swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); - return error; + return ret; } /** @@ -536,7 +536,8 @@ static int load_image(struct swap_map_handle *handle, snapshot_write_finalize(snapshot); if (!snapshot_image_loaded(snapshot)) error = -ENODATA; - } + } else + printk("\n"); swsusp_show_speed(&start, &stop, nr_to_read, "Read"); return error; } @@ -572,8 +573,6 @@ int swsusp_read(unsigned int *flags_p) error = load_image(&handle, &snapshot, header->pages - 1); release_swap_reader(&handle); - blkdev_put(resume_bdev, FMODE_READ); - if (!error) pr_debug("PM: Image successfully loaded\n"); else @@ -596,7 +595,7 @@ int swsusp_check(void) error = bio_read_page(swsusp_resume_block, swsusp_header, NULL); if (error) - return error; + goto put; if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) { memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); @@ -604,8 +603,10 @@ int swsusp_check(void) error = bio_write_page(swsusp_resume_block, swsusp_header, NULL); } else { - return -EINVAL; + error = -EINVAL; } + +put: if (error) blkdev_put(resume_bdev, FMODE_READ); else diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 705f02ac743..f3077c0ab18 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -59,7 +59,7 @@ NUM_RCU_LVL_2, \ NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \ }, \ - .signaled = RCU_SIGNAL_INIT, \ + .signaled = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ .onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \ @@ -657,14 +657,17 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) * irqs disabled. */ rcu_for_each_node_breadth_first(rsp, rnp) { - spin_lock(&rnp->lock); /* irqs already disabled. */ + spin_lock(&rnp->lock); /* irqs already disabled. */ rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; - spin_unlock(&rnp->lock); /* irqs already disabled. */ + spin_unlock(&rnp->lock); /* irqs remain disabled. */ } + rnp = rcu_get_root(rsp); + spin_lock(&rnp->lock); /* irqs already disabled. */ rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ + spin_unlock(&rnp->lock); /* irqs remain disabled. */ spin_unlock_irqrestore(&rsp->onofflock, flags); } @@ -706,6 +709,7 @@ static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) { WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); rsp->completed = rsp->gpnum; + rsp->signaled = RCU_GP_IDLE; rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ } @@ -913,7 +917,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) spin_unlock(&rnp->lock); /* irqs remain disabled. */ break; } - rcu_preempt_offline_tasks(rsp, rnp, rdp); + + /* + * If there was a task blocking the current grace period, + * and if all CPUs have checked in, we need to propagate + * the quiescent state up the rcu_node hierarchy. But that + * is inconvenient at the moment due to deadlock issues if + * this should end the current grace period. So set the + * offlined CPU's bit in ->qsmask in order to force the + * next force_quiescent_state() invocation to clean up this + * mess in a deadlock-free manner. + */ + if (rcu_preempt_offline_tasks(rsp, rnp, rdp) && !rnp->qsmask) + rnp->qsmask |= mask; + mask = rnp->grpmask; spin_unlock(&rnp->lock); /* irqs remain disabled. */ rnp = rnp->parent; @@ -958,7 +975,7 @@ static void rcu_offline_cpu(int cpu) * Invoke any RCU callbacks that have made it to the end of their grace * period. Thottle as specified by rdp->blimit. */ -static void rcu_do_batch(struct rcu_data *rdp) +static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; struct rcu_head *next, *list, **tail; @@ -1011,6 +1028,13 @@ static void rcu_do_batch(struct rcu_data *rdp) if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) rdp->blimit = blimit; + /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ + if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { + rdp->qlen_last_fqs_check = 0; + rdp->n_force_qs_snap = rsp->n_force_qs; + } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) + rdp->qlen_last_fqs_check = rdp->qlen; + local_irq_restore(flags); /* Re-raise the RCU softirq if there are callbacks remaining. */ @@ -1142,9 +1166,10 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) } spin_unlock(&rnp->lock); switch (signaled) { + case RCU_GP_IDLE: case RCU_GP_INIT: - break; /* grace period still initializing, ignore. */ + break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: @@ -1158,7 +1183,8 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Update state, record completion counter. */ spin_lock(&rnp->lock); - if (lastcomp == rsp->completed) { + if (lastcomp == rsp->completed && + rsp->signaled == RCU_SAVE_DYNTICK) { rsp->signaled = RCU_FORCE_QS; dyntick_record_completed(rsp, lastcomp); } @@ -1224,7 +1250,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) } /* If there are callbacks ready, invoke them. */ - rcu_do_batch(rdp); + rcu_do_batch(rsp, rdp); } /* @@ -1288,10 +1314,20 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ } - /* Force the grace period if too many callbacks or too long waiting. */ - if (unlikely(++rdp->qlen > qhimark)) { + /* + * Force the grace period if too many callbacks or too long waiting. + * Enforce hysteresis, and don't invoke force_quiescent_state() + * if some other CPU has recently done so. Also, don't bother + * invoking force_quiescent_state() if the newly enqueued callback + * is the only one waiting for a grace period to complete. + */ + if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { rdp->blimit = LONG_MAX; - force_quiescent_state(rsp, 0); + if (rsp->n_force_qs == rdp->n_force_qs_snap && + *rdp->nxttail[RCU_DONE_TAIL] != head) + force_quiescent_state(rsp, 0); + rdp->n_force_qs_snap = rsp->n_force_qs; + rdp->qlen_last_fqs_check = rdp->qlen; } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) force_quiescent_state(rsp, 1); local_irq_restore(flags); @@ -1523,6 +1559,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) rdp->beenonline = 1; /* We have now been online. */ rdp->preemptable = preemptable; rdp->passed_quiesc_completed = lastcomp - 1; + rdp->qlen_last_fqs_check = 0; + rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; spin_unlock(&rnp->lock); /* irqs remain disabled. */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index b40ac570604..1899023b096 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -167,6 +167,10 @@ struct rcu_data { struct rcu_head *nxtlist; struct rcu_head **nxttail[RCU_NEXT_SIZE]; long qlen; /* # of queued callbacks */ + long qlen_last_fqs_check; + /* qlen at last check for QS forcing */ + unsigned long n_force_qs_snap; + /* did other CPU force QS recently? */ long blimit; /* Upper limit on a processed batch */ #ifdef CONFIG_NO_HZ @@ -197,9 +201,10 @@ struct rcu_data { }; /* Values for signaled field in struct rcu_state. */ -#define RCU_GP_INIT 0 /* Grace period being initialized. */ -#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */ -#define RCU_FORCE_QS 2 /* Need to force quiescent state. */ +#define RCU_GP_IDLE 0 /* No grace period in progress. */ +#define RCU_GP_INIT 1 /* Grace period being initialized. */ +#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ +#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ #ifdef CONFIG_NO_HZ #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK #else /* #ifdef CONFIG_NO_HZ */ @@ -302,9 +307,9 @@ static void rcu_print_task_stall(struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU -static void rcu_preempt_offline_tasks(struct rcu_state *rsp, - struct rcu_node *rnp, - struct rcu_data *rdp); +static int rcu_preempt_offline_tasks(struct rcu_state *rsp, + struct rcu_node *rnp, + struct rcu_data *rdp); static void rcu_preempt_offline_cpu(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_preempt_check_callbacks(int cpu); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c0cb783aa16..ef2a58c2b9d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -304,21 +304,25 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) * parent is to remove the need for rcu_read_unlock_special() to * make more than two attempts to acquire the target rcu_node's lock. * + * Returns 1 if there was previously a task blocking the current grace + * period on the specified rcu_node structure. + * * The caller must hold rnp->lock with irqs disabled. */ -static void rcu_preempt_offline_tasks(struct rcu_state *rsp, - struct rcu_node *rnp, - struct rcu_data *rdp) +static int rcu_preempt_offline_tasks(struct rcu_state *rsp, + struct rcu_node *rnp, + struct rcu_data *rdp) { int i; struct list_head *lp; struct list_head *lp_root; + int retval = rcu_preempted_readers(rnp); struct rcu_node *rnp_root = rcu_get_root(rsp); struct task_struct *tp; if (rnp == rnp_root) { WARN_ONCE(1, "Last CPU thought to be offlined?"); - return; /* Shouldn't happen: at least one CPU online. */ + return 0; /* Shouldn't happen: at least one CPU online. */ } WARN_ON_ONCE(rnp != rdp->mynode && (!list_empty(&rnp->blocked_tasks[0]) || @@ -342,6 +346,8 @@ static void rcu_preempt_offline_tasks(struct rcu_state *rsp, spin_unlock(&rnp_root->lock); /* irqs remain disabled */ } } + + return retval; } /* @@ -393,6 +399,17 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) EXPORT_SYMBOL_GPL(call_rcu); /* + * Wait for an rcu-preempt grace period. We are supposed to expedite the + * grace period, but this is the crude slow compatability hack, so just + * invoke synchronize_rcu(). + */ +void synchronize_rcu_expedited(void) +{ + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); + +/* * Check to see if there is any immediate preemptable-RCU-related work * to be done. */ @@ -521,12 +538,15 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) /* * Because preemptable RCU does not exist, it never needs to migrate - * tasks that were blocked within RCU read-side critical sections. + * tasks that were blocked within RCU read-side critical sections, and + * such non-existent tasks cannot possibly have been blocking the current + * grace period. */ -static void rcu_preempt_offline_tasks(struct rcu_state *rsp, - struct rcu_node *rnp, - struct rcu_data *rdp) +sta |