aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-16 16:51:32 +0200
committerIngo Molnar <mingo@elte.hu>2008-10-16 16:51:32 +0200
commit5fef06e8c8c52aa7170dbbb068aa996d83738d38 (patch)
treef46a1eefd68863bdae57afa004e5281801a6b61e /kernel
parent0c5d1eb77a8be917b638344a22afe1398236482b (diff)
parent278429cff8809958d25415ba0ed32b59866ab1a8 (diff)
Merge branch 'linus' into genirq
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c2
-rw-r--r--kernel/auditsc.c9
-rw-r--r--kernel/cpu.c24
-rw-r--r--kernel/cpuset.c2
-rw-r--r--kernel/dma-coherent.c2
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/hrtimer.c95
-rw-r--r--kernel/irq/manage.c9
-rw-r--r--kernel/kgdb.c3
-rw-r--r--kernel/posix-timers.c2
-rw-r--r--kernel/printk.c16
-rw-r--r--kernel/rcuclassic.c337
-rw-r--r--kernel/rcupreempt.c8
-rw-r--r--kernel/rcupreempt_trace.c7
-rw-r--r--kernel/resource.c68
-rw-r--r--kernel/sched.c381
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/sched_fair.c234
-rw-r--r--kernel/sched_features.h1
-rw-r--r--kernel/sched_idletask.c6
-rw-r--r--kernel/sched_rt.c57
-rw-r--r--kernel/sys.c4
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c25
-rw-r--r--kernel/time/tick-broadcast.c6
-rw-r--r--kernel/time/tick-sched.c13
-rw-r--r--kernel/trace/trace_sysprof.c2
-rw-r--r--kernel/user.c4
28 files changed, 839 insertions, 486 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index dd68b905941..f6006a60df5 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -548,7 +548,7 @@ static void do_acct_process(struct bsd_acct_struct *acct,
#endif
spin_lock_irq(&current->sighand->siglock);
- tty = current->signal->tty;
+ tty = current->signal->tty; /* Safe as we hold the siglock */
ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 59cedfb040e..cf5bc2f5f9c 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -246,8 +246,8 @@ static int audit_match_perm(struct audit_context *ctx, int mask)
unsigned n;
if (unlikely(!ctx))
return 0;
-
n = ctx->major;
+
switch (audit_classify_syscall(ctx->arch, n)) {
case 0: /* native */
if ((mask & AUDIT_PERM_WRITE) &&
@@ -1204,13 +1204,13 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
(context->return_valid==AUDITSC_SUCCESS)?"yes":"no",
context->return_code);
- mutex_lock(&tty_mutex);
- read_lock(&tasklist_lock);
+ spin_lock_irq(&tsk->sighand->siglock);
if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
tty = tsk->signal->tty->name;
else
tty = "(none)";
- read_unlock(&tasklist_lock);
+ spin_unlock_irq(&tsk->sighand->siglock);
+
audit_log_format(ab,
" a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
" ppid=%d pid=%d auid=%u uid=%u gid=%u"
@@ -1230,7 +1230,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
context->egid, context->sgid, context->fsgid, tty,
tsk->sessionid);
- mutex_unlock(&tty_mutex);
audit_log_task_info(ab, tsk);
if (context->filterkey) {
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f17e9854c24..86d49045dae 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -199,13 +199,14 @@ static int __ref take_cpu_down(void *_param)
struct take_cpu_down_param *param = _param;
int err;
- raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
- param->hcpu);
/* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable();
if (err < 0)
return err;
+ raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+ param->hcpu);
+
/* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */
sched_idle_next();
@@ -453,6 +454,25 @@ out:
}
#endif /* CONFIG_PM_SLEEP_SMP */
+/**
+ * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+ * @cpu: cpu that just started
+ *
+ * This function calls the cpu_chain notifiers with CPU_STARTING.
+ * It must be called by the arch code on the new cpu, before the new cpu
+ * enables interrupts and before the "boot" cpu returns from __cpu_up().
+ */
+void notify_cpu_starting(unsigned int cpu)
+{
+ unsigned long val = CPU_STARTING;
+
+#ifdef CONFIG_PM_SLEEP_SMP
+ if (cpu_isset(cpu, frozen_cpus))
+ val = CPU_STARTING_FROZEN;
+#endif /* CONFIG_PM_SLEEP_SMP */
+ raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
+}
+
#endif /* CONFIG_SMP */
/*
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 827cd9adccb..eab7bd6628e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1921,7 +1921,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
* that has tasks along with an empty 'mems'. But if we did see such
* a cpuset, we'd handle it just like we do if its 'cpus' was empty.
*/
-static void scan_for_empty_cpusets(const struct cpuset *root)
+static void scan_for_empty_cpusets(struct cpuset *root)
{
LIST_HEAD(queue);
struct cpuset *cp; /* scans cpusets being updated */
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
index c1d4d5b4c61..f013a0c2e11 100644
--- a/kernel/dma-coherent.c
+++ b/kernel/dma-coherent.c
@@ -124,6 +124,7 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
}
return (mem != NULL);
}
+EXPORT_SYMBOL(dma_alloc_from_coherent);
/**
* dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool
@@ -151,3 +152,4 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
}
return 0;
}
+EXPORT_SYMBOL(dma_release_from_coherent);
diff --git a/kernel/fork.c b/kernel/fork.c
index 7ce2ebe8479..30de644a40c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -802,6 +802,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
sig->leader = 0; /* session leadership doesn't inherit */
sig->tty_old_pgrp = NULL;
+ sig->tty = NULL;
sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
sig->gtime = cputime_zero;
@@ -838,6 +839,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
void __cleanup_signal(struct signal_struct *sig)
{
exit_thread_group_keys(sig);
+ tty_kref_put(sig->tty);
kmem_cache_free(signal_cachep, sig);
}
@@ -1227,7 +1229,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->nsproxy->pid_ns->child_reaper = p;
p->signal->leader_pid = pid;
- p->signal->tty = current->signal->tty;
+ tty_kref_put(p->signal->tty);
+ p->signal->tty = tty_kref_get(current->signal->tty);
set_task_pgrp(p, task_pgrp_nr(current));
set_task_session(p, task_session_nr(current));
attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index b8e4dce80a7..cdec83e722f 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -672,13 +672,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
*/
BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
return 1;
- case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
+ case HRTIMER_CB_IRQSAFE_PERCPU:
+ case HRTIMER_CB_IRQSAFE_UNLOCKED:
/*
* This is solely for the sched tick emulation with
* dynamic tick support to ensure that we do not
* restart the tick right on the edge and end up with
* the tick timer in the softirq ! The calling site
- * takes care of this.
+ * takes care of this. Also used for hrtimer sleeper !
*/
debug_hrtimer_deactivate(timer);
return 1;
@@ -1245,7 +1246,8 @@ static void __run_hrtimer(struct hrtimer *timer)
timer_stats_account_hrtimer(timer);
fn = timer->function;
- if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
+ timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
/*
* Used for scheduler timers, avoid lock inversion with
* rq->lock and tasklist_lock.
@@ -1452,7 +1454,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
sl->timer.function = hrtimer_wakeup;
sl->task = task;
#ifdef CONFIG_HIGH_RES_TIMERS
- sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
#endif
}
@@ -1591,29 +1593,95 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
#ifdef CONFIG_HOTPLUG_CPU
-static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
- struct hrtimer_clock_base *new_base)
+static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
+ struct hrtimer_clock_base *new_base, int dcpu)
{
struct hrtimer *timer;
struct rb_node *node;
+ int raise = 0;
while ((node = rb_first(&old_base->active))) {
timer = rb_entry(node, struct hrtimer, node);
BUG_ON(hrtimer_callback_running(timer));
debug_hrtimer_deactivate(timer);
- __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0);
+
+ /*
+ * Should not happen. Per CPU timers should be
+ * canceled _before_ the migration code is called
+ */
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
+ __remove_hrtimer(timer, old_base,
+ HRTIMER_STATE_INACTIVE, 0);
+ WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
+ timer, timer->function, dcpu);
+ continue;
+ }
+
+ /*
+ * Mark it as STATE_MIGRATE not INACTIVE otherwise the
+ * timer could be seen as !active and just vanish away
+ * under us on another CPU
+ */
+ __remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
timer->base = new_base;
/*
* Enqueue the timer. Allow reprogramming of the event device
*/
enqueue_hrtimer(timer, new_base, 1);
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+ /*
+ * Happens with high res enabled when the timer was
+ * already expired and the callback mode is
+ * HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The
+ * enqueue code does not move them to the soft irq
+ * pending list for performance/latency reasons, but
+ * in the migration state, we need to do that
+ * otherwise we end up with a stale timer.
+ */
+ if (timer->state == HRTIMER_STATE_MIGRATE) {
+ timer->state = HRTIMER_STATE_PENDING;
+ list_add_tail(&timer->cb_entry,
+ &new_base->cpu_base->cb_pending);
+ raise = 1;
+ }
+#endif
+ /* Clear the migration state bit */
+ timer->state &= ~HRTIMER_STATE_MIGRATE;
+ }
+ return raise;
+}
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
+ struct hrtimer_cpu_base *new_base)
+{
+ struct hrtimer *timer;
+ int raise = 0;
+
+ while (!list_empty(&old_base->cb_pending)) {
+ timer = list_entry(old_base->cb_pending.next,
+ struct hrtimer, cb_entry);
+
+ __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0);
+ timer->base = &new_base->clock_base[timer->base->index];
+ list_add_tail(&timer->cb_entry, &new_base->cb_pending);
+ raise = 1;
}
+ return raise;
+}
+#else
+static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
+ struct hrtimer_cpu_base *new_base)
+{
+ return 0;
}
+#endif
static void migrate_hrtimers(int cpu)
{
struct hrtimer_cpu_base *old_base, *new_base;
- int i;
+ int i, raise = 0;
BUG_ON(cpu_online(cpu));
old_base = &per_cpu(hrtimer_bases, cpu);
@@ -1626,14 +1694,21 @@ static void migrate_hrtimers(int cpu)
spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
- migrate_hrtimer_list(&old_base->clock_base[i],
- &new_base->clock_base[i]);
+ if (migrate_hrtimer_list(&old_base->clock_base[i],
+ &new_base->clock_base[i], cpu))
+ raise = 1;
}
+ if (migrate_hrtimer_pending(old_base, new_base))
+ raise = 1;
+
spin_unlock(&old_base->lock);
spin_unlock(&new_base->lock);
local_irq_enable();
put_cpu_var(hrtimer_bases);
+
+ if (raise)
+ hrtimer_raise_softirq();
}
#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index e59157b591f..d363f32dba7 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -89,7 +89,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
set_balance_irq_affinity(irq, cpumask);
#ifdef CONFIG_GENERIC_PENDING_IRQ
- set_pending_irq(irq, cpumask);
+ if (desc->status & IRQ_MOVE_PCNTXT) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ desc->chip->set_affinity(irq, cpumask);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ } else
+ set_pending_irq(irq, cpumask);
#else
desc->affinity = cpumask;
desc->chip->set_affinity(irq, cpumask);
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 25d955dbb98..e4dcfb2272a 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -590,6 +590,7 @@ static void kgdb_wait(struct pt_regs *regs)
/* Signal the primary CPU that we are done: */
atomic_set(&cpu_in_kgdb[cpu], 0);
+ touch_softlockup_watchdog();
clocksource_touch_watchdog();
local_irq_restore(flags);
}
@@ -1432,6 +1433,7 @@ acquirelock:
atomic_read(&kgdb_cpu_doing_single_step) != cpu) {
atomic_set(&kgdb_active, -1);
+ touch_softlockup_watchdog();
clocksource_touch_watchdog();
local_irq_restore(flags);
@@ -1524,6 +1526,7 @@ acquirelock:
kgdb_restore:
/* Free kgdb_active */
atomic_set(&kgdb_active, -1);
+ touch_softlockup_watchdog();
clocksource_touch_watchdog();
local_irq_restore(flags);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index e36d5798cbf..5131e547116 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -441,7 +441,7 @@ static struct k_itimer * alloc_posix_timer(void)
return tmr;
if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
kmem_cache_free(posix_timers_cache, tmr);
- tmr = NULL;
+ return NULL;
}
memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
return tmr;
diff --git a/kernel/printk.c b/kernel/printk.c
index b51b1567bb5..a430fd04008 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1291,22 +1291,6 @@ static int __init disable_boot_consoles(void)
}
late_initcall(disable_boot_consoles);
-/**
- * tty_write_message - write a message to a certain tty, not just the console.
- * @tty: the destination tty_struct
- * @msg: the message to write
- *
- * This is used for messages that need to be redirected to a specific tty.
- * We don't put it into the syslog queue right now maybe in the future if
- * really needed.
- */
-void tty_write_message(struct tty_struct *tty, char *msg)
-{
- if (tty && tty->ops->write)
- tty->ops->write(tty, msg, strlen(msg));
- return;
-}
-
#if defined CONFIG_PRINTK
/*
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index aad93cdc9f6..37f72e55154 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -47,6 +47,7 @@
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
+#include <linux/time.h>
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
@@ -60,12 +61,14 @@ EXPORT_SYMBOL_GPL(rcu_lock_map);
static struct rcu_ctrlblk rcu_ctrlblk = {
.cur = -300,
.completed = -300,
+ .pending = -300,
.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
.cpumask = CPU_MASK_NONE,
};
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
.cur = -300,
.completed = -300,
+ .pending = -300,
.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
.cpumask = CPU_MASK_NONE,
};
@@ -83,7 +86,10 @@ static void force_quiescent_state(struct rcu_data *rdp,
{
int cpu;
cpumask_t cpumask;
+ unsigned long flags;
+
set_need_resched();
+ spin_lock_irqsave(&rcp->lock, flags);
if (unlikely(!rcp->signaled)) {
rcp->signaled = 1;
/*
@@ -109,6 +115,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
for_each_cpu_mask_nr(cpu, cpumask)
smp_send_reschedule(cpu);
}
+ spin_unlock_irqrestore(&rcp->lock, flags);
}
#else
static inline void force_quiescent_state(struct rcu_data *rdp,
@@ -118,6 +125,126 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
}
#endif
+static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
+ struct rcu_data *rdp)
+{
+ long batch;
+
+ head->next = NULL;
+ smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
+
+ /*
+ * Determine the batch number of this callback.
+ *
+ * Using ACCESS_ONCE to avoid the following error when gcc eliminates
+ * local variable "batch" and emits codes like this:
+ * 1) rdp->batch = rcp->cur + 1 # gets old value
+ * ......
+ * 2)rcu_batch_after(rcp->cur + 1, rdp->batch) # gets new value
+ * then [*nxttail[0], *nxttail[1]) may contain callbacks
+ * that batch# = rdp->batch, see the comment of struct rcu_data.
+ */
+ batch = ACCESS_ONCE(rcp->cur) + 1;
+
+ if (rdp->nxtlist && rcu_batch_after(batch, rdp->batch)) {
+ /* process callbacks */
+ rdp->nxttail[0] = rdp->nxttail[1];
+ rdp->nxttail[1] = rdp->nxttail[2];
+ if (rcu_batch_after(batch - 1, rdp->batch))
+ rdp->nxttail[0] = rdp->nxttail[2];
+ }
+
+ rdp->batch = batch;
+ *rdp->nxttail[2] = head;
+ rdp->nxttail[2] = &head->next;
+
+ if (unlikely(++rdp->qlen > qhimark)) {
+ rdp->blimit = INT_MAX;
+ force_quiescent_state(rdp, &rcu_ctrlblk);
+ }
+}
+
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+ rcp->gp_start = jiffies;
+ rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+}
+
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+ int cpu;
+ long delta;
+ unsigned long flags;
+
+ /* Only let one CPU complain about others per time interval. */
+
+ spin_lock_irqsave(&rcp->lock, flags);
+ delta = jiffies - rcp->jiffies_stall;
+ if (delta < 2 || rcp->cur != rcp->completed) {
+ spin_unlock_irqrestore(&rcp->lock, flags);
+ return;
+ }
+ rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+ spin_unlock_irqrestore(&rcp->lock, flags);
+
+ /* OK, time to rat on our buddy... */
+
+ printk(KERN_ERR "RCU detected CPU stalls:");
+ for_each_possible_cpu(cpu) {
+ if (cpu_isset(cpu, rcp->cpumask))
+ printk(" %d", cpu);
+ }
+ printk(" (detected by %d, t=%ld jiffies)\n",
+ smp_processor_id(), (long)(jiffies - rcp->gp_start));
+}
+
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+ unsigned long flags;
+
+ printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+ smp_processor_id(), jiffies,
+ jiffies - rcp->gp_start);
+ dump_stack();
+ spin_lock_irqsave(&rcp->lock, flags);
+ if ((long)(jiffies - rcp->jiffies_stall) >= 0)
+ rcp->jiffies_stall =
+ jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+ spin_unlock_irqrestore(&rcp->lock, flags);
+ set_need_resched(); /* kick ourselves to get things going. */
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+ long delta;
+
+ delta = jiffies - rcp->jiffies_stall;
+ if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
+
+ /* We haven't checked in, so go dump stack. */
+ print_cpu_stall(rcp);
+
+ } else if (rcp->cur != rcp->completed && delta >= 2) {
+
+ /* They had two seconds to dump stack, so complain. */
+ print_other_cpu_stall(rcp);
+ }
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+
+static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
/**
* call_rcu - Queue an RCU callback for invocation after a grace period.
* @head: structure to be used for queueing the RCU updates.
@@ -133,18 +260,10 @@ void call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *rcu))
{
unsigned long flags;
- struct rcu_data *rdp;
head->func = func;
- head->next = NULL;
local_irq_save(flags);
- rdp = &__get_cpu_var(rcu_data);
- *rdp->nxttail = head;
- rdp->nxttail = &head->next;
- if (unlikely(++rdp->qlen > qhimark)) {
- rdp->blimit = INT_MAX;
- force_quiescent_state(rdp, &rcu_ctrlblk);
- }
+ __call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(call_rcu);
@@ -169,20 +288,10 @@ void call_rcu_bh(struct rcu_head *head,
void (*func)(struct rcu_head *rcu))
{
unsigned long flags;
- struct rcu_data *rdp;
head->func = func;
- head->next = NULL;
local_irq_save(flags);
- rdp = &__get_cpu_var(rcu_bh_data);
- *rdp->nxttail = head;
- rdp->nxttail = &head->next;
-
- if (unlikely(++rdp->qlen > qhimark)) {
- rdp->blimit = INT_MAX;
- force_quiescent_state(rdp, &rcu_bh_ctrlblk);
- }
-
+ __call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
@@ -211,12 +320,6 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
static inline void raise_rcu_softirq(void)
{
raise_softirq(RCU_SOFTIRQ);
- /*
- * The smp_mb() here is required to ensure that this cpu's
- * __rcu_process_callbacks() reads the most recently updated
- * value of rcu->cur.
- */
- smp_mb();
}
/*
@@ -225,6 +328,7 @@ static inline void raise_rcu_softirq(void)
*/
static void rcu_do_batch(struct rcu_data *rdp)
{
+ unsigned long flags;
struct rcu_head *next, *list;
int count = 0;
@@ -239,9 +343,9 @@ static void rcu_do_batch(struct rcu_data *rdp)
}
rdp->donelist = list;
- local_irq_disable();
+ local_irq_save(flags);
rdp->qlen -= count;
- local_irq_enable();
+ local_irq_restore(flags);
if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
rdp->blimit = blimit;
@@ -269,6 +373,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
* period (if necessary).
*/
+
/*
* Register a new batch of callbacks, and start it up if there is currently no
* active batch and the batch to be registered has not already occurred.
@@ -276,15 +381,10 @@ static void rcu_do_batch(struct rcu_data *rdp)
*/
static void rcu_start_batch(struct rcu_ctrlblk *rcp)
{
- if (rcp->next_pending &&
+ if (rcp->cur != rcp->pending &&
rcp->completed == rcp->cur) {
- rcp->next_pending = 0;
- /*
- * next_pending == 0 must be visible in
- * __rcu_process_callbacks() before it can see new value of cur.
- */
- smp_wmb();
rcp->cur++;
+ record_gp_stall_check_time(rcp);
/*
* Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -322,6 +422,8 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
struct rcu_data *rdp)
{
+ unsigned long flags;
+
if (rdp->quiescbatch != rcp->cur) {
/* start new grace period: */
rdp->qs_pending = 1;
@@ -345,7 +447,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
return;
rdp->qs_pending = 0;
- spin_lock(&rcp->lock);
+ spin_lock_irqsave(&rcp->lock, flags);
/*
* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
* during cpu startup. Ignore the quiescent state.
@@ -353,7 +455,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
if (likely(rdp->quiescbatch == rcp->cur))
cpu_quiet(rdp->cpu, rcp);
- spin_unlock(&rcp->lock);
+ spin_unlock_irqrestore(&rcp->lock, flags);
}
@@ -364,33 +466,38 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
* which is dead and hence not processing interrupts.
*/
static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
- struct rcu_head **tail)
+ struct rcu_head **tail, long batch)
{
- local_irq_disable();
- *this_rdp->nxttail = list;
- if (list)
- this_rdp->nxttail = tail;
- local_irq_enable();
+ unsigned long flags;
+
+ if (list) {
+ local_irq_save(flags);
+ this_rdp->batch = batch;
+ *this_rdp->nxttail[2] = list;
+ this_rdp->nxttail[2] = tail;
+ local_irq_restore(flags);
+ }
}
static void __rcu_offline_cpu(struct rcu_data *this_rdp,
struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
{
- /* if the cpu going offline owns the grace period
+ unsigned long flags;
+
+ /*
+ * if the cpu going offline owns the grace period
* we can block indefinitely waiting for it, so flush
* it here
*/
- spin_lock_bh(&rcp->lock);
+ spin_lock_irqsave(&rcp->lock, flags);
if (rcp->cur != rcp->completed)
cpu_quiet(rdp->cpu, rcp);
- spin_unlock_bh(&rcp->lock);
- rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
- rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
- rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
+ rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
+ rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
+ spin_unlock(&rcp->lock);
- local_irq_disable();
this_rdp->qlen += rdp->qlen;
- local_irq_enable();
+ local_irq_restore(flags);
}
static void rcu_offline_cpu(int cpu)
@@ -420,38 +527,52 @@ static void rcu_offline_cpu(int cpu)
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
struct rcu_data *rdp)
{
- if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
- *rdp->donetail = rdp->curlist;
- rdp->donetail = rdp->curtail;
- rdp->curlist = NULL;
- rdp->curtail = &rdp->curlist;
- }
+ unsigned long flags;
+ long completed_snap;
- if (rdp->nxtlist && !rdp->curlist) {
- local_irq_disable();
- rdp->curlist = rdp->nxtlist;
- rdp->curtail = rdp->nxttail;
- rdp->nxtlist = NULL;
- rdp->nxttail = &rdp->nxtlist;
- local_irq_enable();
+ if (rdp->nxtlist) {
+ local_irq_save(flags);
+ completed_snap = ACCESS_ONCE(rcp->completed);
/*
- * start the next batch of callbacks
+ * move the other grace-period-completed entries to
+ * [rdp->nxtlist, *rdp->nxttail[0]) temporarily
*/
+ if (!rcu_batch_before(completed_snap, rdp->batch))
+ rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
+ else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
+ rdp->nxttail[0] = rdp->nxttail[1];
- /* determine batch number */
- rdp->batch = rcp->cur + 1;
- /* see the comment and corresponding wmb() in
- * the rcu_start_batch()
+ /*
+ * the grace period for entries in
+ * [rdp->nxtlist, *rdp->nxttail[0]) has completed and
+ * move these entries to donelist
*/
- smp_rmb();
+ if (rdp->nxttail[0] != &rdp->nxtlist) {
+ *rdp->donetail = rdp->nxtlist;
+ rdp->donetail = rdp->nxttail[0];
+ rdp->nxtlist = *rdp->nxttail[0];
+ *rdp->donetail = NULL;
+
+ if (rdp->nxttail[1] == rdp->nxttail[0])
+ rdp->nxttail[1] = &rdp->nxtlist;
+ if (rdp->nxttail[2] == rdp->nxttail[0])
+ rdp->nxttail[2] = &rdp->nxtlist;
+ rdp->nxttail[0] = &rdp->nxtlist;
+ }
+
+ local_irq_restore(flags);
+
+ if (rcu_batch_after(rdp->batch, rcp->pending)) {
+ unsigned long flags2;
- if (!rcp->next_pending) {
/* and start it/schedule start if it's a new batch */
- spin_lock(&rcp->lock);
- rcp->next_pending = 1;
- rcu_start_batch(rcp);
- spin_unlock(&rcp->lock);
+ spin_lock_irqsave(&rcp->lock, flags2);
+ if (rcu_batch_after(rdp->batch, rcp->pending)) {
+ rcp->pending = rdp->batch;
+ rcu_start_batch(rcp);
+ }
+ spin_unlock_irqrestore(&rcp->lock, flags2);
}
}
@@ -462,21 +583,53 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
static void rcu_process_callbacks(struct softirq_action *unused)
{
+ /*
+ * Memory references from any prior RCU read-side critical sections
+ * executed by the interrupted code must be see before any RCU
+ * grace-period manupulations below.
+ */
+
+ smp_mb(); /* See above block comment. */
+
__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
+
+ /*