aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/Kconfig.preempt13
-rw-r--r--kernel/Makefile6
-rw-r--r--kernel/cpu.c164
-rw-r--r--kernel/cpuset.c14
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/hrtimer.c258
-rw-r--r--kernel/kmod.c13
-rw-r--r--kernel/ksysfs.c82
-rw-r--r--kernel/kthread.c12
-rw-r--r--kernel/latencytop.c239
-rw-r--r--kernel/lockdep.c31
-rw-r--r--kernel/module.c184
-rw-r--r--kernel/params.c44
-rw-r--r--kernel/posix-cpu-timers.c30
-rw-r--r--kernel/power/disk.c20
-rw-r--r--kernel/power/main.c26
-rw-r--r--kernel/power/pm.c4
-rw-r--r--kernel/power/power.h4
-rw-r--r--kernel/printk.c57
-rw-r--r--kernel/profile.c99
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcuclassic.c575
-rw-r--r--kernel/rcupdate.c576
-rw-r--r--kernel/rcupreempt.c953
-rw-r--r--kernel/rcupreempt_trace.c330
-rw-r--r--kernel/rcutorture.c6
-rw-r--r--kernel/rtmutex-tester.c2
-rw-r--r--kernel/sched.c1378
-rw-r--r--kernel/sched_debug.c5
-rw-r--r--kernel/sched_fair.c391
-rw-r--r--kernel/sched_idletask.c42
-rw-r--r--kernel/sched_rt.c1112
-rw-r--r--kernel/softlockup.c116
-rw-r--r--kernel/stop_machine.c4
-rw-r--r--kernel/sysctl.c77
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/tick-sched.c13
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/timer.c7
-rw-r--r--kernel/user.c152
-rw-r--r--kernel/workqueue.c40
42 files changed, 5453 insertions, 1645 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 4af15802ccd..526128a2e62 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -54,3 +54,5 @@ config HZ
default 300 if HZ_300
default 1000 if HZ_1000
+config SCHED_HRTICK
+ def_bool HIGH_RES_TIMERS && X86
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index c64ce9c1420..0669b70fa6a 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -52,14 +52,13 @@ config PREEMPT
endchoice
-config PREEMPT_BKL
- bool "Preempt The Big Kernel Lock"
- depends on SMP || PREEMPT
+config RCU_TRACE
+ bool "Enable tracing for RCU - currently stats in debugfs"
+ select DEBUG_FS
default y
help
- This option reduces the latency of the kernel by making the
- big kernel lock preemptible.
+ This option provides tracing in RCU which presents stats
+ in debugfs for debugging RCU implementation.
- Say Y here if you are building a kernel for a desktop system.
+ Say Y here if you want to enable RCU tracing
Say N if you are unsure.
-
diff --git a/kernel/Makefile b/kernel/Makefile
index dfa96956dae..390d4214626 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -52,11 +52,17 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
+obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
+ifeq ($(CONFIG_PREEMPT_RCU),y)
+obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
+endif
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_MARKERS) += marker.o
+obj-$(CONFIG_LATENCYTOP) += latencytop.o
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6b3a0c15144..e0d3a4f56ec 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -15,9 +15,8 @@
#include <linux/stop_machine.h>
#include <linux/mutex.h>
-/* This protects CPUs going up and down... */
+/* Serializes the updates to cpu_online_map, cpu_present_map */
static DEFINE_MUTEX(cpu_add_remove_lock);
-static DEFINE_MUTEX(cpu_bitmask_lock);
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
@@ -26,52 +25,123 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
*/
static int cpu_hotplug_disabled;
-#ifdef CONFIG_HOTPLUG_CPU
+static struct {
+ struct task_struct *active_writer;
+ struct mutex lock; /* Synchronizes accesses to refcount, */
+ /*
+ * Also blocks the new readers during
+ * an ongoing cpu hotplug operation.
+ */
+ int refcount;
+ wait_queue_head_t writer_queue;
+} cpu_hotplug;
-/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
-static struct task_struct *recursive;
-static int recursive_depth;
+#define writer_exists() (cpu_hotplug.active_writer != NULL)
-void lock_cpu_hotplug(void)
+void __init cpu_hotplug_init(void)
{
- struct task_struct *tsk = current;
-
- if (tsk == recursive) {
- static int warnings = 10;
- if (warnings) {
- printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n");
- WARN_ON(1);
- warnings--;
- }
- recursive_depth++;
+ cpu_hotplug.active_writer = NULL;
+ mutex_init(&cpu_hotplug.lock);
+ cpu_hotplug.refcount = 0;
+ init_waitqueue_head(&cpu_hotplug.writer_queue);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void get_online_cpus(void)
+{
+ might_sleep();
+ if (cpu_hotplug.active_writer == current)
return;
- }
- mutex_lock(&cpu_bitmask_lock);
- recursive = tsk;
+ mutex_lock(&cpu_hotplug.lock);
+ cpu_hotplug.refcount++;
+ mutex_unlock(&cpu_hotplug.lock);
+
}
-EXPORT_SYMBOL_GPL(lock_cpu_hotplug);
+EXPORT_SYMBOL_GPL(get_online_cpus);
-void unlock_cpu_hotplug(void)
+void put_online_cpus(void)
{
- WARN_ON(recursive != current);
- if (recursive_depth) {
- recursive_depth--;
+ if (cpu_hotplug.active_writer == current)
return;
- }
- recursive = NULL;
- mutex_unlock(&cpu_bitmask_lock);
+ mutex_lock(&cpu_hotplug.lock);
+ cpu_hotplug.refcount--;
+
+ if (unlikely(writer_exists()) && !cpu_hotplug.refcount)
+ wake_up(&cpu_hotplug.writer_queue);
+
+ mutex_unlock(&cpu_hotplug.lock);
+
}
-EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
+EXPORT_SYMBOL_GPL(put_online_cpus);
#endif /* CONFIG_HOTPLUG_CPU */
+/*
+ * The following two API's must be used when attempting
+ * to serialize the updates to cpu_online_map, cpu_present_map.
+ */
+void cpu_maps_update_begin(void)
+{
+ mutex_lock(&cpu_add_remove_lock);
+}
+
+void cpu_maps_update_done(void)
+{
+ mutex_unlock(&cpu_add_remove_lock);
+}
+
+/*
+ * This ensures that the hotplug operation can begin only when the
+ * refcount goes to zero.
+ *
+ * Note that during a cpu-hotplug operation, the new readers, if any,
+ * will be blocked by the cpu_hotplug.lock
+ *
+ * Since cpu_maps_update_begin is always called after invoking
+ * cpu_maps_update_begin, we can be sure that only one writer is active.
+ *
+ * Note that theoretically, there is a possibility of a livelock:
+ * - Refcount goes to zero, last reader wakes up the sleeping
+ * writer.
+ * - Last reader unlocks the cpu_hotplug.lock.
+ * - A new reader arrives at this moment, bumps up the refcount.
+ * - The writer acquires the cpu_hotplug.lock finds the refcount
+ * non zero and goes to sleep again.
+ *
+ * However, this is very difficult to achieve in practice since
+ * get_online_cpus() not an api which is called all that often.
+ *
+ */
+static void cpu_hotplug_begin(void)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ mutex_lock(&cpu_hotplug.lock);
+
+ cpu_hotplug.active_writer = current;
+ add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait);
+ while (cpu_hotplug.refcount) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ mutex_unlock(&cpu_hotplug.lock);
+ schedule();
+ mutex_lock(&cpu_hotplug.lock);
+ }
+ remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait);
+}
+
+static void cpu_hotplug_done(void)
+{
+ cpu_hotplug.active_writer = NULL;
+ mutex_unlock(&cpu_hotplug.lock);
+}
/* Need to know about CPUs going up/down? */
int __cpuinit register_cpu_notifier(struct notifier_block *nb)
{
int ret;
- mutex_lock(&cpu_add_remove_lock);
+ cpu_maps_update_begin();
ret = raw_notifier_chain_register(&cpu_chain, nb);
- mutex_unlock(&cpu_add_remove_lock);
+ cpu_maps_update_done();
return ret;
}
@@ -81,9 +151,9 @@ EXPORT_SYMBOL(register_cpu_notifier);
void unregister_cpu_notifier(struct notifier_block *nb)
{
- mutex_lock(&cpu_add_remove_lock);
+ cpu_maps_update_begin();
raw_notifier_chain_unregister(&cpu_chain, nb);
- mutex_unlock(&cpu_add_remove_lock);
+ cpu_maps_update_done();
}
EXPORT_SYMBOL(unregister_cpu_notifier);
@@ -147,7 +217,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
if (!cpu_online(cpu))
return -EINVAL;
- raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
+ cpu_hotplug_begin();
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
hcpu, -1, &nr_calls);
if (err == NOTIFY_BAD) {
@@ -166,9 +236,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
cpu_clear(cpu, tmp);
set_cpus_allowed(current, tmp);
- mutex_lock(&cpu_bitmask_lock);
p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
- mutex_unlock(&cpu_bitmask_lock);
if (IS_ERR(p) || cpu_online(cpu)) {
/* CPU didn't die: tell everyone. Can't complain. */
@@ -202,7 +270,7 @@ out_thread:
out_allowed:
set_cpus_allowed(current, old_allowed);
out_release:
- raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
+ cpu_hotplug_done();
return err;
}
@@ -210,13 +278,13 @@ int cpu_down(unsigned int cpu)
{
int err = 0;
- mutex_lock(&cpu_add_remove_lock);
+ cpu_maps_update_begin();
if (cpu_hotplug_disabled)
err = -EBUSY;
else
err = _cpu_down(cpu, 0);
- mutex_unlock(&cpu_add_remove_lock);
+ cpu_maps_update_done();
return err;
}
#endif /*CONFIG_HOTPLUG_CPU*/
@@ -231,7 +299,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
if (cpu_online(cpu) || !cpu_present(cpu))
return -EINVAL;
- raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
+ cpu_hotplug_begin();
ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
-1, &nr_calls);
if (ret == NOTIFY_BAD) {
@@ -243,9 +311,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
}
/* Arch-specific enabling code. */
- mutex_lock(&cpu_bitmask_lock);
ret = __cpu_up(cpu);
- mutex_unlock(&cpu_bitmask_lock);
if (ret != 0)
goto out_notify;
BUG_ON(!cpu_online(cpu));
@@ -257,7 +323,7 @@ out_notify:
if (ret != 0)
__raw_notifier_call_chain(&cpu_chain,
CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
- raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
+ cpu_hotplug_done();
return ret;
}
@@ -275,13 +341,13 @@ int __cpuinit cpu_up(unsigned int cpu)
return -EINVAL;
}
- mutex_lock(&cpu_add_remove_lock);
+ cpu_maps_update_begin();
if (cpu_hotplug_disabled)
err = -EBUSY;
else
err = _cpu_up(cpu, 0);
- mutex_unlock(&cpu_add_remove_lock);
+ cpu_maps_update_done();
return err;
}
@@ -292,7 +358,7 @@ int disable_nonboot_cpus(void)
{
int cpu, first_cpu, error = 0;
- mutex_lock(&cpu_add_remove_lock);
+ cpu_maps_update_begin();
first_cpu = first_cpu(cpu_online_map);
/* We take down all of the non-boot CPUs in one shot to avoid races
* with the userspace trying to use the CPU hotplug at the same time
@@ -319,7 +385,7 @@ int disable_nonboot_cpus(void)
} else {
printk(KERN_ERR "Non-boot CPUs are not disabled\n");
}
- mutex_unlock(&cpu_add_remove_lock);
+ cpu_maps_update_done();
return error;
}
@@ -328,7 +394,7 @@ void enable_nonboot_cpus(void)
int cpu, error;
/* Allow everyone to use the CPU hotplug again */
- mutex_lock(&cpu_add_remove_lock);
+ cpu_maps_update_begin();
cpu_hotplug_disabled = 0;
if (cpus_empty(frozen_cpus))
goto out;
@@ -344,6 +410,6 @@ void enable_nonboot_cpus(void)
}
cpus_clear(frozen_cpus);
out:
- mutex_unlock(&cpu_add_remove_lock);
+ cpu_maps_update_done();
}
#endif /* CONFIG_PM_SLEEP_SMP */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 50f5dc46368..cfaf6419d81 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -537,10 +537,10 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
*
* Call with cgroup_mutex held. May take callback_mutex during
* call due to the kfifo_alloc() and kmalloc() calls. May nest
- * a call to the lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
+ * a call to the get_online_cpus()/put_online_cpus() pair.
* Must not be called holding callback_mutex, because we must not
- * call lock_cpu_hotplug() while holding callback_mutex. Elsewhere
- * the kernel nests callback_mutex inside lock_cpu_hotplug() calls.
+ * call get_online_cpus() while holding callback_mutex. Elsewhere
+ * the kernel nests callback_mutex inside get_online_cpus() calls.
* So the reverse nesting would risk an ABBA deadlock.
*
* The three key local variables below are:
@@ -691,9 +691,9 @@ restart:
rebuild:
/* Have scheduler rebuild sched domains */
- lock_cpu_hotplug();
+ get_online_cpus();
partition_sched_domains(ndoms, doms);
- unlock_cpu_hotplug();
+ put_online_cpus();
done:
if (q && !IS_ERR(q))
@@ -1617,10 +1617,10 @@ static struct cgroup_subsys_state *cpuset_create(
*
* If the cpuset being removed has its flag 'sched_load_balance'
* enabled, then simulate turning sched_load_balance off, which
- * will call rebuild_sched_domains(). The lock_cpu_hotplug()
+ * will call rebuild_sched_domains(). The get_online_cpus()
* call in rebuild_sched_domains() must not be made while holding
* callback_mutex. Elsewhere the kernel nests callback_mutex inside
- * lock_cpu_hotplug() calls. So the reverse nesting would risk an
+ * get_online_cpus() calls. So the reverse nesting would risk an
* ABBA deadlock.
*/
diff --git a/kernel/fork.c b/kernel/fork.c
index 8dd8ff28100..39d22b3357d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1045,6 +1045,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
copy_flags(clone_flags, p);
INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
+#ifdef CONFIG_PREEMPT_RCU
+ p->rcu_read_lock_nesting = 0;
+ p->rcu_flipctr_idx = 0;
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
p->vfork_done = NULL;
spin_lock_init(&p->alloc_lock);
@@ -1059,6 +1063,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->prev_utime = cputime_zero;
p->prev_stime = cputime_zero;
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+ p->last_switch_count = 0;
+ p->last_switch_timestamp = 0;
+#endif
+
#ifdef CONFIG_TASK_XACCT
p->rchar = 0; /* I/O counter: bytes read */
p->wchar = 0; /* I/O counter: bytes written */
@@ -1196,6 +1205,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
#endif
+ clear_all_latency_tracing(p);
/* Our parent execution domain becomes current domain
These must match for thread signalling to apply */
@@ -1237,6 +1247,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* parent's CPU). This avoids alot of nasty races.
*/
p->cpus_allowed = current->cpus_allowed;
+ p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
!cpu_online(task_cpu(p))))
set_task_cpu(p, smp_processor_id());
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index e65dd0b47cd..bd5d6b5060b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -325,6 +325,22 @@ unsigned long ktime_divns(const ktime_t kt, s64 div)
}
#endif /* BITS_PER_LONG >= 64 */
+/*
+ * Check, whether the timer is on the callback pending list
+ */
+static inline int hrtimer_cb_pending(const struct hrtimer *timer)
+{
+ return timer->state & HRTIMER_STATE_PENDING;
+}
+
+/*
+ * Remove a timer from the callback pending list
+ */
+static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
+{
+ list_del_init(&timer->cb_entry);
+}
+
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -494,29 +510,12 @@ void hres_timers_resume(void)
}
/*
- * Check, whether the timer is on the callback pending list
- */
-static inline int hrtimer_cb_pending(const struct hrtimer *timer)
-{
- return timer->state & HRTIMER_STATE_PENDING;
-}
-
-/*
- * Remove a timer from the callback pending list
- */
-static inline void hrtimer_remove_cb_pending(struct hrtimer *timer)
-{
- list_del_init(&timer->cb_entry);
-}
-
-/*
* Initialize the high resolution related parts of cpu_base
*/
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
{
base->expires_next.tv64 = KTIME_MAX;
base->hres_active = 0;
- INIT_LIST_HEAD(&base->cb_pending);
}
/*
@@ -524,7 +523,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
*/
static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
{
- INIT_LIST_HEAD(&timer->cb_entry);
}
/*
@@ -618,10 +616,13 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
{
return 0;
}
-static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; }
-static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { }
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
+static inline int hrtimer_reprogram(struct hrtimer *timer,
+ struct hrtimer_clock_base *base)
+{
+ return 0;
+}
#endif /* CONFIG_HIGH_RES_TIMERS */
@@ -1001,6 +1002,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
clock_id = CLOCK_MONOTONIC;
timer->base = &cpu_base->clock_base[clock_id];
+ INIT_LIST_HEAD(&timer->cb_entry);
hrtimer_init_timer_hres(timer);
#ifdef CONFIG_TIMER_STATS
@@ -1030,6 +1032,85 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
}
EXPORT_SYMBOL_GPL(hrtimer_get_res);
+static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
+{
+ spin_lock_irq(&cpu_base->lock);
+
+ while (!list_empty(&cpu_base->cb_pending)) {
+ enum hrtimer_restart (*fn)(struct hrtimer *);
+ struct hrtimer *timer;
+ int restart;
+
+ timer = list_entry(cpu_base->cb_pending.next,
+ struct hrtimer, cb_entry);
+
+ timer_stats_account_hrtimer(timer);
+
+ fn = timer->function;
+ __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
+ spin_unlock_irq(&cpu_base->lock);
+
+ restart = fn(timer);
+
+ spin_lock_irq(&cpu_base->lock);
+
+ timer->state &= ~HRTIMER_STATE_CALLBACK;
+ if (restart == HRTIMER_RESTART) {
+ BUG_ON(hrtimer_active(timer));
+ /*
+ * Enqueue the timer, allow reprogramming of the event
+ * device
+ */
+ enqueue_hrtimer(timer, timer->base, 1);
+ } else if (hrtimer_active(timer)) {
+ /*
+ * If the timer was rearmed on another CPU, reprogram
+ * the event device.
+ */
+ if (timer->base->first == &timer->node)
+ hrtimer_reprogram(timer, timer->base);
+ }
+ }
+ spin_unlock_irq(&cpu_base->lock);
+}
+
+static void __run_hrtimer(struct hrtimer *timer)
+{
+ struct hrtimer_clock_base *base = timer->base;
+ struct hrtimer_cpu_base *cpu_base = base->cpu_base;
+ enum hrtimer_restart (*fn)(struct hrtimer *);
+ int restart;
+
+ __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
+ timer_stats_account_hrtimer(timer);
+
+ fn = timer->function;
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
+ /*
+ * Used for scheduler timers, avoid lock inversion with
+ * rq->lock and tasklist_lock.
+ *
+ * These timers are required to deal with enqueue expiry
+ * themselves and are not allowed to migrate.
+ */
+ spin_unlock(&cpu_base->lock);
+ restart = fn(timer);
+ spin_lock(&cpu_base->lock);
+ } else
+ restart = fn(timer);
+
+ /*
+ * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
+ * reprogramming of the event hardware. This happens at the end of this
+ * function anyway.
+ */
+ if (restart != HRTIMER_NORESTART) {
+ BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
+ enqueue_hrtimer(timer, base, 0);
+ }
+ timer->state &= ~HRTIMER_STATE_CALLBACK;
+}
+
#ifdef CONFIG_HIGH_RES_TIMERS
/*
@@ -1087,21 +1168,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
continue;
}
- __remove_hrtimer(timer, base,
- HRTIMER_STATE_CALLBACK, 0);
- timer_stats_account_hrtimer(timer);
-
- /*
- * Note: We clear the CALLBACK bit after
- * enqueue_hrtimer to avoid reprogramming of
- * the event hardware. This happens at the end
- * of this function anyway.
- */
- if (timer->function(timer) != HRTIMER_NORESTART) {
- BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
- enqueue_hrtimer(timer, base, 0);
- }
- timer->state &= ~HRTIMER_STATE_CALLBACK;
+ __run_hrtimer(timer);
}
spin_unlock(&cpu_base->lock);
base++;
@@ -1122,52 +1189,41 @@ void hrtimer_interrupt(struct clock_event_device *dev)
static void run_hrtimer_softirq(struct softirq_action *h)
{
- struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
-
- spin_lock_irq(&cpu_base->lock);
-
- while (!list_empty(&cpu_base->cb_pending)) {
- enum hrtimer_restart (*fn)(struct hrtimer *);
- struct hrtimer *timer;
- int restart;
-
- timer = list_entry(cpu_base->cb_pending.next,
- struct hrtimer, cb_entry);
+ run_hrtimer_pending(&__get_cpu_var(hrtimer_bases));
+}
- timer_stats_account_hrtimer(timer);
+#endif /* CONFIG_HIGH_RES_TIMERS */
- fn = timer->function;
- __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
- spin_unlock_irq(&cpu_base->lock);
+/*
+ * Called from timer softirq every jiffy, expire hrtimers:
+ *
+ * For HRT its the fall back code to run the softirq in the timer
+ * softirq context in case the hrtimer initialization failed or has
+ * not been done yet.
+ */
+void hrtimer_run_pending(void)
+{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
- restart = fn(timer);
+ if (hrtimer_hres_active())
+ return;
- spin_lock_irq(&cpu_base->lock);
+ /*
+ * This _is_ ugly: We have to check in the softirq context,
+ * whether we can switch to highres and / or nohz mode. The
+ * clocksource switch happens in the timer interrupt with
+ * xtime_lock held. Notification from there only sets the
+ * check bit in the tick_oneshot code, otherwise we might
+ * deadlock vs. xtime_lock.
+ */
+ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
+ hrtimer_switch_to_hres();
- timer->state &= ~HRTIMER_STATE_CALLBACK;
- if (restart == HRTIMER_RESTART) {
- BUG_ON(hrtimer_active(timer));
- /*
- * Enqueue the timer, allow reprogramming of the event
- * device
- */
- enqueue_hrtimer(timer, timer->base, 1);
- } else if (hrtimer_active(timer)) {
- /*
- * If the timer was rearmed on another CPU, reprogram
- * the event device.
- */
- if (timer->base->first == &timer->node)
- hrtimer_reprogram(timer, timer->base);
- }
- }
- spin_unlock_irq(&cpu_base->lock);
+ run_hrtimer_pending(cpu_base);
}
-#endif /* CONFIG_HIGH_RES_TIMERS */
-
/*
- * Expire the per base hrtimer-queue:
+ * Called from hardirq context every jiffy
*/
static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
int index)
@@ -1181,46 +1237,27 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
if (base->get_softirq_time)
base->softirq_time = base->get_softirq_time();
- spin_lock_irq(&cpu_base->lock);
+ spin_lock(&cpu_base->lock);
while ((node = base->first)) {
struct hrtimer *timer;
- enum hrtimer_restart (*fn)(struct hrtimer *);
- int restart;
timer = rb_entry(node, struct hrtimer, node);
if (base->softirq_time.tv64 <= timer->expires.tv64)
break;
-#ifdef CONFIG_HIGH_RES_TIMERS
- WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ);
-#endif
- timer_stats_account_hrtimer(timer);
-
- fn = timer->function;
- __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
- spin_unlock_irq(&cpu_base->lock);
-
- restart = fn(timer);
-
- spin_lock_irq(&cpu_base->lock);
-
- timer->state &= ~HRTIMER_STATE_CALLBACK;
- if (restart != HRTIMER_NORESTART) {
- BUG_ON(hrtimer_active(timer));
- enqueue_hrtimer(timer, base, 0);
+ if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) {
+ __remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0);
+ list_add_tail(&timer->cb_entry,
+ &base->cpu_base->cb_pending);
+ continue;
}
+
+ __run_hrtimer(timer);
}
- spin_unlock_irq(&cpu_base->lock);
+ spin_unlock(&cpu_base->lock);
}
-/*
- * Called from timer softirq every jiffy