diff options
Diffstat (limited to 'kernel')
42 files changed, 5453 insertions, 1645 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index 4af15802ccd..526128a2e62 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -54,3 +54,5 @@ config HZ default 300 if HZ_300 default 1000 if HZ_1000 +config SCHED_HRTICK + def_bool HIGH_RES_TIMERS && X86 diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index c64ce9c1420..0669b70fa6a 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -52,14 +52,13 @@ config PREEMPT endchoice -config PREEMPT_BKL - bool "Preempt The Big Kernel Lock" - depends on SMP || PREEMPT +config RCU_TRACE + bool "Enable tracing for RCU - currently stats in debugfs" + select DEBUG_FS default y help - This option reduces the latency of the kernel by making the - big kernel lock preemptible. + This option provides tracing in RCU which presents stats + in debugfs for debugging RCU implementation. - Say Y here if you are building a kernel for a desktop system. + Say Y here if you want to enable RCU tracing Say N if you are unsure. - diff --git a/kernel/Makefile b/kernel/Makefile index dfa96956dae..390d4214626 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -52,11 +52,17 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o +obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o +obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o +ifeq ($(CONFIG_PREEMPT_RCU),y) +obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o +endif obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_MARKERS) += marker.o +obj-$(CONFIG_LATENCYTOP) += latencytop.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is diff --git a/kernel/cpu.c b/kernel/cpu.c index 6b3a0c15144..e0d3a4f56ec 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -15,9 +15,8 @@ #include <linux/stop_machine.h> #include <linux/mutex.h> -/* This protects CPUs going up and down... */ +/* Serializes the updates to cpu_online_map, cpu_present_map */ static DEFINE_MUTEX(cpu_add_remove_lock); -static DEFINE_MUTEX(cpu_bitmask_lock); static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); @@ -26,52 +25,123 @@ static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain); */ static int cpu_hotplug_disabled; -#ifdef CONFIG_HOTPLUG_CPU +static struct { + struct task_struct *active_writer; + struct mutex lock; /* Synchronizes accesses to refcount, */ + /* + * Also blocks the new readers during + * an ongoing cpu hotplug operation. + */ + int refcount; + wait_queue_head_t writer_queue; +} cpu_hotplug; -/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ -static struct task_struct *recursive; -static int recursive_depth; +#define writer_exists() (cpu_hotplug.active_writer != NULL) -void lock_cpu_hotplug(void) +void __init cpu_hotplug_init(void) { - struct task_struct *tsk = current; - - if (tsk == recursive) { - static int warnings = 10; - if (warnings) { - printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n"); - WARN_ON(1); - warnings--; - } - recursive_depth++; + cpu_hotplug.active_writer = NULL; + mutex_init(&cpu_hotplug.lock); + cpu_hotplug.refcount = 0; + init_waitqueue_head(&cpu_hotplug.writer_queue); +} + +#ifdef CONFIG_HOTPLUG_CPU + +void get_online_cpus(void) +{ + might_sleep(); + if (cpu_hotplug.active_writer == current) return; - } - mutex_lock(&cpu_bitmask_lock); - recursive = tsk; + mutex_lock(&cpu_hotplug.lock); + cpu_hotplug.refcount++; + mutex_unlock(&cpu_hotplug.lock); + } -EXPORT_SYMBOL_GPL(lock_cpu_hotplug); +EXPORT_SYMBOL_GPL(get_online_cpus); -void unlock_cpu_hotplug(void) +void put_online_cpus(void) { - WARN_ON(recursive != current); - if (recursive_depth) { - recursive_depth--; + if (cpu_hotplug.active_writer == current) return; - } - recursive = NULL; - mutex_unlock(&cpu_bitmask_lock); + mutex_lock(&cpu_hotplug.lock); + cpu_hotplug.refcount--; + + if (unlikely(writer_exists()) && !cpu_hotplug.refcount) + wake_up(&cpu_hotplug.writer_queue); + + mutex_unlock(&cpu_hotplug.lock); + } -EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); +EXPORT_SYMBOL_GPL(put_online_cpus); #endif /* CONFIG_HOTPLUG_CPU */ +/* + * The following two API's must be used when attempting + * to serialize the updates to cpu_online_map, cpu_present_map. + */ +void cpu_maps_update_begin(void) +{ + mutex_lock(&cpu_add_remove_lock); +} + +void cpu_maps_update_done(void) +{ + mutex_unlock(&cpu_add_remove_lock); +} + +/* + * This ensures that the hotplug operation can begin only when the + * refcount goes to zero. + * + * Note that during a cpu-hotplug operation, the new readers, if any, + * will be blocked by the cpu_hotplug.lock + * + * Since cpu_maps_update_begin is always called after invoking + * cpu_maps_update_begin, we can be sure that only one writer is active. + * + * Note that theoretically, there is a possibility of a livelock: + * - Refcount goes to zero, last reader wakes up the sleeping + * writer. + * - Last reader unlocks the cpu_hotplug.lock. + * - A new reader arrives at this moment, bumps up the refcount. + * - The writer acquires the cpu_hotplug.lock finds the refcount + * non zero and goes to sleep again. + * + * However, this is very difficult to achieve in practice since + * get_online_cpus() not an api which is called all that often. + * + */ +static void cpu_hotplug_begin(void) +{ + DECLARE_WAITQUEUE(wait, current); + + mutex_lock(&cpu_hotplug.lock); + + cpu_hotplug.active_writer = current; + add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait); + while (cpu_hotplug.refcount) { + set_current_state(TASK_UNINTERRUPTIBLE); + mutex_unlock(&cpu_hotplug.lock); + schedule(); + mutex_lock(&cpu_hotplug.lock); + } + remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait); +} + +static void cpu_hotplug_done(void) +{ + cpu_hotplug.active_writer = NULL; + mutex_unlock(&cpu_hotplug.lock); +} /* Need to know about CPUs going up/down? */ int __cpuinit register_cpu_notifier(struct notifier_block *nb) { int ret; - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); ret = raw_notifier_chain_register(&cpu_chain, nb); - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); return ret; } @@ -81,9 +151,9 @@ EXPORT_SYMBOL(register_cpu_notifier); void unregister_cpu_notifier(struct notifier_block *nb) { - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); raw_notifier_chain_unregister(&cpu_chain, nb); - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); } EXPORT_SYMBOL(unregister_cpu_notifier); @@ -147,7 +217,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) if (!cpu_online(cpu)) return -EINVAL; - raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); + cpu_hotplug_begin(); err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); if (err == NOTIFY_BAD) { @@ -166,9 +236,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) cpu_clear(cpu, tmp); set_cpus_allowed(current, tmp); - mutex_lock(&cpu_bitmask_lock); p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); - mutex_unlock(&cpu_bitmask_lock); if (IS_ERR(p) || cpu_online(cpu)) { /* CPU didn't die: tell everyone. Can't complain. */ @@ -202,7 +270,7 @@ out_thread: out_allowed: set_cpus_allowed(current, old_allowed); out_release: - raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); + cpu_hotplug_done(); return err; } @@ -210,13 +278,13 @@ int cpu_down(unsigned int cpu) { int err = 0; - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); if (cpu_hotplug_disabled) err = -EBUSY; else err = _cpu_down(cpu, 0); - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); return err; } #endif /*CONFIG_HOTPLUG_CPU*/ @@ -231,7 +299,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) if (cpu_online(cpu) || !cpu_present(cpu)) return -EINVAL; - raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); + cpu_hotplug_begin(); ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); if (ret == NOTIFY_BAD) { @@ -243,9 +311,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) } /* Arch-specific enabling code. */ - mutex_lock(&cpu_bitmask_lock); ret = __cpu_up(cpu); - mutex_unlock(&cpu_bitmask_lock); if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); @@ -257,7 +323,7 @@ out_notify: if (ret != 0) __raw_notifier_call_chain(&cpu_chain, CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); - raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); + cpu_hotplug_done(); return ret; } @@ -275,13 +341,13 @@ int __cpuinit cpu_up(unsigned int cpu) return -EINVAL; } - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); if (cpu_hotplug_disabled) err = -EBUSY; else err = _cpu_up(cpu, 0); - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); return err; } @@ -292,7 +358,7 @@ int disable_nonboot_cpus(void) { int cpu, first_cpu, error = 0; - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); first_cpu = first_cpu(cpu_online_map); /* We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time @@ -319,7 +385,7 @@ int disable_nonboot_cpus(void) } else { printk(KERN_ERR "Non-boot CPUs are not disabled\n"); } - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); return error; } @@ -328,7 +394,7 @@ void enable_nonboot_cpus(void) int cpu, error; /* Allow everyone to use the CPU hotplug again */ - mutex_lock(&cpu_add_remove_lock); + cpu_maps_update_begin(); cpu_hotplug_disabled = 0; if (cpus_empty(frozen_cpus)) goto out; @@ -344,6 +410,6 @@ void enable_nonboot_cpus(void) } cpus_clear(frozen_cpus); out: - mutex_unlock(&cpu_add_remove_lock); + cpu_maps_update_done(); } #endif /* CONFIG_PM_SLEEP_SMP */ diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 50f5dc46368..cfaf6419d81 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -537,10 +537,10 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) * * Call with cgroup_mutex held. May take callback_mutex during * call due to the kfifo_alloc() and kmalloc() calls. May nest - * a call to the lock_cpu_hotplug()/unlock_cpu_hotplug() pair. + * a call to the get_online_cpus()/put_online_cpus() pair. * Must not be called holding callback_mutex, because we must not - * call lock_cpu_hotplug() while holding callback_mutex. Elsewhere - * the kernel nests callback_mutex inside lock_cpu_hotplug() calls. + * call get_online_cpus() while holding callback_mutex. Elsewhere + * the kernel nests callback_mutex inside get_online_cpus() calls. * So the reverse nesting would risk an ABBA deadlock. * * The three key local variables below are: @@ -691,9 +691,9 @@ restart: rebuild: /* Have scheduler rebuild sched domains */ - lock_cpu_hotplug(); + get_online_cpus(); partition_sched_domains(ndoms, doms); - unlock_cpu_hotplug(); + put_online_cpus(); done: if (q && !IS_ERR(q)) @@ -1617,10 +1617,10 @@ static struct cgroup_subsys_state *cpuset_create( * * If the cpuset being removed has its flag 'sched_load_balance' * enabled, then simulate turning sched_load_balance off, which - * will call rebuild_sched_domains(). The lock_cpu_hotplug() + * will call rebuild_sched_domains(). The get_online_cpus() * call in rebuild_sched_domains() must not be made while holding * callback_mutex. Elsewhere the kernel nests callback_mutex inside - * lock_cpu_hotplug() calls. So the reverse nesting would risk an + * get_online_cpus() calls. So the reverse nesting would risk an * ABBA deadlock. */ diff --git a/kernel/fork.c b/kernel/fork.c index 8dd8ff28100..39d22b3357d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1045,6 +1045,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); +#ifdef CONFIG_PREEMPT_RCU + p->rcu_read_lock_nesting = 0; + p->rcu_flipctr_idx = 0; +#endif /* #ifdef CONFIG_PREEMPT_RCU */ p->vfork_done = NULL; spin_lock_init(&p->alloc_lock); @@ -1059,6 +1063,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; +#ifdef CONFIG_DETECT_SOFTLOCKUP + p->last_switch_count = 0; + p->last_switch_timestamp = 0; +#endif + #ifdef CONFIG_TASK_XACCT p->rchar = 0; /* I/O counter: bytes read */ p->wchar = 0; /* I/O counter: bytes written */ @@ -1196,6 +1205,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(p, TIF_SYSCALL_EMU); #endif + clear_all_latency_tracing(p); /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ @@ -1237,6 +1247,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, * parent's CPU). This avoids alot of nasty races. */ p->cpus_allowed = current->cpus_allowed; + p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed; if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) || !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index e65dd0b47cd..bd5d6b5060b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -325,6 +325,22 @@ unsigned long ktime_divns(const ktime_t kt, s64 div) } #endif /* BITS_PER_LONG >= 64 */ +/* + * Check, whether the timer is on the callback pending list + */ +static inline int hrtimer_cb_pending(const struct hrtimer *timer) +{ + return timer->state & HRTIMER_STATE_PENDING; +} + +/* + * Remove a timer from the callback pending list + */ +static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) +{ + list_del_init(&timer->cb_entry); +} + /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -494,29 +510,12 @@ void hres_timers_resume(void) } /* - * Check, whether the timer is on the callback pending list - */ -static inline int hrtimer_cb_pending(const struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_PENDING; -} - -/* - * Remove a timer from the callback pending list - */ -static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) -{ - list_del_init(&timer->cb_entry); -} - -/* * Initialize the high resolution related parts of cpu_base */ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { base->expires_next.tv64 = KTIME_MAX; base->hres_active = 0; - INIT_LIST_HEAD(&base->cb_pending); } /* @@ -524,7 +523,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) */ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { - INIT_LIST_HEAD(&timer->cb_entry); } /* @@ -618,10 +616,13 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, { return 0; } -static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; } -static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { } static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } +static inline int hrtimer_reprogram(struct hrtimer *timer, + struct hrtimer_clock_base *base) +{ + return 0; +} #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -1001,6 +1002,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, clock_id = CLOCK_MONOTONIC; timer->base = &cpu_base->clock_base[clock_id]; + INIT_LIST_HEAD(&timer->cb_entry); hrtimer_init_timer_hres(timer); #ifdef CONFIG_TIMER_STATS @@ -1030,6 +1032,85 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) } EXPORT_SYMBOL_GPL(hrtimer_get_res); +static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) +{ + spin_lock_irq(&cpu_base->lock); + + while (!list_empty(&cpu_base->cb_pending)) { + enum hrtimer_restart (*fn)(struct hrtimer *); + struct hrtimer *timer; + int restart; + + timer = list_entry(cpu_base->cb_pending.next, + struct hrtimer, cb_entry); + + timer_stats_account_hrtimer(timer); + + fn = timer->function; + __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); + spin_unlock_irq(&cpu_base->lock); + + restart = fn(timer); + + spin_lock_irq(&cpu_base->lock); + + timer->state &= ~HRTIMER_STATE_CALLBACK; + if (restart == HRTIMER_RESTART) { + BUG_ON(hrtimer_active(timer)); + /* + * Enqueue the timer, allow reprogramming of the event + * device + */ + enqueue_hrtimer(timer, timer->base, 1); + } else if (hrtimer_active(timer)) { + /* + * If the timer was rearmed on another CPU, reprogram + * the event device. + */ + if (timer->base->first == &timer->node) + hrtimer_reprogram(timer, timer->base); + } + } + spin_unlock_irq(&cpu_base->lock); +} + +static void __run_hrtimer(struct hrtimer *timer) +{ + struct hrtimer_clock_base *base = timer->base; + struct hrtimer_cpu_base *cpu_base = base->cpu_base; + enum hrtimer_restart (*fn)(struct hrtimer *); + int restart; + + __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); + timer_stats_account_hrtimer(timer); + + fn = timer->function; + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) { + /* + * Used for scheduler timers, avoid lock inversion with + * rq->lock and tasklist_lock. + * + * These timers are required to deal with enqueue expiry + * themselves and are not allowed to migrate. + */ + spin_unlock(&cpu_base->lock); + restart = fn(timer); + spin_lock(&cpu_base->lock); + } else + restart = fn(timer); + + /* + * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid + * reprogramming of the event hardware. This happens at the end of this + * function anyway. + */ + if (restart != HRTIMER_NORESTART) { + BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); + enqueue_hrtimer(timer, base, 0); + } + timer->state &= ~HRTIMER_STATE_CALLBACK; +} + #ifdef CONFIG_HIGH_RES_TIMERS /* @@ -1087,21 +1168,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) continue; } - __remove_hrtimer(timer, base, - HRTIMER_STATE_CALLBACK, 0); - timer_stats_account_hrtimer(timer); - - /* - * Note: We clear the CALLBACK bit after - * enqueue_hrtimer to avoid reprogramming of - * the event hardware. This happens at the end - * of this function anyway. - */ - if (timer->function(timer) != HRTIMER_NORESTART) { - BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); - enqueue_hrtimer(timer, base, 0); - } - timer->state &= ~HRTIMER_STATE_CALLBACK; + __run_hrtimer(timer); } spin_unlock(&cpu_base->lock); base++; @@ -1122,52 +1189,41 @@ void hrtimer_interrupt(struct clock_event_device *dev) static void run_hrtimer_softirq(struct softirq_action *h) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - spin_lock_irq(&cpu_base->lock); - - while (!list_empty(&cpu_base->cb_pending)) { - enum hrtimer_restart (*fn)(struct hrtimer *); - struct hrtimer *timer; - int restart; - - timer = list_entry(cpu_base->cb_pending.next, - struct hrtimer, cb_entry); + run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); +} - timer_stats_account_hrtimer(timer); +#endif /* CONFIG_HIGH_RES_TIMERS */ - fn = timer->function; - __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); - spin_unlock_irq(&cpu_base->lock); +/* + * Called from timer softirq every jiffy, expire hrtimers: + * + * For HRT its the fall back code to run the softirq in the timer + * softirq context in case the hrtimer initialization failed or has + * not been done yet. + */ +void hrtimer_run_pending(void) +{ + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - restart = fn(timer); + if (hrtimer_hres_active()) + return; - spin_lock_irq(&cpu_base->lock); + /* + * This _is_ ugly: We have to check in the softirq context, + * whether we can switch to highres and / or nohz mode. The + * clocksource switch happens in the timer interrupt with + * xtime_lock held. Notification from there only sets the + * check bit in the tick_oneshot code, otherwise we might + * deadlock vs. xtime_lock. + */ + if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) + hrtimer_switch_to_hres(); - timer->state &= ~HRTIMER_STATE_CALLBACK; - if (restart == HRTIMER_RESTART) { - BUG_ON(hrtimer_active(timer)); - /* - * Enqueue the timer, allow reprogramming of the event - * device - */ - enqueue_hrtimer(timer, timer->base, 1); - } else if (hrtimer_active(timer)) { - /* - * If the timer was rearmed on another CPU, reprogram - * the event device. - */ - if (timer->base->first == &timer->node) - hrtimer_reprogram(timer, timer->base); - } - } - spin_unlock_irq(&cpu_base->lock); + run_hrtimer_pending(cpu_base); } -#endif /* CONFIG_HIGH_RES_TIMERS */ - /* - * Expire the per base hrtimer-queue: + * Called from hardirq context every jiffy */ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, int index) @@ -1181,46 +1237,27 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, if (base->get_softirq_time) base->softirq_time = base->get_softirq_time(); - spin_lock_irq(&cpu_base->lock); + spin_lock(&cpu_base->lock); while ((node = base->first)) { struct hrtimer *timer; - enum hrtimer_restart (*fn)(struct hrtimer *); - int restart; timer = rb_entry(node, struct hrtimer, node); if (base->softirq_time.tv64 <= timer->expires.tv64) break; -#ifdef CONFIG_HIGH_RES_TIMERS - WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ); -#endif - timer_stats_account_hrtimer(timer); - - fn = timer->function; - __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); - spin_unlock_irq(&cpu_base->lock); - - restart = fn(timer); - - spin_lock_irq(&cpu_base->lock); - - timer->state &= ~HRTIMER_STATE_CALLBACK; - if (restart != HRTIMER_NORESTART) { - BUG_ON(hrtimer_active(timer)); - enqueue_hrtimer(timer, base, 0); + if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { + __remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0); + list_add_tail(&timer->cb_entry, + &base->cpu_base->cb_pending); + continue; } + + __run_hrtimer(timer); } - spin_unlock_irq(&cpu_base->lock); + spin_unlock(&cpu_base->lock); } -/* - * Called from timer softirq every jiffy |