From 19f5f7364a1cc770b14692f609bb9b802fc334d5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 27 Jul 2011 17:29:28 +0200 Subject: nohz: Separate idle sleeping time accounting from nohz logic As we plan to be able to stop the tick outside the idle task, we need to prepare for separating nohz logic from idle. As a start, this pulls the idle sleeping time accounting out of the tick stop/restart API to the callers on idle entry/exit. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 77 ++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 35 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index da70c6db496..81409bba242 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -271,10 +271,10 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -static void tick_nohz_stop_sched_tick(struct tick_sched *ts) +static void tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; - ktime_t last_update, expires, now; + ktime_t last_update, expires; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; int cpu; @@ -282,8 +282,6 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); - now = tick_nohz_start_idle(cpu, ts); - /* * If this cpu is offline and it is the one which updates * jiffies, then give up the assignment and let it be taken by @@ -444,6 +442,14 @@ out: ts->sleep_length = ktime_sub(dev->next_event, now); } +static void __tick_nohz_idle_enter(struct tick_sched *ts) +{ + ktime_t now; + + now = tick_nohz_start_idle(smp_processor_id(), ts); + tick_nohz_stop_sched_tick(ts, now); +} + /** * tick_nohz_idle_enter - stop the idle tick from the idle task * @@ -479,7 +485,7 @@ void tick_nohz_idle_enter(void) * update of the idle time accounting in tick_nohz_start_idle(). */ ts->inidle = 1; - tick_nohz_stop_sched_tick(ts); + __tick_nohz_idle_enter(ts); local_irq_enable(); } @@ -499,7 +505,7 @@ void tick_nohz_irq_exit(void) if (!ts->inidle) return; - tick_nohz_stop_sched_tick(ts); + __tick_nohz_idle_enter(ts); } /** @@ -540,39 +546,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) } } -/** - * tick_nohz_idle_exit - restart the idle tick from the idle task - * - * Restart the idle tick when the CPU is woken up from idle - * This also exit the RCU extended quiescent state. The CPU - * can use RCU again after this function is called. - */ -void tick_nohz_idle_exit(void) +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) { - int cpu = smp_processor_id(); - struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); #ifndef CONFIG_VIRT_CPU_ACCOUNTING unsigned long ticks; #endif - ktime_t now; - - local_irq_disable(); - - WARN_ON_ONCE(!ts->inidle); - - ts->inidle = 0; - - if (ts->idle_active || ts->tick_stopped) - now = ktime_get(); - - if (ts->idle_active) - tick_nohz_stop_idle(cpu, now); - - if (!ts->tick_stopped) { - local_irq_enable(); - return; - } - /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); @@ -600,6 +578,35 @@ void tick_nohz_idle_exit(void) ts->idle_exittime = now; tick_nohz_restart(ts, now); +} + +/** + * tick_nohz_idle_exit - restart the idle tick from the idle task + * + * Restart the idle tick when the CPU is woken up from idle + * This also exit the RCU extended quiescent state. The CPU + * can use RCU again after this function is called. + */ +void tick_nohz_idle_exit(void) +{ + int cpu = smp_processor_id(); + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); + ktime_t now; + + local_irq_disable(); + + WARN_ON_ONCE(!ts->inidle); + + ts->inidle = 0; + + if (ts->idle_active || ts->tick_stopped) + now = ktime_get(); + + if (ts->idle_active) + tick_nohz_stop_idle(cpu, now); + + if (ts->tick_stopped) + tick_nohz_restart_sched_tick(ts, now); local_irq_enable(); } -- cgit v1.2.3-70-g09d2 From 2ac0d98fd624ae50f5e6ae9c800977a9dbbfcde6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 28 Jul 2011 04:00:47 +0200 Subject: nohz: Make nohz API agnostic against idle ticks cputime accounting When the timer tick fires, it accounts the new jiffy as either part of system, user or idle time. This is how we record the cputime statistics. But when the tick is stopped from the idle task, we still need to record the number of jiffies spent tickless until we restart the tick and fall back to traditional tick-based cputime accounting. To do this, we take a snapshot of jiffies when the tick is stopped and compute the difference against the new value of jiffies when the tick is restarted. Then we account this whole difference to the idle cputime. However we are preparing to be able to stop the tick from other places than idle. So this idle time accounting needs to be performed from the callers of nohz APIs, not from the nohz APIs themselves because we now want them to be agnostic against places that stop/restart tick. Therefore, we pull the tickless idle time accounting out of generic nohz helpers up to idle entry/exit callers. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 81409bba242..911834b33b8 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -402,7 +402,6 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now) ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; - ts->idle_jiffies = last_jiffies; } ts->idle_sleeps++; @@ -445,9 +444,13 @@ out: static void __tick_nohz_idle_enter(struct tick_sched *ts) { ktime_t now; + int was_stopped = ts->tick_stopped; now = tick_nohz_start_idle(smp_processor_id(), ts); tick_nohz_stop_sched_tick(ts, now); + + if (!was_stopped && ts->tick_stopped) + ts->idle_jiffies = ts->last_jiffies; } /** @@ -548,15 +551,25 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) { -#ifndef CONFIG_VIRT_CPU_ACCOUNTING - unsigned long ticks; -#endif /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); update_cpu_load_nohz(); + touch_softlockup_watchdog(); + /* + * Cancel the scheduled timer and restore the tick + */ + ts->tick_stopped = 0; + ts->idle_exittime = now; + + tick_nohz_restart(ts, now); +} + +static void tick_nohz_account_idle_ticks(struct tick_sched *ts) +{ #ifndef CONFIG_VIRT_CPU_ACCOUNTING + unsigned long ticks; /* * We stopped the tick in idle. Update process times would miss the * time we slept as update_process_times does only a 1 tick @@ -569,15 +582,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) if (ticks && ticks < LONG_MAX) account_idle_ticks(ticks); #endif - - touch_softlockup_watchdog(); - /* - * Cancel the scheduled timer and restore the tick - */ - ts->tick_stopped = 0; - ts->idle_exittime = now; - - tick_nohz_restart(ts, now); } /** @@ -605,8 +609,10 @@ void tick_nohz_idle_exit(void) if (ts->idle_active) tick_nohz_stop_idle(cpu, now); - if (ts->tick_stopped) + if (ts->tick_stopped) { tick_nohz_restart_sched_tick(ts, now); + tick_nohz_account_idle_ticks(ts); + } local_irq_enable(); } @@ -811,7 +817,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) */ if (ts->tick_stopped) { touch_softlockup_watchdog(); - ts->idle_jiffies++; + if (idle_cpu(cpu)) + ts->idle_jiffies++; } update_process_times(user_mode(regs)); profile_tick(CPU_PROFILING); -- cgit v1.2.3-70-g09d2 From f5d411c91ede162240f34e05a233f2759412988e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 31 Jul 2011 17:44:12 +0200 Subject: nohz: Rename ts->idle_tick to ts->last_tick Now that idle and nohz logics are going to be independant each others, ts->idle_tick becomes too much a biased name to describe the field that saves the last scheduled tick on top of which we re-calculate the next tick to schedule when the timer is restarted. We want to reuse this even to stop the tick outside idle cases. So let's rename it to some more generic name: ts->last_tick. This changes a bit the timer list stat export so we need to increase its version. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner --- include/linux/tick.h | 8 ++++---- kernel/time/tick-sched.c | 4 ++-- kernel/time/timer_list.c | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/tick.h b/include/linux/tick.h index ab8be90b5cc..f37fceb69b7 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -31,10 +31,10 @@ enum tick_nohz_mode { * struct tick_sched - sched tick emulation and no idle tick control/stats * @sched_timer: hrtimer to schedule the periodic tick in high * resolution mode - * @idle_tick: Store the last idle tick expiry time when the tick - * timer is modified for idle sleeps. This is necessary + * @last_tick: Store the last tick expiry time when the tick + * timer is modified for nohz sleeps. This is necessary * to resume the tick timer operation in the timeline - * when the CPU returns from idle + * when the CPU returns from nohz sleep. * @tick_stopped: Indicator that the idle tick has been stopped * @idle_jiffies: jiffies at the entry to idle for idle time accounting * @idle_calls: Total number of idle calls @@ -51,7 +51,7 @@ struct tick_sched { struct hrtimer sched_timer; unsigned long check_clocks; enum tick_nohz_mode nohz_mode; - ktime_t idle_tick; + ktime_t last_tick; int inidle; int tick_stopped; unsigned long idle_jiffies; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 911834b33b8..73cc4901336 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -400,7 +400,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now) if (!ts->tick_stopped) { select_nohz_load_balancer(1); - ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); + ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; } @@ -526,7 +526,7 @@ ktime_t tick_nohz_get_sleep_length(void) static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) { hrtimer_cancel(&ts->sched_timer); - hrtimer_set_expires(&ts->sched_timer, ts->idle_tick); + hrtimer_set_expires(&ts->sched_timer, ts->last_tick); while (1) { /* Forward the time to expire in the future */ diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 3258455549f..af5a7e9f164 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -167,7 +167,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) { struct tick_sched *ts = tick_get_tick_sched(cpu); P(nohz_mode); - P_ns(idle_tick); + P_ns(last_tick); P(tick_stopped); P(idle_jiffies); P(idle_calls); @@ -259,7 +259,7 @@ static int timer_list_show(struct seq_file *m, void *v) u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Timer List Version: v0.6\n"); + SEQ_printf(m, "Timer List Version: v0.7\n"); SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); -- cgit v1.2.3-70-g09d2 From 5b39939a40801f0c17e31adaf643d6e974227856 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 1 Aug 2011 00:06:10 +0200 Subject: nohz: Move ts->idle_calls incrementation into strict idle logic Since we want to prepare for making the nohz API to work further the idle case, we need to pull ts->idle_calls incrementation up to the callers in idle. To perform this, we split tick_nohz_stop_sched_tick() in two parts: a first one that checks if we can really stop the tick for idle, and another that actually stops it. Then from the callers in idle, we check if we can stop the tick and only then we increment idle_calls and finally relay to the nohz API that won't care about these details anymore. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 86 ++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 39 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 73cc4901336..430e1b6901c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -271,47 +271,15 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -static void tick_nohz_stop_sched_tick(struct tick_sched *ts, ktime_t now) +static void tick_nohz_stop_sched_tick(struct tick_sched *ts, + ktime_t now, int cpu) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; ktime_t last_update, expires; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; - int cpu; - - cpu = smp_processor_id(); - ts = &per_cpu(tick_cpu_sched, cpu); - - /* - * If this cpu is offline and it is the one which updates - * jiffies, then give up the assignment and let it be taken by - * the cpu which runs the tick timer next. If we don't drop - * this here the jiffies might be stale and do_timer() never - * invoked. - */ - if (unlikely(!cpu_online(cpu))) { - if (cpu == tick_do_timer_cpu) - tick_do_timer_cpu = TICK_DO_TIMER_NONE; - } - - if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) - return; - - if (need_resched()) - return; - if (unlikely(local_softirq_pending() && cpu_online(cpu))) { - static int ratelimit; - - if (ratelimit < 10) { - printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", - (unsigned int) local_softirq_pending()); - ratelimit++; - } - return; - } - ts->idle_calls++; /* Read jiffies and the time when jiffies were updated last */ do { seq = read_seqbegin(&xtime_lock); @@ -441,16 +409,56 @@ out: ts->sleep_length = ktime_sub(dev->next_event, now); } +static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) +{ + /* + * If this cpu is offline and it is the one which updates + * jiffies, then give up the assignment and let it be taken by + * the cpu which runs the tick timer next. If we don't drop + * this here the jiffies might be stale and do_timer() never + * invoked. + */ + if (unlikely(!cpu_online(cpu))) { + if (cpu == tick_do_timer_cpu) + tick_do_timer_cpu = TICK_DO_TIMER_NONE; + } + + if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) + return false; + + if (need_resched()) + return false; + + if (unlikely(local_softirq_pending() && cpu_online(cpu))) { + static int ratelimit; + + if (ratelimit < 10) { + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", + (unsigned int) local_softirq_pending()); + ratelimit++; + } + return false; + } + + return true; +} + static void __tick_nohz_idle_enter(struct tick_sched *ts) { ktime_t now; - int was_stopped = ts->tick_stopped; + int cpu = smp_processor_id(); - now = tick_nohz_start_idle(smp_processor_id(), ts); - tick_nohz_stop_sched_tick(ts, now); + now = tick_nohz_start_idle(cpu, ts); - if (!was_stopped && ts->tick_stopped) - ts->idle_jiffies = ts->last_jiffies; + if (can_stop_idle_tick(cpu, ts)) { + int was_stopped = ts->tick_stopped; + + ts->idle_calls++; + tick_nohz_stop_sched_tick(ts, now, cpu); + + if (!was_stopped && ts->tick_stopped) + ts->idle_jiffies = ts->last_jiffies; + } } /** -- cgit v1.2.3-70-g09d2 From 84bf1bccc60cc64376125ea2eae05e4ba12f795b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 1 Aug 2011 01:25:38 +0200 Subject: nohz: Move next idle expiry time record into idle logic area The next idle expiry time record and idle sleeps tracking are statistics that only concern idle. Since we want the nohz APIs to become usable further idle context, let's pull up the handling of these statistics to the callers in idle. Signed-off-by: Frederic Weisbecker Cc: Alessio Igor Bogani Cc: Andrew Morton Cc: Avi Kivity Cc: Chris Metcalf Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Max Krasnyansky Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Sven-Thorsten Dietrich Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 430e1b6901c..60c9c60e910 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -271,11 +271,11 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) } EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); -static void tick_nohz_stop_sched_tick(struct tick_sched *ts, - ktime_t now, int cpu) +static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, + ktime_t now, int cpu) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; - ktime_t last_update, expires; + ktime_t last_update, expires, ret = { .tv64 = 0 }; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; @@ -358,6 +358,8 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts, if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) goto out; + ret = expires; + /* * nohz_stop_sched_tick can be called several times before * the nohz_restart_sched_tick is called. This happens when @@ -372,11 +374,6 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts, ts->tick_stopped = 1; } - ts->idle_sleeps++; - - /* Mark expires */ - ts->idle_expires = expires; - /* * If the expiration time == KTIME_MAX, then * in this case we simply stop the tick timer. @@ -407,6 +404,8 @@ out: ts->next_jiffies = next_jiffies; ts->last_jiffies = last_jiffies; ts->sleep_length = ktime_sub(dev->next_event, now); + + return ret; } static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) @@ -445,7 +444,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) static void __tick_nohz_idle_enter(struct tick_sched *ts) { - ktime_t now; + ktime_t now, expires; int cpu = smp_processor_id(); now = tick_nohz_start_idle(cpu, ts); @@ -454,7 +453,12 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts) int was_stopped = ts->tick_stopped; ts->idle_calls++; - tick_nohz_stop_sched_tick(ts, now, cpu); + + expires = tick_nohz_stop_sched_tick(ts, now, cpu); + if (expires.tv64 > 0LL) { + ts->idle_sleeps++; + ts->idle_expires = expires; + } if (!was_stopped && ts->tick_stopped) ts->idle_jiffies = ts->last_jiffies; -- cgit v1.2.3-70-g09d2 From 9d2ad24306f2fafc3612e5a216aab31f9e56e879 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 24 Jun 2012 10:15:02 -0700 Subject: rcu: Make RCU_FAST_NO_HZ respect nohz= boot parameter If the nohz= boot parameter disables nohz, then RCU_FAST_NO_HZ needs to also disable itself. This commit therefore checks for tick_nohz_enabled being zero, disabling rcu_prepare_for_idle() if so. This commit assumes that tick_nohz_enabled can change at runtime: If this is not the case, then a simpler approach suffices. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 15 +++++++++++++++ kernel/time/tick-sched.c | 2 +- 3 files changed, 17 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 19b61ac1079..e978845c680 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -97,6 +97,7 @@ struct rcu_dynticks { /* # times non-lazy CBs posted to CPU. */ unsigned long nonlazy_posted_snap; /* idle-period nonlazy_posted snapshot. */ + int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c28d25522a8..3508000e077 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1971,6 +1971,8 @@ static void rcu_idle_count_callbacks_posted(void) #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ +extern int tick_nohz_enabled; + /* * Does the specified flavor of RCU have non-lazy callbacks pending on * the specified CPU? Both RCU flavor and CPU are specified by the @@ -2112,6 +2114,7 @@ static void rcu_cleanup_after_idle(int cpu) del_timer(&rdtp->idle_gp_timer); trace_rcu_prep_idle("Cleanup after idle"); + rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); } /* @@ -2137,6 +2140,18 @@ static void rcu_prepare_for_idle(int cpu) { struct timer_list *tp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + int tne; + + /* Handle nohz enablement switches conservatively. */ + tne = ACCESS_ONCE(tick_nohz_enabled); + if (tne != rdtp->tick_nohz_enabled_snap) { + if (rcu_cpu_has_callbacks(cpu)) + invoke_rcu_core(); /* force nohz to see update. */ + rdtp->tick_nohz_enabled_snap = tne; + return; + } + if (!tne) + return; /* * If this is an idle re-entry, for example, due to use of diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 86999783392..66ff07f6184 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -105,7 +105,7 @@ static ktime_t tick_init_jiffy_update(void) /* * NO HZ enabled ? */ -static int tick_nohz_enabled __read_mostly = 1; +int tick_nohz_enabled __read_mostly = 1; /* * Enable / Disable tickless mode -- cgit v1.2.3-70-g09d2 From 5167e8d5417bf5c322a703d2927daec727ea40dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jun 2012 15:52:09 +0200 Subject: sched/nohz: Rewrite and fix load-avg computation -- again Thanks to Charles Wang for spotting the defects in the current code: - If we go idle during the sample window -- after sampling, we get a negative bias because we can negate our own sample. - If we wake up during the sample window we get a positive bias because we push the sample to a known active period. So rewrite the entire nohz load-avg muck once again, now adding copious documentation to the code. Reported-and-tested-by: Doug Smythies Reported-and-tested-by: Charles Wang Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: stable@kernel.org Link: http://lkml.kernel.org/r/1340373782.18025.74.camel@twins [ minor edits ] Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++ kernel/sched/core.c | 275 ++++++++++++++++++++++++++++++++++------------- kernel/sched/idle_task.c | 1 - kernel/sched/sched.h | 2 - kernel/time/tick-sched.c | 2 + 5 files changed, 213 insertions(+), 75 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4059c0f33f0..20cb7497c59 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1909,6 +1909,14 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif +#ifdef CONFIG_NO_HZ +void calc_load_enter_idle(void); +void calc_load_exit_idle(void); +#else +static inline void calc_load_enter_idle(void) { } +static inline void calc_load_exit_idle(void) { } +#endif /* CONFIG_NO_HZ */ + #ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d5594a4268d..bb840405335 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2161,11 +2161,73 @@ unsigned long this_cpu_load(void) } +/* + * Global load-average calculations + * + * We take a distributed and async approach to calculating the global load-avg + * in order to minimize overhead. + * + * The global load average is an exponentially decaying average of nr_running + + * nr_uninterruptible. + * + * Once every LOAD_FREQ: + * + * nr_active = 0; + * for_each_possible_cpu(cpu) + * nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible; + * + * avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n) + * + * Due to a number of reasons the above turns in the mess below: + * + * - for_each_possible_cpu() is prohibitively expensive on machines with + * serious number of cpus, therefore we need to take a distributed approach + * to calculating nr_active. + * + * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0 + * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) } + * + * So assuming nr_active := 0 when we start out -- true per definition, we + * can simply take per-cpu deltas and fold those into a global accumulate + * to obtain the same result. See calc_load_fold_active(). + * + * Furthermore, in order to avoid synchronizing all per-cpu delta folding + * across the machine, we assume 10 ticks is sufficient time for every + * cpu to have completed this task. + * + * This places an upper-bound on the IRQ-off latency of the machine. Then + * again, being late doesn't loose the delta, just wrecks the sample. + * + * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because + * this would add another cross-cpu cacheline miss and atomic operation + * to the wakeup path. Instead we increment on whatever cpu the task ran + * when it went into uninterruptible state and decrement on whatever cpu + * did the wakeup. This means that only the sum of nr_uninterruptible over + * all cpus yields the correct result. + * + * This covers the NO_HZ=n code, for extra head-aches, see the comment below. + */ + /* Variables and functions for calc_load */ static atomic_long_t calc_load_tasks; static unsigned long calc_load_update; unsigned long avenrun[3]; -EXPORT_SYMBOL(avenrun); +EXPORT_SYMBOL(avenrun); /* should be removed */ + +/** + * get_avenrun - get the load average array + * @loads: pointer to dest load array + * @offset: offset to add + * @shift: shift count to shift the result left + * + * These values are estimates at best, so no need for locking. + */ +void get_avenrun(unsigned long *loads, unsigned long offset, int shift) +{ + loads[0] = (avenrun[0] + offset) << shift; + loads[1] = (avenrun[1] + offset) << shift; + loads[2] = (avenrun[2] + offset) << shift; +} static long calc_load_fold_active(struct rq *this_rq) { @@ -2182,6 +2244,9 @@ static long calc_load_fold_active(struct rq *this_rq) return delta; } +/* + * a1 = a0 * e + a * (1 - e) + */ static unsigned long calc_load(unsigned long load, unsigned long exp, unsigned long active) { @@ -2193,30 +2258,118 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) #ifdef CONFIG_NO_HZ /* - * For NO_HZ we delay the active fold to the next LOAD_FREQ update. + * Handle NO_HZ for the global load-average. + * + * Since the above described distributed algorithm to compute the global + * load-average relies on per-cpu sampling from the tick, it is affected by + * NO_HZ. + * + * The basic idea is to fold the nr_active delta into a global idle-delta upon + * entering NO_HZ state such that we can include this as an 'extra' cpu delta + * when we read the global state. + * + * Obviously reality has to ruin such a delightfully simple scheme: + * + * - When we go NO_HZ idle during the window, we can negate our sample + * contribution, causing under-accounting. + * + * We avoid this by keeping two idle-delta counters and flipping them + * when the window starts, thus separating old and new NO_HZ load. + * + * The only trick is the slight shift in index flip for read vs write. + * + * 0s 5s 10s 15s + * +10 +10 +10 +10 + * |-|-----------|-|-----------|-|-----------|-| + * r:0 0 1 1 0 0 1 1 0 + * w:0 1 1 0 0 1 1 0 0 + * + * This ensures we'll fold the old idle contribution in this window while + * accumlating the new one. + * + * - When we wake up from NO_HZ idle during the window, we push up our + * contribution, since we effectively move our sample point to a known + * busy state. + * + * This is solved by pushing the window forward, and thus skipping the + * sample, for this cpu (effectively using the idle-delta for this cpu which + * was in effect at the time the window opened). This also solves the issue + * of having to deal with a cpu having been in NOHZ idle for multiple + * LOAD_FREQ intervals. * * When making the ILB scale, we should try to pull this in as well. */ -static atomic_long_t calc_load_tasks_idle; +static atomic_long_t calc_load_idle[2]; +static int calc_load_idx; -void calc_load_account_idle(struct rq *this_rq) +static inline int calc_load_write_idx(void) { + int idx = calc_load_idx; + + /* + * See calc_global_nohz(), if we observe the new index, we also + * need to observe the new update time. + */ + smp_rmb(); + + /* + * If the folding window started, make sure we start writing in the + * next idle-delta. + */ + if (!time_before(jiffies, calc_load_update)) + idx++; + + return idx & 1; +} + +static inline int calc_load_read_idx(void) +{ + return calc_load_idx & 1; +} + +void calc_load_enter_idle(void) +{ + struct rq *this_rq = this_rq(); long delta; + /* + * We're going into NOHZ mode, if there's any pending delta, fold it + * into the pending idle delta. + */ delta = calc_load_fold_active(this_rq); - if (delta) - atomic_long_add(delta, &calc_load_tasks_idle); + if (delta) { + int idx = calc_load_write_idx(); + atomic_long_add(delta, &calc_load_idle[idx]); + } } -static long calc_load_fold_idle(void) +void calc_load_exit_idle(void) { - long delta = 0; + struct rq *this_rq = this_rq(); + + /* + * If we're still before the sample window, we're done. + */ + if (time_before(jiffies, this_rq->calc_load_update)) + return; /* - * Its got a race, we don't care... + * We woke inside or after the sample window, this means we're already + * accounted through the nohz accounting, so skip the entire deal and + * sync up for the next window. */ - if (atomic_long_read(&calc_load_tasks_idle)) - delta = atomic_long_xchg(&calc_load_tasks_idle, 0); + this_rq->calc_load_update = calc_load_update; + if (time_before(jiffies, this_rq->calc_load_update + 10)) + this_rq->calc_load_update += LOAD_FREQ; +} + +static long calc_load_fold_idle(void) +{ + int idx = calc_load_read_idx(); + long delta = 0; + + if (atomic_long_read(&calc_load_idle[idx])) + delta = atomic_long_xchg(&calc_load_idle[idx], 0); return delta; } @@ -2302,66 +2455,39 @@ static void calc_global_nohz(void) { long delta, active, n; - /* - * If we crossed a calc_load_update boundary, make sure to fold - * any pending idle changes, the respective CPUs might have - * missed the tick driven calc_load_account_active() update - * due to NO_HZ. - */ - delta = calc_load_fold_idle(); - if (delta) - atomic_long_add(delta, &calc_load_tasks); - - /* - * It could be the one fold was all it took, we done! - */ - if (time_before(jiffies, calc_load_update + 10)) - return; - - /* - * Catch-up, fold however many we are behind still - */ - delta = jiffies - calc_load_update - 10; - n = 1 + (delta / LOAD_FREQ); + if (!time_before(jiffies, calc_load_update + 10)) { + /* + * Catch-up, fold however many we are behind still + */ + delta = jiffies - calc_load_update - 10; + n = 1 + (delta / LOAD_FREQ); - active = atomic_long_read(&calc_load_tasks); - active = active > 0 ? active * FIXED_1 : 0; + active = atomic_long_read(&calc_load_tasks); + active = active > 0 ? active * FIXED_1 : 0; - avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); - avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); - avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); + avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); + avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); + avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); - calc_load_update += n * LOAD_FREQ; -} -#else -void calc_load_account_idle(struct rq *this_rq) -{ -} + calc_load_update += n * LOAD_FREQ; + } -static inline long calc_load_fold_idle(void) -{ - return 0; + /* + * Flip the idle index... + * + * Make sure we first write the new time then flip the index, so that + * calc_load_write_idx() will see the new time when it reads the new + * index, this avoids a double flip messing things up. + */ + smp_wmb(); + calc_load_idx++; } +#else /* !CONFIG_NO_HZ */ -static void calc_global_nohz(void) -{ -} -#endif +static inline long calc_load_fold_idle(void) { return 0; } +static inline void calc_global_nohz(void) { } -/** - * get_avenrun - get the load average array - * @loads: pointer to dest load array - * @offset: offset to add - * @shift: shift count to shift the result left - * - * These values are estimates at best, so no need for locking. - */ -void get_avenrun(unsigned long *loads, unsigned long offset, int shift) -{ - loads[0] = (avenrun[0] + offset) << shift; - loads[1] = (avenrun[1] + offset) << shift; - loads[2] = (avenrun[2] + offset) << shift; -} +#endif /* CONFIG_NO_HZ */ /* * calc_load - update the avenrun load estimates 10 ticks after the @@ -2369,11 +2495,18 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) */ void calc_global_load(unsigned long ticks) { - long active; + long active, delta; if (time_before(jiffies, calc_load_update + 10)) return; + /* + * Fold the 'old' idle-delta to include all NO_HZ cpus. + */ + delta = calc_load_fold_idle(); + if (delta) + atomic_long_add(delta, &calc_load_tasks); + active = atomic_long_read(&calc_load_tasks); active = active > 0 ? active * FIXED_1 : 0; @@ -2384,12 +2517,7 @@ void calc_global_load(unsigned long ticks) calc_load_update += LOAD_FREQ; /* - * Account one period with whatever state we found before - * folding in the nohz state and ageing the entire idle period. - * - * This avoids loosing a sample when we go idle between - * calc_load_account_active() (10 ticks ago) and now and thus - * under-accounting. + * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. */ calc_global_nohz(); } @@ -2406,13 +2534,16 @@ static void calc_load_account_active(struct rq *this_rq) return; delta = calc_load_fold_active(this_rq); - delta += calc_load_fold_idle(); if (delta) atomic_long_add(delta, &calc_load_tasks); this_rq->calc_load_update += LOAD_FREQ; } +/* + * End of global load-average stuff + */ + /* * The exact cpuload at various idx values, calculated at every tick would be * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index b44d604b35d..b6baf370cae 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -25,7 +25,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl static struct task_struct *pick_next_task_idle(struct rq *rq) { schedstat_inc(rq, sched_goidle); - calc_load_account_idle(rq); return rq->idle; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6d52cea7f33..55844f24435 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -942,8 +942,6 @@ static inline u64 sched_avg_period(void) return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2; } -void calc_load_account_idle(struct rq *this_rq); - #ifdef CONFIG_SCHED_HRTICK /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 86999783392..4a08472c3ca 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -406,6 +406,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) */ if (!ts->tick_stopped) { select_nohz_load_balancer(1); + calc_load_enter_idle(); ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; @@ -597,6 +598,7 @@ void tick_nohz_idle_exit(void) account_idle_ticks(ticks); #endif + calc_load_exit_idle(); touch_softlockup_watchdog(); /* * Cancel the scheduled timer and restore the tick -- cgit v1.2.3-70-g09d2 From 4873fa070ae84a4115f0b3c9dfabc224f1bc7c51 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 10 Jul 2012 18:43:20 -0400 Subject: timekeeping: Fix leapsecond triggered load spike issue The timekeeping code misses an update of the hrtimer subsystem after a leap second happened. Due to that timers based on CLOCK_REALTIME are either expiring a second early or late depending on whether a leap second has been inserted or deleted until an operation is initiated which causes that update. Unless the update happens by some other means this discrepancy between the timekeeping and the hrtimer data stays forever and timers are expired either early or late. The reported immediate workaround - $ data -s "`date`" - is causing a call to clock_was_set() which updates the hrtimer data structures. See: http://www.sheeri.com/content/mysql-and-leap-second-high-cpu-and-fix Add the missing clock_was_set() call to update_wall_time() in case of a leap second event. The actual update is deferred to softirq context as the necessary smp function call cannot be invoked from hard interrupt context. Signed-off-by: John Stultz Reported-by: Jan Engelhardt Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1341960205-56738-3-git-send-email-johnstul@us.ibm.com Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6f46a00a1e8..a413e5940e0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -963,6 +963,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) leap = second_overflow(timekeeper.xtime.tv_sec); timekeeper.xtime.tv_sec += leap; timekeeper.wall_to_monotonic.tv_sec -= leap; + if (leap) + clock_was_set_delayed(); } /* Accumulate raw time */ @@ -1079,6 +1081,8 @@ static void update_wall_time(void) leap = second_overflow(timekeeper.xtime.tv_sec); timekeeper.xtime.tv_sec += leap; timekeeper.wall_to_monotonic.tv_sec -= leap; + if (leap) + clock_was_set_delayed(); } timekeeping_update(false); -- cgit v1.2.3-70-g09d2 From 5b9fe759a678e05be4937ddf03d50e950207c1c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 10 Jul 2012 18:43:21 -0400 Subject: timekeeping: Maintain ktime_t based offsets for hrtimers We need to update the hrtimer clock offsets from the hrtimer interrupt context. To avoid conversions from timespec to ktime_t maintain a ktime_t based representation of those offsets in the timekeeper. This puts the conversion overhead into the code which updates the underlying offsets and provides fast accessible values in the hrtimer interrupt. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1341960205-56738-4-git-send-email-johnstul@us.ibm.com Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index a413e5940e0..1c038dac71a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -70,6 +70,12 @@ struct timekeeper { /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ struct timespec raw_time; + /* Offset clock monotonic -> clock realtime */ + ktime_t offs_real; + + /* Offset clock monotonic -> clock boottime */ + ktime_t offs_boot; + /* Seqlock for all timekeeper values */ seqlock_t lock; }; @@ -172,6 +178,14 @@ static inline s64 timekeeping_get_ns_raw(void) return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); } +static void update_rt_offset(void) +{ + struct timespec tmp, *wtm = &timekeeper.wall_to_monotonic; + + set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); + timekeeper.offs_real = timespec_to_ktime(tmp); +} + /* must hold write on timekeeper.lock */ static void timekeeping_update(bool clearntp) { @@ -179,6 +193,7 @@ static void timekeeping_update(bool clearntp) timekeeper.ntp_error = 0; ntp_clear(); } + update_rt_offset(); update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic, timekeeper.clock, timekeeper.mult); } @@ -604,6 +619,7 @@ void __init timekeeping_init(void) } set_normalized_timespec(&timekeeper.wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); + update_rt_offset(); timekeeper.total_sleep_time.tv_sec = 0; timekeeper.total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -612,6 +628,12 @@ void __init timekeeping_init(void) /* time in seconds when suspend began */ static struct timespec timekeeping_suspend_time; +static void update_sleep_time(struct timespec t) +{ + timekeeper.total_sleep_time = t; + timekeeper.offs_boot = timespec_to_ktime(t); +} + /** * __timekeeping_inject_sleeptime - Internal function to add sleep interval * @delta: pointer to a timespec delta value @@ -630,8 +652,7 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta) timekeeper.xtime = timespec_add(timekeeper.xtime, *delta); timekeeper.wall_to_monotonic = timespec_sub(timekeeper.wall_to_monotonic, *delta); - timekeeper.total_sleep_time = timespec_add( - timekeeper.total_sleep_time, *delta); + update_sleep_time(timespec_add(timekeeper.total_sleep_time, *delta)); } -- cgit v1.2.3-70-g09d2 From f6c06abfb3972ad4914cef57d8348fcb2932bc3b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 10 Jul 2012 18:43:24 -0400 Subject: timekeeping: Provide hrtimer update function To finally fix the infamous leap second issue and other race windows caused by functions which change the offsets between the various time bases (CLOCK_MONOTONIC, CLOCK_REALTIME and CLOCK_BOOTTIME) we need a function which atomically gets the current monotonic time and updates the offsets of CLOCK_REALTIME and CLOCK_BOOTTIME with minimalistic overhead. The previous patch which provides ktime_t offsets allows us to make this function almost as cheap as ktime_get() which is going to be replaced in hrtimer_interrupt(). Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: stable@vger.kernel.org Signed-off-by: John Stultz Link: http://lkml.kernel.org/r/1341960205-56738-7-git-send-email-johnstul@us.ibm.com Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 1 + kernel/time/timekeeping.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'kernel/time') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index c9ec9400ee5..cc07d2777bb 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -327,6 +327,7 @@ extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); extern ktime_t ktime_get_boottime(void); extern ktime_t ktime_get_monotonic_offset(void); +extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1c038dac71a..269b1fe5f2a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1271,6 +1271,40 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, } while (read_seqretry(&timekeeper.lock, seq)); } +#ifdef CONFIG_HIGH_RES_TIMERS +/** + * ktime_get_update_offsets - hrtimer helper + * @offs_real: pointer to storage for monotonic -> realtime offset + * @offs_boot: pointer to storage for monotonic -> boottime offset + * + * Returns current monotonic time and updates the offsets + * Called from hrtimer_interupt() or retrigger_next_event() + */ +ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) +{ + ktime_t now; + unsigned int seq; + u64 secs, nsecs; + + do { + seq = read_seqbegin(&timekeeper.lock); + + secs = timekeeper.xtime.tv_sec; + nsecs = timekeeper.xtime.tv_nsec; + nsecs += timekeeping_get_ns(); + /* If arch requires, add in gettimeoffset() */ + nsecs += arch_gettimeoffset(); + + *offs_real = timekeeper.offs_real; + *offs_boot = timekeeper.offs_boot; + } while (read_seqretry(&timekeeper.lock, seq)); + + now = ktime_add_ns(ktime_set(secs, 0), nsecs); + now = ktime_sub(now, *offs_real); + return now; +} +#endif + /** * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format */ -- cgit v1.2.3-70-g09d2 From 6b1859dba01c7d512b72d77e3fd7da8354235189 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:50 -0400 Subject: ntp: Fix STA_INS/DEL clearing bug In commit 6b43ae8a619d17c4935c3320d2ef9e92bdeed05d, I introduced a bug that kept the STA_INS or STA_DEL bit from being cleared from time_status via adjtimex() without forcing STA_PLL first. Usually once the STA_INS is set, it isn't cleared until the leap second is applied, so its unlikely this affected anyone. However during testing I noticed it took some effort to cancel a leap second once STA_INS was set. Signed-off-by: John Stultz Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava CC: stable@vger.kernel.org # 3.4 Link: http://lkml.kernel.org/r/1342156917-25092-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/ntp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 70b33abcc7b..b7fbadc5c97 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -409,7 +409,9 @@ int second_overflow(unsigned long secs) time_state = TIME_DEL; break; case TIME_INS: - if (secs % 86400 == 0) { + if (!(time_status & STA_INS)) + time_state = TIME_OK; + else if (secs % 86400 == 0) { leap = -1; time_state = TIME_OOP; time_tai++; @@ -418,7 +420,9 @@ int second_overflow(unsigned long secs) } break; case TIME_DEL: - if ((secs + 1) % 86400 == 0) { + if (!(time_status & STA_DEL)) + time_state = TIME_OK; + else if ((secs + 1) % 86400 == 0) { leap = 1; time_tai--; time_state = TIME_WAIT; -- cgit v1.2.3-70-g09d2 From 42e71e81f5bb5125ca7c194b5ccf1c93511ff8fb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:51 -0400 Subject: time: Whitespace cleanups per Ingo%27s requests Ingo noted a number of places where there is inconsistent use of whitespace. This patch tries to address the main culprits. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-3-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 269b1fe5f2a..c2f12aa87fc 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -24,32 +24,31 @@ /* Structure holding internal timekeeping values. */ struct timekeeper { /* Current clocksource used for timekeeping. */ - struct clocksource *clock; + struct clocksource *clock; /* NTP adjusted clock multiplier */ - u32 mult; + u32 mult; /* The shift value of the current clocksource. */ - int shift; - + int shift; /* Number of clock cycles in one NTP interval. */ - cycle_t cycle_interval; + cycle_t cycle_interval; /* Number of clock shifted nano seconds in one NTP interval. */ - u64 xtime_interval; + u64 xtime_interval; /* shifted nano seconds left over when rounding cycle_interval */ - s64 xtime_remainder; + s64 xtime_remainder; /* Raw nano seconds accumulated per NTP interval. */ - u32 raw_interval; + u32 raw_interval; /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */ - u64 xtime_nsec; + u64 xtime_nsec; /* Difference between accumulated time and NTP time in ntp * shifted nano seconds. */ - s64 ntp_error; + s64 ntp_error; /* Shift conversion between clock shifted nano seconds and * ntp shifted nano seconds. */ - int ntp_error_shift; + int ntp_error_shift; /* The current time */ - struct timespec xtime; + struct timespec xtime; /* * wall_to_monotonic is what we need to add to xtime (or xtime corrected * for sub jiffie times) to get to monotonic time. Monotonic is pegged @@ -64,20 +63,17 @@ struct timekeeper { * - wall_to_monotonic is no longer the boot time, getboottime must be * used instead. */ - struct timespec wall_to_monotonic; + struct timespec wall_to_monotonic; /* time spent in suspend */ - struct timespec total_sleep_time; + struct timespec total_sleep_time; /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ - struct timespec raw_time; - + struct timespec raw_time; /* Offset clock monotonic -> clock realtime */ - ktime_t offs_real; - + ktime_t offs_real; /* Offset clock monotonic -> clock boottime */ - ktime_t offs_boot; - + ktime_t offs_boot; /* Seqlock for all timekeeper values */ - seqlock_t lock; + seqlock_t lock; }; static struct timekeeper timekeeper; @@ -547,6 +543,7 @@ u64 timekeeping_max_deferment(void) { unsigned long seq; u64 ret; + do { seq = read_seqbegin(&timekeeper.lock); -- cgit v1.2.3-70-g09d2 From fee84c43e6afc42295ae8058cbbef9ea5633926c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:52 -0400 Subject: time: Explicitly use u32 instead of int for shift values Ingo noted that using a u32 instead of int for shift values would be better to make sure the compiler doesn't unnecessarily use complex signed arithmetic. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-4-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c2f12aa87fc..4fd83df0b14 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -28,7 +28,7 @@ struct timekeeper { /* NTP adjusted clock multiplier */ u32 mult; /* The shift value of the current clocksource. */ - int shift; + u32 shift; /* Number of clock cycles in one NTP interval. */ cycle_t cycle_interval; /* Number of clock shifted nano seconds in one NTP interval. */ @@ -45,7 +45,7 @@ struct timekeeper { s64 ntp_error; /* Shift conversion between clock shifted nano seconds and * ntp shifted nano seconds. */ - int ntp_error_shift; + u32 ntp_error_shift; /* The current time */ struct timespec xtime; @@ -960,7 +960,7 @@ static void timekeeping_adjust(s64 offset) * * Returns the unconsumed cycles. */ -static cycle_t logarithmic_accumulation(cycle_t offset, int shift) +static cycle_t logarithmic_accumulation(cycle_t offset, u32 shift) { u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; u64 raw_nsecs; -- cgit v1.2.3-70-g09d2 From 1e75fa8be9fb61e1af46b5b3b176347a4c958ca1 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:53 -0400 Subject: time: Condense timekeeper.xtime into xtime_sec The timekeeper struct has a xtime_nsec, which keeps the sub-nanosecond remainder. This ends up being somewhat duplicative of the timekeeper.xtime.tv_nsec value, and we have to do extra work to keep them apart, copying the full nsec portion out and back in over and over. This patch simplifies some of the logic by taking the timekeeper xtime value and splitting it into timekeeper.xtime_sec and reuses the timekeeper.xtime_nsec for the sub-second portion (stored in higher res shifted nanoseconds). This simplifies some of the accumulation logic. And will allow for more accurate timekeeping once the vsyscall code is updated to use the shifted nanosecond remainder. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-5-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 181 ++++++++++++++++++++++++++++------------------ 1 file changed, 110 insertions(+), 71 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4fd83df0b14..b98d9bd73e5 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -38,8 +38,11 @@ struct timekeeper { /* Raw nano seconds accumulated per NTP interval. */ u32 raw_interval; - /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */ + /* Current CLOCK_REALTIME time in seconds */ + u64 xtime_sec; + /* Clock shifted nano seconds */ u64 xtime_nsec; + /* Difference between accumulated time and NTP time in ntp * shifted nano seconds. */ s64 ntp_error; @@ -47,8 +50,6 @@ struct timekeeper { * ntp shifted nano seconds. */ u32 ntp_error_shift; - /* The current time */ - struct timespec xtime; /* * wall_to_monotonic is what we need to add to xtime (or xtime corrected * for sub jiffie times) to get to monotonic time. Monotonic is pegged @@ -84,11 +85,37 @@ static struct timekeeper timekeeper; */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); - /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; +static inline void tk_normalize_xtime(struct timekeeper *tk) +{ + while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) { + tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift; + tk->xtime_sec++; + } +} + +static struct timespec tk_xtime(struct timekeeper *tk) +{ + struct timespec ts; + + ts.tv_sec = tk->xtime_sec; + ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); + return ts; +} +static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) +{ + tk->xtime_sec = ts->tv_sec; + tk->xtime_nsec = ts->tv_nsec << tk->shift; +} + +static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) +{ + tk->xtime_sec += ts->tv_sec; + tk->xtime_nsec += ts->tv_nsec << tk->shift; +} /** * timekeeper_setup_internals - Set up internals to use clocksource clock. @@ -104,7 +131,9 @@ static void timekeeper_setup_internals(struct clocksource *clock) { cycle_t interval; u64 tmp, ntpinterval; + struct clocksource *old_clock; + old_clock = timekeeper.clock; timekeeper.clock = clock; clock->cycle_last = clock->read(clock); @@ -126,7 +155,14 @@ static void timekeeper_setup_internals(struct clocksource *clock) timekeeper.raw_interval = ((u64) interval * clock->mult) >> clock->shift; - timekeeper.xtime_nsec = 0; + /* if changing clocks, convert xtime_nsec shift units */ + if (old_clock) { + int shift_change = clock->shift - old_clock->shift; + if (shift_change < 0) + timekeeper.xtime_nsec >>= -shift_change; + else + timekeeper.xtime_nsec <<= shift_change; + } timekeeper.shift = clock->shift; timekeeper.ntp_error = 0; @@ -145,6 +181,7 @@ static inline s64 timekeeping_get_ns(void) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; + s64 nsec; /* read clocksource: */ clock = timekeeper.clock; @@ -153,9 +190,8 @@ static inline s64 timekeeping_get_ns(void) /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; - /* return delta convert to nanoseconds using ntp adjusted mult. */ - return clocksource_cyc2ns(cycle_delta, timekeeper.mult, - timekeeper.shift); + nsec = cycle_delta * timekeeper.mult + timekeeper.xtime_nsec; + return nsec >> timekeeper.shift; } static inline s64 timekeeping_get_ns_raw(void) @@ -185,12 +221,15 @@ static void update_rt_offset(void) /* must hold write on timekeeper.lock */ static void timekeeping_update(bool clearntp) { + struct timespec xt; + if (clearntp) { timekeeper.ntp_error = 0; ntp_clear(); } update_rt_offset(); - update_vsyscall(&timekeeper.xtime, &timekeeper.wall_to_monotonic, + xt = tk_xtime(&timekeeper); + update_vsyscall(&xt, &timekeeper.wall_to_monotonic, timekeeper.clock, timekeeper.mult); } @@ -213,13 +252,12 @@ static void timekeeping_forward_now(void) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; - nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult, - timekeeper.shift); + timekeeper.xtime_nsec += cycle_delta * timekeeper.mult; /* If arch requires, add in gettimeoffset() */ - nsec += arch_gettimeoffset(); + timekeeper.xtime_nsec += arch_gettimeoffset() << timekeeper.shift; - timespec_add_ns(&timekeeper.xtime, nsec); + tk_normalize_xtime(&timekeeper); nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); timespec_add_ns(&timekeeper.raw_time, nsec); @@ -234,15 +272,15 @@ static void timekeeping_forward_now(void) void getnstimeofday(struct timespec *ts) { unsigned long seq; - s64 nsecs; + s64 nsecs = 0; WARN_ON(timekeeping_suspended); do { seq = read_seqbegin(&timekeeper.lock); - *ts = timekeeper.xtime; - nsecs = timekeeping_get_ns(); + ts->tv_sec = timekeeper.xtime_sec; + ts->tv_nsec = timekeeping_get_ns(); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); @@ -262,11 +300,10 @@ ktime_t ktime_get(void) do { seq = read_seqbegin(&timekeeper.lock); - secs = timekeeper.xtime.tv_sec + + secs = timekeeper.xtime_sec + timekeeper.wall_to_monotonic.tv_sec; - nsecs = timekeeper.xtime.tv_nsec + + nsecs = timekeeping_get_ns() + timekeeper.wall_to_monotonic.tv_nsec; - nsecs += timekeeping_get_ns(); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); @@ -291,22 +328,21 @@ void ktime_get_ts(struct timespec *ts) { struct timespec tomono; unsigned int seq; - s64 nsecs; WARN_ON(timekeeping_suspended); do { seq = read_seqbegin(&timekeeper.lock); - *ts = timekeeper.xtime; + ts->tv_sec = timekeeper.xtime_sec; + ts->tv_nsec = timekeeping_get_ns(); tomono = timekeeper.wall_to_monotonic; - nsecs = timekeeping_get_ns(); /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); + ts->tv_nsec += arch_gettimeoffset(); } while (read_seqretry(&timekeeper.lock, seq)); set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, - ts->tv_nsec + tomono.tv_nsec + nsecs); + ts->tv_nsec + tomono.tv_nsec); } EXPORT_SYMBOL_GPL(ktime_get_ts); @@ -334,7 +370,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) seq = read_seqbegin(&timekeeper.lock); *ts_raw = timekeeper.raw_time; - *ts_real = timekeeper.xtime; + ts_real->tv_sec = timekeeper.xtime_sec; + ts_real->tv_nsec = 0; nsecs_raw = timekeeping_get_ns_raw(); nsecs_real = timekeeping_get_ns(); @@ -377,7 +414,7 @@ EXPORT_SYMBOL(do_gettimeofday); */ int do_settimeofday(const struct timespec *tv) { - struct timespec ts_delta; + struct timespec ts_delta, xt; unsigned long flags; if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) @@ -387,12 +424,15 @@ int do_settimeofday(const struct timespec *tv) timekeeping_forward_now(); - ts_delta.tv_sec = tv->tv_sec - timekeeper.xtime.tv_sec; - ts_delta.tv_nsec = tv->tv_nsec - timekeeper.xtime.tv_nsec; + xt = tk_xtime(&timekeeper); + ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; + ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec; + timekeeper.wall_to_monotonic = timespec_sub(timekeeper.wall_to_monotonic, ts_delta); - timekeeper.xtime = *tv; + tk_set_xtime(&timekeeper, tv); + timekeeping_update(true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -422,7 +462,8 @@ int timekeeping_inject_offset(struct timespec *ts) timekeeping_forward_now(); - timekeeper.xtime = timespec_add(timekeeper.xtime, *ts); + + tk_xtime_add(&timekeeper, ts); timekeeper.wall_to_monotonic = timespec_sub(timekeeper.wall_to_monotonic, *ts); @@ -606,14 +647,12 @@ void __init timekeeping_init(void) clock->enable(clock); timekeeper_setup_internals(clock); - timekeeper.xtime.tv_sec = now.tv_sec; - timekeeper.xtime.tv_nsec = now.tv_nsec; + tk_set_xtime(&timekeeper, &now); timekeeper.raw_time.tv_sec = 0; timekeeper.raw_time.tv_nsec = 0; - if (boot.tv_sec == 0 && boot.tv_nsec == 0) { - boot.tv_sec = timekeeper.xtime.tv_sec; - boot.tv_nsec = timekeeper.xtime.tv_nsec; - } + if (boot.tv_sec == 0 && boot.tv_nsec == 0) + boot = tk_xtime(&timekeeper); + set_normalized_timespec(&timekeeper.wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); update_rt_offset(); @@ -646,7 +685,7 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta) return; } - timekeeper.xtime = timespec_add(timekeeper.xtime, *delta); + tk_xtime_add(&timekeeper, delta); timekeeper.wall_to_monotonic = timespec_sub(timekeeper.wall_to_monotonic, *delta); update_sleep_time(timespec_add(timekeeper.total_sleep_time, *delta)); @@ -742,7 +781,7 @@ static int timekeeping_suspend(void) * try to compensate so the difference in system time * and persistent_clock time stays close to constant. */ - delta = timespec_sub(timekeeper.xtime, timekeeping_suspend_time); + delta = timespec_sub(tk_xtime(&timekeeper), timekeeping_suspend_time); delta_delta = timespec_sub(delta, old_delta); if (abs(delta_delta.tv_sec) >= 2) { /* @@ -977,9 +1016,9 @@ static cycle_t logarithmic_accumulation(cycle_t offset, u32 shift) while (timekeeper.xtime_nsec >= nsecps) { int leap; timekeeper.xtime_nsec -= nsecps; - timekeeper.xtime.tv_sec++; - leap = second_overflow(timekeeper.xtime.tv_sec); - timekeeper.xtime.tv_sec += leap; + timekeeper.xtime_sec++; + leap = second_overflow(timekeeper.xtime_sec); + timekeeper.xtime_sec += leap; timekeeper.wall_to_monotonic.tv_sec -= leap; if (leap) clock_was_set_delayed(); @@ -1015,6 +1054,7 @@ static void update_wall_time(void) cycle_t offset; int shift = 0, maxshift; unsigned long flags; + s64 remainder; write_seqlock_irqsave(&timekeeper.lock, flags); @@ -1029,8 +1069,6 @@ static void update_wall_time(void) #else offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #endif - timekeeper.xtime_nsec = (s64)timekeeper.xtime.tv_nsec << - timekeeper.shift; /* * With NO_HZ we may have to accumulate many cycle_intervals @@ -1076,28 +1114,31 @@ static void update_wall_time(void) timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; } - /* - * Store full nanoseconds into xtime after rounding it up and - * add the remainder to the error difference. - */ - timekeeper.xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >> - timekeeper.shift) + 1; - timekeeper.xtime_nsec -= (s64)timekeeper.xtime.tv_nsec << - timekeeper.shift; - timekeeper.ntp_error += timekeeper.xtime_nsec << - timekeeper.ntp_error_shift; + * Store only full nanoseconds into xtime_nsec after rounding + * it up and add the remainder to the error difference. + * XXX - This is necessary to avoid small 1ns inconsistnecies caused + * by truncating the remainder in vsyscalls. However, it causes + * additional work to be done in timekeeping_adjust(). Once + * the vsyscall implementations are converted to use xtime_nsec + * (shifted nanoseconds), this can be killed. + */ + remainder = timekeeper.xtime_nsec & ((1 << timekeeper.shift) - 1); + timekeeper.xtime_nsec -= remainder; + timekeeper.xtime_nsec += 1 << timekeeper.shift; + timekeeper.ntp_error += remainder << timekeeper.ntp_error_shift; /* * Finally, make sure that after the rounding - * xtime.tv_nsec isn't larger than NSEC_PER_SEC + * xtime_nsec isn't larger than NSEC_PER_SEC */ - if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { + if (unlikely(timekeeper.xtime_nsec >= + ((u64)NSEC_PER_SEC << timekeeper.shift))) { int leap; - timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; - timekeeper.xtime.tv_sec++; - leap = second_overflow(timekeeper.xtime.tv_sec); - timekeeper.xtime.tv_sec += leap; + timekeeper.xtime_nsec -= (u64)NSEC_PER_SEC << timekeeper.shift; + timekeeper.xtime_sec++; + leap = second_overflow(timekeeper.xtime_sec); + timekeeper.xtime_sec += leap; timekeeper.wall_to_monotonic.tv_sec -= leap; if (leap) clock_was_set_delayed(); @@ -1148,21 +1189,20 @@ void get_monotonic_boottime(struct timespec *ts) { struct timespec tomono, sleep; unsigned int seq; - s64 nsecs; WARN_ON(timekeeping_suspended); do { seq = read_seqbegin(&timekeeper.lock); - *ts = timekeeper.xtime; + ts->tv_sec = timekeeper.xtime_sec; + ts->tv_nsec = timekeeping_get_ns(); tomono = timekeeper.wall_to_monotonic; sleep = timekeeper.total_sleep_time; - nsecs = timekeeping_get_ns(); } while (read_seqretry(&timekeeper.lock, seq)); set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, - ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs); + ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec); } EXPORT_SYMBOL_GPL(get_monotonic_boottime); @@ -1195,13 +1235,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased); unsigned long get_seconds(void) { - return timekeeper.xtime.tv_sec; + return timekeeper.xtime_sec; } EXPORT_SYMBOL(get_seconds); struct timespec __current_kernel_time(void) { - return timekeeper.xtime; + return tk_xtime(&timekeeper); } struct timespec current_kernel_time(void) @@ -1212,7 +1252,7 @@ struct timespec current_kernel_time(void) do { seq = read_seqbegin(&timekeeper.lock); - now = timekeeper.xtime; + now = tk_xtime(&timekeeper); } while (read_seqretry(&timekeeper.lock, seq)); return now; @@ -1227,7 +1267,7 @@ struct timespec get_monotonic_coarse(void) do { seq = read_seqbegin(&timekeeper.lock); - now = timekeeper.xtime; + now = tk_xtime(&timekeeper); mono = timekeeper.wall_to_monotonic; } while (read_seqretry(&timekeeper.lock, seq)); @@ -1262,7 +1302,7 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, do { seq = read_seqbegin(&timekeeper.lock); - *xtim = timekeeper.xtime; + *xtim = tk_xtime(&timekeeper); *wtom = timekeeper.wall_to_monotonic; *sleep = timekeeper.total_sleep_time; } while (read_seqretry(&timekeeper.lock, seq)); @@ -1286,9 +1326,8 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) do { seq = read_seqbegin(&timekeeper.lock); - secs = timekeeper.xtime.tv_sec; - nsecs = timekeeper.xtime.tv_nsec; - nsecs += timekeeping_get_ns(); + secs = timekeeper.xtime_sec; + nsecs = timekeeping_get_ns(); /* If arch requires, add in gettimeoffset() */ nsecs += arch_gettimeoffset(); -- cgit v1.2.3-70-g09d2 From 1f4f948706bcec1b51bf6492bf04057d2e21e273 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:54 -0400 Subject: time: Refactor accumulation of nsecs to secs We do the exact same logic moving nsecs to secs in the timekeeper in multiple places, so condense this into a single function. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-6-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 54 ++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 22 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b98d9bd73e5..cb4a433bab9 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -990,6 +990,35 @@ static void timekeeping_adjust(s64 offset) } +/** + * accumulate_nsecs_to_secs - Accumulates nsecs into secs + * + * Helper function that accumulates a the nsecs greater then a second + * from the xtime_nsec field to the xtime_secs field. + * It also calls into the NTP code to handle leapsecond processing. + * + */ +static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) +{ + u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; + + while (tk->xtime_nsec >= nsecps) { + int leap; + + tk->xtime_nsec -= nsecps; + tk->xtime_sec++; + + /* Figure out if its a leap sec and apply if needed */ + leap = second_overflow(tk->xtime_sec); + tk->xtime_sec += leap; + tk->wall_to_monotonic.tv_sec -= leap; + if (leap) + clock_was_set_delayed(); + + } +} + + /** * logarithmic_accumulation - shifted accumulation of cycles * @@ -1001,7 +1030,6 @@ static void timekeeping_adjust(s64 offset) */ static cycle_t logarithmic_accumulation(cycle_t offset, u32 shift) { - u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; u64 raw_nsecs; /* If the offset is smaller than a shifted interval, do nothing */ @@ -1013,16 +1041,8 @@ static cycle_t logarithmic_accumulation(cycle_t offset, u32 shift) timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift; timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; - while (timekeeper.xtime_nsec >= nsecps) { - int leap; - timekeeper.xtime_nsec -= nsecps; - timekeeper.xtime_sec++; - leap = second_overflow(timekeeper.xtime_sec); - timekeeper.xtime_sec += leap; - timekeeper.wall_to_monotonic.tv_sec -= leap; - if (leap) - clock_was_set_delayed(); - } + + accumulate_nsecs_to_secs(&timekeeper); /* Accumulate raw time */ raw_nsecs = timekeeper.raw_interval << shift; @@ -1132,17 +1152,7 @@ static void update_wall_time(void) * Finally, make sure that after the rounding * xtime_nsec isn't larger than NSEC_PER_SEC */ - if (unlikely(timekeeper.xtime_nsec >= - ((u64)NSEC_PER_SEC << timekeeper.shift))) { - int leap; - timekeeper.xtime_nsec -= (u64)NSEC_PER_SEC << timekeeper.shift; - timekeeper.xtime_sec++; - leap = second_overflow(timekeeper.xtime_sec); - timekeeper.xtime_sec += leap; - timekeeper.wall_to_monotonic.tv_sec -= leap; - if (leap) - clock_was_set_delayed(); - } + accumulate_nsecs_to_secs(&timekeeper); timekeeping_update(false); -- cgit v1.2.3-70-g09d2 From f2a5a0854efc62abe7f69e9947842cb135837f9a Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:55 -0400 Subject: time: Move arch_gettimeoffset() usage into timekeeping_get_ns() Since we call arch_gettimeoffset() in all the accessor functions, move arch_gettimeoffset() calls into timekeeping_get_ns() and timekeeping_get_ns_raw() to simplify the code. This also makes the code easier to maintain as we don't have to worry about forgetting the arch_gettimeoffset() as has happened in the past. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-7-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cb4a433bab9..e43289df28c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -191,13 +191,17 @@ static inline s64 timekeeping_get_ns(void) cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; nsec = cycle_delta * timekeeper.mult + timekeeper.xtime_nsec; - return nsec >> timekeeper.shift; + nsec >>= timekeeper.shift; + + /* If arch requires, add in gettimeoffset() */ + return nsec + arch_gettimeoffset(); } static inline s64 timekeeping_get_ns_raw(void) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; + s64 nsec; /* read clocksource: */ clock = timekeeper.clock; @@ -206,8 +210,11 @@ static inline s64 timekeeping_get_ns_raw(void) /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; - /* return delta convert to nanoseconds. */ - return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); + /* convert delta to nanoseconds. */ + nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); + + /* If arch requires, add in gettimeoffset() */ + return nsec + arch_gettimeoffset(); } static void update_rt_offset(void) @@ -282,9 +289,6 @@ void getnstimeofday(struct timespec *ts) ts->tv_sec = timekeeper.xtime_sec; ts->tv_nsec = timekeeping_get_ns(); - /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); - } while (read_seqretry(&timekeeper.lock, seq)); timespec_add_ns(ts, nsecs); @@ -304,8 +308,6 @@ ktime_t ktime_get(void) timekeeper.wall_to_monotonic.tv_sec; nsecs = timekeeping_get_ns() + timekeeper.wall_to_monotonic.tv_nsec; - /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); } while (read_seqretry(&timekeeper.lock, seq)); /* @@ -336,8 +338,6 @@ void ktime_get_ts(struct timespec *ts) ts->tv_sec = timekeeper.xtime_sec; ts->tv_nsec = timekeeping_get_ns(); tomono = timekeeper.wall_to_monotonic; - /* If arch requires, add in gettimeoffset() */ - ts->tv_nsec += arch_gettimeoffset(); } while (read_seqretry(&timekeeper.lock, seq)); @@ -365,8 +365,6 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) WARN_ON_ONCE(timekeeping_suspended); do { - u32 arch_offset; - seq = read_seqbegin(&timekeeper.lock); *ts_raw = timekeeper.raw_time; @@ -376,11 +374,6 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) nsecs_raw = timekeeping_get_ns_raw(); nsecs_real = timekeeping_get_ns(); - /* If arch requires, add in gettimeoffset() */ - arch_offset = arch_gettimeoffset(); - nsecs_raw += arch_offset; - nsecs_real += arch_offset; - } while (read_seqretry(&timekeeper.lock, seq)); timespec_add_ns(ts_raw, nsecs_raw); @@ -1338,8 +1331,6 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) secs = timekeeper.xtime_sec; nsecs = timekeeping_get_ns(); - /* If arch requires, add in gettimeoffset() */ - nsecs += arch_gettimeoffset(); *offs_real = timekeeper.offs_real; *offs_boot = timekeeper.offs_boot; -- cgit v1.2.3-70-g09d2 From 2a8c0883c3cfffcc148ea606e2a4e7453cd75e73 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:56 -0400 Subject: time: Move xtime_nsec adjustment underflow handling timekeeping_adjust When we make adjustments speeding up the clock, its possible for xtime_nsec to underflow. We already handle this properly, but we do so from update_wall_time() instead of the more logical timekeeping_adjust(), where the possible underflow actually occurs. Thus, move the correction logic to the timekeeping_adjust, which is the function that causes the issue. Making update_wall_time() more readable. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-8-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e43289df28c..aeeaab8cba6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -980,6 +980,27 @@ static void timekeeping_adjust(s64 offset) timekeeper.xtime_nsec -= offset; timekeeper.ntp_error -= (interval - offset) << timekeeper.ntp_error_shift; + + /* + * It may be possible that when we entered this function, xtime_nsec + * was very small. Further, if we're slightly speeding the clocksource + * in the code above, its possible the required corrective factor to + * xtime_nsec could cause it to underflow. + * + * Now, since we already accumulated the second, cannot simply roll + * the accumulated second back, since the NTP subsystem has been + * notified via second_overflow. So instead we push xtime_nsec forward + * by the amount we underflowed, and add that amount into the error. + * + * We'll correct this error next time through this function, when + * xtime_nsec is not as small. + */ + if (unlikely((s64)timekeeper.xtime_nsec < 0)) { + s64 neg = -(s64)timekeeper.xtime_nsec; + timekeeper.xtime_nsec = 0; + timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; + } + } @@ -1105,27 +1126,6 @@ static void update_wall_time(void) /* correct the clock when NTP error is too big */ timekeeping_adjust(offset); - /* - * Since in the loop above, we accumulate any amount of time - * in xtime_nsec over a second into xtime.tv_sec, its possible for - * xtime_nsec to be fairly small after the loop. Further, if we're - * slightly speeding the clocksource up in timekeeping_adjust(), - * its possible the required corrective factor to xtime_nsec could - * cause it to underflow. - * - * Now, we cannot simply roll the accumulated second back, since - * the NTP subsystem has been notified via second_overflow. So - * instead we push xtime_nsec forward by the amount we underflowed, - * and add that amount into the error. - * - * We'll correct this error next time through this function, when - * xtime_nsec is not as small. - */ - if (unlikely((s64)timekeeper.xtime_nsec < 0)) { - s64 neg = -(s64)timekeeper.xtime_nsec; - timekeeper.xtime_nsec = 0; - timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; - } /* * Store only full nanoseconds into xtime_nsec after rounding -- cgit v1.2.3-70-g09d2 From f726a697d06102e7a1fc0a87308cb30a84580205 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 13 Jul 2012 01:21:57 -0400 Subject: time: Rework timekeeping functions to take timekeeper ptr as argument As part of cleaning up the timekeeping code, this patch converts a number of internal functions to takei a timekeeper ptr as an argument, so that the internal functions don't access the global timekeeper structure directly. This allows for further optimizations to reduce lock hold time later. This patch has been updated to include more consistent usage of the timekeeper value, by making sure it is always passed as a argument to non top-level functions. Signed-off-by: John Stultz Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1342156917-25092-9-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 208 +++++++++++++++++++++++----------------------- 1 file changed, 103 insertions(+), 105 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index aeeaab8cba6..5980e902978 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -127,14 +127,14 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) * * Unless you're the timekeeping code, you should not be using this! */ -static void timekeeper_setup_internals(struct clocksource *clock) +static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) { cycle_t interval; u64 tmp, ntpinterval; struct clocksource *old_clock; - old_clock = timekeeper.clock; - timekeeper.clock = clock; + old_clock = tk->clock; + tk->clock = clock; clock->cycle_last = clock->read(clock); /* Do the ns -> cycle conversion first, using original mult */ @@ -147,64 +147,64 @@ static void timekeeper_setup_internals(struct clocksource *clock) tmp = 1; interval = (cycle_t) tmp; - timekeeper.cycle_interval = interval; + tk->cycle_interval = interval; /* Go back from cycles -> shifted ns */ - timekeeper.xtime_interval = (u64) interval * clock->mult; - timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval; - timekeeper.raw_interval = + tk->xtime_interval = (u64) interval * clock->mult; + tk->xtime_remainder = ntpinterval - tk->xtime_interval; + tk->raw_interval = ((u64) interval * clock->mult) >> clock->shift; /* if changing clocks, convert xtime_nsec shift units */ if (old_clock) { int shift_change = clock->shift - old_clock->shift; if (shift_change < 0) - timekeeper.xtime_nsec >>= -shift_change; + tk->xtime_nsec >>= -shift_change; else - timekeeper.xtime_nsec <<= shift_change; + tk->xtime_nsec <<= shift_change; } - timekeeper.shift = clock->shift; + tk->shift = clock->shift; - timekeeper.ntp_error = 0; - timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; + tk->ntp_error = 0; + tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; /* * The timekeeper keeps its own mult values for the currently * active clocksource. These value will be adjusted via NTP * to counteract clock drifting. */ - timekeeper.mult = clock->mult; + tk->mult = clock->mult; } /* Timekeeper helper functions. */ -static inline s64 timekeeping_get_ns(void) +static inline s64 timekeeping_get_ns(struct timekeeper *tk) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; s64 nsec; /* read clocksource: */ - clock = timekeeper.clock; + clock = tk->clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; - nsec = cycle_delta * timekeeper.mult + timekeeper.xtime_nsec; - nsec >>= timekeeper.shift; + nsec = cycle_delta * tk->mult + tk->xtime_nsec; + nsec >>= tk->shift; /* If arch requires, add in gettimeoffset() */ return nsec + arch_gettimeoffset(); } -static inline s64 timekeeping_get_ns_raw(void) +static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; s64 nsec; /* read clocksource: */ - clock = timekeeper.clock; + clock = tk->clock; cycle_now = clock->read(clock); /* calculate the delta since the last update_wall_time: */ @@ -217,27 +217,26 @@ static inline s64 timekeeping_get_ns_raw(void) return nsec + arch_gettimeoffset(); } -static void update_rt_offset(void) +static void update_rt_offset(struct timekeeper *tk) { - struct timespec tmp, *wtm = &timekeeper.wall_to_monotonic; + struct timespec tmp, *wtm = &tk->wall_to_monotonic; set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); - timekeeper.offs_real = timespec_to_ktime(tmp); + tk->offs_real = timespec_to_ktime(tmp); } /* must hold write on timekeeper.lock */ -static void timekeeping_update(bool clearntp) +static void timekeeping_update(struct timekeeper *tk, bool clearntp) { struct timespec xt; if (clearntp) { - timekeeper.ntp_error = 0; + tk->ntp_error = 0; ntp_clear(); } - update_rt_offset(); - xt = tk_xtime(&timekeeper); - update_vsyscall(&xt, &timekeeper.wall_to_monotonic, - timekeeper.clock, timekeeper.mult); + update_rt_offset(tk); + xt = tk_xtime(tk); + update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); } @@ -248,26 +247,26 @@ static void timekeeping_update(bool clearntp) * update_wall_time(). This is useful before significant clock changes, * as it avoids having to deal with this time offset explicitly. */ -static void timekeeping_forward_now(void) +static void timekeeping_forward_now(struct timekeeper *tk) { cycle_t cycle_now, cycle_delta; struct clocksource *clock; s64 nsec; - clock = timekeeper.clock; + clock = tk->clock; cycle_now = clock->read(clock); cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; clock->cycle_last = cycle_now; - timekeeper.xtime_nsec += cycle_delta * timekeeper.mult; + tk->xtime_nsec += cycle_delta * tk->mult; /* If arch requires, add in gettimeoffset() */ - timekeeper.xtime_nsec += arch_gettimeoffset() << timekeeper.shift; + tk->xtime_nsec += arch_gettimeoffset() << tk->shift; - tk_normalize_xtime(&timekeeper); + tk_normalize_xtime(tk); nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); - timespec_add_ns(&timekeeper.raw_time, nsec); + timespec_add_ns(&tk->raw_time, nsec); } /** @@ -287,7 +286,7 @@ void getnstimeofday(struct timespec *ts) seq = read_seqbegin(&timekeeper.lock); ts->tv_sec = timekeeper.xtime_sec; - ts->tv_nsec = timekeeping_get_ns(); + ts->tv_nsec = timekeeping_get_ns(&timekeeper); } while (read_seqretry(&timekeeper.lock, seq)); @@ -306,7 +305,7 @@ ktime_t ktime_get(void) seq = read_seqbegin(&timekeeper.lock); secs = timekeeper.xtime_sec + timekeeper.wall_to_monotonic.tv_sec; - nsecs = timekeeping_get_ns() + + nsecs = timekeeping_get_ns(&timekeeper) + timekeeper.wall_to_monotonic.tv_nsec; } while (read_seqretry(&timekeeper.lock, seq)); @@ -336,7 +335,7 @@ void ktime_get_ts(struct timespec *ts) do { seq = read_seqbegin(&timekeeper.lock); ts->tv_sec = timekeeper.xtime_sec; - ts->tv_nsec = timekeeping_get_ns(); + ts->tv_nsec = timekeeping_get_ns(&timekeeper); tomono = timekeeper.wall_to_monotonic; } while (read_seqretry(&timekeeper.lock, seq)); @@ -371,8 +370,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) ts_real->tv_sec = timekeeper.xtime_sec; ts_real->tv_nsec = 0; - nsecs_raw = timekeeping_get_ns_raw(); - nsecs_real = timekeeping_get_ns(); + nsecs_raw = timekeeping_get_ns_raw(&timekeeper); + nsecs_real = timekeeping_get_ns(&timekeeper); } while (read_seqretry(&timekeeper.lock, seq)); @@ -415,7 +414,7 @@ int do_settimeofday(const struct timespec *tv) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); xt = tk_xtime(&timekeeper); ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; @@ -426,7 +425,7 @@ int do_settimeofday(const struct timespec *tv) tk_set_xtime(&timekeeper, tv); - timekeeping_update(true); + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -453,14 +452,14 @@ int timekeeping_inject_offset(struct timespec *ts) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); tk_xtime_add(&timekeeper, ts); timekeeper.wall_to_monotonic = timespec_sub(timekeeper.wall_to_monotonic, *ts); - timekeeping_update(true); + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -485,14 +484,14 @@ static int change_clocksource(void *data) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); if (!new->enable || new->enable(new) == 0) { old = timekeeper.clock; - timekeeper_setup_internals(new); + tk_setup_internals(&timekeeper, new); if (old->disable) old->disable(old); } - timekeeping_update(true); + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -542,7 +541,7 @@ void getrawmonotonic(struct timespec *ts) do { seq = read_seqbegin(&timekeeper.lock); - nsecs = timekeeping_get_ns_raw(); + nsecs = timekeeping_get_ns_raw(&timekeeper); *ts = timekeeper.raw_time; } while (read_seqretry(&timekeeper.lock, seq)); @@ -638,7 +637,7 @@ void __init timekeeping_init(void) clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); - timekeeper_setup_internals(clock); + tk_setup_internals(&timekeeper, clock); tk_set_xtime(&timekeeper, &now); timekeeper.raw_time.tv_sec = 0; @@ -648,7 +647,7 @@ void __init timekeeping_init(void) set_normalized_timespec(&timekeeper.wall_to_monotonic, -boot.tv_sec, -boot.tv_nsec); - update_rt_offset(); + update_rt_offset(&timekeeper); timekeeper.total_sleep_time.tv_sec = 0; timekeeper.total_sleep_time.tv_nsec = 0; write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -670,7 +669,8 @@ static void update_sleep_time(struct timespec t) * Takes a timespec offset measuring a suspend interval and properly * adds the sleep offset to the timekeeping variables. */ -static void __timekeeping_inject_sleeptime(struct timespec *delta) +static void __timekeeping_inject_sleeptime(struct timekeeper *tk, + struct timespec *delta) { if (!timespec_valid(delta)) { printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " @@ -678,10 +678,9 @@ static void __timekeeping_inject_sleeptime(struct timespec *delta) return; } - tk_xtime_add(&timekeeper, delta); - timekeeper.wall_to_monotonic = - timespec_sub(timekeeper.wall_to_monotonic, *delta); - update_sleep_time(timespec_add(timekeeper.total_sleep_time, *delta)); + tk_xtime_add(tk, delta); + tk->wall_to_monotonic = timespec_sub(tk->wall_to_monotonic, *delta); + update_sleep_time(timespec_add(tk->total_sleep_time, *delta)); } @@ -707,11 +706,11 @@ void timekeeping_inject_sleeptime(struct timespec *delta) write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); - __timekeeping_inject_sleeptime(delta); + __timekeeping_inject_sleeptime(&timekeeper, delta); - timekeeping_update(true); + timekeeping_update(&timekeeper, true); write_sequnlock_irqrestore(&timekeeper.lock, flags); @@ -740,7 +739,7 @@ static void timekeeping_resume(void) if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { ts = timespec_sub(ts, timekeeping_suspend_time); - __timekeeping_inject_sleeptime(&ts); + __timekeeping_inject_sleeptime(&timekeeper, &ts); } /* re-base the last cycle value */ timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); @@ -765,7 +764,7 @@ static int timekeeping_suspend(void) read_persistent_clock(&timekeeping_suspend_time); write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(); + timekeeping_forward_now(&timekeeper); timekeeping_suspended = 1; /* @@ -813,7 +812,8 @@ device_initcall(timekeeping_init_ops); * If the error is already larger, we look ahead even further * to compensate for late or lost adjustments. */ -static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, +static __always_inline int timekeeping_bigadjust(struct timekeeper *tk, + s64 error, s64 *interval, s64 *offset) { s64 tick_error, i; @@ -829,7 +829,7 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, * here. This is tuned so that an error of about 1 msec is adjusted * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). */ - error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); + error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); error2 = abs(error2); for (look_ahead = 0; error2 > 0; look_ahead++) error2 >>= 2; @@ -838,8 +838,8 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, * Now calculate the error in (1 << look_ahead) ticks, but first * remove the single look ahead already included in the error. */ - tick_error = ntp_tick_length() >> (timekeeper.ntp_error_shift + 1); - tick_error -= timekeeper.xtime_interval >> 1; + tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1); + tick_error -= tk->xtime_interval >> 1; error = ((error - tick_error) >> look_ahead) + tick_error; /* Finally calculate the adjustment shift value. */ @@ -864,9 +864,9 @@ static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval, * this is optimized for the most common adjustments of -1,0,1, * for other values we can do a bit more work. */ -static void timekeeping_adjust(s64 offset) +static void timekeeping_adjust(struct timekeeper *tk, s64 offset) { - s64 error, interval = timekeeper.cycle_interval; + s64 error, interval = tk->cycle_interval; int adj; /* @@ -882,7 +882,7 @@ static void timekeeping_adjust(s64 offset) * * Note: It does not "save" on aggravation when reading the code. */ - error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1); + error = tk->ntp_error >> (tk->ntp_error_shift - 1); if (error > interval) { /* * We now divide error by 4(via shift), which checks if @@ -904,7 +904,8 @@ static void timekeeping_adjust(s64 offset) if (likely(error <= interval)) adj = 1; else - adj = timekeeping_bigadjust(error, &interval, &offset); + adj = timekeeping_bigadjust(tk, error, &interval, + &offset); } else if (error < -interval) { /* See comment above, this is just switched for the negative */ error >>= 2; @@ -913,18 +914,17 @@ static void timekeeping_adjust(s64 offset) interval = -interval; offset = -offset; } else - adj = timekeeping_bigadjust(error, &interval, &offset); - } else /* No adjustment needed */ + adj = timekeeping_bigadjust(tk, error, &interval, + &offset); + } else return; - if (unlikely(timekeeper.clock->maxadj && - (timekeeper.mult + adj > - timekeeper.clock->mult + timekeeper.clock->maxadj))) { + if (unlikely(tk->clock->maxadj && + (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { printk_once(KERN_WARNING "Adjusting %s more than 11%% (%ld vs %ld)\n", - timekeeper.clock->name, (long)timekeeper.mult + adj, - (long)timekeeper.clock->mult + - timekeeper.clock->maxadj); + tk->clock->name, (long)tk->mult + adj, + (long)tk->clock->mult + tk->clock->maxadj); } /* * So the following can be confusing. @@ -975,11 +975,10 @@ static void timekeeping_adjust(s64 offset) * * XXX - TODO: Doc ntp_error calculation. */ - timekeeper.mult += adj; - timekeeper.xtime_interval += interval; - timekeeper.xtime_nsec -= offset; - timekeeper.ntp_error -= (interval - offset) << - timekeeper.ntp_error_shift; + tk->mult += adj; + tk->xtime_interval += interval; + tk->xtime_nsec -= offset; + tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; /* * It may be possible that when we entered this function, xtime_nsec @@ -995,10 +994,10 @@ static void timekeeping_adjust(s64 offset) * We'll correct this error next time through this function, when * xtime_nsec is not as small. */ - if (unlikely((s64)timekeeper.xtime_nsec < 0)) { - s64 neg = -(s64)timekeeper.xtime_nsec; - timekeeper.xtime_nsec = 0; - timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; + if (unlikely((s64)tk->xtime_nsec < 0)) { + s64 neg = -(s64)tk->xtime_nsec; + tk->xtime_nsec = 0; + tk->ntp_error += neg << tk->ntp_error_shift; } } @@ -1042,37 +1041,36 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) * * Returns the unconsumed cycles. */ -static cycle_t logarithmic_accumulation(cycle_t offset, u32 shift) +static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, + u32 shift) { u64 raw_nsecs; - /* If the offset is smaller than a shifted interval, do nothing */ - if (offset < timekeeper.cycle_interval<cycle_interval<cycle_last += timekeeper.cycle_interval << shift; + offset -= tk->cycle_interval << shift; + tk->clock->cycle_last += tk->cycle_interval << shift; - timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; - - accumulate_nsecs_to_secs(&timekeeper); + tk->xtime_nsec += tk->xtime_interval << shift; + accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ - raw_nsecs = timekeeper.raw_interval << shift; - raw_nsecs += timekeeper.raw_time.tv_nsec; + raw_nsecs = tk->raw_interval << shift; + raw_nsecs += tk->raw_time.tv_nsec; if (raw_nsecs >= NSEC_PER_SEC) { u64 raw_secs = raw_nsecs; raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); - timekeeper.raw_time.tv_sec += raw_secs; + tk->raw_time.tv_sec += raw_secs; } - timekeeper.raw_time.tv_nsec = raw_nsecs; + tk->raw_time.tv_nsec = raw_nsecs; /* Accumulate error between NTP and clock interval */ - timekeeper.ntp_error += ntp_tick_length() << shift; - timekeeper.ntp_error -= - (timekeeper.xtime_interval + timekeeper.xtime_remainder) << - (timekeeper.ntp_error_shift + shift); + tk->ntp_error += ntp_tick_length() << shift; + tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << + (tk->ntp_error_shift + shift); return offset; } @@ -1118,13 +1116,13 @@ static void update_wall_time(void) maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; shift = min(shift, maxshift); while (offset >= timekeeper.cycle_interval) { - offset = logarithmic_accumulation(offset, shift); + offset = logarithmic_accumulation(&timekeeper, offset, shift); if(offset < timekeeper.cycle_interval<tv_sec = timekeeper.xtime_sec; - ts->tv_nsec = timekeeping_get_ns(); + ts->tv_nsec = timekeeping_get_ns(&timekeeper); tomono = timekeeper.wall_to_monotonic; sleep = timekeeper.total_sleep_time; @@ -1330,7 +1328,7 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) seq = read_seqbegin(&timekeeper.lock); secs = timekeeper.xtime_sec; - nsecs = timekeeping_get_ns(); + nsecs = timekeeping_get_ns(&timekeeper); *offs_real = timekeeper.offs_real; *offs_boot = timekeeper.offs_boot; -- cgit v1.2.3-70-g09d2 From 3e997130bd2e8c6f5aaa49d6e3161d4d29b43ab0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Jul 2012 12:50:42 -0400 Subject: timekeeping: Add missing update call in timekeeping_resume() The leap second rework unearthed another issue of inconsistent data. On timekeeping_resume() the timekeeper data is updated, but nothing calls timekeeping_update(), so now the update code in the timer interrupt sees stale values. This has been the case before those changes, but then the timer interrupt was using stale data as well so this went unnoticed for quite some time. Add the missing update call, so all the data is consistent everywhere. Reported-by: Andreas Schwab Reported-and-tested-by: "Rafael J. Wysocki" Reported-and-tested-by: Martin Steigerwald Cc: LKML Cc: Linux PM list Cc: John Stultz Cc: Ingo Molnar Cc: Peter Zijlstra , Cc: Prarit Bhargava Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz Signed-off-by: Linus Torvalds --- kernel/time/timekeeping.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 269b1fe5f2a..3447cfaf11e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -717,6 +717,7 @@ static void timekeeping_resume(void) timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.ntp_error = 0; timekeeping_suspended = 0; + timekeeping_update(false); write_sequnlock_irqrestore(&timekeeper.lock, flags); touch_softlockup_watchdog(); -- cgit v1.2.3-70-g09d2 From b44d50dcacea0d485ca2ff9140f8cc28ee22f28d Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 23 Jul 2012 16:22:37 -0400 Subject: time: Fix casting issue in tk_set_xtime and tk_xtime_add commit 1e75fa8b (time: Condense timekeeper.xtime into xtime_sec) introduced helper functions which apply a timespec to the core internal timekeeper data. The internal storage type is u64. The timespec tv_nsec value must be shifted before set or added to the internal value. tv_nsec is a long, which is 32bit on a 32bit system, so without casting tv_nsec to u64 we lose the bits which are shifted over the 32bit boundary. Add the proper typecasts. Reported-by: Konrad Rzeszutek Wilk Tested-by: Konrad Rzeszutek Wilk Signed-off-by: John Stultz Acked-by: Prarit Bhargava Link: http://lkml.kernel.org/r/1343074957-16541-1-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/timekeeping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5980e902978..8f2aba1246f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -108,13 +108,13 @@ static struct timespec tk_xtime(struct timekeeper *tk) static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) { tk->xtime_sec = ts->tv_sec; - tk->xtime_nsec = ts->tv_nsec << tk->shift; + tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift; } static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) { tk->xtime_sec += ts->tv_sec; - tk->xtime_nsec += ts->tv_nsec << tk->shift; + tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; } /** -- cgit v1.2.3-70-g09d2 From 02ab20ae38337b99b5c29c81090f594b8fd61283 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 27 Jul 2012 14:48:10 -0400 Subject: time/jiffies: Rename ACTHZ to SHIFTED_HZ Ingo noted that ACTHZ is a confusing name, and requested it be renamed, so this patch renames ACTHZ to SHIFTED_HZ to better describe it. Signed-off-by: John Stultz Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1343414893-45779-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- include/linux/jiffies.h | 21 +++++++++++++-------- include/linux/timex.h | 2 +- kernel/time/jiffies.c | 2 +- kernel/time/ntp.c | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 7d24466fb80..82680541576 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -55,21 +55,26 @@ /* LATCH is used in the interval timer and ftape setup. */ # define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */ -/* HZ is the requested value. ACTHZ is actual HZ ("<< 8" is for accuracy) */ -# define ACTHZ (SH_DIV(CLOCK_TICK_RATE, LATCH, 8)) +/* + * HZ is the requested value. However the CLOCK_TICK_RATE may not allow + * for exactly HZ. So SHIFTED_HZ is high res HZ ("<< 8" is for accuracy) + */ +# define SHIFTED_HZ (SH_DIV(CLOCK_TICK_RATE, LATCH, 8)) #else -# define ACTHZ (HZ << 8) +# define SHIFTED_HZ (HZ << 8) #endif -/* TICK_NSEC is the time between ticks in nsec assuming real ACTHZ */ -#define TICK_NSEC (SH_DIV (1000000UL * 1000, ACTHZ, 8)) +/* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */ +#define TICK_NSEC (SH_DIV(1000000UL * 1000, SHIFTED_HZ, 8)) /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) -/* TICK_USEC_TO_NSEC is the time between ticks in nsec assuming real ACTHZ and */ -/* a value TUSEC for TICK_USEC (can be set bij adjtimex) */ -#define TICK_USEC_TO_NSEC(TUSEC) (SH_DIV (TUSEC * USER_HZ * 1000, ACTHZ, 8)) +/* + * TICK_USEC_TO_NSEC is the time between ticks in nsec assuming SHIFTED_HZ and + * a value TUSEC for TICK_USEC (can be set bij adjtimex) + */ +#define TICK_USEC_TO_NSEC(TUSEC) (SH_DIV(TUSEC * USER_HZ * 1000, SHIFTED_HZ, 8)) /* some arch's have a small-data section that can be accessed register-relative * but that can only take up to, say, 4-byte variables. jiffies being part of diff --git a/include/linux/timex.h b/include/linux/timex.h index 99bc88b1fc0..7c5ceb20e03 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -232,7 +232,7 @@ struct timex { * estimated error = NTP dispersion. */ extern unsigned long tick_usec; /* USER_HZ period (usec) */ -extern unsigned long tick_nsec; /* ACTHZ period (nsec) */ +extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */ extern void ntp_init(void); extern void ntp_clear(void); diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index a470154e040..46da0537c10 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -37,7 +37,7 @@ * requested HZ value. It is also not recommended * for "tick-less" systems. */ -#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ)) +#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ)) /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier * conversion, the .shift value could be zero. However diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index b7fbadc5c97..24174b4d669 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -28,7 +28,7 @@ DEFINE_SPINLOCK(ntp_lock); /* USER_HZ period (usecs): */ unsigned long tick_usec = TICK_USEC; -/* ACTHZ period (nsecs): */ +/* SHIFTED_HZ period (nsecs): */ unsigned long tick_nsec; static u64 tick_length; -- cgit v1.2.3-70-g09d2 From d4e3ab384b2343c7074f713ac330f839c38c52ee Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 27 Jul 2012 14:48:11 -0400 Subject: time: Clean up stray newlines Ingo noted inconsistent newline usage between functions. This patch cleans those up. Signed-off-by: John Stultz Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1343414893-45779-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cf364db5589..05b37a5ec7f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -239,7 +239,6 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp) update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); } - /** * timekeeping_forward_now - update clock to the current time * @@ -436,7 +435,6 @@ int do_settimeofday(const struct timespec *tv) } EXPORT_SYMBOL(do_settimeofday); - /** * timekeeping_inject_offset - Adds or subtracts from the current time. * @tv: pointer to the timespec variable containing the offset @@ -550,7 +548,6 @@ void getrawmonotonic(struct timespec *ts) } EXPORT_SYMBOL(getrawmonotonic); - /** * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres */ @@ -683,7 +680,6 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, update_sleep_time(timespec_add(tk->total_sleep_time, *delta)); } - /** * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values * @delta: pointer to a timespec delta value @@ -718,7 +714,6 @@ void timekeeping_inject_sleeptime(struct timespec *delta) clock_was_set(); } - /** * timekeeping_resume - Resumes the generic timekeeping subsystem. * @@ -1003,7 +998,6 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) } - /** * accumulate_nsecs_to_secs - Accumulates nsecs into secs * @@ -1032,7 +1026,6 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) } } - /** * logarithmic_accumulation - shifted accumulation of cycles * @@ -1076,7 +1069,6 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, return offset; } - /** * update_wall_time - Uses the current clocksource to increment the wall time * @@ -1177,7 +1169,6 @@ void getboottime(struct timespec *ts) } EXPORT_SYMBOL_GPL(getboottime); - /** * get_monotonic_boottime - Returns monotonic time since boot * @ts: pointer to the timespec to be set @@ -1358,7 +1349,6 @@ ktime_t ktime_get_monotonic_offset(void) } EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); - /** * xtime_update() - advances the timekeeping infrastructure * @ticks: number of ticks, that have elapsed since the last call. -- cgit v1.2.3-70-g09d2 From 6d0ef903e2bda70da124c10d8ad89f2382c87991 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 27 Jul 2012 14:48:12 -0400 Subject: time: Clean up offs_real/wall_to_mono and offs_boot/total_sleep_time updates For performance reasons, we maintain ktime_t based duplicates of wall_to_monotonic (offs_real) and total_sleep_time (offs_boot). Since large problems could occur (such as the resume regression on 3.5-rc7, or the leapsecond hrtimer issue) if these value pairs were to be inconsistently updated, this patch this cleans up how we modify these value pairs to ensure we are always consistent. As a side-effect this is also more efficient as we only caulculate the duplicate values when they are changed, rather then every update_wall_time call. This also provides WARN_ONs to detect if future changes break the invariants. Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Richard Cochran Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1343414893-45779-5-git-send-email-john.stultz@linaro.org [ Cleaned up minor style issues. ] Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 90 ++++++++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 36 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 05b37a5ec7f..4da65592b4d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -65,14 +65,14 @@ struct timekeeper { * used instead. */ struct timespec wall_to_monotonic; - /* time spent in suspend */ - struct timespec total_sleep_time; - /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ - struct timespec raw_time; /* Offset clock monotonic -> clock realtime */ ktime_t offs_real; + /* time spent in suspend */ + struct timespec total_sleep_time; /* Offset clock monotonic -> clock boottime */ ktime_t offs_boot; + /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ + struct timespec raw_time; /* Seqlock for all timekeeper values */ seqlock_t lock; }; @@ -117,6 +117,31 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; } +static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) +{ + struct timespec tmp; + + /* + * Verify consistency of: offset_real = -wall_to_monotonic + * before modifying anything + */ + set_normalized_timespec(&tmp, -tk->wall_to_monotonic.tv_sec, + -tk->wall_to_monotonic.tv_nsec); + WARN_ON_ONCE(tk->offs_real.tv64 != timespec_to_ktime(tmp).tv64); + tk->wall_to_monotonic = wtm; + set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec); + tk->offs_real = timespec_to_ktime(tmp); +} + +static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t) +{ + /* Verify consistency before modifying */ + WARN_ON_ONCE(tk->offs_boot.tv64 != timespec_to_ktime(tk->total_sleep_time).tv64); + + tk->total_sleep_time = t; + tk->offs_boot = timespec_to_ktime(t); +} + /** * timekeeper_setup_internals - Set up internals to use clocksource clock. * @@ -217,14 +242,6 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) return nsec + arch_gettimeoffset(); } -static void update_rt_offset(struct timekeeper *tk) -{ - struct timespec tmp, *wtm = &tk->wall_to_monotonic; - - set_normalized_timespec(&tmp, -wtm->tv_sec, -wtm->tv_nsec); - tk->offs_real = timespec_to_ktime(tmp); -} - /* must hold write on timekeeper.lock */ static void timekeeping_update(struct timekeeper *tk, bool clearntp) { @@ -234,7 +251,6 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp) tk->ntp_error = 0; ntp_clear(); } - update_rt_offset(tk); xt = tk_xtime(tk); update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); } @@ -419,8 +435,8 @@ int do_settimeofday(const struct timespec *tv) ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec; - timekeeper.wall_to_monotonic = - timespec_sub(timekeeper.wall_to_monotonic, ts_delta); + tk_set_wall_to_mono(&timekeeper, + timespec_sub(timekeeper.wall_to_monotonic, ts_delta)); tk_set_xtime(&timekeeper, tv); @@ -454,8 +470,8 @@ int timekeeping_inject_offset(struct timespec *ts) tk_xtime_add(&timekeeper, ts); - timekeeper.wall_to_monotonic = - timespec_sub(timekeeper.wall_to_monotonic, *ts); + tk_set_wall_to_mono(&timekeeper, + timespec_sub(timekeeper.wall_to_monotonic, *ts)); timekeeping_update(&timekeeper, true); @@ -621,7 +637,7 @@ void __init timekeeping_init(void) { struct clocksource *clock; unsigned long flags; - struct timespec now, boot; + struct timespec now, boot, tmp; read_persistent_clock(&now); read_boot_clock(&boot); @@ -642,23 +658,19 @@ void __init timekeeping_init(void) if (boot.tv_sec == 0 && boot.tv_nsec == 0) boot = tk_xtime(&timekeeper); - set_normalized_timespec(&timekeeper.wall_to_monotonic, - -boot.tv_sec, -boot.tv_nsec); - update_rt_offset(&timekeeper); - timekeeper.total_sleep_time.tv_sec = 0; - timekeeper.total_sleep_time.tv_nsec = 0; + set_normalized_timespec(&tmp, -boot.tv_sec, -boot.tv_nsec); + tk_set_wall_to_mono(&timekeeper, tmp); + + tmp.tv_sec = 0; + tmp.tv_nsec = 0; + tk_set_sleep_time(&timekeeper, tmp); + write_sequnlock_irqrestore(&timekeeper.lock, flags); } /* time in seconds when suspend began */ static struct timespec timekeeping_suspend_time; -static void update_sleep_time(struct timespec t) -{ - timekeeper.total_sleep_time = t; - timekeeper.offs_boot = timespec_to_ktime(t); -} - /** * __timekeeping_inject_sleeptime - Internal function to add sleep interval * @delta: pointer to a timespec delta value @@ -674,10 +686,9 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, "sleep delta value!\n"); return; } - tk_xtime_add(tk, delta); - tk->wall_to_monotonic = timespec_sub(tk->wall_to_monotonic, *delta); - update_sleep_time(timespec_add(tk->total_sleep_time, *delta)); + tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta)); + tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta)); } /** @@ -1018,11 +1029,18 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk) /* Figure out if its a leap sec and apply if needed */ leap = second_overflow(tk->xtime_sec); - tk->xtime_sec += leap; - tk->wall_to_monotonic.tv_sec -= leap; - if (leap) - clock_was_set_delayed(); + if (unlikely(leap)) { + struct timespec ts; + + tk->xtime_sec += leap; + ts.tv_sec = leap; + ts.tv_nsec = 0; + tk_set_wall_to_mono(tk, + timespec_sub(tk->wall_to_monotonic, ts)); + + clock_was_set_delayed(); + } } } -- cgit v1.2.3-70-g09d2 From 4e250fdde9be50581c7dd5fed88c9b9960615314 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 27 Jul 2012 14:48:13 -0400 Subject: time: Remove all direct references to timekeeper Ingo noted that the numerous timekeeper.value references made the timekeeping code ugly and caused many long lines that had to be broken up. He recommended replacing timekeeper.value references with tk->value. This patch provides a local tk value for all top level time functions and sets it to &timekeeper. Then all timekeeper access is done via a tk pointer. Signed-off-by: John Stultz Cc: Prarit Bhargava Link: http://lkml.kernel.org/r/1343414893-45779-6-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 282 +++++++++++++++++++++++++--------------------- 1 file changed, 154 insertions(+), 128 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4da65592b4d..2988bc81918 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -292,18 +292,19 @@ static void timekeeping_forward_now(struct timekeeper *tk) */ void getnstimeofday(struct timespec *ts) { + struct timekeeper *tk = &timekeeper; unsigned long seq; s64 nsecs = 0; WARN_ON(timekeeping_suspended); do { - seq = read_seqbegin(&timekeeper.lock); + seq = read_seqbegin(&tk->lock); - ts->tv_sec = timekeeper.xtime_sec; - ts->tv_nsec = timekeeping_get_ns(&timekeeper); + ts->tv_sec = tk->xtime_sec; + ts->tv_nsec = timekeeping_get_ns(tk); - } while (read_seqretry(&timekeeper.lock, seq)); + } while (read_seqretry(&tk->lock, seq)); timespec_add_ns(ts, nsecs); } @@ -311,19 +312,18 @@ EXPORT_SYMBOL(getnstimeofday); ktime_t ktime_get(void) { + struct timekeeper *tk = &timekeeper; unsigned int seq; s64 secs, nsecs; WARN_ON(timekeeping_suspended); do { - seq = read_seqbegin(&timekeeper.lock); - secs = timekeeper.xtime_sec + - timekeeper.wall_to_monotonic.tv_sec; - nsecs = timekeeping_get_ns(&timekeeper) + - timekeeper.wall_to_monotonic.tv_nsec; + seq = read_seqbegin(&tk->lock); + secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec; - } while (read_seqretry(&timekeeper.lock, seq)); + } while (read_seqretry(&tk->lock, seq)); /* * Use ktime_set/ktime_add_ns to create a proper ktime on * 32-bit architectures without CONFIG_KTIME_SCALAR. @@ -342,18 +342,19 @@ EXPORT_SYMBOL_GPL(ktime_get); */ void ktime_get_ts(struct timespec *ts) { + struct timekeeper *tk = &timekeeper; struct timespec tomono; unsigned int seq; WARN_ON(timekeeping_suspended); do { - seq = read_seqbegin(&timekeeper.lock); - ts->tv_sec = timekeeper.xtime_sec; - ts->tv_nsec = timekeeping_get_ns(&timekeeper); - tomono = timekeeper.wall_to_monotonic; + seq = read_seqbegin(&tk->lock); + ts->tv_sec = tk->xtime_sec; + ts->tv_nsec = timekeeping_get_ns(tk); + tomono = tk->wall_to_monotonic; - } while (read_seqretry(&timekeeper.lock, seq)); + } while (read_seqretry(&tk->lock, seq)); set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec, ts->tv_nsec + tomono.tv_nsec); @@ -373,22 +374,23 @@ EXPORT_SYMBOL_GPL(ktime_get_ts); */ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) { + struct timekeeper *tk = &timekeeper; unsigned long seq; s64 nsecs_raw, nsecs_real; WARN_ON_ONCE(timekeeping_suspended); do { - seq = read_seqbegin(&timekeeper.lock); + seq = read_seqbegin(&tk->lock); - *ts_raw = timekeeper.raw_time; - ts_real->tv_sec = timekeeper.xtime_sec; + *ts_raw = tk->raw_time; + ts_real->tv_sec = tk->xtime_sec; ts_real->tv_nsec = 0; - nsecs_raw = timekeeping_get_ns_raw(&timekeeper); - nsecs_real = timekeeping_get_ns(&timekeeper); + nsecs_raw = timekeeping_get_ns_raw(tk); + nsecs_real = timekeeping_get_ns(tk); - } while (read_seqretry(&timekeeper.lock, seq)); + } while (read_seqretry(&tk->lock, seq)); timespec_add_ns(ts_raw, nsecs_raw); timespec_add_ns(ts_real, nsecs_real); @@ -421,28 +423,28 @@ EXPORT_SYMBOL(do_gettimeofday); */ int do_settimeofday(const struct timespec *tv) { + struct timekeeper *tk = &timekeeper; struct timespec ts_delta, xt; unsigned long flags; if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irqsave(&timekeeper.lock, flags); + write_seqlock_irqsave(&tk->lock, flags); - timekeeping_forward_now(&timekeeper); + timekeeping_forward_now(tk); - xt = tk_xtime(&timekeeper); + xt = tk_xtime(tk); ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec; - tk_set_wall_to_mono(&timekeeper, - timespec_sub(timekeeper.wall_to_monotonic, ts_delta)); + tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, ts_delta)); - tk_set_xtime(&timekeeper, tv); + tk_set_xtime(tk, tv); - timekeeping_update(&timekeeper, true); + timekeeping_update(tk, true); - write_sequnlock_irqrestore(&timekeeper.lock, flags); + write_sequnlock_irqrestore(&tk->lock, flags); /* signal hrtimers about time change */ clock_was_set(); @@ -459,23 +461,23 @@ EXPORT_SYMBOL(do_settimeofday); */ int timekeeping_inject_offset(struct timespec *ts) { + struct timekeeper *tk = &timekeeper; unsigned long flags; if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - write_seqlock_irqsave(&timekeeper.lock, flags); + write_seqlock_irqsave(&tk->lock, flags); - timekeeping_forward_now(&timekeeper); + timekeeping_forward_now(tk); - tk_xtime_add(&timekeeper, ts); - tk_set_wall_to_mono(&timekeeper, - timespec_sub(timekeeper.wall_to_monotonic, *ts)); + tk_xtime_add(tk, ts); + tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); - timekeeping_update(&timekeeper, true); + timekeeping_update(tk, true); - write_sequnlock_irqrestore(&timekeeper.lock, flags); + write_sequnlock_irqrestore(&tk->lock, flags); /* signal hrtimers about time change */ clock_was_set(); @@ -491,23 +493,24 @@ EXPORT_SYMBOL(timekeeping_inject_offset); */ static int change_clocksource(void *data) { + struct timekeeper *tk = &timekeeper; struct clocksource *new, *old; unsigned long flags; new = (struct clocksource *) data; - write_seqlock_irqsave(&timekeeper.lock, flags); + write_seqlock_irqsave(&tk->lock, flags); - timekeeping_forward_now(&timekeeper); + timekeeping_forward_now(tk); if (!new->enable || new->enable(new) == 0) { - old = timekeeper.clock; - tk_setup_internals(&timekeeper, new); + old = tk->clock; + tk_setup_internals(tk, new); if (old->disable) old->disable(old); } - timekeeping_update(&timekeeper, true); + timekeeping_update(tk, true); - write_sequnlock_irqrestore(&timekeeper.lock, flags); + write_sequnlock_irqrestore(&tk->lock, flags); return 0; } @@ -521,7 +524,9 @@ static int change_clocksource(void *data) */ void timekeeping_notify(struct clocksource *clock) { - if (timekeeper.clock == clock) + struct timekeeper *tk = &timekeeper; + + if (tk->clock == clock) return; stop_machine(change_clocksource, clock, NULL); tick_clock_notify(); @@ -550,15 +555,16 @@ EXPORT_SYMBOL_GPL(ktime_get_real); */ void getrawmonotonic(struct timespec *ts) { + struct timekeeper *tk = &timekeeper; unsigned long seq; s64 nsecs; do { - seq = read_seqbegin(&timekeeper.lock); - nsecs = timekeeping_get_ns_raw(&timekeeper); - *ts = timekeeper.raw_time; + seq = read_seqbegin(&tk->lock); + nsecs = timekeeping_get_ns_raw(tk); + *ts = tk->raw_time; - } while (read_seqretry(&timekeeper.lock, seq)); + } while (read_seqretry(&tk->lock, seq)); timespec_add_ns(ts, nsecs); } @@ -569,15 +575,16 @@ EXPORT_SYMBOL(getrawmonotonic); */ int timekeeping_valid_for_hres(void) { + struct timekeeper *tk = &timekeeper; unsigned long seq; int ret; do { - seq = read_seqbegin(&timekeeper.lock); + seq = read_seqbegin(&tk->lock); - ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; + ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; - } while (read_seqretry(&timekeeper.lock, seq)); + } while (read_seqretry(&tk->lock, seq)); return ret; } @@ -587,15 +594,16 @@ int timekeeping_valid_for_hres(void) */ u64 timekeeping_max_deferment(void) { + struct timekeeper *tk = &timekeeper; unsigned long seq; u64 ret; do { - seq = read_seqbegin(&timekeeper.lock); + seq = read_seqbegin(&tk->lock); - ret = timekeeper.clock->max_idle_ns; + ret = tk->clock->max_idle_ns; - } while (read_seqretry(&timekeeper.lock, seq)); + } while (read_seqretry(&tk->lock, seq)); return ret; } @@ -635,6 +643,7 @@ void __attribute__((weak)) read_boot_clock(struct timespec *ts) */ void __init timekeeping_init(void) { + struct timekeeper *tk = &timekeeper; struct clocksource *clock; unsigned long flags; struct timespec now, boot, tmp; @@ -642,30 +651,30 @@ void __init timekeeping_init(void) read_persistent_clock(&now); read_boot_clock(&boot); - seqlock_init(&timekeeper.lock); + seqlock_init(&tk->lock); ntp_init(); - write_seqlock_irqsave(&timekeeper.lock, flags); + write_seqlock_irqsave(&tk->lock, flags); clock = clocksource_default_clock(); if (clock->enable) clock->enable(clock); - tk_setup_internals(&timekeeper, clock); + tk_setup_internals(tk, clock); - tk_set_xtime(&timekeeper, &now); - timekeeper.raw_time.tv_sec = 0; - timekeeper.raw_time.tv_nsec = 0; + tk_set_xtime(tk, &now); + tk->raw_time.tv_sec = 0; + tk->raw_time.tv_nsec = 0; if (boot.tv_sec == 0 && boot.tv_nsec == 0) - boot = tk_xtime(&timekeeper); + boot = tk_xtime(tk); set_normalized_timespec(&tmp, -boot.tv_sec, -boot.tv_nsec); - tk_set_wall_to_mono(&timekeeper, tmp); + tk_set_wall_to_mono(tk, tmp); tmp.tv_sec = 0; tmp.tv_nsec = 0; - tk_set_sleep_time(&timekeeper, tmp); + tk_set_sleep_time(tk, tmp); - write_sequnlock_irqrestore(&timekeeper.lock, flags); + write_sequnlock_irqrestore(&tk->lock, flags); } /* time in seconds when suspend began */ @@ -703,6 +712,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, */ void timekeeping_inject_sleeptime(struct timespec *delta) { + struct timekeeper *tk = &timekeeper; unsigned long flags; struct timespec ts; @@ -711,15 +721,15 @@ void timekeeping_inject_sleeptime(struct timespec *delta) if (!(ts.tv_sec == 0 && ts.tv_nsec == 0)) return; - write_seqlock_irqsave(&timekeeper.lock, flags); + write_seqlock_irqsave(&tk->lock, flags); - timekeeping_forward_now(&timekeeper); + timekeeping_forward_now(tk); - __timekeeping_inject_sleeptime(&timekeeper, delta); + __timekeeping_inject_sleeptime(tk, delta); - timekeeping_update(&timekeeper, true); + timekeeping_update(tk, true); - write_sequnlock_irqrestore(&timekeeper.lock, flags); + write_sequnlock_irqrestore(&tk->lock, flags); /* signal hrtimers about time change */ clock_was_set(); @@ -734,6 +744,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) */ static void timekeeping_resume(void) { + struct timekeeper *tk = &timekeeper; unsigned long flags; struct timespec ts; @@ -741,18 +752,18 @@ static void timekeeping_resume(void) clocksource_resume(); - write_seqlock_irqsave(&timekeeper.lock, flags); + write_seqlock_irqsave(&tk->lock, flags); if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { ts = timespec_sub(ts, timekeeping_suspend_time); - __timekeeping_inject_sleeptime(&timekeeper, &ts); + __timekeeping_inject_sleeptime(tk, &ts); } /* re-base the last cycle value */ - timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); - timekeeper.ntp_error = 0; + tk->clock->cycle_last = tk->clock->read(tk->clock); + tk->ntp_error = 0; timekeeping_suspended = 0; - timekeeping_update(&timekeeper, false); - write_sequnlock_irqrestore(&timekeeper.lock, flags); + timekeeping_update(tk, false); + write_sequnlock_irqrestore(&tk->lock, flags); touch_softlockup_watchdog(); @@ -764,14 +775,15 @@ static void timekeeping_resume(void) static int timekeeping_suspend(void) { + struct timekeeper *tk = &timekeeper; unsigned long flags; struct timespec delta, delta_delta; static struct timespec old_delta; read_persistent_clock(&timekeeping_suspend_time); - write_seqlock_irqsave(&timekeeper.lock, flags); - timekeeping_forward_now(&timekeeper); + write_seqlock_irqsave(&tk->lock, flags); + timekeeping_forward_now(tk); timekeeping_suspended = 1; /* @@ -780,7 +792,7 @@ static int timekeeping_suspend(void) * try to compensate so the difference in system time * and persistent_clock time stays close to constant. */ - delta = timespec_sub(tk_xtime(&timekeeper), timekeeping_suspend_time); + delta = timespec_sub(tk_xtime(tk), timekeeping_suspend_time); delta_delta = timespec_sub(delta, old_delta); if (abs(delta_delta.tv_sec) >= 2) { /* @@ -793,7 +805,7 @@ static int timekeeping_suspend(void) timekeeping_suspend_time = timespec_add(timekeeping_suspend_time, delta_delta); } - write_sequnlock_irqrestore(&timekeeper.lock, flags); + write_sequnlock_irqrestore(&tk->lock, flags); clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); clocksource_suspend(); @@ -904,7 +916,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) * the error. This causes the likely below to be unlikely. * * The proper fix is to avoid rounding up by using - * the high precision timekeeper.xtime_nsec instead of + * the high precision tk->xtime_nsec instead of * xtime.tv_nsec everywhere. Fixing this will take some * time. */ @@ -1094,21 +1106,22 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, static void update_wall_time(void) { struct clocksource *clock; + struct timekeeper *tk = &timekeeper; cycle_t offset; int shift = 0, maxshift; unsigned long flags; s64 remainder; - write_seqlock_irqsave(&timekeeper.lock, flags); + write_seqlock_irqsave(&tk->lock, flags); /* Make sure we're fully resumed: */ if (unlikely(timekeeping_suspended)) goto out; - clock = timekeeper.clock; + clock = tk->clock; #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET - offset = timekeeper.cycle_interval; + offset = tk->cycle_interval; #else offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #endif @@ -1121,19 +1134,19 @@ static void update_wall_time(void) * chunk in one go, and then try to consume the next smaller * doubled multiple. */ - shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); + shift = ilog2(offset) - ilog2(tk->cycle_interval); shift = max(0, shift); /* Bound shift to one less than what overflows tick_length */ maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; shift = min(shift, maxshift); - while (offset >= timekeeper.cycle_interval) { - offset = logarithmic_accumulation(&timekeeper, offset, shift); - if(offset < timekeeper.cycle_interval<= tk->cycle_interval) { + offset = logarithmic_accumulation(tk, offset, shift); + if (offset < tk->cycle_interval<xtime_nsec & ((1 << tk->shift) - 1); + tk->xtime_nsec -= remainder; + tk->xtime_nsec += 1 << tk->shift; + tk->ntp_error += remainder << tk->ntp_error_shift; /* * Finally, make sure that after the rounding * xtime_nsec isn't larger than NSEC_PER_SEC */ - accumulate_nsecs_to_secs(&timekeeper); + accumulate_nsecs_to_secs(tk); - timekeeping_update(&timekeeper, false); + timekeeping_update(tk, false); out: - write_sequnlock_irqrestore(&timekeeper.lock, flags); + write_sequnlock_irqrestore(&tk->lock, flags); } @@ -1176,11 +1189,12 @@ out: */ void getboottime(struct timespec *ts) { + struct timekeeper *tk = &timekeeper; struct timespec boottime = { - .tv_sec = timekeeper.wall_to_monotonic.tv_sec + - timekeeper.total_sleep_time.tv_sec, - .tv_nsec = timekeeper.wall_to_monotonic.tv_nsec + - timekeeper.total_sleep_time.tv_nsec + .tv_sec = tk->wall_to_monotonic.tv_sec + + tk->total_sleep_time.tv_sec, + .tv_nsec = tk->wall_to_monotonic.tv_nsec + + tk->total_sleep_time.tv_nsec }; set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); @@ -1198,19 +1212,20 @@ EXPORT_SYMBOL_GPL(getboottime); */ void get_monotonic_boottime(struct timespec *ts) { + struct timekeeper *tk = &timekeeper; struct timespec tomono, sleep; unsigned int seq; WARN_ON(timekeeping_suspended); do { - seq = read_seqbegin(&timekeeper.lock); - ts->tv_sec = timekeeper.xtime_sec; - ts->tv_nsec = timekeeping_get_ns(&timekeeper); - tomono = timekeeper.wall_to_monotonic; - sleep = timekeeper.total_sleep_time; + seq = read_seqbegin(&tk->lock); + ts->tv_sec = tk->xtime_sec; + ts->tv_nsec = timekeeping_get_ns(tk); + tomono = tk->wall_to_monotonic; + sleep = tk->total_sleep_time; - } while (read_seqretry(&timekeeper.lock, seq)); + } while (read_seqretry(&tk->lock, seq)); set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec, ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec); @@ -1240,31 +1255,38 @@ EXPORT_SYMBOL_GPL(ktime_get_boottime); */ void monotonic_to_bootbased(struct timespec *ts) { - *ts = timespec_add(*ts, timekeeper.total_sleep_time); + struct timekeeper *tk = &timekeeper; + + *ts = timespec_add(*ts, tk->total_sleep_time); } EXPORT_SYMBOL_GPL(monotonic_to_bootbased); unsigned long get_seconds(void) { - return timekeeper.xtime_sec; + struct timekeeper *tk = &timekeeper; + + return tk->xtime_sec; } EXPORT_SYMBOL(get_seconds); struct timespec __current_kernel_time(void) { - return tk_xtime(&timekeeper); + struct timekeeper *tk = &timekeeper; + + return tk_xtime(tk); } struct timespec current_kernel_time(void) { + struct timekeeper *tk = &timekeeper; struct timespec now; unsigned long seq; do { - seq = read_seqbegin(&timekeeper.lock); + seq = read_seqbegin(&tk->lock); - now = tk_xtime(&timekeeper); - } while (read_seqretry(&timekeeper.lock, seq)); + now = tk_xtime(tk); + } while (read_seqretry(&tk->lock, seq)); return now; } @@ -1272,15 +1294,16 @@ EXPORT_SYMBOL(current_kernel_time); struct timespec get_monotonic_coarse(void) { + struct timekeeper *tk = &timekeeper; struct timespec now, mono; unsigned long seq; do { - seq = read_seqbegin(&timekeeper.lock); + seq = read_seqbegin(&tk->lock); - now = tk_xtime(&timekeeper); - mono = timekeeper.wall_to_monotonic; - } while (read_seqretry(&timekeeper.lock, seq)); + now = tk_xtime(tk); + mono = tk->wall_to_monotonic; + } while (read_seqretry(&tk->lock, seq)); set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, now.tv_nsec + mono.tv_nsec); @@ -1309,14 +1332,15 @@ void do_timer(unsigned long ticks) void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, struct timespec *wtom, struct timespec *sleep) { + struct timekeeper *tk = &timekeeper; unsigned long seq; do { - seq = read_seqbegin(&timekeeper.lock); - *xtim = tk_xtime(&timekeeper); - *wtom = timekeeper.wall_to_monotonic; - *sleep = timekeeper.total_sleep_time; - } while (read_seqretry(&timekeeper.lock, seq)); + seq = read_seqbegin(&tk->lock); + *xtim = tk_xtime(tk); + *wtom = tk->wall_to_monotonic; + *sleep = tk->total_sleep_time; + } while (read_seqretry(&tk->lock, seq)); } #ifdef CONFIG_HIGH_RES_TIMERS @@ -1330,19 +1354,20 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, */ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) { + struct timekeeper *tk = &timekeeper; ktime_t now; unsigned int seq; u64 secs, nsecs; do { - seq = read_seqbegin(&timekeeper.lock); + seq = read_seqbegin(&tk->lock); - secs = timekeeper.xtime_sec; - nsecs = timekeeping_get_ns(&timekeeper); + secs = tk->xtime_sec; + nsecs = timekeeping_get_ns(tk); - *offs_real = timekeeper.offs_real; - *offs_boot = timekeeper.offs_boot; - } while (read_seqretry(&timekeeper.lock, seq)); + *offs_real = tk->offs_real; + *offs_boot = tk->offs_boot; + } while (read_seqretry(&tk->lock, seq)); now = ktime_add_ns(ktime_set(secs, 0), nsecs); now = ktime_sub(now, *offs_real); @@ -1355,13 +1380,14 @@ ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) */ ktime_t ktime_get_monotonic_offset(void) { + struct timekeeper *tk = &timekeeper; unsigned long seq; struct timespec wtom; do { - seq = read_seqbegin(&timekeeper.lock); - wtom = timekeeper.wall_to_monotonic; - } while (read_seqretry(&timekeeper.lock, seq)); + seq = read_seqbegin(&tk->lock); + wtom = tk->wall_to_monotonic; + } while (read_seqretry(&tk->lock, seq)); return timespec_to_ktime(wtom); } -- cgit v1.2.3-70-g09d2 From 1d17d17484d40f2d5b35c79518597a2b25296996 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Aug 2012 21:21:14 +0200 Subject: time: Fix adjustment cleanup bug in timekeeping_adjust() Tetsuo Handa reported that sporadically the system clock starts counting up too quickly which is enough to confuse the hangcheck timer to print a bogus stall warning. Commit 2a8c0883 "time: Move xtime_nsec adjustment underflow handling timekeeping_adjust" overlooked this exit path: } else return; which should really be a proper exit sequence, fixing the bug as a side effect. Also make the flow more readable by properly balancing curly braces. Reported-by: Tetsuo Handa wrote: Tested-by: Tetsuo Handa wrote: Signed-off-by: Ingo Molnar Cc: john.stultz@linaro.org Cc: a.p.zijlstra@chello.nl Cc: richardcochran@gmail.com Cc: prarit@redhat.com Link: http://lkml.kernel.org/r/20120804192114.GA28347@gmail.com Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2988bc81918..e16af197a2b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -923,20 +923,22 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) if (likely(error <= interval)) adj = 1; else - adj = timekeeping_bigadjust(tk, error, &interval, - &offset); - } else if (error < -interval) { - /* See comment above, this is just switched for the negative */ - error >>= 2; - if (likely(error >= -interval)) { - adj = -1; - interval = -interval; - offset = -offset; - } else - adj = timekeeping_bigadjust(tk, error, &interval, - &offset); - } else - return; + adj = timekeeping_bigadjust(tk, error, &interval, &offset); + } else { + if (error < -interval) { + /* See comment above, this is just switched for the negative */ + error >>= 2; + if (likely(error >= -interval)) { + adj = -1; + interval = -interval; + offset = -offset; + } else { + adj = timekeeping_bigadjust(tk, error, &interval, &offset); + } + } else { + goto out_adjust; + } + } if (unlikely(tk->clock->maxadj && (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { @@ -999,6 +1001,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) tk->xtime_nsec -= offset; tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; +out_adjust: /* * It may be possible that when we entered this function, xtime_nsec * was very small. Further, if we're slightly speeding the clocksource -- cgit v1.2.3-70-g09d2