From 71120f183bff04ba4f7ba3cc554202061912d548 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Jul 2007 01:49:16 -0700 Subject: timekeeping: fixup shadow variable argument clocksource_adjust() has a clock argument, which shadows the file global clock variable. Fix this up. Signed-off-by: Thomas Gleixner Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/timekeeping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 728cedfd3cb..89698776613 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -401,7 +401,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, * this is optimized for the most common adjustments of -1,0,1, * for other values we can do a bit more work. */ -static void clocksource_adjust(struct clocksource *clock, s64 offset) +static void clocksource_adjust(s64 offset) { s64 error, interval = clock->cycle_interval; int adj; @@ -476,7 +476,7 @@ void update_wall_time(void) } /* correct the clock when NTP error is too big */ - clocksource_adjust(clock, offset); + clocksource_adjust(offset); /* store full nanoseconds into xtime */ xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; -- cgit v1.2.3-70-g09d2 From 1f564ad6d4182859612cbae452122e5eb2d62a76 Mon Sep 17 00:00:00 2001 From: Bob Picco Date: Wed, 18 Jul 2007 15:51:28 -0700 Subject: [IA64] remove time interpolator Remove time_interpolator code (This is generic code, but only user was ia64. It has been superseded by the CONFIG_GENERIC_TIME code). Signed-off-by: Bob Picco Signed-off-by: John Stultz Signed-off-by: Peter Keilty Signed-off-by: Tony Luck --- Documentation/time_interpolators.txt | 41 -------- include/linux/timex.h | 60 ----------- kernel/time.c | 88 ---------------- kernel/time/ntp.c | 10 -- kernel/time/timekeeping.c | 4 - kernel/timer.c | 188 ----------------------------------- 6 files changed, 391 deletions(-) delete mode 100644 Documentation/time_interpolators.txt (limited to 'kernel/time') diff --git a/Documentation/time_interpolators.txt b/Documentation/time_interpolators.txt deleted file mode 100644 index e3b60854fbc..00000000000 --- a/Documentation/time_interpolators.txt +++ /dev/null @@ -1,41 +0,0 @@ -Time Interpolators ------------------- - -Time interpolators are a base of time calculation between timer ticks and -allow an accurate determination of time down to the accuracy of the time -source in nanoseconds. - -The architecture specific code typically provides gettimeofday and -settimeofday under Linux. The time interpolator provides both if an arch -defines CONFIG_TIME_INTERPOLATION. The arch still must set up timer tick -operations and call the necessary functions to advance the clock. - -With the time interpolator a standardized interface exists for time -interpolation between ticks. The provided logic is highly scalable -and has been tested in SMP situations of up to 512 CPUs. - -If CONFIG_TIME_INTERPOLATION is defined then the architecture specific code -(or the device drivers - like HPET) may register time interpolators. -These are typically defined in the following way: - -static struct time_interpolator my_interpolator { - .frequency = MY_FREQUENCY, - .source = TIME_SOURCE_MMIO32, - .shift = 8, /* scaling for higher accuracy */ - .drift = -1, /* Unknown drift */ - .jitter = 0 /* time source is stable */ -}; - -void time_init(void) -{ - .... - /* Initialization of the timer *. - my_interpolator.address = &my_timer; - register_time_interpolator(&my_interpolator); - .... -} - -For more details see include/linux/timex.h and kernel/timer.c. - -Christoph Lameter , October 31, 2004 - diff --git a/include/linux/timex.h b/include/linux/timex.h index da929dbbea2..37ac3ff90fa 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -224,66 +224,6 @@ static inline int ntp_synced(void) __x < 0 ? -(-__x >> __s) : __x >> __s; \ }) - -#ifdef CONFIG_TIME_INTERPOLATION - -#define TIME_SOURCE_CPU 0 -#define TIME_SOURCE_MMIO64 1 -#define TIME_SOURCE_MMIO32 2 -#define TIME_SOURCE_FUNCTION 3 - -/* For proper operations time_interpolator clocks must run slightly slower - * than the standard clock since the interpolator may only correct by having - * time jump forward during a tick. A slower clock is usually a side effect - * of the integer divide of the nanoseconds in a second by the frequency. - * The accuracy of the division can be increased by specifying a shift. - * However, this may cause the clock not to be slow enough. - * The interpolator will self-tune the clock by slowing down if no - * resets occur or speeding up if the time jumps per analysis cycle - * become too high. - * - * Setting jitter compensates for a fluctuating timesource by comparing - * to the last value read from the timesource to insure that an earlier value - * is not returned by a later call. The price to pay - * for the compensation is that the timer routines are not as scalable anymore. - */ - -struct time_interpolator { - u16 source; /* time source flags */ - u8 shift; /* increases accuracy of multiply by shifting. */ - /* Note that bits may be lost if shift is set too high */ - u8 jitter; /* if set compensate for fluctuations */ - u32 nsec_per_cyc; /* set by register_time_interpolator() */ - void *addr; /* address of counter or function */ - cycles_t mask; /* mask the valid bits of the counter */ - unsigned long offset; /* nsec offset at last update of interpolator */ - u64 last_counter; /* counter value in units of the counter at last update */ - cycles_t last_cycle; /* Last timer value if TIME_SOURCE_JITTER is set */ - u64 frequency; /* frequency in counts/second */ - long drift; /* drift in parts-per-million (or -1) */ - unsigned long skips; /* skips forward */ - unsigned long ns_skipped; /* nanoseconds skipped */ - struct time_interpolator *next; -}; - -extern void register_time_interpolator(struct time_interpolator *); -extern void unregister_time_interpolator(struct time_interpolator *); -extern void time_interpolator_reset(void); -extern unsigned long time_interpolator_get_offset(void); -extern void time_interpolator_update(long delta_nsec); - -#else /* !CONFIG_TIME_INTERPOLATION */ - -static inline void time_interpolator_reset(void) -{ -} - -static inline void time_interpolator_update(long delta_nsec) -{ -} - -#endif /* !CONFIG_TIME_INTERPOLATION */ - #define TICK_LENGTH_SHIFT 32 #ifdef CONFIG_NO_HZ diff --git a/kernel/time.c b/kernel/time.c index ffe19149d77..e325597f5bf 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -136,7 +136,6 @@ static inline void warp_clock(void) write_seqlock_irq(&xtime_lock); wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; xtime.tv_sec += sys_tz.tz_minuteswest * 60; - time_interpolator_reset(); write_sequnlock_irq(&xtime_lock); clock_was_set(); } @@ -309,92 +308,6 @@ struct timespec timespec_trunc(struct timespec t, unsigned gran) } EXPORT_SYMBOL(timespec_trunc); -#ifdef CONFIG_TIME_INTERPOLATION -void getnstimeofday (struct timespec *tv) -{ - unsigned long seq,sec,nsec; - - do { - seq = read_seqbegin(&xtime_lock); - sec = xtime.tv_sec; - nsec = xtime.tv_nsec+time_interpolator_get_offset(); - } while (unlikely(read_seqretry(&xtime_lock, seq))); - - while (unlikely(nsec >= NSEC_PER_SEC)) { - nsec -= NSEC_PER_SEC; - ++sec; - } - tv->tv_sec = sec; - tv->tv_nsec = nsec; -} -EXPORT_SYMBOL_GPL(getnstimeofday); - -int do_settimeofday (struct timespec *tv) -{ - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec, nsec = tv->tv_nsec; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irq(&xtime_lock); - { - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; - time_interpolator_reset(); - } - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; -} -EXPORT_SYMBOL(do_settimeofday); - -void do_gettimeofday (struct timeval *tv) -{ - unsigned long seq, nsec, usec, sec, offset; - do { - seq = read_seqbegin(&xtime_lock); - offset = time_interpolator_get_offset(); - sec = xtime.tv_sec; - nsec = xtime.tv_nsec; - } while (unlikely(read_seqretry(&xtime_lock, seq))); - - usec = (nsec + offset) / 1000; - - while (unlikely(usec >= USEC_PER_SEC)) { - usec -= USEC_PER_SEC; - ++sec; - } - - tv->tv_sec = sec; - tv->tv_usec = usec; - - /* - * Make sure xtime.tv_sec [returned by sys_time()] always - * follows the gettimeofday() result precisely. This - * condition is extremely unlikely, it can hit at most - * once per second: - */ - if (unlikely(xtime.tv_sec != tv->tv_sec)) { - unsigned long flags; - - write_seqlock_irqsave(&xtime_lock, flags); - update_wall_time(); - write_sequnlock_irqrestore(&xtime_lock, flags); - } -} -EXPORT_SYMBOL(do_gettimeofday); - -#else /* CONFIG_TIME_INTERPOLATION */ - #ifndef CONFIG_GENERIC_TIME /* * Simulate gettimeofday using do_gettimeofday which only allows a timeval @@ -410,7 +323,6 @@ void getnstimeofday(struct timespec *tv) } EXPORT_SYMBOL_GPL(getnstimeofday); #endif -#endif /* CONFIG_TIME_INTERPOLATION */ /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 438c6b723ee..b5e352597cb 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -116,11 +116,6 @@ void second_overflow(void) if (xtime.tv_sec % 86400 == 0) { xtime.tv_sec--; wall_to_monotonic.tv_sec++; - /* - * The timer interpolator will make time change - * gradually instead of an immediate jump by one second - */ - time_interpolator_update(-NSEC_PER_SEC); time_state = TIME_OOP; printk(KERN_NOTICE "Clock: inserting leap second " "23:59:60 UTC\n"); @@ -130,11 +125,6 @@ void second_overflow(void) if ((xtime.tv_sec + 1) % 86400 == 0) { xtime.tv_sec++; wall_to_monotonic.tv_sec--; - /* - * Use of time interpolator for a gradual change of - * time - */ - time_interpolator_update(NSEC_PER_SEC); time_state = TIME_WAIT; printk(KERN_NOTICE "Clock: deleting leap second " "23:59:59 UTC\n"); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 728cedfd3cb..027d46c906e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -466,10 +466,6 @@ void update_wall_time(void) second_overflow(); } - /* interpolator bits */ - time_interpolator_update(clock->xtime_interval - >> clock->shift); - /* accumulate error between NTP and clock interval */ clock->error += current_tick_length(); clock->error -= clock->xtime_interval << (TICK_LENGTH_SHIFT - clock->shift); diff --git a/kernel/timer.c b/kernel/timer.c index b7792fb0338..dbc03ab14ee 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1349,194 +1349,6 @@ void __init init_timers(void) open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); } -#ifdef CONFIG_TIME_INTERPOLATION - -struct time_interpolator *time_interpolator __read_mostly; -static struct time_interpolator *time_interpolator_list __read_mostly; -static DEFINE_SPINLOCK(time_interpolator_lock); - -static inline cycles_t time_interpolator_get_cycles(unsigned int src) -{ - unsigned long (*x)(void); - - switch (src) - { - case TIME_SOURCE_FUNCTION: - x = time_interpolator->addr; - return x(); - - case TIME_SOURCE_MMIO64 : - return readq_relaxed((void __iomem *)time_interpolator->addr); - - case TIME_SOURCE_MMIO32 : - return readl_relaxed((void __iomem *)time_interpolator->addr); - - default: return get_cycles(); - } -} - -static inline u64 time_interpolator_get_counter(int writelock) -{ - unsigned int src = time_interpolator->source; - - if (time_interpolator->jitter) - { - cycles_t lcycle; - cycles_t now; - - do { - lcycle = time_interpolator->last_cycle; - now = time_interpolator_get_cycles(src); - if (lcycle && time_after(lcycle, now)) - return lcycle; - - /* When holding the xtime write lock, there's no need - * to add the overhead of the cmpxchg. Readers are - * force to retry until the write lock is released. - */ - if (writelock) { - time_interpolator->last_cycle = now; - return now; - } - /* Keep track of the last timer value returned. The use of cmpxchg here - * will cause contention in an SMP environment. - */ - } while (unlikely(cmpxchg(&time_interpolator->last_cycle, lcycle, now) != lcycle)); - return now; - } - else - return time_interpolator_get_cycles(src); -} - -void time_interpolator_reset(void) -{ - time_interpolator->offset = 0; - time_interpolator->last_counter = time_interpolator_get_counter(1); -} - -#define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift) - -unsigned long time_interpolator_get_offset(void) -{ - /* If we do not have a time interpolator set up then just return zero */ - if (!time_interpolator) - return 0; - - return time_interpolator->offset + - GET_TI_NSECS(time_interpolator_get_counter(0), time_interpolator); -} - -#define INTERPOLATOR_ADJUST 65536 -#define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST - -void time_interpolator_update(long delta_nsec) -{ - u64 counter; - unsigned long offset; - - /* If there is no time interpolator set up then do nothing */ - if (!time_interpolator) - return; - - /* - * The interpolator compensates for late ticks by accumulating the late - * time in time_interpolator->offset. A tick earlier than expected will - * lead to a reset of the offset and a corresponding jump of the clock - * forward. Again this only works if the interpolator clock is running - * slightly slower than the regular clock and the tuning logic insures - * that. - */ - - counter = time_interpolator_get_counter(1); - offset = time_interpolator->offset + - GET_TI_NSECS(counter, time_interpolator); - - if (delta_nsec < 0 || (unsigned long) delta_nsec < offset) - time_interpolator->offset = offset - delta_nsec; - else { - time_interpolator->skips++; - time_interpolator->ns_skipped += delta_nsec - offset; - time_interpolator->offset = 0; - } - time_interpolator->last_counter = counter; - - /* Tuning logic for time interpolator invoked every minute or so. - * Decrease interpolator clock speed if no skips occurred and an offset is carried. - * Increase interpolator clock speed if we skip too much time. - */ - if (jiffies % INTERPOLATOR_ADJUST == 0) - { - if (time_interpolator->skips == 0 && time_interpolator->offset > tick_nsec) - time_interpolator->nsec_per_cyc--; - if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0) - time_interpolator->nsec_per_cyc++; - time_interpolator->skips = 0; - time_interpolator->ns_skipped = 0; - } -} - -static inline int -is_better_time_interpolator(struct time_interpolator *new) -{ - if (!time_interpolator) - return 1; - return new->frequency > 2*time_interpolator->frequency || - (unsigned long)new->drift < (unsigned long)time_interpolator->drift; -} - -void -register_time_interpolator(struct time_interpolator *ti) -{ - unsigned long flags; - - /* Sanity check */ - BUG_ON(ti->frequency == 0 || ti->mask == 0); - - ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency; - spin_lock(&time_interpolator_lock); - write_seqlock_irqsave(&xtime_lock, flags); - if (is_better_time_interpolator(ti)) { - time_interpolator = ti; - time_interpolator_reset(); - } - write_sequnlock_irqrestore(&xtime_lock, flags); - - ti->next = time_interpolator_list; - time_interpolator_list = ti; - spin_unlock(&time_interpolator_lock); -} - -void -unregister_time_interpolator(struct time_interpolator *ti) -{ - struct time_interpolator *curr, **prev; - unsigned long flags; - - spin_lock(&time_interpolator_lock); - prev = &time_interpolator_list; - for (curr = *prev; curr; curr = curr->next) { - if (curr == ti) { - *prev = curr->next; - break; - } - prev = &curr->next; - } - - write_seqlock_irqsave(&xtime_lock, flags); - if (ti == time_interpolator) { - /* we lost the best time-interpolator: */ - time_interpolator = NULL; - /* find the next-best interpolator */ - for (curr = time_interpolator_list; curr; curr = curr->next) - if (is_better_time_interpolator(curr)) - time_interpolator = curr; - time_interpolator_reset(); - } - write_sequnlock_irqrestore(&xtime_lock, flags); - spin_unlock(&time_interpolator_lock); -} -#endif /* CONFIG_TIME_INTERPOLATION */ - /** * msleep - sleep safely even with waitqueue interruptions * @msecs: Time in milliseconds to sleep for -- cgit v1.2.3-70-g09d2 From 18de5bc4c1f1f1fa5e14f354a7603bd6e9d4e3b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 21 Jul 2007 04:37:34 -0700 Subject: clockevents: fix resume logic We need to make sure, that the clockevent devices are resumed, before the tick is resumed. The current resume logic does not guarantee this. Add CLOCK_EVT_MODE_RESUME and call the set mode functions of the clock event devices before resuming the tick / oneshot functionality. Fixup the existing users. Thanks to Nigel Cunningham for tracking down a long standing thinko, which affected the jinxed VAIO. [akpm@linux-foundation.org: xen build fix] Signed-off-by: Thomas Gleixner Cc: john stultz Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-davinci/time.c | 2 ++ arch/arm/mach-imx/time.c | 1 + arch/arm/mach-ixp4xx/common.c | 2 ++ arch/arm/mach-omap1/time.c | 1 + arch/arm/plat-omap/timer32k.c | 2 ++ arch/i386/kernel/apic.c | 3 ++ arch/i386/kernel/hpet.c | 71 +++------------------------------------ arch/i386/kernel/i8253.c | 26 +++++++------- arch/i386/kernel/vmiclock.c | 1 + arch/i386/xen/time.c | 3 ++ arch/sh/kernel/timers/timer-tmu.c | 1 + arch/sparc64/kernel/time.c | 1 + drivers/lguest/lguest.c | 2 ++ include/linux/clockchips.h | 1 + kernel/time/tick-broadcast.c | 6 ++-- kernel/time/tick-common.c | 16 +++++---- 16 files changed, 51 insertions(+), 88 deletions(-) (limited to 'kernel/time') diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index 4d8425de692..e96a3dcdc1a 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -285,6 +285,8 @@ static void davinci_set_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_SHUTDOWN: t->opts = TIMER_OPTS_DISABLED; break; + case CLOCK_EVT_MODE_RESUME: + break; } } diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index 010f6fa984a..d86d124aea2 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -159,6 +159,7 @@ static void imx_set_mode(enum clock_event_mode mode, struct clock_event_device * break; case CLOCK_EVT_MODE_SHUTDOWN: case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_RESUME: /* Left event sources disabled, no more interrupts appears */ break; } diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 8112f726ffa..23e7fba6d3e 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -459,6 +459,8 @@ static void ixp4xx_set_mode(enum clock_event_mode mode, default: osrt = opts = 0; break; + case CLOCK_EVT_MODE_RESUME: + break; } *IXP4XX_OSRT1 = osrt | opts; diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 3705d20c4e5..237651ebae5 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -156,6 +156,7 @@ static void omap_mpu_set_mode(enum clock_event_mode mode, break; case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: + case CLOCK_EVT_MODE_RESUME: break; } } diff --git a/arch/arm/plat-omap/timer32k.c b/arch/arm/plat-omap/timer32k.c index 2feceec8ecc..b0af014b0e2 100644 --- a/arch/arm/plat-omap/timer32k.c +++ b/arch/arm/plat-omap/timer32k.c @@ -156,6 +156,8 @@ static void omap_32k_timer_set_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_SHUTDOWN: omap_32k_timer_stop(); break; + case CLOCK_EVT_MODE_RESUME: + break; } } diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 67824f3bb97..610f44b2436 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -263,6 +263,9 @@ static void lapic_timer_setup(enum clock_event_mode mode, v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write_around(APIC_LVTT, v); break; + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here */ + break; } local_irq_restore(flags); diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c index 17d73459fc5..cfbf792a0bf 100644 --- a/arch/i386/kernel/hpet.c +++ b/arch/i386/kernel/hpet.c @@ -187,6 +187,10 @@ static void hpet_set_mode(enum clock_event_mode mode, cfg &= ~HPET_TN_ENABLE; hpet_writel(cfg, HPET_T0_CFG); break; + + case CLOCK_EVT_MODE_RESUME: + hpet_enable_int(); + break; } } @@ -217,6 +221,7 @@ static struct clocksource clocksource_hpet = { .mask = HPET_MASK, .shift = HPET_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .resume = hpet_start_counter, }; /* @@ -291,7 +296,6 @@ int __init hpet_enable(void) clocksource_register(&clocksource_hpet); - if (id & HPET_ID_LEGSUP) { hpet_enable_int(); hpet_reserve_platform_timers(id); @@ -524,68 +528,3 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } #endif - - -/* - * Suspend/resume part - */ - -#ifdef CONFIG_PM - -static int hpet_suspend(struct sys_device *sys_device, pm_message_t state) -{ - unsigned long cfg = hpet_readl(HPET_CFG); - - cfg &= ~(HPET_CFG_ENABLE|HPET_CFG_LEGACY); - hpet_writel(cfg, HPET_CFG); - - return 0; -} - -static int hpet_resume(struct sys_device *sys_device) -{ - unsigned int id; - - hpet_start_counter(); - - id = hpet_readl(HPET_ID); - - if (id & HPET_ID_LEGSUP) - hpet_enable_int(); - - return 0; -} - -static struct sysdev_class hpet_class = { - set_kset_name("hpet"), - .suspend = hpet_suspend, - .resume = hpet_resume, -}; - -static struct sys_device hpet_device = { - .id = 0, - .cls = &hpet_class, -}; - - -static __init int hpet_register_sysfs(void) -{ - int err; - - if (!is_hpet_capable()) - return 0; - - err = sysdev_class_register(&hpet_class); - - if (!err) { - err = sysdev_register(&hpet_device); - if (err) - sysdev_class_unregister(&hpet_class); - } - - return err; -} - -device_initcall(hpet_register_sysfs); - -#endif diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c index f8a3c4054c7..931eabe1e56 100644 --- a/arch/i386/kernel/i8253.c +++ b/arch/i386/kernel/i8253.c @@ -3,11 +3,11 @@ * */ #include -#include +#include +#include #include -#include #include -#include +#include #include #include @@ -41,26 +41,24 @@ static void init_pit_timer(enum clock_event_mode mode, case CLOCK_EVT_MODE_PERIODIC: /* binary, mode 2, LSB/MSB, ch 0 */ outb_p(0x34, PIT_MODE); - udelay(10); outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ - udelay(10); outb(LATCH >> 8 , PIT_CH0); /* MSB */ break; - /* - * Avoid unnecessary state transitions, as it confuses - * Geode / Cyrix based boxen. - */ case CLOCK_EVT_MODE_SHUTDOWN: - if (evt->mode == CLOCK_EVT_MODE_UNUSED) - break; case CLOCK_EVT_MODE_UNUSED: - if (evt->mode == CLOCK_EVT_MODE_SHUTDOWN) - break; + outb_p(0x30, PIT_MODE); + outb_p(0, PIT_CH0); /* LSB */ + outb_p(0, PIT_CH0); /* MSB */ + break; + case CLOCK_EVT_MODE_ONESHOT: /* One shot setup */ outb_p(0x38, PIT_MODE); - udelay(10); + break; + + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here */ break; } spin_unlock_irqrestore(&i8253_lock, flags); diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c index f9b845f4e69..d23077cca45 100644 --- a/arch/i386/kernel/vmiclock.c +++ b/arch/i386/kernel/vmiclock.c @@ -142,6 +142,7 @@ static void vmi_timer_set_mode(enum clock_event_mode mode, switch (mode) { case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: break; case CLOCK_EVT_MODE_PERIODIC: cycles_per_hz = vmi_timer_ops.get_cycle_frequency(); diff --git a/arch/i386/xen/time.c b/arch/i386/xen/time.c index 51fdabf1fd4..dfd6db69ead 100644 --- a/arch/i386/xen/time.c +++ b/arch/i386/xen/time.c @@ -412,6 +412,7 @@ static void xen_timerop_set_mode(enum clock_event_mode mode, break; case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: break; case CLOCK_EVT_MODE_UNUSED: @@ -474,6 +475,8 @@ static void xen_vcpuop_set_mode(enum clock_event_mode mode, HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) BUG(); break; + case CLOCK_EVT_MODE_RESUME: + break; } } diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c index 097ebd49f1b..7aca37d7976 100644 --- a/arch/sh/kernel/timers/timer-tmu.c +++ b/arch/sh/kernel/timers/timer-tmu.c @@ -80,6 +80,7 @@ static void tmu_set_mode(enum clock_event_mode mode, break; case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: + case CLOCK_EVT_MODE_RESUME: break; } } diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index e340eb401fb..87c10a7544d 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -931,6 +931,7 @@ static void sparc64_timer_setup(enum clock_event_mode mode, { switch (mode) { case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: break; case CLOCK_EVT_MODE_SHUTDOWN: diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c index 434fea1e82f..18dade06d4a 100644 --- a/drivers/lguest/lguest.c +++ b/drivers/lguest/lguest.c @@ -398,6 +398,8 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode, break; case CLOCK_EVT_MODE_PERIODIC: BUG(); + case CLOCK_EVT_MODE_RESUME: + break; } } diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 8d7a39019ac..e0bd46eb241 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -23,6 +23,7 @@ enum clock_event_mode { CLOCK_EVT_MODE_SHUTDOWN, CLOCK_EVT_MODE_PERIODIC, CLOCK_EVT_MODE_ONESHOT, + CLOCK_EVT_MODE_RESUME, }; /* Clock event notification values */ diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 8001d37071f..8339af229cb 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -49,7 +49,7 @@ cpumask_t *tick_get_broadcast_mask(void) */ static void tick_broadcast_start_periodic(struct clock_event_device *bc) { - if (bc && bc->mode == CLOCK_EVT_MODE_SHUTDOWN) + if (bc) tick_setup_periodic(bc, 1); } @@ -299,7 +299,7 @@ void tick_suspend_broadcast(void) spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; - if (bc && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + if (bc) clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); spin_unlock_irqrestore(&tick_broadcast_lock, flags); @@ -316,6 +316,8 @@ int tick_resume_broadcast(void) bc = tick_broadcast_device.evtdev; if (bc) { + clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME); + switch (tick_broadcast_device.mode) { case TICKDEV_MODE_PERIODIC: if(!cpus_empty(tick_broadcast_mask)) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index a96ec9ab345..77a21abc871 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -318,12 +318,17 @@ static void tick_resume(void) { struct tick_device *td = &__get_cpu_var(tick_cpu_device); unsigned long flags; + int broadcast = tick_resume_broadcast(); spin_lock_irqsave(&tick_device_lock, flags); - if (td->mode == TICKDEV_MODE_PERIODIC) - tick_setup_periodic(td->evtdev, 0); - else - tick_resume_oneshot(); + clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); + + if (!broadcast) { + if (td->mode == TICKDEV_MODE_PERIODIC) + tick_setup_periodic(td->evtdev, 0); + else + tick_resume_oneshot(); + } spin_unlock_irqrestore(&tick_device_lock, flags); } @@ -360,8 +365,7 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, break; case CLOCK_EVT_NOTIFY_RESUME: - if (!tick_resume_broadcast()) - tick_resume(); + tick_resume(); break; default: -- cgit v1.2.3-70-g09d2 From 5590a536c0bc403fc73908c66c1c88cbed735ecb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 21 Jul 2007 04:37:35 -0700 Subject: clockevents: fix device replacement When a device is replaced by a better rated device, then the broadcast mode needs to be evaluated again. When the new device has no requirement for broadcasting, then the broadcast bits for the CPU must be cleared. Signed-off-by: Thomas Gleixner Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/tick-broadcast.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 8339af229cb..db8e0f3d409 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -31,6 +31,12 @@ struct tick_device tick_broadcast_device; static cpumask_t tick_broadcast_mask; static DEFINE_SPINLOCK(tick_broadcast_lock); +#ifdef CONFIG_TICK_ONESHOT +static void tick_broadcast_clear_oneshot(int cpu); +#else +static inline void tick_broadcast_clear_oneshot(int cpu) { } +#endif + /* * Debugging: see timer_list.c */ @@ -99,8 +105,19 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) cpu_set(cpu, tick_broadcast_mask); tick_broadcast_start_periodic(tick_broadcast_device.evtdev); ret = 1; - } + } else { + /* + * When the new device is not affected by the stop + * feature and the cpu is marked in the broadcast mask + * then clear the broadcast bit. + */ + if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { + int cpu = smp_processor_id(); + cpu_clear(cpu, tick_broadcast_mask); + tick_broadcast_clear_oneshot(cpu); + } + } spin_unlock_irqrestore(&tick_broadcast_lock, flags); return ret; } @@ -487,6 +504,16 @@ out: spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +/* + * Reset the one shot broadcast for a cpu + * + * Called with tick_broadcast_lock held + */ +static void tick_broadcast_clear_oneshot(int cpu) +{ + cpu_clear(cpu, tick_broadcast_oneshot_mask); +} + /** * tick_broadcast_setup_highres - setup the broadcast device for highres */ -- cgit v1.2.3-70-g09d2 From 3704540b48295253bd9c87a5e7ff545f9d47a3b8 Mon Sep 17 00:00:00 2001 From: john stultz Date: Sat, 21 Jul 2007 04:37:35 -0700 Subject: tick management: spread timer interrupt After discussing w/ Thomas over IRC, it seems the issue is the sched tick fires on every cpu at the same time, causing extra lock contention. This smaller change, adds an extra offset per cpu so the ticks don't line up. This patch also drops the idle latency from 40us down to under 20us. Signed-off-by: john stultz Signed-off-by: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/tick-sched.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 52db9e3c526..b416995b975 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -546,6 +546,7 @@ void tick_setup_sched_timer(void) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); ktime_t now = ktime_get(); + u64 offset; /* * Emulate tick processing via per-CPU hrtimers: @@ -554,8 +555,12 @@ void tick_setup_sched_timer(void) ts->sched_timer.function = tick_sched_timer; ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; - /* Get the next period */ + /* Get the next period (per cpu) */ ts->sched_timer.expires = tick_init_jiffy_update(); + offset = ktime_to_ns(tick_period) >> 1; + do_div(offset, NR_CPUS); + offset *= smp_processor_id(); + ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset); for (;;) { hrtimer_forward(&ts->sched_timer, now, tick_period); -- cgit v1.2.3-70-g09d2 From 820de5c39ef7f6866d2c9e6c7d208bcd2a6e1942 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 21 Jul 2007 04:37:36 -0700 Subject: highres: improve debug output Add some more debug information to the hrtimer and clock events code. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 3 +++ kernel/hrtimer.c | 5 ++++- kernel/time/tick-oneshot.c | 15 ++++++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 610f44b2436..83988c3c8e2 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -524,6 +524,9 @@ void __init setup_boot_APIC_clock(void) */ if (nmi_watchdog != NMI_IO_APIC) lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + else + printk(KERN_WARNING "APIC timer registered as dummy," + " due to nmi_watchdog=1!\n"); } /* Setup the lapic or request the broadcast */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 72d034258ba..065a8978662 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -558,7 +558,8 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, */ static int hrtimer_switch_to_hres(void) { - struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); + int cpu = smp_processor_id(); + struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu); unsigned long flags; if (base->hres_active) @@ -568,6 +569,8 @@ static int hrtimer_switch_to_hres(void) if (tick_init_highres()) { local_irq_restore(flags); + printk(KERN_WARNING "Could not switch to high resolution " + "mode on CPU %d\n", cpu); return 0; } base->hres_active = 1; diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index f6997ab0c3c..0258d3115d5 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -73,8 +73,21 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) struct clock_event_device *dev = td->evtdev; if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) || - !tick_device_is_functional(dev)) + !tick_device_is_functional(dev)) { + + printk(KERN_INFO "Clockevents: " + "could not switch to one-shot mode:"); + if (!dev) { + printk(" no tick device\n"); + } else { + if (!tick_device_is_functional(dev)) + printk(" %s is not functional.\n", dev->name); + else + printk(" %s does not support one-shot mode.\n", + dev->name); + } return -EINVAL; + } td->mode = TICKDEV_MODE_ONESHOT; dev->event_handler = handler; -- cgit v1.2.3-70-g09d2 From 82644459c592a28a3eab682f9b88d81019ddfe8b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 21 Jul 2007 04:37:37 -0700 Subject: NTP: move the cmos update code into ntp.c i386 and sparc64 have the identical code to update the cmos clock. Move it into kernel/time/ntp.c as there are other architectures coming along with the same requirements. [akpm@linux-foundation.org: build fixes] Signed-off-by: Thomas Gleixner Cc: Chris Wright Cc: Ingo Molnar Cc: john stultz Cc: David Miller Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 4 ++++ arch/i386/kernel/time.c | 50 ++------------------------------------- arch/sparc64/Kconfig | 4 ++++ arch/sparc64/kernel/time.c | 53 ++--------------------------------------- include/asm-i386/timer.h | 1 - include/linux/time.h | 3 +++ kernel/time/ntp.c | 59 +++++++++++++++++++++++++++++++++++++++++++--- 7 files changed, 71 insertions(+), 103 deletions(-) (limited to 'kernel/time') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 7a11b905ef4..361aca8b3ec 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -18,6 +18,10 @@ config GENERIC_TIME bool default y +config GENERIC_CMOS_UPDATE + bool + default y + config CLOCKSOURCE_WATCHDOG bool default y diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a665df61f08..19a6c678d02 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -207,55 +207,9 @@ unsigned long read_persistent_clock(void) return retval; } -static void sync_cmos_clock(unsigned long dummy); - -static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); -int no_sync_cmos_clock; - -static void sync_cmos_clock(unsigned long dummy) -{ - struct timeval now, next; - int fail = 1; - - /* - * If we have an externally synchronized Linux clock, then update - * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be - * called as close as possible to 500 ms before the new second starts. - * This code is run on a timer. If the clock is set, that timer - * may not expire at the correct time. Thus, we adjust... - */ - if (!ntp_synced()) - /* - * Not synced, exit, do not restart a timer (if one is - * running, let it run out). - */ - return; - - do_gettimeofday(&now); - if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && - now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) - fail = set_rtc_mmss(now.tv_sec); - - next.tv_usec = USEC_AFTER - now.tv_usec; - if (next.tv_usec <= 0) - next.tv_usec += USEC_PER_SEC; - - if (!fail) - next.tv_sec = 659; - else - next.tv_sec = 0; - - if (next.tv_usec >= USEC_PER_SEC) { - next.tv_sec++; - next.tv_usec -= USEC_PER_SEC; - } - mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next)); -} - -void notify_arch_cmos_timer(void) +int update_persistent_clock(struct timespec now) { - if (!no_sync_cmos_clock) - mod_timer(&sync_cmos_timer, jiffies + 1); + return set_rtc_mmss(now.tv_sec); } extern void (*late_time_init)(void); diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index f1cc55677ff..33dabf588bd 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -23,6 +23,10 @@ config GENERIC_TIME bool default y +config GENERIC_CMOS_UPDATE + bool + default y + config GENERIC_CLOCKEVENTS bool default y diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index 87c10a7544d..49063ca2efc 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -403,58 +403,9 @@ static struct sparc64_tick_ops hbtick_operations __read_mostly = { static unsigned long timer_ticks_per_nsec_quotient __read_mostly; -#define TICK_SIZE (tick_nsec / 1000) - -#define USEC_AFTER 500000 -#define USEC_BEFORE 500000 - -static void sync_cmos_clock(unsigned long dummy); - -static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); - -static void sync_cmos_clock(unsigned long dummy) -{ - struct timeval now, next; - int fail = 1; - - /* - * If we have an externally synchronized Linux clock, then update - * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be - * called as close as possible to 500 ms before the new second starts. - * This code is run on a timer. If the clock is set, that timer - * may not expire at the correct time. Thus, we adjust... - */ - if (!ntp_synced()) - /* - * Not synced, exit, do not restart a timer (if one is - * running, let it run out). - */ - return; - - do_gettimeofday(&now); - if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && - now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) - fail = set_rtc_mmss(now.tv_sec); - - next.tv_usec = USEC_AFTER - now.tv_usec; - if (next.tv_usec <= 0) - next.tv_usec += USEC_PER_SEC; - - if (!fail) - next.tv_sec = 659; - else - next.tv_sec = 0; - - if (next.tv_usec >= USEC_PER_SEC) { - next.tv_sec++; - next.tv_usec -= USEC_PER_SEC; - } - mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next)); -} - -void notify_arch_cmos_timer(void) +int update_persistent_clock(struct timespec now) { - mod_timer(&sync_cmos_timer, jiffies + 1); + return set_rtc_mmss(now.tv_sec); } /* Kick start a stopped clock (procedure from the Sun NVRAM/hostid FAQ). */ diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index 51a713e33a9..b371667cfde 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -11,7 +11,6 @@ unsigned long native_calculate_cpu_khz(void); extern int timer_ack; extern int no_timer_check; -extern int no_sync_cmos_clock; extern int recalibrate_cpu_khz(void); #ifndef CONFIG_PARAVIRT diff --git a/include/linux/time.h b/include/linux/time.h index ec3b0ced0af..e6aea5146e5 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -4,6 +4,7 @@ #include #ifdef __KERNEL__ +# include # include #endif @@ -94,6 +95,8 @@ extern struct timespec wall_to_monotonic; extern seqlock_t xtime_lock __attribute__((weak)); extern unsigned long read_persistent_clock(void); +extern int update_persistent_clock(struct timespec now); +extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); static inline unsigned long get_seconds(void) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index b5e352597cb..cd91237dbfe 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -175,12 +176,64 @@ u64 current_tick_length(void) return tick_length; } +#ifdef CONFIG_GENERIC_CMOS_UPDATE -void __attribute__ ((weak)) notify_arch_cmos_timer(void) +/* Disable the cmos update - used by virtualization and embedded */ +int no_sync_cmos_clock __read_mostly; + +static void sync_cmos_clock(unsigned long dummy); + +static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); + +static void sync_cmos_clock(unsigned long dummy) +{ + struct timespec now, next; + int fail = 1; + + /* + * If we have an externally synchronized Linux clock, then update + * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be + * called as close as possible to 500 ms before the new second starts. + * This code is run on a timer. If the clock is set, that timer + * may not expire at the correct time. Thus, we adjust... + */ + if (!ntp_synced()) + /* + * Not synced, exit, do not restart a timer (if one is + * running, let it run out). + */ + return; + + getnstimeofday(&now); + if (abs(xtime.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) + fail = update_persistent_clock(now); + + next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec; + if (next.tv_nsec <= 0) + next.tv_nsec += NSEC_PER_SEC; + + if (!fail) + next.tv_sec = 659; + else + next.tv_sec = 0; + + if (next.tv_nsec >= NSEC_PER_SEC) { + next.tv_sec++; + next.tv_nsec -= NSEC_PER_SEC; + } + mod_timer(&sync_cmos_timer, jiffies + timespec_to_jiffies(&next)); +} + +static void notify_cmos_timer(void) { - return; + if (no_sync_cmos_clock) + mod_timer(&sync_cmos_timer, jiffies + 1); } +#else +static inline void notify_cmos_timer(void) { } +#endif + /* adjtimex mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. */ @@ -345,6 +398,6 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0) txc->stbcnt = 0; write_sequnlock_irq(&xtime_lock); do_gettimeofday(&txc->time); - notify_arch_cmos_timer(); + notify_cmos_timer(); return(result); } -- cgit v1.2.3-70-g09d2 From 2c6b47de17c75d553de3e2fb426d8298d2074585 Mon Sep 17 00:00:00 2001 From: john stultz Date: Tue, 24 Jul 2007 17:47:43 -0700 Subject: Cleanup non-arch xtime uses, use get_seconds() or current_kernel_time(). This avoids use of the kernel-internal "xtime" variable directly outside of the actual time-related functions. Instead, use the helper functions that we already have available to us. This doesn't actually change any behaviour, but this will allow us to fix the fact that "xtime" isn't updated very often with CONFIG_NO_HZ (because much of the realtime information is maintained as separate offsets to 'xtime'), which has caused interfaces that use xtime directly to get a time that is out of sync with the real-time clock by up to a third of a second or so. Signed-off-by: John Stultz Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Linus Torvalds --- drivers/rtc/class.c | 5 +++-- drivers/s390/net/ctcmain.c | 6 +++--- drivers/s390/net/netiucv.c | 4 ++-- include/linux/time.h | 2 +- kernel/acct.c | 2 +- kernel/hrtimer.c | 2 +- kernel/time.c | 16 ---------------- kernel/time/timekeeping.c | 16 ++++++++++++++++ kernel/tsacct.c | 2 +- net/rxrpc/af_rxrpc.c | 2 +- net/rxrpc/ar-connection.c | 4 ++-- net/rxrpc/ar-transport.c | 4 ++-- net/rxrpc/rxkad.c | 2 +- 13 files changed, 34 insertions(+), 33 deletions(-) (limited to 'kernel/time') diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 8b3cd31d6a6..10ab3b71ffc 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -46,6 +46,7 @@ static int rtc_suspend(struct device *dev, pm_message_t mesg) { struct rtc_device *rtc = to_rtc_device(dev); struct rtc_time tm; + struct timespec ts = current_kernel_time(); if (strncmp(rtc->dev.bus_id, CONFIG_RTC_HCTOSYS_DEVICE, @@ -57,8 +58,8 @@ static int rtc_suspend(struct device *dev, pm_message_t mesg) /* RTC precision is 1 second; adjust delta for avg 1/2 sec err */ set_normalized_timespec(&delta, - xtime.tv_sec - oldtime, - xtime.tv_nsec - (NSEC_PER_SEC >> 1)); + ts.tv_sec - oldtime, + ts.tv_nsec - (NSEC_PER_SEC >> 1)); return 0; } diff --git a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c index b20fd068173..92e8a37b502 100644 --- a/drivers/s390/net/ctcmain.c +++ b/drivers/s390/net/ctcmain.c @@ -674,7 +674,7 @@ ch_action_txdone(fsm_instance * fi, int event, void *arg) int first = 1; int i; unsigned long duration; - struct timespec done_stamp = xtime; + struct timespec done_stamp = current_kernel_time(); DBF_TEXT(trace, 4, __FUNCTION__); @@ -730,7 +730,7 @@ ch_action_txdone(fsm_instance * fi, int event, void *arg) spin_unlock(&ch->collect_lock); ch->ccw[1].count = ch->trans_skb->len; fsm_addtimer(&ch->timer, CTC_TIMEOUT_5SEC, CH_EVENT_TIMER, ch); - ch->prof.send_stamp = xtime; + ch->prof.send_stamp = current_kernel_time(); rc = ccw_device_start(ch->cdev, &ch->ccw[0], (unsigned long) ch, 0xff, 0); ch->prof.doios_multi++; @@ -2281,7 +2281,7 @@ transmit_skb(struct channel *ch, struct sk_buff *skb) fsm_newstate(ch->fsm, CH_STATE_TX); fsm_addtimer(&ch->timer, CTC_TIMEOUT_5SEC, CH_EVENT_TIMER, ch); spin_lock_irqsave(get_ccwdev_lock(ch->cdev), saveflags); - ch->prof.send_stamp = xtime; + ch->prof.send_stamp = current_kernel_time(); rc = ccw_device_start(ch->cdev, &ch->ccw[ccw_idx], (unsigned long) ch, 0xff, 0); spin_unlock_irqrestore(get_ccwdev_lock(ch->cdev), saveflags); diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 3d28e1a5bf7..26888947433 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -753,7 +753,7 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg) header.next = 0; memcpy(skb_put(conn->tx_buff, NETIUCV_HDRLEN), &header, NETIUCV_HDRLEN); - conn->prof.send_stamp = xtime; + conn->prof.send_stamp = current_kernel_time(); txmsg.class = 0; txmsg.tag = 0; rc = iucv_message_send(conn->path, &txmsg, 0, 0, @@ -1185,7 +1185,7 @@ static int netiucv_transmit_skb(struct iucv_connection *conn, memcpy(skb_put(nskb, NETIUCV_HDRLEN), &header, NETIUCV_HDRLEN); fsm_newstate(conn->fsm, CONN_STATE_TX); - conn->prof.send_stamp = xtime; + conn->prof.send_stamp = current_kernel_time(); msg.tag = 1; msg.class = 0; diff --git a/include/linux/time.h b/include/linux/time.h index e6aea5146e5..71181df8b74 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -107,7 +107,7 @@ static inline unsigned long get_seconds(void) struct timespec current_kernel_time(void); #define CURRENT_TIME (current_kernel_time()) -#define CURRENT_TIME_SEC ((struct timespec) { xtime.tv_sec, 0 }) +#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday(struct timespec *tv); diff --git a/kernel/acct.c b/kernel/acct.c index 70d0d88e555..24f0f8b2ba7 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -468,7 +468,7 @@ static void do_acct_process(struct file *file) } #endif do_div(elapsed, AHZ); - ac.ac_btime = xtime.tv_sec - elapsed; + ac.ac_btime = get_seconds() - elapsed; /* we really need to bite the bullet and change layout */ ac.ac_uid = current->uid; ac.ac_gid = current->gid; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index eb1ddebd2c0..a7bb05e6cb6 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -144,7 +144,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) #ifdef CONFIG_NO_HZ getnstimeofday(&xts); #else - xts = xtime; + xts = current_kernel_time(); #endif tom = wall_to_monotonic; } while (read_seqretry(&xtime_lock, seq)); diff --git a/kernel/time.c b/kernel/time.c index 5b81da08bbd..2289a8d6831 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -215,22 +215,6 @@ asmlinkage long sys_adjtimex(struct timex __user *txc_p) return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; } -inline struct timespec current_kernel_time(void) -{ - struct timespec now; - unsigned long seq; - - do { - seq = read_seqbegin(&xtime_lock); - - now = xtime; - } while (read_seqretry(&xtime_lock, seq)); - - return now; -} - -EXPORT_SYMBOL(current_kernel_time); - /** * current_fs_time - Return FS time * @sb: Superblock. diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 88c81026e00..07a3f1420c2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -509,3 +509,19 @@ void monotonic_to_bootbased(struct timespec *ts) { ts->tv_sec += total_sleep_time; } + +struct timespec current_kernel_time(void) +{ + struct timespec now; + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + + now = xtime; + } while (read_seqretry(&xtime_lock, seq)); + + return now; +} + +EXPORT_SYMBOL(current_kernel_time); diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 658f638c402..c122131a122 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -39,7 +39,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) ac_etime = timespec_to_ns(&ts); do_div(ac_etime, NSEC_PER_USEC); stats->ac_etime = ac_etime; - stats->ac_btime = xtime.tv_sec - ts.tv_sec; + stats->ac_btime = get_seconds() - ts.tv_sec; if (thread_group_leader(tsk)) { stats->ac_exitcode = tsk->exit_code; if (tsk->flags & PF_FORKNOEXEC) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 16a68df4e36..c58fa0d1be2 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -787,7 +787,7 @@ static int __init af_rxrpc_init(void) BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof(dummy_skb->cb)); - rxrpc_epoch = htonl(xtime.tv_sec); + rxrpc_epoch = htonl(get_seconds()); ret = -ENOMEM; rxrpc_call_jar = kmem_cache_create( diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c index 482750efc23..372b24466dc 100644 --- a/net/rxrpc/ar-connection.c +++ b/net/rxrpc/ar-connection.c @@ -791,7 +791,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn) ASSERTCMP(atomic_read(&conn->usage), >, 0); - conn->put_time = xtime.tv_sec; + conn->put_time = get_seconds(); if (atomic_dec_and_test(&conn->usage)) { _debug("zombie"); rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); @@ -835,7 +835,7 @@ void rxrpc_connection_reaper(struct work_struct *work) _enter(""); - now = xtime.tv_sec; + now = get_seconds(); earliest = ULONG_MAX; write_lock_bh(&rxrpc_connection_lock); diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c index d43d78f1930..bb282a6a19f 100644 --- a/net/rxrpc/ar-transport.c +++ b/net/rxrpc/ar-transport.c @@ -183,7 +183,7 @@ void rxrpc_put_transport(struct rxrpc_transport *trans) ASSERTCMP(atomic_read(&trans->usage), >, 0); - trans->put_time = xtime.tv_sec; + trans->put_time = get_seconds(); if (unlikely(atomic_dec_and_test(&trans->usage))) _debug("zombie"); /* let the reaper determine the timeout to avoid a race with @@ -219,7 +219,7 @@ static void rxrpc_transport_reaper(struct work_struct *work) _enter(""); - now = xtime.tv_sec; + now = get_seconds(); earliest = ULONG_MAX; /* extract all the transports that have been dead too long */ diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 5ec705144e1..ac3cabdca78 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -916,7 +916,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, issue = be32_to_cpu(stamp); } p += 4; - now = xtime.tv_sec; + now = get_seconds(); _debug("KIV ISSUE: %lx [%lx]", issue, now); /* check the ticket is in date */ -- cgit v1.2.3-70-g09d2 From 17c38b7490b3f0300c7812aefdae2ddda7ab4112 Mon Sep 17 00:00:00 2001 From: john stultz Date: Tue, 24 Jul 2007 18:38:34 -0700 Subject: Cache xtime every call to update_wall_time This avoids xtime lag seen with dynticks, because while 'xtime' itself is still not updated often, we keep a 'xtime_cache' variable around that contains the approximate real-time that _is_ updated each time we do a 'update_wall_time()', and is thus never off by more than one tick. IOW, this restores the original semantics for 'xtime' users, as long as you use the proper abstraction functions (ie 'current_kernel_time()' or 'get_seconds()' depending on whether you want a timespec or just the seconds field). [ Updated Patch. As penance for my sins I've also yanked another #ifdef that was added to avoid the xtime lag w/ hrtimers. ] Signed-off-by: John Stultz Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Linus Torvalds --- include/linux/time.h | 6 +----- kernel/hrtimer.c | 4 ---- kernel/time/timekeeping.c | 26 +++++++++++++++++++++++--- 3 files changed, 24 insertions(+), 12 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/time.h b/include/linux/time.h index 71181df8b74..6a5f503b4f1 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -99,11 +99,7 @@ extern int update_persistent_clock(struct timespec now); extern int no_sync_cmos_clock __read_mostly; void timekeeping_init(void); -static inline unsigned long get_seconds(void) -{ - return xtime.tv_sec; -} - +unsigned long get_seconds(void); struct timespec current_kernel_time(void); #define CURRENT_TIME (current_kernel_time()) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index a7bb05e6cb6..c21ca6bfaa6 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -141,11 +141,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) do { seq = read_seqbegin(&xtime_lock); -#ifdef CONFIG_NO_HZ - getnstimeofday(&xts); -#else xts = current_kernel_time(); -#endif tom = wall_to_monotonic; } while (read_seqretry(&xtime_lock, seq)); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 07a3f1420c2..acc417b5a9b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -47,10 +47,22 @@ EXPORT_SYMBOL(xtime_lock); struct timespec xtime __attribute__ ((aligned (16))); struct timespec wall_to_monotonic __attribute__ ((aligned (16))); static unsigned long total_sleep_time; /* seconds */ - EXPORT_SYMBOL(xtime); +#ifdef CONFIG_NO_HZ +static struct timespec xtime_cache __attribute__ ((aligned (16))); +static inline void update_xtime_cache(u64 nsec) +{ + xtime_cache = xtime; + timespec_add_ns(&xtime_cache, nsec); +} +#else +#define xtime_cache xtime +/* We do *not* want to evaluate the argument for this case */ +#define update_xtime_cache(n) do { } while (0) +#endif + static struct clocksource *clock; /* pointer to current clocksource */ @@ -478,6 +490,8 @@ void update_wall_time(void) xtime.tv_nsec = (s64)clock->xtime_nsec >> clock->shift; clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; + update_xtime_cache(cyc2ns(clock, offset)); + /* check to see if there is a new clocksource to use */ change_clocksource(); update_vsyscall(&xtime, clock); @@ -510,6 +524,13 @@ void monotonic_to_bootbased(struct timespec *ts) ts->tv_sec += total_sleep_time; } +unsigned long get_seconds(void) +{ + return xtime_cache.tv_sec; +} +EXPORT_SYMBOL(get_seconds); + + struct timespec current_kernel_time(void) { struct timespec now; @@ -518,10 +539,9 @@ struct timespec current_kernel_time(void) do { seq = read_seqbegin(&xtime_lock); - now = xtime; + now = xtime_cache; } while (read_seqretry(&xtime_lock, seq)); return now; } - EXPORT_SYMBOL(current_kernel_time); -- cgit v1.2.3-70-g09d2 From 5ea473a1dfeca2ee38c5dd458c1174d129e6b64e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 31 Jul 2007 00:38:50 -0700 Subject: Fix leaks on /proc/{*/sched,sched_debug,timer_list,timer_stats} On every open/close one struct seq_operations leaks. Kudos to /proc/slab_allocators. Signed-off-by: Alexey Dobriyan Acked-by: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 +- kernel/sched_debug.c | 2 +- kernel/time/timer_list.c | 2 +- kernel/time/timer_stats.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/fs/proc/base.c b/fs/proc/base.c index 3c77d5a64e7..19489b0d555 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -927,7 +927,7 @@ static const struct file_operations proc_pid_sched_operations = { .read = seq_read, .write = sched_write, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; #endif diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 42970f723a9..0eca442b779 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -200,7 +200,7 @@ static struct file_operations sched_debug_fops = { .open = sched_debug_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static int __init init_sched_debug_procfs(void) diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index e5edc3a22a0..fdb2e03d4fe 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -267,7 +267,7 @@ static struct file_operations timer_list_fops = { .open = timer_list_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; static int __init init_timer_list_procfs(void) diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 8ed62fda16c..3c38fb5eae1 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -399,7 +399,7 @@ static struct file_operations tstats_fops = { .read = seq_read, .write = tstats_write, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; void __init init_timer_stats(void) -- cgit v1.2.3-70-g09d2 From 6ddfca9548d8ecc26096a30667423ba919109533 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 10 Aug 2007 13:01:09 -0700 Subject: timer: remove clockevents_unregister_notifier I find a function(clockevents_unregister_notifier) which is not called by anything in tree. Signed-off-by: Miao Xie Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clockchips.h | 1 - kernel/time/clockevents.c | 10 ---------- 2 files changed, 11 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index e0bd46eb241..def5a659b8a 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -123,7 +123,6 @@ extern void clockevents_exchange_device(struct clock_event_device *old, extern void clockevents_set_mode(struct clock_event_device *dev, enum clock_event_mode mode); extern int clockevents_register_notifier(struct notifier_block *nb); -extern void clockevents_unregister_notifier(struct notifier_block *nb); extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, ktime_t now); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 2ad1c37b8df..41dd3105ce7 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -113,16 +113,6 @@ int clockevents_register_notifier(struct notifier_block *nb) return ret; } -/** - * clockevents_unregister_notifier - unregister a clock events change listener - */ -void clockevents_unregister_notifier(struct notifier_block *nb) -{ - spin_lock(&clockevents_lock); - raw_notifier_chain_unregister(&clockevents_chain, nb); - spin_unlock(&clockevents_lock); -} - /* * Notify about a clock event change. Called with clockevents_lock * held. -- cgit v1.2.3-70-g09d2 From 298a5df45d497e66064fda22ef0abf13766d3333 Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Tue, 11 Sep 2007 15:24:03 -0700 Subject: Fix "no_sync_cmos_clock" logic inversion in kernel/time/ntp.c Seems to me that this timer will only get started on platforms that say they don't want it? Signed-off-by: Tony Breeds Cc: Paul Mackerras Cc: Gabriel Paubert Cc: Zachary Amsden Acked-by: Thomas Gleixner Cc: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/ntp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index cd91237dbfe..de6a2d6b3eb 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -226,7 +226,7 @@ static void sync_cmos_clock(unsigned long dummy) static void notify_cmos_timer(void) { - if (no_sync_cmos_clock) + if (!no_sync_cmos_clock) mod_timer(&sync_cmos_timer, jiffies + 1); } -- cgit v1.2.3-70-g09d2 From 3be9095063885d482b87d3875ea7f28e635882d0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 16 Sep 2007 15:36:43 +0200 Subject: timekeeping: access rtc outside of xtime lock Lockdep complains about the access of rtc in timekeeping_suspend inside the interrupt disabled region of the write locked xtime lock. Move the access outside. Signed-off-by: Thomas Gleixner Cc: John Stultz --- kernel/time/timekeeping.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index acc417b5a9b..f682091fa89 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -325,9 +325,10 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) { unsigned long flags; + timekeeping_suspend_time = read_persistent_clock(); + write_seqlock_irqsave(&xtime_lock, flags); timekeeping_suspended = 1; - timekeeping_suspend_time = read_persistent_clock(); write_sequnlock_irqrestore(&xtime_lock, flags); clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); -- cgit v1.2.3-70-g09d2 From 6a669ee8a790487b7ec1edda762d39615a78264b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 16 Sep 2007 15:36:43 +0200 Subject: timekeeping: Prevent time going backwards on resume Timekeeping resume adjusts xtime by adding the slept time in seconds and resets the reference value of the clock source (clock->cycle_last). clock->cycle last is used to calculate the delta between the last xtime update and the readout of the clock source in __get_nsec_offset(). xtime plus the offset is the current time. The resume code ignores the delta which had already elapsed between the last xtime update and the actual time of suspend. If the suspend time is short, then we can see time going backwards on resume. Suspend: offs_s = clock->read() - clock->cycle_last; now = xtime + offs_s; timekeeping_suspend_time = read_rtc(); Resume: sleep_time = read_rtc() - timekeeping_suspend_time; xtime.tv_sec += sleep_time; clock->cycle_last = clock->read(); offs_r = clock->read() - clock->cycle_last; now = xtime + offs_r; if sleep_time_seconds == 0 and offs_r < offs_s, then time goes backwards. Fix this by storing the offset from the last xtime update and add it to xtime during resume, when we reset clock->cycle_last: sleep_time = read_rtc() - timekeeping_suspend_time; xtime.tv_sec += sleep_time; xtime += offs_s; /* Fixup xtime offset at suspend time */ clock->cycle_last = clock->read(); offs_r = clock->read() - clock->cycle_last; now = xtime + offs_r; Thanks to Marcelo for tracking this down on the OLPC and providing the necessary details to analyze the root cause. Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Tosatti --- kernel/time/timekeeping.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f682091fa89..4ad79f6bdec 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -217,6 +217,7 @@ static void change_clocksource(void) } #else static inline void change_clocksource(void) { } +static inline s64 __get_nsec_offset(void) { return 0; } #endif /** @@ -280,6 +281,8 @@ void __init timekeeping_init(void) static int timekeeping_suspended; /* time in seconds when suspend began */ static unsigned long timekeeping_suspend_time; +/* xtime offset when we went into suspend */ +static s64 timekeeping_suspend_nsecs; /** * timekeeping_resume - Resumes the generic timekeeping subsystem. @@ -305,6 +308,8 @@ static int timekeeping_resume(struct sys_device *dev) wall_to_monotonic.tv_sec -= sleep_length; total_sleep_time += sleep_length; } + /* Make sure that we have the correct xtime reference */ + timespec_add_ns(&xtime, timekeeping_suspend_nsecs); /* re-base the last cycle value */ clock->cycle_last = clocksource_read(clock); clock->error = 0; @@ -328,6 +333,8 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) timekeeping_suspend_time = read_persistent_clock(); write_seqlock_irqsave(&xtime_lock, flags); + /* Get the current xtime offset */ + timekeeping_suspend_nsecs = __get_nsec_offset(); timekeeping_suspended = 1; write_sequnlock_irqrestore(&xtime_lock, flags); -- cgit v1.2.3-70-g09d2 From 07eec6af448d13a6a520d9c6f06f2e87f61b567a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 16 Sep 2007 15:36:43 +0200 Subject: clockevents: Enforce oneshot broadcast when broadcast mask is set on resume The jinxed VAIO refuses to resume without hitting keys on the keyboard when this is not enforced. It is unclear why the cpu ends up in a lower C State without notifying the clock events layer, but enforcing the oneshot broadcast here is safe. Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index db8e0f3d409..947959fb2bb 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -382,12 +382,23 @@ static int tick_broadcast_set_event(ktime_t expires, int force) int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { + int cpu = smp_processor_id(); + + /* + * If the CPU is marked for broadcast, enforce oneshot + * broadcast mode. The jinxed VAIO does not resume otherwise. + * No idea why it ends up in a lower C State during resume + * without notifying the clock events layer. + */ + if (cpu_isset(cpu, tick_broadcast_mask)) + cpu_set(cpu, tick_broadcast_oneshot_mask); + clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); if(!cpus_empty(tick_broadcast_oneshot_mask)) tick_broadcast_set_event(ktime_get(), 1); - return cpu_isset(smp_processor_id(), tick_broadcast_oneshot_mask); + return cpu_isset(cpu, tick_broadcast_oneshot_mask); } /* -- cgit v1.2.3-70-g09d2 From 31d9b3938c0459e5e9755ce0a98ac1e24eeff972 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 16 Sep 2007 15:36:43 +0200 Subject: clockevents: do not shutdown the oneshot broadcast device When a cpu goes offline it is removed from the broadcast masks. If the mask becomes empty the code shuts down the broadcast device. This is wrong, because the broadcast device needs to be ready for the online cpu going idle (into a c-state, which stops the local apic timer). Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 947959fb2bb..aab881c86a1 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -560,20 +560,17 @@ void tick_broadcast_switch_to_oneshot(void) */ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { - struct clock_event_device *bc; unsigned long flags; unsigned int cpu = *cpup; spin_lock_irqsave(&tick_broadcast_lock, flags); - bc = tick_broadcast_device.evtdev; + /* + * Clear the broadcast mask flag for the dead cpu, but do not + * stop the broadcast device! + */ cpu_clear(cpu, tick_broadcast_oneshot_mask); - if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) { - if (bc && cpus_empty(tick_broadcast_oneshot_mask)) - clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); - } - spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -- cgit v1.2.3-70-g09d2 From 5e41d0d60a534d2a5dc9772600a58f44c8d12506 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 16 Sep 2007 15:36:43 +0200 Subject: clockevents: prevent stale tick update on offline cpu Taking a cpu offline removes the cpu from the online mask before the CPU_DEAD notification is done. The clock events layer does the cleanup of the dead CPU from the CPU_DEAD notifier chain. tick_do_timer_cpu is used to avoid xtime lock contention by assigning the task of jiffies xtime updates to one CPU. If a CPU is taken offline, then this assignment becomes stale. This went unnoticed because most of the time the offline CPU went dead before the online CPU reached __cpu_die(), where the CPU_DEAD state is checked. In the case that the offline CPU did not reach the DEAD state before we reach __cpu_die(), the code in there goes to sleep for 100ms. Due to the stale time update assignment, the system is stuck forever. Take the assignment away when a cpu is not longer in the cpu_online_mask. We do this in the last call to tick_nohz_stop_sched_tick() when the offline CPU is on the way to the final play_dead() idle entry. Signed-off-by: Thomas Gleixner --- kernel/time/tick-sched.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b416995b975..8c3fef1db09 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -160,6 +160,18 @@ void tick_nohz_stop_sched_tick(void) cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); + /* + * If this cpu is offline and it is the one which updates + * jiffies, then give up the assignment and let it be taken by + * the cpu which runs the tick timer next. If we don't drop + * this here the jiffies might be stale and do_timer() never + * invoked. + */ + if (unlikely(!cpu_online(cpu))) { + if (cpu == tick_do_timer_cpu) + tick_do_timer_cpu = -1; + } + if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) goto end; -- cgit v1.2.3-70-g09d2 From b7e113dc9d52c4a37d2da6fafe77959f3a28eccf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 22 Sep 2007 22:29:06 +0000 Subject: clockevents: remove the suspend/resume workaround^Wthinko In a desparate attempt to fix the suspend/resume problem on Andrews VAIO I added a workaround which enforced the broadcast of the oneshot timer on resume. This was actually resolving the problem on the VAIO but was just a stupid workaround, which was not tackling the root cause: the assignement of lower idle C-States in the ACPI processor_idle code. The cpuidle patches, which utilize the dynamic tick feature and go faster into deeper C-states exposed the problem again. The correct solution is the previous patch, which prevents lower C-states across the suspend/resume. Remove the enforcement code, including the conditional broadcast timer arming, which helped to pamper over the real problem for quite a time. The oneshot broadcast flag for the cpu, which runs the resume code can never be set at the time when this code is executed. It only gets set, when the CPU is entering a lower idle C-State. Signed-off-by: Thomas Gleixner Tested-by: Andrew Morton Cc: Len Brown Cc: Venkatesh Pallipadi Cc: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- kernel/time/tick-broadcast.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index aab881c86a1..0962e057766 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -382,23 +382,8 @@ static int tick_broadcast_set_event(ktime_t expires, int force) int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { - int cpu = smp_processor_id(); - - /* - * If the CPU is marked for broadcast, enforce oneshot - * broadcast mode. The jinxed VAIO does not resume otherwise. - * No idea why it ends up in a lower C State during resume - * without notifying the clock events layer. - */ - if (cpu_isset(cpu, tick_broadcast_mask)) - cpu_set(cpu, tick_broadcast_oneshot_mask); - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); - - if(!cpus_empty(tick_broadcast_oneshot_mask)) - tick_broadcast_set_event(ktime_get(), 1); - - return cpu_isset(cpu, tick_broadcast_oneshot_mask); + return 0; } /* -- cgit v1.2.3-70-g09d2 From 74922be1485818ed368c4cf4f0b100f70bf01e08 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 7 Oct 2007 00:24:31 -0700 Subject: Fix timer_stats printout of events/sec When using /proc/timer_stats on ppc64 I noticed the events/sec field wasnt accurate. Sometimes the integer part was incorrect due to rounding (we werent taking the fractional seconds into consideration). The fraction part is also wrong, we need to pad the printf statement and take the bottom three digits of 1000 times the value. Signed-off-by: Anton Blanchard Acked-by: Ingo Molnar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/timer_stats.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 3c38fb5eae1..c36bb7ed030 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -327,8 +327,9 @@ static int tstats_show(struct seq_file *m, void *v) ms = 1; if (events && period.tv_sec) - seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events, - events / period.tv_sec, events * 1000 / ms); + seq_printf(m, "%ld total events, %ld.%03ld events/sec\n", + events, events * 1000 / ms, + (events * 1000000 / ms) % 1000); else seq_printf(m, "%ld total events\n", events); -- cgit v1.2.3-70-g09d2 From de68d9b173ee657115dd0e584c2365b7954253a5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 Oct 2007 23:04:05 +0200 Subject: clockevents: Allow build w/o run-tine usage for migration purposes Migration aid to allow preparatory patches which introduce not yet used parts of clock events code. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven --- include/linux/clockchips.h | 8 ++++++-- kernel/time/Kconfig | 5 +++++ kernel/time/Makefile | 2 +- kernel/time/clockevents.c | 3 ++- 4 files changed, 14 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index f368a6fe8cc..d2ddea92689 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -8,7 +8,7 @@ #ifndef _LINUX_CLOCKCHIPS_H #define _LINUX_CLOCKCHIPS_H -#ifdef CONFIG_GENERIC_CLOCKEVENTS +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD #include #include @@ -126,9 +126,13 @@ extern int clockevents_register_notifier(struct notifier_block *nb); extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, ktime_t now); +#ifdef CONFIG_GENERIC_CLOCKEVENTS extern void clockevents_notify(unsigned long reason, void *arg); - #else +# define clockevents_notify(reason, arg) do { } while (0) +#endif + +#else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */ #define clockevents_notify(reason, arg) do { } while (0) diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index f6635112654..8d53106a0a9 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -23,3 +23,8 @@ config HIGH_RES_TIMERS hardware is not capable then this option only increases the size of the kernel image. +config GENERIC_CLOCKEVENTS_BUILD + bool + default y + depends on GENERIC_CLOCKEVENTS || GENERIC_CLOCKEVENTS_MIGR + diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 99b6034fc86..905b0b50792 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -1,6 +1,6 @@ obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o +obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 41dd3105ce7..822beebe664 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -194,6 +194,7 @@ void clockevents_exchange_device(struct clock_event_device *old, local_irq_restore(flags); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS /** * clockevents_notify - notification about relevant events */ @@ -222,4 +223,4 @@ void clockevents_notify(unsigned long reason, void *arg) spin_unlock(&clockevents_lock); } EXPORT_SYMBOL_GPL(clockevents_notify); - +#endif -- cgit v1.2.3-70-g09d2 From c8a1d398de70a7774359b4720c392891cdd485f9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 Oct 2007 23:04:06 +0200 Subject: clockevents: fix periodic broadcast for oneshot devices The next_event member of the clock event device is used to keep track of the next periodic event. For one shot only devices it is wrong to clear the variable, as the next event will be based on it. Pointed out by Ralf Baechle Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven --- kernel/time/tick-broadcast.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 0962e057766..acf15b49e55 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -176,8 +176,6 @@ static void tick_do_periodic_broadcast(void) */ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) { - dev->next_event.tv64 = KTIME_MAX; - tick_do_periodic_broadcast(); /* -- cgit v1.2.3-70-g09d2 From 4a93232dab0a07074bcc5291a0f1f39919916f31 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Fri, 12 Oct 2007 23:04:23 +0200 Subject: clock events: allow replacement of broadcast timer Change the broadcast timer, if a timer with higher rating becomes available. Signed-off-by: Venkatesh Pallipadi Cc: Andi Kleen Cc: john stultz Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 13 ++++++------- kernel/time/tick-common.c | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index acf15b49e55..298bc7c6f09 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -64,8 +64,9 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) */ int tick_check_broadcast_device(struct clock_event_device *dev) { - if (tick_broadcast_device.evtdev || - (dev->features & CLOCK_EVT_FEAT_C3STOP)) + if ((tick_broadcast_device.evtdev && + tick_broadcast_device.evtdev->rating >= dev->rating) || + (dev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; clockevents_exchange_device(NULL, dev); @@ -513,11 +514,9 @@ static void tick_broadcast_clear_oneshot(int cpu) */ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { - if (bc->mode != CLOCK_EVT_MODE_ONESHOT) { - bc->event_handler = tick_handle_oneshot_broadcast; - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); - bc->next_event.tv64 = KTIME_MAX; - } + bc->event_handler = tick_handle_oneshot_broadcast; + clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + bc->next_event.tv64 = KTIME_MAX; } /* diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 77a21abc871..3f3ae390783 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -200,7 +200,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) cpu = smp_processor_id(); if (!cpu_isset(cpu, newdev->cpumask)) - goto out; + goto out_bc; td = &per_cpu(tick_cpu_device, cpu); curdev = td->evtdev; @@ -265,7 +265,7 @@ out_bc: */ if (tick_check_broadcast_device(newdev)) ret = NOTIFY_STOP; -out: + spin_unlock_irqrestore(&tick_device_lock, flags); return ret; -- cgit v1.2.3-70-g09d2