From 25985edcedea6396277003854657b5f3cb31a628 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 30 Mar 2011 22:57:33 -0300 Subject: Fix common misspellings Fixes generated by 'codespell' and manually reviewed. Signed-off-by: Lucas De Marchi --- kernel/time/jiffies.c | 2 +- kernel/time/timer_stats.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index b2fa506667c..a470154e040 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -34,7 +34,7 @@ * inaccuracies caused by missed or lost timer * interrupts and the inability for the timer * interrupt hardware to accuratly tick at the - * requested HZ value. It is also not reccomended + * requested HZ value. It is also not recommended * for "tick-less" systems. */ #define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ)) diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 2f3b585b8d7..a5d0a3a85dd 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -236,7 +236,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf, unsigned int timer_flag) { /* - * It doesnt matter which lock we take: + * It doesn't matter which lock we take: */ raw_spinlock_t *lock; struct entry *entry, input; -- cgit v1.2.3-70-g09d2 From 4352d9d44b935e4d000be6ec89ddb55c2bf35f24 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Mon, 4 Apr 2011 08:31:23 -0700 Subject: ntp: fix non privileged system time shifting The ADJ_SETOFFSET bit added in commit 094aa188 ("ntp: Add ADJ_SETOFFSET mode bit") also introduced a way for any user to change the system time. Sneaky or buggy calls to adjtimex() could set ADJ_OFFSET_SS_READ | ADJ_SETOFFSET which would result in a successful call to timekeeping_inject_offset(). This patch fixes the issue by adding the capability check. Signed-off-by: Richard Cochran Signed-off-by: Linus Torvalds --- kernel/time/ntp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 5f1bb8e2008..f6117a4c7cb 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -652,6 +652,8 @@ int do_adjtimex(struct timex *txc) struct timespec delta; delta.tv_sec = txc->time.tv_sec; delta.tv_nsec = txc->time.tv_usec; + if (!capable(CAP_SYS_TIME)) + return -EPERM; if (!(txc->modes & ADJ_NANO)) delta.tv_nsec *= 1000; result = timekeeping_inject_offset(&delta); -- cgit v1.2.3-70-g09d2 From 1791f881435fab951939ad700e947b66c062e083 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 30 Mar 2011 15:24:21 +0200 Subject: posix clocks: Replace mutex with reader/writer semaphore A dynamic posix clock is protected from asynchronous removal by a mutex. However, using a mutex has the unwanted effect that a long running clock operation in one process will unnecessarily block other processes. For example, one process might call read() to get an external time stamp coming in at one pulse per second. A second process calling clock_gettime would have to wait for almost a whole second. This patch fixes the issue by using a reader/writer semaphore instead of a mutex. Signed-off-by: Richard Cochran Cc: John Stultz Link: http://lkml.kernel.org/r/%3C20110330132421.GA31771%40riccoc20.at.omicron.at%3E Signed-off-by: Thomas Gleixner --- include/linux/posix-clock.h | 5 +++-- kernel/time/posix-clock.c | 24 +++++++++--------------- 2 files changed, 12 insertions(+), 17 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 369e19d3750..7f1183dcd11 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -24,6 +24,7 @@ #include #include #include +#include struct posix_clock; @@ -104,7 +105,7 @@ struct posix_clock_operations { * @ops: Functional interface to the clock * @cdev: Character device instance for this clock * @kref: Reference count. - * @mutex: Protects the 'zombie' field from concurrent access. + * @rwsem: Protects the 'zombie' field from concurrent access. * @zombie: If 'zombie' is true, then the hardware has disappeared. * @release: A function to free the structure when the reference count reaches * zero. May be NULL if structure is statically allocated. @@ -117,7 +118,7 @@ struct posix_clock { struct posix_clock_operations ops; struct cdev cdev; struct kref kref; - struct mutex mutex; + struct rw_semaphore rwsem; bool zombie; void (*release)(struct posix_clock *clk); }; diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 25028dd4fa1..c340ca658f3 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -19,7 +19,6 @@ */ #include #include -#include #include #include #include @@ -34,19 +33,19 @@ static struct posix_clock *get_posix_clock(struct file *fp) { struct posix_clock *clk = fp->private_data; - mutex_lock(&clk->mutex); + down_read(&clk->rwsem); if (!clk->zombie) return clk; - mutex_unlock(&clk->mutex); + up_read(&clk->rwsem); return NULL; } static void put_posix_clock(struct posix_clock *clk) { - mutex_unlock(&clk->mutex); + up_read(&clk->rwsem); } static ssize_t posix_clock_read(struct file *fp, char __user *buf, @@ -156,7 +155,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) struct posix_clock *clk = container_of(inode->i_cdev, struct posix_clock, cdev); - mutex_lock(&clk->mutex); + down_read(&clk->rwsem); if (clk->zombie) { err = -ENODEV; @@ -172,7 +171,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) fp->private_data = clk; } out: - mutex_unlock(&clk->mutex); + up_read(&clk->rwsem); return err; } @@ -211,25 +210,20 @@ int posix_clock_register(struct posix_clock *clk, dev_t devid) int err; kref_init(&clk->kref); - mutex_init(&clk->mutex); + init_rwsem(&clk->rwsem); cdev_init(&clk->cdev, &posix_clock_file_operations); clk->cdev.owner = clk->ops.owner; err = cdev_add(&clk->cdev, devid, 1); - if (err) - goto no_cdev; return err; -no_cdev: - mutex_destroy(&clk->mutex); - return err; } EXPORT_SYMBOL_GPL(posix_clock_register); static void delete_clock(struct kref *kref) { struct posix_clock *clk = container_of(kref, struct posix_clock, kref); - mutex_destroy(&clk->mutex); + if (clk->release) clk->release(clk); } @@ -238,9 +232,9 @@ void posix_clock_unregister(struct posix_clock *clk) { cdev_del(&clk->cdev); - mutex_lock(&clk->mutex); + down_write(&clk->rwsem); clk->zombie = true; - mutex_unlock(&clk->mutex); + up_write(&clk->rwsem); kref_put(&clk->kref, delete_clock); } -- cgit v1.2.3-70-g09d2 From 304529b1b6f8612ccbb4582e997051b48b94f4a4 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 1 Apr 2011 14:32:09 -0700 Subject: time: Add timekeeping_inject_sleeptime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some platforms cannot implement read_persistent_clock, as their RTC devices are only accessible when interrupts are enabled. This keeps them from being used by the timekeeping code on resume to measure the time in suspend. The RTC layer tries to work around this, by calling do_settimeofday on resume after irqs are reenabled to set the time properly. However, this only corrects CLOCK_REALTIME, and does not properly adjust the sleep time value. This causes btime in /proc/stat to be incorrect as well as making the new CLOCK_BOTTTIME inaccurate. This patch resolves the issue by introducing a new timekeeping hook to allow the RTC layer to inject the sleep time on resume. The code also checks to make sure that read_persistent_clock is nonfunctional before setting the sleep time, so that should the RTC's HCTOSYS option be configured in on a system that does support read_persistent_clock we will not increase the total_sleep_time twice. CC: Arve Hjønnevåg CC: Thomas Gleixner Acked-by: Arnd Bergmann Signed-off-by: John Stultz --- drivers/rtc/class.c | 23 ++++++++----------- include/linux/time.h | 1 + kernel/time/timekeeping.c | 56 ++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 63 insertions(+), 17 deletions(-) (limited to 'kernel/time') diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 39013867cbd..4194e59e14c 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -41,26 +41,21 @@ static void rtc_device_release(struct device *dev) * system's wall clock; restore it on resume(). */ -static struct timespec delta; static time_t oldtime; +static struct timespec oldts; static int rtc_suspend(struct device *dev, pm_message_t mesg) { struct rtc_device *rtc = to_rtc_device(dev); struct rtc_time tm; - struct timespec ts = current_kernel_time(); if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) return 0; rtc_read_time(rtc, &tm); + ktime_get_ts(&oldts); rtc_tm_to_time(&tm, &oldtime); - /* RTC precision is 1 second; adjust delta for avg 1/2 sec err */ - set_normalized_timespec(&delta, - ts.tv_sec - oldtime, - ts.tv_nsec - (NSEC_PER_SEC >> 1)); - return 0; } @@ -70,10 +65,12 @@ static int rtc_resume(struct device *dev) struct rtc_time tm; time_t newtime; struct timespec time; + struct timespec newts; if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) return 0; + ktime_get_ts(&newts); rtc_read_time(rtc, &tm); if (rtc_valid_tm(&tm) != 0) { pr_debug("%s: bogus resume time\n", dev_name(&rtc->dev)); @@ -85,15 +82,13 @@ static int rtc_resume(struct device *dev) pr_debug("%s: time travel!\n", dev_name(&rtc->dev)); return 0; } + /* calculate the RTC time delta */ + set_normalized_timespec(&time, newtime - oldtime, 0); - /* restore wall clock using delta against this RTC; - * adjust again for avg 1/2 second RTC sampling error - */ - set_normalized_timespec(&time, - newtime + delta.tv_sec, - (NSEC_PER_SEC >> 1) + delta.tv_nsec); - do_settimeofday(&time); + /* subtract kernel time between rtc_suspend to rtc_resume */ + time = timespec_sub(time, timespec_sub(newts, oldts)); + timekeeping_inject_sleeptime(&time); return 0; } diff --git a/include/linux/time.h b/include/linux/time.h index 454a2620578..4ea5a75fcac 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -126,6 +126,7 @@ struct timespec __current_kernel_time(void); /* does not take xtime_lock */ struct timespec get_monotonic_coarse(void); void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, struct timespec *wtom, struct timespec *sleep); +void timekeeping_inject_sleeptime(struct timespec *delta); #define CURRENT_TIME (current_kernel_time()) #define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8ad5d576755..8e6a05a5915 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -595,6 +595,58 @@ void __init timekeeping_init(void) /* time in seconds when suspend began */ static struct timespec timekeeping_suspend_time; +/** + * __timekeeping_inject_sleeptime - Internal function to add sleep interval + * @delta: pointer to a timespec delta value + * + * Takes a timespec offset measuring a suspend interval and properly + * adds the sleep offset to the timekeeping variables. + */ +static void __timekeeping_inject_sleeptime(struct timespec *delta) +{ + xtime = timespec_add(xtime, *delta); + wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta); + total_sleep_time = timespec_add(total_sleep_time, *delta); +} + + +/** + * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values + * @delta: pointer to a timespec delta value + * + * This hook is for architectures that cannot support read_persistent_clock + * because their RTC/persistent clock is only accessible when irqs are enabled. + * + * This function should only be called by rtc_resume(), and allows + * a suspend offset to be injected into the timekeeping values. + */ +void timekeeping_inject_sleeptime(struct timespec *delta) +{ + unsigned long flags; + struct timespec ts; + + /* Make sure we don't set the clock twice */ + read_persistent_clock(&ts); + if (!(ts.tv_sec == 0 && ts.tv_nsec == 0)) + return; + + write_seqlock_irqsave(&xtime_lock, flags); + timekeeping_forward_now(); + + __timekeeping_inject_sleeptime(delta); + + timekeeper.ntp_error = 0; + ntp_clear(); + update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock, + timekeeper.mult); + + write_sequnlock_irqrestore(&xtime_lock, flags); + + /* signal hrtimers about time change */ + clock_was_set(); +} + + /** * timekeeping_resume - Resumes the generic timekeeping subsystem. * @@ -615,9 +667,7 @@ static void timekeeping_resume(void) if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { ts = timespec_sub(ts, timekeeping_suspend_time); - xtime = timespec_add(xtime, ts); - wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); - total_sleep_time = timespec_add(total_sleep_time, ts); + __timekeeping_inject_sleeptime(&ts); } /* re-base the last cycle value */ timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); -- cgit v1.2.3-70-g09d2 From ff3ead96d17f47ee70c294a5cc2cce9b61e82f0f Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 11 Jan 2011 09:42:13 -0800 Subject: timers: Introduce in-kernel alarm-timer interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides the in kernel interface and infrastructure for alarm-timers. Alarm-timers are a hybrid style timer, similar to hrtimers, but when the system is suspended, the RTC device is set to fire and wake the system for when the soonest alarm-timer expires. The concept for Alarm-timers was inspired by the Android Alarm driver (by Arve Hjønnevåg) found in the Android kernel tree. See: http://android.git.kernel.org/?p=kernel/common.git;a=blob;f=drivers/rtc/alarm.c;h=1250edfbdf3302f5e4ea6194847c6ef4bb7beb1c;hb=android-2.6.36 This in-kernel interface should be fairly compatible with the Android alarm driver in-kernel interface, but has the advantage of utilizing the new RTC timerqueue code instead of doing direct RTC manipulation. CC: Arve Hjønnevåg CC: Thomas Gleixner CC: Alessandro Zummo Acked-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/alarmtimer.h | 30 ++++ kernel/time/Makefile | 2 +- kernel/time/alarmtimer.c | 375 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 406 insertions(+), 1 deletion(-) create mode 100644 include/linux/alarmtimer.h create mode 100644 kernel/time/alarmtimer.c (limited to 'kernel/time') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h new file mode 100644 index 00000000000..6b364b2e207 --- /dev/null +++ b/include/linux/alarmtimer.h @@ -0,0 +1,30 @@ +#ifndef _LINUX_ALARMTIMER_H +#define _LINUX_ALARMTIMER_H + +#include +#include +#include +#include + +enum alarmtimer_type { + ALARM_REALTIME, + ALARM_BOOTTIME, + + ALARM_NUMTYPE, +}; + +struct alarm { + struct timerqueue_node node; + ktime_t period; + void (*function)(struct alarm *); + enum alarmtimer_type type; + char enabled; + void *data; +}; + +void alarm_init(struct alarm *alarm, enum alarmtimer_type type, + void (*function)(struct alarm *)); +void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period); +void alarm_cancel(struct alarm *alarm); + +#endif diff --git a/kernel/time/Makefile b/kernel/time/Makefile index b0425991e9a..e2fd74b8e8c 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -1,5 +1,5 @@ obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o -obj-y += timeconv.o posix-clock.o +obj-y += timeconv.o posix-clock.o alarmtimer.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c new file mode 100644 index 00000000000..48c2ee949e6 --- /dev/null +++ b/kernel/time/alarmtimer.c @@ -0,0 +1,375 @@ +/* + * Alarmtimer interface + * + * This interface provides a timer which is similarto hrtimers, + * but triggers a RTC alarm if the box is suspend. + * + * This interface is influenced by the Android RTC Alarm timer + * interface. + * + * Copyright (C) 2010 IBM Corperation + * + * Author: John Stultz + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +static struct alarm_base { + spinlock_t lock; + struct timerqueue_head timerqueue; + struct hrtimer timer; + ktime_t (*gettime)(void); + clockid_t base_clockid; + struct work_struct irqwork; +} alarm_bases[ALARM_NUMTYPE]; + +static struct rtc_timer rtctimer; +static struct rtc_device *rtcdev; + +static ktime_t freezer_delta; +static DEFINE_SPINLOCK(freezer_delta_lock); + + +/************************************************************************** + * alarmtimer management code + */ + +/* + * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue + * @base: pointer to the base where the timer is being run + * @alarm: pointer to alarm being enqueued. + * + * Adds alarm to a alarm_base timerqueue and if necessary sets + * an hrtimer to run. + * + * Must hold base->lock when calling. + */ +static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) +{ + timerqueue_add(&base->timerqueue, &alarm->node); + if (&alarm->node == timerqueue_getnext(&base->timerqueue)) { + hrtimer_try_to_cancel(&base->timer); + hrtimer_start(&base->timer, alarm->node.expires, + HRTIMER_MODE_ABS); + } +} + +/* + * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue + * @base: pointer to the base where the timer is running + * @alarm: pointer to alarm being removed + * + * Removes alarm to a alarm_base timerqueue and if necessary sets + * a new timer to run. + * + * Must hold base->lock when calling. + */ +static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) +{ + struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue); + + timerqueue_del(&base->timerqueue, &alarm->node); + if (next == &alarm->node) { + hrtimer_try_to_cancel(&base->timer); + next = timerqueue_getnext(&base->timerqueue); + if (!next) + return; + hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS); + } +} + +/* + * alarmtimer_do_work - Handles alarm being fired. + * @work: pointer to workqueue being run + * + * When a timer fires, this runs through the timerqueue to see + * which alarm timers, and run those that expired. If there are + * more alarm timers queued, we set the hrtimer to fire in the + * future. + */ +void alarmtimer_do_work(struct work_struct *work) +{ + struct alarm_base *base = container_of(work, struct alarm_base, + irqwork); + struct timerqueue_node *next; + unsigned long flags; + ktime_t now; + + spin_lock_irqsave(&base->lock, flags); + now = base->gettime(); + while ((next = timerqueue_getnext(&base->timerqueue))) { + struct alarm *alarm; + ktime_t expired = next->expires; + + if (expired.tv64 >= now.tv64) + break; + + alarm = container_of(next, struct alarm, node); + + timerqueue_del(&base->timerqueue, &alarm->node); + alarm->enabled = 0; + /* Re-add periodic timers */ + if (alarm->period.tv64) { + alarm->node.expires = ktime_add(expired, alarm->period); + timerqueue_add(&base->timerqueue, &alarm->node); + alarm->enabled = 1; + } + spin_unlock_irqrestore(&base->lock, flags); + if (alarm->function) + alarm->function(alarm); + spin_lock_irqsave(&base->lock, flags); + } + + if (next) { + hrtimer_start(&base->timer, next->expires, + HRTIMER_MODE_ABS); + } + spin_unlock_irqrestore(&base->lock, flags); +} + + +/* + * alarmtimer_fired - Handles alarm hrtimer being fired. + * @timer: pointer to hrtimer being run + * + * When a timer fires, this schedules the do_work function to + * be run. + */ +static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) +{ + struct alarm_base *base = container_of(timer, struct alarm_base, timer); + schedule_work(&base->irqwork); + return HRTIMER_NORESTART; +} + + +/* + * alarmtimer_suspend - Suspend time callback + * @dev: unused + * @state: unused + * + * When we are going into suspend, we look through the bases + * to see which is the soonest timer to expire. We then + * set an rtc timer to fire that far into the future, which + * will wake us from suspend. + */ +static int alarmtimer_suspend(struct device *dev) +{ + struct rtc_time tm; + ktime_t min, now; + unsigned long flags; + int i; + + spin_lock_irqsave(&freezer_delta_lock, flags); + min = freezer_delta; + freezer_delta = ktime_set(0, 0); + spin_unlock_irqrestore(&freezer_delta_lock, flags); + + /* If we have no rtcdev, just return */ + if (!rtcdev) + return 0; + + /* Find the soonest timer to expire*/ + for (i = 0; i < ALARM_NUMTYPE; i++) { + struct alarm_base *base = &alarm_bases[i]; + struct timerqueue_node *next; + ktime_t delta; + + spin_lock_irqsave(&base->lock, flags); + next = timerqueue_getnext(&base->timerqueue); + spin_unlock_irqrestore(&base->lock, flags); + if (!next) + continue; + delta = ktime_sub(next->expires, base->gettime()); + if (!min.tv64 || (delta.tv64 < min.tv64)) + min = delta; + } + if (min.tv64 == 0) + return 0; + + /* XXX - Should we enforce a minimum sleep time? */ + WARN_ON(min.tv64 < NSEC_PER_SEC); + + /* Setup an rtc timer to fire that far in the future */ + rtc_timer_cancel(rtcdev, &rtctimer); + rtc_read_time(rtcdev, &tm); + now = rtc_tm_to_ktime(tm); + now = ktime_add(now, min); + + rtc_timer_start(rtcdev, &rtctimer, now, ktime_set(0, 0)); + + return 0; +} + + +/************************************************************************** + * alarm kernel interface code + */ + +/* + * alarm_init - Initialize an alarm structure + * @alarm: ptr to alarm to be initialized + * @type: the type of the alarm + * @function: callback that is run when the alarm fires + * + * In-kernel interface to initializes the alarm structure. + */ +void alarm_init(struct alarm *alarm, enum alarmtimer_type type, + void (*function)(struct alarm *)) +{ + timerqueue_init(&alarm->node); + alarm->period = ktime_set(0, 0); + alarm->function = function; + alarm->type = type; + alarm->enabled = 0; +} + +/* + * alarm_start - Sets an alarm to fire + * @alarm: ptr to alarm to set + * @start: time to run the alarm + * @period: period at which the alarm will recur + * + * In-kernel interface set an alarm timer. + */ +void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + unsigned long flags; + + spin_lock_irqsave(&base->lock, flags); + if (alarm->enabled) + alarmtimer_remove(base, alarm); + alarm->node.expires = start; + alarm->period = period; + alarmtimer_enqueue(base, alarm); + alarm->enabled = 1; + spin_unlock_irqrestore(&base->lock, flags); +} + +/* + * alarm_cancel - Tries to cancel an alarm timer + * @alarm: ptr to alarm to be canceled + * + * In-kernel interface to cancel an alarm timer. + */ +void alarm_cancel(struct alarm *alarm) +{ + struct alarm_base *base = &alarm_bases[alarm->type]; + unsigned long flags; + + spin_lock_irqsave(&base->lock, flags); + if (alarm->enabled) + alarmtimer_remove(base, alarm); + alarm->enabled = 0; + spin_unlock_irqrestore(&base->lock, flags); +} + + + +/************************************************************************** + * alarmtimer initialization code + */ + +/* Suspend hook structures */ +static const struct dev_pm_ops alarmtimer_pm_ops = { + .suspend = alarmtimer_suspend, +}; + +static struct platform_driver alarmtimer_driver = { + .driver = { + .name = "alarmtimer", + .pm = &alarmtimer_pm_ops, + } +}; + +/** + * alarmtimer_init - Initialize alarm timer code + * + * This function initializes the alarm bases and registers + * the posix clock ids. + */ +static int __init alarmtimer_init(void) +{ + int error = 0; + int i; + + /* Initialize alarm bases */ + alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME; + alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real; + alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME; + alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime; + for (i = 0; i < ALARM_NUMTYPE; i++) { + timerqueue_init_head(&alarm_bases[i].timerqueue); + spin_lock_init(&alarm_bases[i].lock); + hrtimer_init(&alarm_bases[i].timer, + alarm_bases[i].base_clockid, + HRTIMER_MODE_ABS); + alarm_bases[i].timer.function = alarmtimer_fired; + INIT_WORK(&alarm_bases[i].irqwork, alarmtimer_do_work); + } + error = platform_driver_register(&alarmtimer_driver); + platform_device_register_simple("alarmtimer", -1, NULL, 0); + + return error; +} +device_initcall(alarmtimer_init); + +/** + * has_wakealarm - check rtc device has wakealarm ability + * @dev: current device + * @name_ptr: name to be returned + * + * This helper function checks to see if the rtc device can wake + * from suspend. + */ +static int __init has_wakealarm(struct device *dev, void *name_ptr) +{ + struct rtc_device *candidate = to_rtc_device(dev); + + if (!candidate->ops->set_alarm) + return 0; + if (!device_may_wakeup(candidate->dev.parent)) + return 0; + + *(const char **)name_ptr = dev_name(dev); + return 1; +} + +/** + * alarmtimer_init_late - Late initializing of alarmtimer code + * + * This function locates a rtc device to use for wakealarms. + * Run as late_initcall to make sure rtc devices have been + * registered. + */ +static int __init alarmtimer_init_late(void) +{ + char *str; + + /* Find an rtc device and init the rtc_timer */ + class_find_device(rtc_class, NULL, &str, has_wakealarm); + if (str) + rtcdev = rtc_class_open(str); + if (!rtcdev) { + printk(KERN_WARNING "No RTC device found, ALARM timers will" + " not wake from suspend"); + } + rtc_timer_init(&rtctimer, NULL, NULL); + + return 0; +} +late_initcall(alarmtimer_init_late); -- cgit v1.2.3-70-g09d2 From 9a7adcf5c6dea63d2e47e6f6d2f7a6c9f48b9337 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 11 Jan 2011 09:54:33 -0800 Subject: timers: Posix interface for alarm-timers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch exposes alarm-timers to userland via the posix clock and timers interface, using two new clockids: CLOCK_REALTIME_ALARM and CLOCK_BOOTTIME_ALARM. Both clockids behave identically to CLOCK_REALTIME and CLOCK_BOOTTIME, respectively, but timers set against the _ALARM suffixed clockids will wake the system if it is suspended. Some background can be found here: https://lwn.net/Articles/429925/ The concept for Alarm-timers was inspired by the Android Alarm driver (by Arve Hjønnevåg) found in the Android kernel tree. See: http://android.git.kernel.org/?p=kernel/common.git;a=blob;f=drivers/rtc/alarm.c;h=1250edfbdf3302f5e4ea6194847c6ef4bb7beb1c;hb=android-2.6.36 While the in-kernel interface is pretty similar between alarm-timers and Android alarm driver, the user-space interface for the Android alarm driver is via ioctls to a new char device. As mentioned above, I've instead chosen to export this functionality via the posix interface, as it seemed a little simpler and avoids creating duplicate interfaces to things like CLOCK_REALTIME and CLOCK_MONOTONIC under alternate names (ie:ANDROID_ALARM_RTC and ANDROID_ALARM_SYSTEMTIME). The semantics of the Android alarm driver are different from what this posix interface provides. For instance, threads other then the thread waiting on the Android alarm driver are able to modify the alarm being waited on. Also this interface does not allow the same wakelock semantics that the Android driver provides (ie: kernel takes a wakelock on RTC alarm-interupt, and holds it through process wakeup, and while the process runs, until the process either closes the char device or calls back in to wait on a new alarm). One potential way to implement similar semantics may be via the timerfd infrastructure, but this needs more research. There may also need to be some sort of sysfs system level policy hooks that allow alarm timers to be disabled to keep them from firing at inappropriate times (ie: laptop in a well insulated bag, mid-flight). CC: Arve Hjønnevåg CC: Thomas Gleixner CC: Alessandro Zummo Acked-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/capability.h | 7 +- include/linux/posix-timers.h | 2 + include/linux/time.h | 2 + kernel/time/alarmtimer.c | 330 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 340 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b49a20..7cb23eae693 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -355,7 +355,12 @@ struct cpu_vfs_cap_data { #define CAP_SYSLOG 34 -#define CAP_LAST_CAP CAP_SYSLOG +/* Allow triggering something that will wake the system */ + +#define CAP_WAKE_ALARM 35 + + +#define CAP_LAST_CAP CAP_WAKE_ALARM #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index d51243ae072..808227d40a6 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -5,6 +5,7 @@ #include #include #include +#include union cpu_time_count { cputime_t cpu; @@ -80,6 +81,7 @@ struct k_itimer { unsigned long incr; unsigned long expires; } mmtimer; + struct alarm alarmtimer; } it; }; diff --git a/include/linux/time.h b/include/linux/time.h index 4ea5a75fcac..b3061782dec 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -295,6 +295,8 @@ struct itimerval { #define CLOCK_REALTIME_COARSE 5 #define CLOCK_MONOTONIC_COARSE 6 #define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 /* * The IDs of various hardware clocks: diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 48c2ee949e6..4058ad79d55 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -215,6 +215,21 @@ static int alarmtimer_suspend(struct device *dev) } +static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) +{ + ktime_t delta; + unsigned long flags; + struct alarm_base *base = &alarm_bases[type]; + + delta = ktime_sub(absexp, base->gettime()); + + spin_lock_irqsave(&freezer_delta_lock, flags); + if (!freezer_delta.tv64 || (delta.tv64 < freezer_delta.tv64)) + freezer_delta = delta; + spin_unlock_irqrestore(&freezer_delta_lock, flags); +} + + /************************************************************************** * alarm kernel interface code */ @@ -279,6 +294,309 @@ void alarm_cancel(struct alarm *alarm) } +/************************************************************************** + * alarm posix interface code + */ + +/* + * clock2alarm - helper that converts from clockid to alarmtypes + * @clockid: clockid. + * + * Helper function that converts from clockids to alarmtypes + */ +static enum alarmtimer_type clock2alarm(clockid_t clockid) +{ + if (clockid == CLOCK_REALTIME_ALARM) + return ALARM_REALTIME; + if (clockid == CLOCK_BOOTTIME_ALARM) + return ALARM_BOOTTIME; + return -1; +} + +/* + * alarm_handle_timer - Callback for posix timers + * @alarm: alarm that fired + * + * Posix timer callback for expired alarm timers. + */ +static void alarm_handle_timer(struct alarm *alarm) +{ + struct k_itimer *ptr = container_of(alarm, struct k_itimer, + it.alarmtimer); + if (posix_timer_event(ptr, 0) != 0) + ptr->it_overrun++; +} + +/* + * alarm_clock_getres - posix getres interface + * @which_clock: clockid + * @tp: timespec to fill + * + * Returns the granularity of underlying alarm base clock + */ +static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp) +{ + clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid; + + return hrtimer_get_res(baseid, tp); +} + +/** + * alarm_clock_get - posix clock_get interface + * @which_clock: clockid + * @tp: timespec to fill. + * + * Provides the underlying alarm base time. + */ +static int alarm_clock_get(clockid_t which_clock, struct timespec *tp) +{ + struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)]; + + *tp = ktime_to_timespec(base->gettime()); + return 0; +} + +/** + * alarm_timer_create - posix timer_create interface + * @new_timer: k_itimer pointer to manage + * + * Initializes the k_itimer structure. + */ +static int alarm_timer_create(struct k_itimer *new_timer) +{ + enum alarmtimer_type type; + struct alarm_base *base; + + if (!capable(CAP_WAKE_ALARM)) + return -EPERM; + + type = clock2alarm(new_timer->it_clock); + base = &alarm_bases[type]; + alarm_init(&new_timer->it.alarmtimer, type, alarm_handle_timer); + return 0; +} + +/** + * alarm_timer_get - posix timer_get interface + * @new_timer: k_itimer pointer + * @cur_setting: itimerspec data to fill + * + * Copies the itimerspec data out from the k_itimer + */ +static void alarm_timer_get(struct k_itimer *timr, + struct itimerspec *cur_setting) +{ + cur_setting->it_interval = + ktime_to_timespec(timr->it.alarmtimer.period); + cur_setting->it_value = + ktime_to_timespec(timr->it.alarmtimer.node.expires); + return; +} + +/** + * alarm_timer_del - posix timer_del interface + * @timr: k_itimer pointer to be deleted + * + * Cancels any programmed alarms for the given timer. + */ +static int alarm_timer_del(struct k_itimer *timr) +{ + alarm_cancel(&timr->it.alarmtimer); + return 0; +} + +/** + * alarm_timer_set - posix timer_set interface + * @timr: k_itimer pointer to be deleted + * @flags: timer flags + * @new_setting: itimerspec to be used + * @old_setting: itimerspec being replaced + * + * Sets the timer to new_setting, and starts the timer. + */ +static int alarm_timer_set(struct k_itimer *timr, int flags, + struct itimerspec *new_setting, + struct itimerspec *old_setting) +{ + /* Save old values */ + old_setting->it_interval = + ktime_to_timespec(timr->it.alarmtimer.period); + old_setting->it_value = + ktime_to_timespec(timr->it.alarmtimer.node.expires); + + /* If the timer was already set, cancel it */ + alarm_cancel(&timr->it.alarmtimer); + + /* start the timer */ + alarm_start(&timr->it.alarmtimer, + timespec_to_ktime(new_setting->it_value), + timespec_to_ktime(new_setting->it_interval)); + return 0; +} + +/** + * alarmtimer_nsleep_wakeup - Wakeup function for alarm_timer_nsleep + * @alarm: ptr to alarm that fired + * + * Wakes up the task that set the alarmtimer + */ +static void alarmtimer_nsleep_wakeup(struct alarm *alarm) +{ + struct task_struct *task = (struct task_struct *)alarm->data; + + alarm->data = NULL; + if (task) + wake_up_process(task); +} + +/** + * alarmtimer_do_nsleep - Internal alarmtimer nsleep implementation + * @alarm: ptr to alarmtimer + * @absexp: absolute expiration time + * + * Sets the alarm timer and sleeps until it is fired or interrupted. + */ +static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp) +{ + alarm->data = (void *)current; + do { + set_current_state(TASK_INTERRUPTIBLE); + alarm_start(alarm, absexp, ktime_set(0, 0)); + if (likely(alarm->data)) + schedule(); + + alarm_cancel(alarm); + } while (alarm->data && !signal_pending(current)); + + __set_current_state(TASK_RUNNING); + + return (alarm->data == NULL); +} + + +/** + * update_rmtp - Update remaining timespec value + * @exp: expiration time + * @type: timer type + * @rmtp: user pointer to remaining timepsec value + * + * Helper function that fills in rmtp value with time between + * now and the exp value + */ +static int update_rmtp(ktime_t exp, enum alarmtimer_type type, + struct timespec __user *rmtp) +{ + struct timespec rmt; + ktime_t rem; + + rem = ktime_sub(exp, alarm_bases[type].gettime()); + + if (rem.tv64 <= 0) + return 0; + rmt = ktime_to_timespec(rem); + + if (copy_to_user(rmtp, &rmt, sizeof(*rmtp))) + return -EFAULT; + + return 1; + +} + +/** + * alarm_timer_nsleep_restart - restartblock alarmtimer nsleep + * @restart: ptr to restart block + * + * Handles restarted clock_nanosleep calls + */ +static long __sched alarm_timer_nsleep_restart(struct restart_block *restart) +{ + enum alarmtimer_type type = restart->nanosleep.index; + ktime_t exp; + struct timespec __user *rmtp; + struct alarm alarm; + int ret = 0; + + exp.tv64 = restart->nanosleep.expires; + alarm_init(&alarm, type, alarmtimer_nsleep_wakeup); + + if (alarmtimer_do_nsleep(&alarm, exp)) + goto out; + + if (freezing(current)) + alarmtimer_freezerset(exp, type); + + rmtp = restart->nanosleep.rmtp; + if (rmtp) { + ret = update_rmtp(exp, type, rmtp); + if (ret <= 0) + goto out; + } + + + /* The other values in restart are already filled in */ + ret = -ERESTART_RESTARTBLOCK; +out: + return ret; +} + +/** + * alarm_timer_nsleep - alarmtimer nanosleep + * @which_clock: clockid + * @flags: determins abstime or relative + * @tsreq: requested sleep time (abs or rel) + * @rmtp: remaining sleep time saved + * + * Handles clock_nanosleep calls against _ALARM clockids + */ +static int alarm_timer_nsleep(const clockid_t which_clock, int flags, + struct timespec *tsreq, struct timespec __user *rmtp) +{ + enum alarmtimer_type type = clock2alarm(which_clock); + struct alarm alarm; + ktime_t exp; + int ret = 0; + struct restart_block *restart; + + if (!capable(CAP_WAKE_ALARM)) + return -EPERM; + + alarm_init(&alarm, type, alarmtimer_nsleep_wakeup); + + exp = timespec_to_ktime(*tsreq); + /* Convert (if necessary) to absolute time */ + if (flags != TIMER_ABSTIME) { + ktime_t now = alarm_bases[type].gettime(); + exp = ktime_add(now, exp); + } + + if (alarmtimer_do_nsleep(&alarm, exp)) + goto out; + + if (freezing(current)) + alarmtimer_freezerset(exp, type); + + /* abs timers don't set remaining time or restart */ + if (flags == TIMER_ABSTIME) { + ret = -ERESTARTNOHAND; + goto out; + } + + if (rmtp) { + ret = update_rmtp(exp, type, rmtp); + if (ret <= 0) + goto out; + } + + restart = ¤t_thread_info()->restart_block; + restart->fn = alarm_timer_nsleep_restart; + restart->nanosleep.index = type; + restart->nanosleep.expires = exp.tv64; + restart->nanosleep.rmtp = rmtp; + ret = -ERESTART_RESTARTBLOCK; + +out: + return ret; +} /************************************************************************** * alarmtimer initialization code @@ -306,6 +624,18 @@ static int __init alarmtimer_init(void) { int error = 0; int i; + struct k_clock alarm_clock = { + .clock_getres = alarm_clock_getres, + .clock_get = alarm_clock_get, + .timer_create = alarm_timer_create, + .timer_set = alarm_timer_set, + .timer_del = alarm_timer_del, + .timer_get = alarm_timer_get, + .nsleep = alarm_timer_nsleep, + }; + + posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); + posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); /* Initialize alarm bases */ alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME; -- cgit v1.2.3-70-g09d2 From 180bf812ceaf01eb8ac69b86f3be0bd57f697668 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 28 Apr 2011 12:58:11 -0700 Subject: timers: Improve alarmtimer comments and minor fixes This patch addresses a number of minor comment improvements and other minor issues from Thomas' review of the alarmtimers code. CC: Thomas Gleixner Signed-off-by: John Stultz --- include/linux/alarmtimer.h | 12 ++++++++- kernel/time/alarmtimer.c | 67 +++++++++++++++++++--------------------------- 2 files changed, 38 insertions(+), 41 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 6b364b2e207..c5d6095b46f 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -13,12 +13,22 @@ enum alarmtimer_type { ALARM_NUMTYPE, }; +/** + * struct alarm - Alarm timer structure + * @node: timerqueue node for adding to the event list this value + * also includes the expiration time. + * @period: Period for recuring alarms + * @function: Function pointer to be executed when the timer fires. + * @type: Alarm type (BOOTTIME/REALTIME) + * @enabled: Flag that represents if the alarm is set to fire or not + * @data: Internal data value. + */ struct alarm { struct timerqueue_node node; ktime_t period; void (*function)(struct alarm *); enum alarmtimer_type type; - char enabled; + bool enabled; void *data; }; diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 4058ad79d55..bed98004ae1 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -26,7 +26,15 @@ #include #include - +/** + * struct alarm_base - Alarm timer bases + * @lock: Lock for syncrhonized access to the base + * @timerqueue: Timerqueue head managing the list of events + * @timer: hrtimer used to schedule events while running + * @gettime: Function to read the time correlating to the base + * @base_clockid: clockid for the base + * @irqwork Delayed work structure for expiring timers + */ static struct alarm_base { spinlock_t lock; struct timerqueue_head timerqueue; @@ -36,18 +44,16 @@ static struct alarm_base { struct work_struct irqwork; } alarm_bases[ALARM_NUMTYPE]; +/* rtc timer and device for setting alarm wakeups at suspend */ static struct rtc_timer rtctimer; static struct rtc_device *rtcdev; +/* freezer delta & lock used to handle clock_nanosleep triggered wakeups */ static ktime_t freezer_delta; static DEFINE_SPINLOCK(freezer_delta_lock); -/************************************************************************** - * alarmtimer management code - */ - -/* +/** * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue * @base: pointer to the base where the timer is being run * @alarm: pointer to alarm being enqueued. @@ -67,7 +73,7 @@ static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) } } -/* +/** * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue * @base: pointer to the base where the timer is running * @alarm: pointer to alarm being removed @@ -91,16 +97,16 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) } } -/* +/** * alarmtimer_do_work - Handles alarm being fired. * @work: pointer to workqueue being run * - * When a timer fires, this runs through the timerqueue to see - * which alarm timers, and run those that expired. If there are - * more alarm timers queued, we set the hrtimer to fire in the - * future. + * When a alarm timer fires, this runs through the timerqueue to + * see which alarms expired, and runs those. If there are more alarm + * timers queued for the future, we set the hrtimer to fire when + * when the next future alarm timer expires. */ -void alarmtimer_do_work(struct work_struct *work) +static void alarmtimer_do_work(struct work_struct *work) { struct alarm_base *base = container_of(work, struct alarm_base, irqwork); @@ -141,7 +147,7 @@ void alarmtimer_do_work(struct work_struct *work) } -/* +/** * alarmtimer_fired - Handles alarm hrtimer being fired. * @timer: pointer to hrtimer being run * @@ -156,7 +162,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) } -/* +/** * alarmtimer_suspend - Suspend time callback * @dev: unused * @state: unused @@ -230,17 +236,11 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) } -/************************************************************************** - * alarm kernel interface code - */ - -/* +/** * alarm_init - Initialize an alarm structure * @alarm: ptr to alarm to be initialized * @type: the type of the alarm * @function: callback that is run when the alarm fires - * - * In-kernel interface to initializes the alarm structure. */ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, void (*function)(struct alarm *)) @@ -252,13 +252,11 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, alarm->enabled = 0; } -/* +/** * alarm_start - Sets an alarm to fire * @alarm: ptr to alarm to set * @start: time to run the alarm * @period: period at which the alarm will recur - * - * In-kernel interface set an alarm timer. */ void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period) { @@ -275,11 +273,9 @@ void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period) spin_unlock_irqrestore(&base->lock, flags); } -/* +/** * alarm_cancel - Tries to cancel an alarm timer * @alarm: ptr to alarm to be canceled - * - * In-kernel interface to cancel an alarm timer. */ void alarm_cancel(struct alarm *alarm) { @@ -294,15 +290,9 @@ void alarm_cancel(struct alarm *alarm) } -/************************************************************************** - * alarm posix interface code - */ - -/* +/** * clock2alarm - helper that converts from clockid to alarmtypes * @clockid: clockid. - * - * Helper function that converts from clockids to alarmtypes */ static enum alarmtimer_type clock2alarm(clockid_t clockid) { @@ -313,7 +303,7 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid) return -1; } -/* +/** * alarm_handle_timer - Callback for posix timers * @alarm: alarm that fired * @@ -327,7 +317,7 @@ static void alarm_handle_timer(struct alarm *alarm) ptr->it_overrun++; } -/* +/** * alarm_clock_getres - posix getres interface * @which_clock: clockid * @tp: timespec to fill @@ -598,9 +588,6 @@ out: return ret; } -/************************************************************************** - * alarmtimer initialization code - */ /* Suspend hook structures */ static const struct dev_pm_ops alarmtimer_pm_ops = { -- cgit v1.2.3-70-g09d2 From 7068b7a16270f1e85a8893d74b0f3c58d7826883 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 28 Apr 2011 13:29:18 -0700 Subject: timers: Remove delayed irqwork from alarmtimers implementation Thomas asked about the delayed irq work in the alarmtimers code, and I realized that it was a legacy from when the alarmtimer base lock was a mutex (due to concerns that we'd be interacting with the RTC device, which is protected by mutexes). Since the alarmtimer base is now protected by a spinlock, we can simply execute alarmtimer functions directly from the hrtimer callback. Should any future alarmtimer functions sleep, they can simply manage scheduling any delayed work themselves. CC: Thomas Gleixner Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index bed98004ae1..491e37b8de1 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -33,7 +33,6 @@ * @timer: hrtimer used to schedule events while running * @gettime: Function to read the time correlating to the base * @base_clockid: clockid for the base - * @irqwork Delayed work structure for expiring timers */ static struct alarm_base { spinlock_t lock; @@ -41,7 +40,6 @@ static struct alarm_base { struct hrtimer timer; ktime_t (*gettime)(void); clockid_t base_clockid; - struct work_struct irqwork; } alarm_bases[ALARM_NUMTYPE]; /* rtc timer and device for setting alarm wakeups at suspend */ @@ -97,22 +95,23 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm) } } + /** - * alarmtimer_do_work - Handles alarm being fired. - * @work: pointer to workqueue being run + * alarmtimer_fired - Handles alarm hrtimer being fired. + * @timer: pointer to hrtimer being run * * When a alarm timer fires, this runs through the timerqueue to * see which alarms expired, and runs those. If there are more alarm * timers queued for the future, we set the hrtimer to fire when * when the next future alarm timer expires. */ -static void alarmtimer_do_work(struct work_struct *work) +static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) { - struct alarm_base *base = container_of(work, struct alarm_base, - irqwork); + struct alarm_base *base = container_of(timer, struct alarm_base, timer); struct timerqueue_node *next; unsigned long flags; ktime_t now; + int ret = HRTIMER_NORESTART; spin_lock_irqsave(&base->lock, flags); now = base->gettime(); @@ -140,25 +139,13 @@ static void alarmtimer_do_work(struct work_struct *work) } if (next) { - hrtimer_start(&base->timer, next->expires, - HRTIMER_MODE_ABS); + hrtimer_set_expires(&base->timer, next->expires); + ret = HRTIMER_RESTART; } spin_unlock_irqrestore(&base->lock, flags); -} + return ret; -/** - * alarmtimer_fired - Handles alarm hrtimer being fired. - * @timer: pointer to hrtimer being run - * - * When a timer fires, this schedules the do_work function to - * be run. - */ -static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) -{ - struct alarm_base *base = container_of(timer, struct alarm_base, timer); - schedule_work(&base->irqwork); - return HRTIMER_NORESTART; } @@ -636,7 +623,6 @@ static int __init alarmtimer_init(void) alarm_bases[i].base_clockid, HRTIMER_MODE_ABS); alarm_bases[i].timer.function = alarmtimer_fired; - INIT_WORK(&alarm_bases[i].irqwork, alarmtimer_do_work); } error = platform_driver_register(&alarmtimer_driver); platform_device_register_simple("alarmtimer", -1, NULL, 0); -- cgit v1.2.3-70-g09d2 From 472647dcd7e351dbeda750e5ab3e8f7b06d1199a Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 29 Apr 2011 15:03:10 -0700 Subject: timers: Fix alarmtimer build issues when CONFIG_RTC_CLASS=n Ingo pointed out that the alarmtimers won't build if CONFIG_RTC_CLASS=n. This patch adds proper ifdefs to the alarmtimer code to disable the rtc usage if it is not built in. Reported-by: Ingo Molnar Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner --- kernel/time/alarmtimer.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 491e37b8de1..9265014cb4d 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -42,9 +42,11 @@ static struct alarm_base { clockid_t base_clockid; } alarm_bases[ALARM_NUMTYPE]; +#ifdef CONFIG_RTC_CLASS /* rtc timer and device for setting alarm wakeups at suspend */ static struct rtc_timer rtctimer; static struct rtc_device *rtcdev; +#endif /* freezer delta & lock used to handle clock_nanosleep triggered wakeups */ static ktime_t freezer_delta; @@ -148,7 +150,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) } - +#ifdef CONFIG_RTC_CLASS /** * alarmtimer_suspend - Suspend time callback * @dev: unused @@ -206,7 +208,12 @@ static int alarmtimer_suspend(struct device *dev) return 0; } - +#else +static int alarmtimer_suspend(struct device *dev) +{ + return 0; +} +#endif static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) { @@ -631,6 +638,7 @@ static int __init alarmtimer_init(void) } device_initcall(alarmtimer_init); +#ifdef CONFIG_RTC_CLASS /** * has_wakealarm - check rtc device has wakealarm ability * @dev: current device @@ -675,4 +683,12 @@ static int __init alarmtimer_init_late(void) return 0; } +#else +static int __init alarmtimer_init_late(void) +{ + printk(KERN_WARNING "Kernel not built with RTC support, ALARM timers" + " will not wake from suspend"); + return 0; +} +#endif late_initcall(alarmtimer_init_late); -- cgit v1.2.3-70-g09d2 From b12a03ce4880bd13786a98db6de494a3e0123129 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 May 2011 16:48:57 +0200 Subject: hrtimers: Prepare for cancel on clock was set timers Make clock_was_set() unconditional and rename hres_timers_resume to hrtimers_resume. This is a preparatory patch for hrtimers which are cancelled when clock realtime was set. Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 16 ++---- kernel/hrtimer.c | 125 ++++++++++++++++++++++------------------------ kernel/time/timekeeping.c | 2 +- 3 files changed, 65 insertions(+), 78 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 62f500c724f..4135c88fe4f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -148,9 +148,7 @@ struct hrtimer_clock_base { ktime_t resolution; ktime_t (*get_time)(void); ktime_t softirq_time; -#ifdef CONFIG_HIGH_RES_TIMERS ktime_t offset; -#endif }; enum hrtimer_base_type { @@ -256,8 +254,6 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) #ifdef CONFIG_HIGH_RES_TIMERS struct clock_event_device; -extern void clock_was_set(void); -extern void hres_timers_resume(void); extern void hrtimer_interrupt(struct clock_event_device *dev); /* @@ -291,16 +287,8 @@ extern void hrtimer_peek_ahead_timers(void); # define MONOTONIC_RES_NSEC LOW_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_LOW_RES -/* - * clock_was_set() is a NOP for non- high-resolution systems. The - * time-sorted order guarantees that a timer does not expire early and - * is expired in the next softirq when the clock was advanced. - */ -static inline void clock_was_set(void) { } static inline void hrtimer_peek_ahead_timers(void) { } -static inline void hres_timers_resume(void) { } - /* * In non high resolution mode the time reference is taken from * the base softirq time variable. @@ -316,11 +304,13 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) } #endif +extern void clock_was_set(void); +extern void hrtimers_resume(void); + extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); extern ktime_t ktime_get_boottime(void); - DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index dbbbf7d4308..c145ed643bc 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -621,66 +621,6 @@ static int hrtimer_reprogram(struct hrtimer *timer, return res; } - -/* - * Retrigger next event is called after clock was set - * - * Called with interrupts disabled via on_each_cpu() - */ -static void retrigger_next_event(void *arg) -{ - struct hrtimer_cpu_base *base; - struct timespec realtime_offset, wtm, sleep; - - if (!hrtimer_hres_active()) - return; - - get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm, - &sleep); - set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); - - base = &__get_cpu_var(hrtimer_bases); - - /* Adjust CLOCK_REALTIME offset */ - raw_spin_lock(&base->lock); - base->clock_base[HRTIMER_BASE_REALTIME].offset = - timespec_to_ktime(realtime_offset); - base->clock_base[HRTIMER_BASE_BOOTTIME].offset = - timespec_to_ktime(sleep); - - hrtimer_force_reprogram(base, 0); - raw_spin_unlock(&base->lock); -} - -/* - * Clock realtime was set - * - * Change the offset of the realtime clock vs. the monotonic - * clock. - * - * We might have to reprogram the high resolution timer interrupt. On - * SMP we call the architecture specific code to retrigger _all_ high - * resolution timer interrupts. On UP we just disable interrupts and - * call the high resolution interrupt code. - */ -void clock_was_set(void) -{ - /* Retrigger the CPU local events everywhere */ - on_each_cpu(retrigger_next_event, NULL, 1); -} - -/* - * During resume we might have to reprogram the high resolution timer - * interrupt (on the local CPU): - */ -void hres_timers_resume(void) -{ - WARN_ONCE(!irqs_disabled(), - KERN_INFO "hres_timers_resume() called with IRQs enabled!"); - - retrigger_next_event(NULL); -} - /* * Initialize the high resolution related parts of cpu_base */ @@ -714,12 +654,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, return 0; } +static void retrigger_next_event(void *arg); + /* * Switch to high resolution mode */ static int hrtimer_switch_to_hres(void) { - int cpu = smp_processor_id(); + int i, cpu = smp_processor_id(); struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu); unsigned long flags; @@ -735,9 +677,8 @@ static int hrtimer_switch_to_hres(void) return 0; } base->hres_active = 1; - base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES; - base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES; - base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES; + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) + base->clock_base[i].resolution = KTIME_HIGH_RES; tick_setup_sched_timer(); @@ -764,6 +705,62 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } #endif /* CONFIG_HIGH_RES_TIMERS */ +/* + * Retrigger next event is called after clock was set + * + * Called with interrupts disabled via on_each_cpu() + */ +static void retrigger_next_event(void *arg) +{ + struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); + struct timespec realtime_offset, xtim, wtm, sleep; + + if (!hrtimer_hres_active()) + return; + + get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); + set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); + + /* Adjust CLOCK_REALTIME offset */ + raw_spin_lock(&base->lock); + base->clock_base[HRTIMER_BASE_REALTIME].offset = + timespec_to_ktime(realtime_offset); + base->clock_base[HRTIMER_BASE_BOOTTIME].offset = + timespec_to_ktime(sleep); + + hrtimer_force_reprogram(base, 0); + raw_spin_unlock(&base->lock); +} + +/* + * Clock realtime was set + * + * Change the offset of the realtime clock vs. the monotonic + * clock. + * + * We might have to reprogram the high resolution timer interrupt. On + * SMP we call the architecture specific code to retrigger _all_ high + * resolution timer interrupts. On UP we just disable interrupts and + * call the high resolution interrupt code. + */ +void clock_was_set(void) +{ + /* Retrigger the CPU local events everywhere */ + on_each_cpu(retrigger_next_event, NULL, 1); +} + +/* + * During resume we might have to reprogram the high resolution timer + * interrupt (on the local CPU): + */ +void hrtimers_resume(void) +{ + WARN_ONCE(!irqs_disabled(), + KERN_INFO "hrtimers_resume() called with IRQs enabled!"); + + retrigger_next_event(NULL); +} + static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) { #ifdef CONFIG_TIMER_STATS diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8e6a05a5915..a61b8fa2d39 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -680,7 +680,7 @@ static void timekeeping_resume(void) clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); /* Resume hrtimers */ - hres_timers_resume(); + hrtimers_resume(); } static int timekeeping_suspend(void) -- cgit v1.2.3-70-g09d2 From 99ee5315dac6211e972fa3f23bcc9a0343ff58c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Apr 2011 14:16:42 +0200 Subject: timerfd: Allow timers to be cancelled when clock was set Some applications must be aware of clock realtime being set backward. A simple example is a clock applet which arms a timer for the next minute display. If clock realtime is set backward then the applet displays a stale time for the amount of time which the clock was set backwards. Due to that applications poll the time because we don't have an interface. Extend the timerfd interface by adding a flag which puts the timer onto a different internal realtime clock. All timers on this clock are expired whenever the clock was set. The timerfd core records the monotonic offset when the timer is created. When the timer is armed, then the current offset is compared to the previous recorded offset. When it has changed, then timerfd_settime returns -ECANCELED. When a timer is read the offset is compared and if it changed -ECANCELED returned to user space. Periodic timers are not rearmed in the cancelation case. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Chris Friesen Tested-by: Kay Sievers Cc: "Kirill A. Shutemov" Cc: Peter Zijlstra Cc: Davide Libenzi Reviewed-by: Alexander Shishkin Link: http://lkml.kernel.org/r/%3Calpine.LFD.2.02.1104271359580.3323%40ionos%3E Signed-off-by: Thomas Gleixner --- fs/timerfd.c | 57 ++++++++++++++++++++++++++++++++++++++++++----- include/linux/hrtimer.h | 2 ++ include/linux/time.h | 6 +++++ include/linux/timerfd.h | 3 ++- kernel/hrtimer.c | 36 +++++++++++++++++++++++++++++- kernel/time/timekeeping.c | 15 +++++++++++++ 6 files changed, 111 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/fs/timerfd.c b/fs/timerfd.c index 8c4fc1425b3..7e14c9e7c4e 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -26,10 +26,12 @@ struct timerfd_ctx { struct hrtimer tmr; ktime_t tintv; + ktime_t moffs; wait_queue_head_t wqh; u64 ticks; int expired; int clockid; + bool might_cancel; }; /* @@ -59,24 +61,52 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; } -static void timerfd_setup(struct timerfd_ctx *ctx, int flags, - const struct itimerspec *ktmr) +static bool timerfd_canceled(struct timerfd_ctx *ctx) +{ + ktime_t moffs; + + if (!ctx->might_cancel) + return false; + + moffs = ktime_get_monotonic_offset(); + + if (moffs.tv64 == ctx->moffs.tv64) + return false; + + ctx->moffs = moffs; + return true; +} + +static int timerfd_setup(struct timerfd_ctx *ctx, int flags, + const struct itimerspec *ktmr) { enum hrtimer_mode htmode; ktime_t texp; + int clockid = ctx->clockid; htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; + ctx->might_cancel = false; + if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME && + (flags & TFD_TIMER_CANCELON_SET)) { + clockid = CLOCK_REALTIME_COS; + ctx->might_cancel = true; + } + texp = timespec_to_ktime(ktmr->it_value); ctx->expired = 0; ctx->ticks = 0; ctx->tintv = timespec_to_ktime(ktmr->it_interval); - hrtimer_init(&ctx->tmr, ctx->clockid, htmode); + hrtimer_init(&ctx->tmr, clockid, htmode); hrtimer_set_expires(&ctx->tmr, texp); ctx->tmr.function = timerfd_tmrproc; - if (texp.tv64 != 0) + if (texp.tv64 != 0) { hrtimer_start(&ctx->tmr, texp, htmode); + if (timerfd_canceled(ctx)) + return -ECANCELED; + } + return 0; } static int timerfd_release(struct inode *inode, struct file *file) @@ -118,8 +148,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, res = -EAGAIN; else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); + if (ctx->ticks) { ticks = ctx->ticks; + + /* + * If clock has changed, we do not care about the + * ticks and we do not rearm the timer. Userspace must + * reevaluate anyway. + */ + if (timerfd_canceled(ctx)) { + ticks = 0; + ctx->expired = 0; + res = -ECANCELED; + } + if (ctx->expired && ctx->tintv.tv64) { /* * If tintv.tv64 != 0, this is a periodic timer that @@ -183,6 +226,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) init_waitqueue_head(&ctx->wqh); ctx->clockid = clockid; hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); + ctx->moffs = ktime_get_monotonic_offset(); ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); @@ -199,6 +243,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, struct file *file; struct timerfd_ctx *ctx; struct itimerspec ktmr, kotmr; + int ret; if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) return -EFAULT; @@ -240,14 +285,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, /* * Re-program the timer to the new value ... */ - timerfd_setup(ctx, flags, &ktmr); + ret = timerfd_setup(ctx, flags, &ktmr); spin_unlock_irq(&ctx->wqh.lock); fput(file); if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) return -EFAULT; - return 0; + return ret; } SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4135c88fe4f..eda4ccde073 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -155,6 +155,7 @@ enum hrtimer_base_type { HRTIMER_BASE_REALTIME, HRTIMER_BASE_MONOTONIC, HRTIMER_BASE_BOOTTIME, + HRTIMER_BASE_REALTIME_COS, HRTIMER_MAX_CLOCK_BASES, }; @@ -310,6 +311,7 @@ extern void hrtimers_resume(void); extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); extern ktime_t ktime_get_boottime(void); +extern ktime_t ktime_get_monotonic_offset(void); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/include/linux/time.h b/include/linux/time.h index b3061782dec..a9242773eb2 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -302,6 +302,12 @@ struct itimerval { * The IDs of various hardware clocks: */ #define CLOCK_SGI_CYCLE 10 + +#ifdef __KERNEL__ +/* This clock is not exposed to user space */ +#define CLOCK_REALTIME_COS 15 +#endif + #define MAX_CLOCKS 16 #define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) #define CLOCKS_MONO CLOCK_MONOTONIC diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h index 2d0792983f8..e9571fc8f1a 100644 --- a/include/linux/timerfd.h +++ b/include/linux/timerfd.h @@ -19,6 +19,7 @@ * shared O_* flags. */ #define TFD_TIMER_ABSTIME (1 << 0) +#define TFD_TIMER_CANCELON_SET (1 << 1) #define TFD_CLOEXEC O_CLOEXEC #define TFD_NONBLOCK O_NONBLOCK @@ -26,6 +27,6 @@ /* Flags for timerfd_create. */ #define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS /* Flags for timerfd_settime. */ -#define TFD_SETTIME_FLAGS TFD_TIMER_ABSTIME +#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET) #endif /* _LINUX_TIMERFD_H */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c145ed643bc..eabcbd78143 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -78,6 +78,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .get_time = &ktime_get_boottime, .resolution = KTIME_LOW_RES, }, + { + .index = CLOCK_REALTIME_COS, + .get_time = &ktime_get_real, + .resolution = KTIME_LOW_RES, + }, } }; @@ -85,6 +90,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, + [CLOCK_REALTIME_COS] = HRTIMER_BASE_REALTIME_COS, }; static inline int hrtimer_clockid_to_base(clockid_t clock_id) @@ -110,6 +116,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot; + base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim; } /* @@ -479,6 +486,8 @@ static inline void debug_deactivate(struct hrtimer *timer) trace_hrtimer_cancel(timer); } +static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base); + /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -715,9 +724,14 @@ static void retrigger_next_event(void *arg) struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); struct timespec realtime_offset, xtim, wtm, sleep; - if (!hrtimer_hres_active()) + if (!hrtimer_hres_active()) { + raw_spin_lock(&base->lock); + hrtimer_expire_cancelable(base); + raw_spin_unlock(&base->lock); return; + } + /* Optimized out for !HIGH_RES */ get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); @@ -727,6 +741,10 @@ static void retrigger_next_event(void *arg) timespec_to_ktime(realtime_offset); base->clock_base[HRTIMER_BASE_BOOTTIME].offset = timespec_to_ktime(sleep); + base->clock_base[HRTIMER_BASE_REALTIME_COS].offset = + timespec_to_ktime(realtime_offset); + + hrtimer_expire_cancelable(base); hrtimer_force_reprogram(base, 0); raw_spin_unlock(&base->lock); @@ -1222,6 +1240,22 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) timer->state &= ~HRTIMER_STATE_CALLBACK; } +static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base) +{ + struct timerqueue_node *node; + struct hrtimer_clock_base *base; + ktime_t now = ktime_get_real(); + + base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS]; + + while ((node = timerqueue_getnext(&base->active))) { + struct hrtimer *timer; + + timer = container_of(node, struct hrtimer, node); + __run_hrtimer(timer, &now); + } +} + #ifdef CONFIG_HIGH_RES_TIMERS /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index a61b8fa2d39..342408cf68d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1098,6 +1098,21 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, } while (read_seqretry(&xtime_lock, seq)); } +/** + * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format + */ +ktime_t ktime_get_monotonic_offset(void) +{ + unsigned long seq; + struct timespec wtom; + + do { + seq = read_seqbegin(&xtime_lock); + wtom = wall_to_monotonic; + } while (read_seqretry(&xtime_lock, seq)); + return timespec_to_ktime(wtom); +} + /** * xtime_update() - advances the timekeeping infrastructure * @ticks: number of ticks, that have elapsed since the last call. -- cgit v1.2.3-70-g09d2 From ce788f930b0cdf821de7ee8f84cfe8cf7fcb6311 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 May 2011 08:00:47 +0200 Subject: alarmtimer: Check return value of class_find_device() alarmtimer_late_init() uses class_find_device() to find a alarm capable rtc device. The match callback stores a pointer to the name in the char pointer handed in from the call site. alarmtimer_late_init() checks the char pointer for NULL, but the pointer is on the stack and not initialized to NULL before the call. So it can have random content when the match function did not identify a device, which leads to random access in the following rtc_open() call where the pointer is dereferenced Instead of relying on the char pointer, check the return value of class_find_device. If a device is found then the name pointer is valid as well. Reported-by: Ingo Molnar Cc: John Stultz Signed-off-by: Thomas Gleixner --- kernel/time/alarmtimer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 9265014cb4d..e5db9b00751 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -669,11 +669,13 @@ static int __init has_wakealarm(struct device *dev, void *name_ptr) */ static int __init alarmtimer_init_late(void) { + struct device *dev; char *str; /* Find an rtc device and init the rtc_timer */ - class_find_device(rtc_class, NULL, &str, has_wakealarm); - if (str) + dev = class_find_device(rtc_class, NULL, &str, has_wakealarm); + /* If we have a device then str is valid. See has_wakealarm() */ + if (dev) rtcdev = rtc_class_open(str); if (!rtcdev) { printk(KERN_WARNING "No RTC device found, ALARM timers will" -- cgit v1.2.3-70-g09d2 From 179eb03268aa1da03d90f1566ea85dc1478d3ae3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 May 2011 08:18:34 +0200 Subject: alarmtimer: Drop device refcount after rtc_open() class_find_device() takes a refcount on the rtc device. rtc_open() takes another one, so we can drop it after the rtc_open() call. Signed-off-by: Thomas Gleixner Cc: John Stultz --- kernel/time/alarmtimer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index e5db9b00751..c6027fe9a4e 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -675,8 +675,14 @@ static int __init alarmtimer_init_late(void) /* Find an rtc device and init the rtc_timer */ dev = class_find_device(rtc_class, NULL, &str, has_wakealarm); /* If we have a device then str is valid. See has_wakealarm() */ - if (dev) + if (dev) { rtcdev = rtc_class_open(str); + /* + * Drop the reference we got in class_find_device, + * rtc_open takes its own. + */ + put_device(dev); + } if (!rtcdev) { printk(KERN_WARNING "No RTC device found, ALARM timers will" " not wake from suspend"); -- cgit v1.2.3-70-g09d2 From e05b2efb82596905ebfe88e8612ee81dec9b6592 Mon Sep 17 00:00:00 2001 From: john stultz Date: Wed, 4 May 2011 18:16:50 -0700 Subject: clocksource: Install completely before selecting Christian Hoffmann reported that the command line clocksource override with acpi_pm timer fails: Kernel command line: clocksource=acpi_pm hpet clockevent registered Switching to clocksource hpet Override clocksource acpi_pm is not HRT compatible. Cannot switch while in HRT/NOHZ mode. The watchdog code is what enables CLOCK_SOURCE_VALID_FOR_HRES, but we actually end up selecting the clocksource before we enqueue it into the watchdog list, so that's why we see the warning and fail to switch to acpi_pm timer as requested. That's particularly bad when we want to debug timekeeping related problems in early boot. Put the selection call last. Reported-by: Christian Hoffmann Signed-off-by: John Stultz Cc: stable@kernel.org # 32... Link: http://lkml.kernel.org/r/%3C1304558210.2943.24.camel%40work-vm%3E Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 6519cf62d9c..0e17c10f8a9 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -685,8 +685,8 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) /* Add clocksource to the clcoksource list */ mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); - clocksource_select(); clocksource_enqueue_watchdog(cs); + clocksource_select(); mutex_unlock(&clocksource_mutex); return 0; } @@ -706,8 +706,8 @@ int clocksource_register(struct clocksource *cs) mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); - clocksource_select(); clocksource_enqueue_watchdog(cs); + clocksource_select(); mutex_unlock(&clocksource_mutex); return 0; } -- cgit v1.2.3-70-g09d2 From 7372b0b122af0f6675f3ab65bfd91c8a438e0480 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 4 May 2011 15:09:27 -0700 Subject: clockevents: Move C3 stop test outside lock Avoid taking broadcast_lock in the idle path for systems where the timer doesn't stop in C3. [ tglx: Removed the stale label and added comment ] Signed-off-by: Andi Kleen Cc: Dave Kleikamp Cc: Chris Mason Cc: Peter Zijlstra Cc: Tim Chen Cc: lenb@kernel.org Cc: paulmck@us.ibm.com Link: http://lkml.kernel.org/r/%3C20110504234806.GF2925%40one.firstfloor.org%3E Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index da800ffa810..827e0f862da 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -456,23 +456,27 @@ void tick_broadcast_oneshot_control(unsigned long reason) unsigned long flags; int cpu; - raw_spin_lock_irqsave(&tick_broadcast_lock, flags); - /* * Periodic mode does not care about the enter/exit of power * states */ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - goto out; + return; - bc = tick_broadcast_device.evtdev; + /* + * We are called with preemtion disabled from the depth of the + * idle code, so we can't be moved away. + */ cpu = smp_processor_id(); td = &per_cpu(tick_cpu_device, cpu); dev = td->evtdev; if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) - goto out; + return; + + bc = tick_broadcast_device.evtdev; + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) { cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask()); @@ -489,8 +493,6 @@ void tick_broadcast_oneshot_control(unsigned long reason) tick_program_event(dev->next_event, 1); } } - -out: raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -- cgit v1.2.3-70-g09d2 From 07f4beb0b5bbfaf36a64aa00d59e670ec578a95a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 May 2011 11:07:48 +0200 Subject: tick: Clear broadcast active bit when switching to oneshot The first cpu which switches from periodic to oneshot mode switches also the broadcast device into oneshot mode. The broadcast device serves as a backup for per cpu timers which stop in deeper C-states. To avoid starvation of the cpus which might be in idle and depend on broadcast mode it marks the other cpus as broadcast active and sets the brodcast expiry value of those cpus to the next tick. The oneshot mode broadcast bit for the other cpus is sticky and gets only cleared when those cpus exit idle. If a cpu was not idle while the bit got set in consequence the bit prevents that the broadcast device is armed on behalf of that cpu when it enters idle for the first time after it switched to oneshot mode. In most cases that goes unnoticed as one of the other cpus has usually a timer pending which keeps the broadcast device armed with a short timeout. Now if the only cpu which has a short timer active has the bit set then the broadcast device will not be armed on behalf of that cpu and will fire way after the expected timer expiry. In the case of Christians bug report it took ~145 seconds which is about half of the wrap around time of HPET (the limit for that device) due to the fact that all other cpus had no timers armed which expired before the 145 seconds timeframe. The solution is simply to clear the broadcast active bit unconditionally when a cpu switches to oneshot mode after the first cpu switched the broadcast device over. It's not idle at that point otherwise it would not be executing that code. [ I fundamentally hate that broadcast crap. Why the heck thought some folks that when going into deep idle it's a brilliant concept to switch off the last device which brings the cpu back from that state? ] Thanks to Christian for providing all the valuable debug information! Reported-and-tested-by: Christian Hoffmann Cc: John Stultz Link: http://lkml.kernel.org/r/%3Calpine.LFD.2.02.1105161105170.3078%40ionos%3E Cc: stable@kernel.org Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index da800ffa810..723c7637e55 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -522,10 +522,11 @@ static void tick_broadcast_init_next_event(struct cpumask *mask, */ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { + int cpu = smp_processor_id(); + /* Set it up only once ! */ if (bc->event_handler != tick_handle_oneshot_broadcast) { int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; - int cpu = smp_processor_id(); bc->event_handler = tick_handle_oneshot_broadcast; clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); @@ -551,6 +552,15 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) tick_broadcast_set_event(tick_next_period, 1); } else bc->next_event.tv64 = KTIME_MAX; + } else { + /* + * The first cpu which switches to oneshot mode sets + * the bit for all other cpus which are in the general + * (periodic) broadcast mask. So the bit is set and + * would prevent the first broadcast enter after this + * to program the bc device. + */ + tick_broadcast_clear_oneshot(cpu); } } -- cgit v1.2.3-70-g09d2 From 724ed53e8ac2c5278af8955673049714c1073464 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 May 2011 21:33:40 +0000 Subject: clocksource: Get rid of the hardcoded 5 seconds sleep time limit Slow clocksources can have a way longer sleep time than 5 seconds and even fast ones can easily cope with 600 seconds and still maintain proper accuracy. Signed-off-by: Thomas Gleixner Cc: John Stultz Reviewed-by: Ingo Molnar Link: http://lkml.kernel.org/r/%3C20110518210136.109811585%40linutronix.de%3E --- kernel/time/clocksource.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 6519cf62d9c..6dbbbb1ae6b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -626,19 +626,6 @@ static void clocksource_enqueue(struct clocksource *cs) list_add(&cs->list, entry); } - -/* - * Maximum time we expect to go between ticks. This includes idle - * tickless time. It provides the trade off between selecting a - * mult/shift pair that is very precise but can only handle a short - * period of time, vs. a mult/shift pair that can handle long periods - * of time but isn't as precise. - * - * This is a subsystem constant, and actual hardware limitations - * may override it (ie: clocksources that wrap every 3 seconds). - */ -#define MAX_UPDATE_LENGTH 5 /* Seconds */ - /** * __clocksource_updatefreq_scale - Used update clocksource with new freq * @t: clocksource to be registered @@ -652,15 +639,28 @@ static void clocksource_enqueue(struct clocksource *cs) */ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) { + unsigned long sec; + /* - * Ideally we want to use some of the limits used in - * clocksource_max_deferment, to provide a more informed - * MAX_UPDATE_LENGTH. But for now this just gets the - * register interface working properly. + * Calc the maximum number of seconds which we can run before + * wrapping around. For clocksources which have a mask > 32bit + * we need to limit the max sleep time to have a good + * conversion precision. 10 minutes is still a reasonable + * amount. That results in a shift value of 24 for a + * clocksource with mask >= 40bit and f >= 4GHz. That maps to + * ~ 0.06ppm granularity for NTP. We apply the same 12.5% + * margin as we do in clocksource_max_deferment() */ + sec = (cs->mask - (cs->mask >> 5)); + do_div(sec, freq); + do_div(sec, scale); + if (!sec) + sec = 1; + else if (sec > 600 && cs->mask > UINT_MAX) + sec = 600; + clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, - NSEC_PER_SEC/scale, - MAX_UPDATE_LENGTH*scale); + NSEC_PER_SEC / scale, sec * scale); cs->max_idle_ns = clocksource_max_deferment(cs); } EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); -- cgit v1.2.3-70-g09d2 From 57f0fcbe1dea8a36c9d1673086326059991c5f81 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 May 2011 21:33:41 +0000 Subject: clockevents: Provide combined configure and register function All clockevent devices have the same open coded initialization functions. Provide an interface which does all necessary initialization in the core code. Signed-off-by: Thomas Gleixner Cc: John Stultz Reviewed-by: Ingo Molnar Link: http://lkml.kernel.org/r/%3C20110518210136.331975870%40linutronix.de%3E --- include/linux/clockchips.h | 9 +++++++++ kernel/time/clockevents.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) (limited to 'kernel/time') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 9466eebc8e1..80acc79e0dc 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -69,6 +69,8 @@ enum clock_event_nofitiers { * @retries: number of forced programming retries * @set_mode: set mode function * @broadcast: function to broadcast events + * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration + * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration * @name: ptr to clock event name * @rating: variable to rate clock event devices * @irq: IRQ number (only for non CPU local devices) @@ -91,6 +93,9 @@ struct clock_event_device { void (*broadcast)(const struct cpumask *mask); void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); + unsigned long min_delta_ticks; + unsigned long max_delta_ticks; + const char *name; int rating; int irq; @@ -123,6 +128,10 @@ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); +extern void clockevents_config_and_register(struct clock_event_device *dev, + u32 freq, unsigned long min_delta, + unsigned long max_delta); + extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); extern void clockevents_set_mode(struct clock_event_device *dev, diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 0d74b9ba90c..c69e88c9444 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -194,6 +194,50 @@ void clockevents_register_device(struct clock_event_device *dev) } EXPORT_SYMBOL_GPL(clockevents_register_device); +static void clockevents_config(struct clock_event_device *dev, + u32 freq) +{ + unsigned long sec; + + if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) + return; + + /* + * Calculate the maximum number of seconds we can sleep. Limit + * to 10 minutes for hardware which can program more than + * 32bit ticks so we still get reasonable conversion values. + */ + sec = dev->max_delta_ticks; + do_div(sec, freq); + if (!sec) + sec = 1; + else if (sec > 600 && dev->max_delta_ticks > UINT_MAX) + sec = 600; + + clockevents_calc_mult_shift(dev, freq, sec); + dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev); + dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev); +} + +/** + * clockevents_config_and_register - Configure and register a clock event device + * @dev: device to register + * @freq: The clock frequency + * @min_delta: The minimum clock ticks to program in oneshot mode + * @max_delta: The maximum clock ticks to program in oneshot mode + * + * min/max_delta can be 0 for devices which do not support oneshot mode. + */ +void clockevents_config_and_register(struct clock_event_device *dev, + u32 freq, unsigned long min_delta, + unsigned long max_delta) +{ + dev->min_delta_ticks = min_delta; + dev->max_delta_ticks = max_delta; + clockevents_config(dev, freq); + clockevents_register_device(dev); +} + /* * Noop handler when we shut down an event device */ -- cgit v1.2.3-70-g09d2 From 80b816b736cfa5b9582279127099b20a479ab7d9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 May 2011 21:33:42 +0000 Subject: clockevents: Provide interface to reconfigure an active clock event device Some ARM SoCs have clock event devices which have their frequency modified due to frequency scaling. Provide an interface which allows to reconfigure an active device. After reconfiguration reprogram the current pending event. Signed-off-by: Thomas Gleixner Cc: LAK Cc: John Stultz Acked-by: Linus Walleij Reviewed-by: Ingo Molnar Link: http://lkml.kernel.org/r/%3C20110518210136.437459958%40linutronix.de%3E --- include/linux/clockchips.h | 2 ++ kernel/time/clockevents.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'kernel/time') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 80acc79e0dc..d6733e27af3 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -132,6 +132,8 @@ extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); +extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); + extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); extern void clockevents_set_mode(struct clock_event_device *dev, diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index c69e88c9444..22a9da9a9c9 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -238,6 +238,26 @@ void clockevents_config_and_register(struct clock_event_device *dev, clockevents_register_device(dev); } +/** + * clockevents_update_freq - Update frequency and reprogram a clock event device. + * @dev: device to modify + * @freq: new device frequency + * + * Reconfigure and reprogram a clock event device in oneshot + * mode. Must be called on the cpu for which the device delivers per + * cpu timer events with interrupts disabled! Returns 0 on success, + * -ETIME when the event is in the past. + */ +int clockevents_update_freq(struct clock_event_device *dev, u32 freq) +{ + clockevents_config(dev, freq); + + if (dev->mode != CLOCK_EVT_MODE_ONESHOT) + return 0; + + return clockevents_program_event(dev, dev->next_event, ktime_get()); +} + /* * Noop handler when we shut down an event device */ -- cgit v1.2.3-70-g09d2 From c0e299b1a91cbdb21ae08e382a4176200398bc36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 May 2011 10:50:52 +0200 Subject: clockevents/source: Use u64 to make 32bit happy unsigned long is not 64bit on 32bit machine. Signed-off-by: Thomas Gleixner --- kernel/time/clockevents.c | 2 +- kernel/time/clocksource.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 22a9da9a9c9..c027d4f602f 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -197,7 +197,7 @@ EXPORT_SYMBOL_GPL(clockevents_register_device); static void clockevents_config(struct clock_event_device *dev, u32 freq) { - unsigned long sec; + u64 sec; if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) return; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d9d5f8c885f..1c95fd67732 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -639,7 +639,7 @@ static void clocksource_enqueue(struct clocksource *cs) */ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) { - unsigned long sec; + u64 sec; /* * Calc the maximum number of seconds which we can run before -- cgit v1.2.3-70-g09d2 From ab8177bc53e8ae3a3ba6d200ce2c2dae263f7ee5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 May 2011 13:05:15 +0200 Subject: hrtimers: Avoid touching inactive timer bases Instead of iterating over all possible timer bases avoid it by marking the active bases in the cpu base. Signed-off-by: Thomas Gleixner Reviewed-by: Peter Zijlstra --- include/linux/hrtimer.h | 7 +++++-- include/linux/thread_info.h | 2 +- kernel/hrtimer.c | 29 ++++++++++++++++++----------- kernel/posix-cpu-timers.c | 4 ++-- kernel/posix-timers.c | 2 +- kernel/time/alarmtimer.c | 4 ++-- 6 files changed, 29 insertions(+), 19 deletions(-) (limited to 'kernel/time') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index cc5f5f51db1..771c95802ed 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -143,7 +143,8 @@ struct hrtimer_sleeper { */ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; - clockid_t index; + int index; + clockid_t clockid; struct timerqueue_head active; ktime_t resolution; ktime_t (*get_time)(void); @@ -162,7 +163,7 @@ enum hrtimer_base_type { * struct hrtimer_cpu_base - the per cpu clock bases * @lock: lock protecting the base and associated clock bases * and timers - * @clock_base: array of clock bases for this cpu + * @active_bases: Bitfield to mark bases with active timers * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode @@ -171,9 +172,11 @@ enum hrtimer_base_type { * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @clock_base: array of clock bases for this cpu */ struct hrtimer_cpu_base { raw_spinlock_t lock; + unsigned long active_bases; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 20fc303947d..8d03f079688 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -29,7 +29,7 @@ struct restart_block { } futex; /* For nanosleep */ struct { - clockid_t index; + clockid_t clockid; struct timespec __user *rmtp; #ifdef CONFIG_COMPAT struct compat_timespec __user *compat_rmtp; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 26dd32f9f6b..1b08f6d67f1 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -64,17 +64,20 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clock_base = { { - .index = CLOCK_REALTIME, + .index = HRTIMER_BASE_REALTIME, + .clockid = CLOCK_REALTIME, .get_time = &ktime_get_real, .resolution = KTIME_LOW_RES, }, { - .index = CLOCK_MONOTONIC, + .index = HRTIMER_BASE_MONOTONIC, + .clockid = CLOCK_MONOTONIC, .get_time = &ktime_get, .resolution = KTIME_LOW_RES, }, { - .index = CLOCK_BOOTTIME, + .index = HRTIMER_BASE_BOOTTIME, + .clockid = CLOCK_BOOTTIME, .get_time = &ktime_get_boottime, .resolution = KTIME_LOW_RES, }, @@ -196,7 +199,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, struct hrtimer_cpu_base *new_cpu_base; int this_cpu = smp_processor_id(); int cpu = hrtimer_get_target(this_cpu, pinned); - int basenum = hrtimer_clockid_to_base(base->index); + int basenum = base->index; again: new_cpu_base = &per_cpu(hrtimer_bases, cpu); @@ -857,6 +860,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, debug_activate(timer); timerqueue_add(&base->active, &timer->node); + base->cpu_base->active_bases |= 1 << base->index; /* * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the @@ -898,6 +902,8 @@ static void __remove_hrtimer(struct hrtimer *timer, #endif } timerqueue_del(&base->active, &timer->node); + if (!timerqueue_getnext(&base->active)) + base->cpu_base->active_bases &= ~(1 << base->index); out: timer->state = newstate; } @@ -1235,7 +1241,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) void hrtimer_interrupt(struct clock_event_device *dev) { struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - struct hrtimer_clock_base *base; ktime_t expires_next, now, entry_time, delta; int i, retries = 0; @@ -1257,12 +1262,15 @@ retry: */ cpu_base->expires_next.tv64 = KTIME_MAX; - base = cpu_base->clock_base; - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - ktime_t basenow; + struct hrtimer_clock_base *base; struct timerqueue_node *node; + ktime_t basenow; + + if (!(cpu_base->active_bases & (1 << i))) + continue; + base = cpu_base->clock_base + i; basenow = ktime_add(now, base->offset); while ((node = timerqueue_getnext(&base->active))) { @@ -1295,7 +1303,6 @@ retry: __run_hrtimer(timer, &basenow); } - base++; } /* @@ -1526,7 +1533,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) struct timespec __user *rmtp; int ret = 0; - hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, + hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, HRTIMER_MODE_ABS); hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); @@ -1578,7 +1585,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, restart = ¤t_thread_info()->restart_block; restart->fn = hrtimer_nanosleep_restart; - restart->nanosleep.index = t.timer.base->index; + restart->nanosleep.clockid = t.timer.base->clockid; restart->nanosleep.rmtp = rmtp; restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 0791b13df7b..58f405b581e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1514,7 +1514,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, return -EFAULT; restart_block->fn = posix_cpu_nsleep_restart; - restart_block->nanosleep.index = which_clock; + restart_block->nanosleep.clockid = which_clock; restart_block->nanosleep.rmtp = rmtp; restart_block->nanosleep.expires = timespec_to_ns(rqtp); } @@ -1523,7 +1523,7 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, static long posix_cpu_nsleep_restart(struct restart_block *restart_block) { - clockid_t which_clock = restart_block->nanosleep.index; + clockid_t which_clock = restart_block->nanosleep.clockid; struct timespec t; struct itimerspec it; int error; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index e5498d7405c..a1b5edf1bf9 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -1056,7 +1056,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, */ long clock_nanosleep_restart(struct restart_block *restart_block) { - clockid_t which_clock = restart_block->nanosleep.index; + clockid_t which_clock = restart_block->nanosleep.clockid; struct k_clock *kc = clockid_to_kclock(which_clock); if (WARN_ON_ONCE(!kc || !kc->nsleep_restart)) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index c6027fe9a4e..2d966244ea6 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -494,7 +494,7 @@ static int update_rmtp(ktime_t exp, enum alarmtimer_type type, */ static long __sched alarm_timer_nsleep_restart(struct restart_block *restart) { - enum alarmtimer_type type = restart->nanosleep.index; + enum alarmtimer_type type = restart->nanosleep.clockid; ktime_t exp; struct timespec __user *rmtp; struct alarm alarm; @@ -573,7 +573,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, restart = ¤t_thread_info()->restart_block; restart->fn = alarm_timer_nsleep_restart; - restart->nanosleep.index = type; + restart->nanosleep.clockid = type; restart->nanosleep.expires = exp.tv64; restart->nanosleep.rmtp = rmtp; ret = -ERESTART_RESTARTBLOCK; -- cgit v1.2.3-70-g09d2