aboutsummaryrefslogtreecommitdiff
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/clockevents.c45
-rw-r--r--kernel/time/clocksource.c105
-rw-r--r--kernel/time/tick-broadcast.c42
-rw-r--r--kernel/time/tick-common.c20
-rw-r--r--kernel/time/tick-internal.h1
-rw-r--r--kernel/time/tick-oneshot.c4
-rw-r--r--kernel/time/tick-sched.c150
-rw-r--r--kernel/time/timecompare.c8
-rw-r--r--kernel/time/timekeeping.c103
-rw-r--r--kernel/time/timer_list.c25
-rw-r--r--kernel/time/timer_stats.c17
11 files changed, 361 insertions, 159 deletions
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 620b58abdc3..6f740d9f094 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -20,6 +20,8 @@
#include <linux/sysdev.h>
#include <linux/tick.h>
+#include "tick-internal.h"
+
/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);
@@ -28,7 +30,7 @@ static LIST_HEAD(clockevents_released);
static RAW_NOTIFIER_HEAD(clockevents_chain);
/* Protection for the above */
-static DEFINE_SPINLOCK(clockevents_lock);
+static DEFINE_RAW_SPINLOCK(clockevents_lock);
/**
* clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
@@ -37,10 +39,9 @@ static DEFINE_SPINLOCK(clockevents_lock);
*
* Math helper, returns latch value converted to nanoseconds (bound checked)
*/
-unsigned long clockevent_delta2ns(unsigned long latch,
- struct clock_event_device *evt)
+u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
{
- u64 clc = ((u64) latch << evt->shift);
+ u64 clc = (u64) latch << evt->shift;
if (unlikely(!evt->mult)) {
evt->mult = 1;
@@ -50,10 +51,10 @@ unsigned long clockevent_delta2ns(unsigned long latch,
do_div(clc, evt->mult);
if (clc < 1000)
clc = 1000;
- if (clc > LONG_MAX)
- clc = LONG_MAX;
+ if (clc > KTIME_MAX)
+ clc = KTIME_MAX;
- return (unsigned long) clc;
+ return clc;
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
@@ -140,9 +141,9 @@ int clockevents_register_notifier(struct notifier_block *nb)
unsigned long flags;
int ret;
- spin_lock_irqsave(&clockevents_lock, flags);
+ raw_spin_lock_irqsave(&clockevents_lock, flags);
ret = raw_notifier_chain_register(&clockevents_chain, nb);
- spin_unlock_irqrestore(&clockevents_lock, flags);
+ raw_spin_unlock_irqrestore(&clockevents_lock, flags);
return ret;
}
@@ -184,13 +185,13 @@ void clockevents_register_device(struct clock_event_device *dev)
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
BUG_ON(!dev->cpumask);
- spin_lock_irqsave(&clockevents_lock, flags);
+ raw_spin_lock_irqsave(&clockevents_lock, flags);
list_add(&dev->list, &clockevent_devices);
clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
clockevents_notify_released();
- spin_unlock_irqrestore(&clockevents_lock, flags);
+ raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_register_device);
@@ -237,10 +238,11 @@ void clockevents_exchange_device(struct clock_event_device *old,
*/
void clockevents_notify(unsigned long reason, void *arg)
{
- struct list_head *node, *tmp;
+ struct clock_event_device *dev, *tmp;
unsigned long flags;
+ int cpu;
- spin_lock_irqsave(&clockevents_lock, flags);
+ raw_spin_lock_irqsave(&clockevents_lock, flags);
clockevents_do_notify(reason, arg);
switch (reason) {
@@ -249,13 +251,24 @@ void clockevents_notify(unsigned long reason, void *arg)
* Unregister the clock event devices which were
* released from the users in the notify chain.
*/
- list_for_each_safe(node, tmp, &clockevents_released)
- list_del(node);
+ list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+ list_del(&dev->list);
+ /*
+ * Now check whether the CPU has left unused per cpu devices
+ */
+ cpu = *((int *)arg);
+ list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
+ if (cpumask_test_cpu(cpu, dev->cpumask) &&
+ cpumask_weight(dev->cpumask) == 1) {
+ BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+ list_del(&dev->list);
+ }
+ }
break;
default:
break;
}
- spin_unlock_irqrestore(&clockevents_lock, flags);
+ raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
#endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 5e18c6ab2c6..e85c23404d3 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -39,7 +39,7 @@ void timecounter_init(struct timecounter *tc,
tc->cycle_last = cc->read(cc);
tc->nsec = start_tstamp;
}
-EXPORT_SYMBOL(timecounter_init);
+EXPORT_SYMBOL_GPL(timecounter_init);
/**
* timecounter_read_delta - get nanoseconds since last call of this function
@@ -83,7 +83,7 @@ u64 timecounter_read(struct timecounter *tc)
return nsec;
}
-EXPORT_SYMBOL(timecounter_read);
+EXPORT_SYMBOL_GPL(timecounter_read);
u64 timecounter_cyc2time(struct timecounter *tc,
cycle_t cycle_tstamp)
@@ -105,7 +105,60 @@ u64 timecounter_cyc2time(struct timecounter *tc,
return nsec;
}
-EXPORT_SYMBOL(timecounter_cyc2time);
+EXPORT_SYMBOL_GPL(timecounter_cyc2time);
+
+/**
+ * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
+ * @mult: pointer to mult variable
+ * @shift: pointer to shift variable
+ * @from: frequency to convert from
+ * @to: frequency to convert to
+ * @minsec: guaranteed runtime conversion range in seconds
+ *
+ * The function evaluates the shift/mult pair for the scaled math
+ * operations of clocksources and clockevents.
+ *
+ * @to and @from are frequency values in HZ. For clock sources @to is
+ * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
+ * event @to is the counter frequency and @from is NSEC_PER_SEC.
+ *
+ * The @minsec conversion range argument controls the time frame in
+ * seconds which must be covered by the runtime conversion with the
+ * calculated mult and shift factors. This guarantees that no 64bit
+ * overflow happens when the input value of the conversion is
+ * multiplied with the calculated mult factor. Larger ranges may
+ * reduce the conversion accuracy by chosing smaller mult and shift
+ * factors.
+ */
+void
+clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)
+{
+ u64 tmp;
+ u32 sft, sftacc= 32;
+
+ /*
+ * Calculate the shift factor which is limiting the conversion
+ * range:
+ */
+ tmp = ((u64)minsec * from) >> 32;
+ while (tmp) {
+ tmp >>=1;
+ sftacc--;
+ }
+
+ /*
+ * Find the conversion shift/mult pair which has the best
+ * accuracy and fits the maxsec conversion range:
+ */
+ for (sft = 32; sft > 0; sft--) {
+ tmp = (u64) to << sft;
+ do_div(tmp, from);
+ if ((tmp >> sftacc) == 0)
+ break;
+ }
+ *mult = tmp;
+ *shift = sft;
+}
/*[Clocksource internal variables]---------
* curr_clocksource:
@@ -413,6 +466,47 @@ void clocksource_touch_watchdog(void)
clocksource_resume_watchdog();
}
+/**
+ * clocksource_max_deferment - Returns max time the clocksource can be deferred
+ * @cs: Pointer to clocksource
+ *
+ */
+static u64 clocksource_max_deferment(struct clocksource *cs)
+{
+ u64 max_nsecs, max_cycles;
+
+ /*
+ * Calculate the maximum number of cycles that we can pass to the
+ * cyc2ns function without overflowing a 64-bit signed result. The
+ * maximum number of cycles is equal to ULLONG_MAX/cs->mult which
+ * is equivalent to the below.
+ * max_cycles < (2^63)/cs->mult
+ * max_cycles < 2^(log2((2^63)/cs->mult))
+ * max_cycles < 2^(log2(2^63) - log2(cs->mult))
+ * max_cycles < 2^(63 - log2(cs->mult))
+ * max_cycles < 1 << (63 - log2(cs->mult))
+ * Please note that we add 1 to the result of the log2 to account for
+ * any rounding errors, ensure the above inequality is satisfied and
+ * no overflow will occur.
+ */
+ max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1));
+
+ /*
+ * The actual maximum number of cycles we can defer the clocksource is
+ * determined by the minimum of max_cycles and cs->mask.
+ */
+ max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
+ max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift);
+
+ /*
+ * To ensure that the clocksource does not wrap whilst we are idle,
+ * limit the time the clocksource can be deferred by 12.5%. Please
+ * note a margin of 12.5% is used because this can be computed with
+ * a shift, versus say 10% which would require division.
+ */
+ return max_nsecs - (max_nsecs >> 5);
+}
+
#ifdef CONFIG_GENERIC_TIME
/**
@@ -511,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs)
*/
int clocksource_register(struct clocksource *cs)
{
+ /* calculate max idle time permitted for this clocksource */
+ cs->max_idle_ns = clocksource_max_deferment(cs);
+
mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs);
clocksource_select();
@@ -580,7 +677,7 @@ sysfs_show_current_clocksources(struct sys_device *dev,
* @count: length of buffer
*
* Takes input from sysfs interface for manually overriding the default
- * clocksource selction.
+ * clocksource selection.
*/
static ssize_t sysfs_override_clocksource(struct sys_device *dev,
struct sysdev_attribute *attr,
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index c2ec25087a3..b3bafd5fc66 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -31,7 +31,7 @@ static struct tick_device tick_broadcast_device;
/* FIXME: Use cpumask_var_t. */
static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
static DECLARE_BITMAP(tmpmask, NR_CPUS);
-static DEFINE_SPINLOCK(tick_broadcast_lock);
+static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
static int tick_broadcast_force;
#ifdef CONFIG_TICK_ONESHOT
@@ -96,7 +96,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
unsigned long flags;
int ret = 0;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Devices might be registered with both periodic and oneshot
@@ -122,7 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
tick_broadcast_clear_oneshot(cpu);
}
}
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return ret;
}
@@ -161,13 +161,13 @@ static void tick_do_broadcast(struct cpumask *mask)
*/
static void tick_do_periodic_broadcast(void)
{
- spin_lock(&tick_broadcast_lock);
+ raw_spin_lock(&tick_broadcast_lock);
cpumask_and(to_cpumask(tmpmask),
cpu_online_mask, tick_get_broadcast_mask());
tick_do_broadcast(to_cpumask(tmpmask));
- spin_unlock(&tick_broadcast_lock);
+ raw_spin_unlock(&tick_broadcast_lock);
}
/*
@@ -212,7 +212,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
unsigned long flags;
int cpu, bc_stopped;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
cpu = smp_processor_id();
td = &per_cpu(tick_cpu_device, cpu);
@@ -263,7 +263,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
tick_broadcast_setup_oneshot(bc);
}
out:
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
@@ -299,7 +299,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
unsigned long flags;
unsigned int cpu = *cpup;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
@@ -309,7 +309,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
clockevents_shutdown(bc);
}
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
void tick_suspend_broadcast(void)
@@ -317,13 +317,13 @@ void tick_suspend_broadcast(void)
struct clock_event_device *bc;
unsigned long flags;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
if (bc)
clockevents_shutdown(bc);
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
int tick_resume_broadcast(void)
@@ -332,7 +332,7 @@ int tick_resume_broadcast(void)
unsigned long flags;
int broadcast = 0;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
@@ -351,7 +351,7 @@ int tick_resume_broadcast(void)
break;
}
}
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
return broadcast;
}
@@ -405,7 +405,7 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
ktime_t now, next_event;
int cpu;
- spin_lock(&tick_broadcast_lock);
+ raw_spin_lock(&tick_broadcast_lock);
again:
dev->next_event.tv64 = KTIME_MAX;
next_event.tv64 = KTIME_MAX;
@@ -443,7 +443,7 @@ again:
if (tick_broadcast_set_event(next_event, 0))
goto again;
}
- spin_unlock(&tick_broadcast_lock);
+ raw_spin_unlock(&tick_broadcast_lock);
}
/*
@@ -457,7 +457,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
unsigned long flags;
int cpu;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Periodic mode does not care about the enter/exit of power
@@ -492,7 +492,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)
}
out:
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
@@ -563,13 +563,13 @@ void tick_broadcast_switch_to_oneshot(void)
struct clock_event_device *bc;
unsigned long flags;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
bc = tick_broadcast_device.evtdev;
if (bc)
tick_broadcast_setup_oneshot(bc);
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
@@ -581,7 +581,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
unsigned long flags;
unsigned int cpu = *cpup;
- spin_lock_irqsave(&tick_broadcast_lock, flags);
+ raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
/*
* Clear the broadcast mask flag for the dead cpu, but do not
@@ -589,7 +589,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
*/
cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
- spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
/*
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 83c4417b6a3..b6b898d2eee 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
ktime_t tick_next_period;
ktime_t tick_period;
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
-DEFINE_SPINLOCK(tick_device_lock);
+static DEFINE_RAW_SPINLOCK(tick_device_lock);
/*
* Debugging: see timer_list.c
@@ -209,7 +209,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
int cpu, ret = NOTIFY_OK;
unsigned long flags;
- spin_lock_irqsave(&tick_device_lock, flags);
+ raw_spin_lock_irqsave(&tick_device_lock, flags);
cpu = smp_processor_id();
if (!cpumask_test_cpu(cpu, newdev->cpumask))
@@ -268,7 +268,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
- spin_unlock_irqrestore(&tick_device_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_device_lock, flags);
return NOTIFY_STOP;
out_bc:
@@ -278,7 +278,7 @@ out_bc:
if (tick_check_broadcast_device(newdev))
ret = NOTIFY_STOP;
- spin_unlock_irqrestore(&tick_device_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_device_lock, flags);
return ret;
}
@@ -311,7 +311,7 @@ static void tick_shutdown(unsigned int *cpup)
struct clock_event_device *dev = td->evtdev;
unsigned long flags;
- spin_lock_irqsave(&tick_device_lock, flags);
+ raw_spin_lock_irqsave(&tick_device_lock, flags);
td->mode = TICKDEV_MODE_PERIODIC;
if (dev) {
/*
@@ -322,7 +322,7 @@ static void tick_shutdown(unsigned int *cpup)
clockevents_exchange_device(dev, NULL);
td->evtdev = NULL;
}
- spin_unlock_irqrestore(&tick_device_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
static void tick_suspend(void)
@@ -330,9 +330,9 @@ static void tick_suspend(void)
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
unsigned long flags;
- spin_lock_irqsave(&tick_device_lock, flags);
+ raw_spin_lock_irqsave(&tick_device_lock, flags);
clockevents_shutdown(td->evtdev);
- spin_unlock_irqrestore(&tick_device_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
static void tick_resume(void)
@@ -341,7 +341,7 @@ static void tick_resume(void)
unsigned long flags;
int broadcast = tick_resume_broadcast();
- spin_lock_irqsave(&tick_device_lock, flags);
+ raw_spin_lock_irqsave(&tick_device_lock, flags);
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
if (!broadcast) {
@@ -350,7 +350,7 @@ static void tick_resume(void)
else
tick_resume_oneshot();
}
- spin_unlock_irqrestore(&tick_device_lock, flags);
+ raw_spin_unlock_irqrestore(&tick_device_lock, flags);
}
/*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index b1c05bf75ee..290eefbc1f6 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -6,7 +6,6 @@
#define TICK_DO_TIMER_BOOT -2
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
-extern spinlock_t tick_device_lock;
extern ktime_t tick_next_period;
extern ktime_t tick_period;
extern int tick_do_timer_cpu __read_mostly;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index a96c0e2b89c..0a8a213016f 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -50,9 +50,9 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
dev->min_delta_ns += dev->min_delta_ns >> 1;
printk(KERN_WARNING
- "CE: %s increasing min_delta_ns to %lu nsec\n",
+ "CE: %s increasing min_delta_ns to %llu nsec\n",
dev->name ? dev->name : "?",
- dev->min_delta_ns << 1);
+ (unsigned long long) dev->min_delta_ns << 1);
i = 0;
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e0f59a21c06..f992762d7f5 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -134,18 +134,13 @@ __setup("nohz=", setup_tick_nohz);
* value. We do this unconditionally on any cpu, as we don't know whether the
* cpu, which has the update task assigned is in a long sleep.
*/
-static void tick_nohz_update_jiffies(void)
+static void tick_nohz_update_jiffies(ktime_t now)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
unsigned long flags;
- ktime_t now;
-
- if (!ts->tick_stopped)
- return;
cpumask_clear_cpu(cpu, nohz_cpu_mask);
- now = ktime_get();
ts->idle_waketime = now;
local_irq_save(flags);
@@ -155,20 +150,17 @@ static void tick_nohz_update_jiffies(void)
touch_softlockup_watchdog();
}
-static void tick_nohz_stop_idle(int cpu)
+static void tick_nohz_stop_idle(int cpu, ktime_t now)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ ktime_t delta;
- if (ts->idle_active) {
- ktime_t now, delta;
- now = ktime_get();
- delta = ktime_sub(now, ts->idle_entrytime);
- ts->idle_lastupdate = now;
- ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
- ts->idle_active = 0;
+ delta = ktime_sub(now, ts->idle_entrytime);
+ ts->idle_lastupdate = now;
+ ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+ ts->idle_active = 0;
- sched_clock_idle_wakeup_event(0);
- }
+ sched_clock_idle_wakeup_event(0);
}
static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
@@ -216,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle)
struct tick_sched *ts;
ktime_t last_update, expires, now;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
+ u64 time_delta;
int cpu;
local_irq_save(flags);
@@ -231,6 +224,13 @@ void tick_nohz_stop_sched_tick(int inidle)
if (!inidle && !ts->inidle)
goto end;
+ /*
+ * Set ts->inidle unconditionally. Even if the system did not
+ * switch to NOHZ mode the cpu frequency governers rely on the
+ * update of the idle time accounting in tick_nohz_start_idle().
+ */
+ ts->inidle = 1;
+
now = tick_nohz_start_idle(ts);
/*
@@ -248,8 +248,6 @@ void tick_nohz_stop_sched_tick(int inidle)
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
goto end;
- ts->inidle = 1;
-
if (need_resched())
goto end;
@@ -258,7 +256,7 @@ void tick_nohz_stop_sched_tick(int inidle)
if (ratelimit < 10) {
printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
- local_softirq_pending());
+ (unsigned int) local_softirq_pending());
ratelimit++;
}
goto end;
@@ -270,14 +268,18 @@ void tick_nohz_stop_sched_tick(int inidle)
seq = read_seqbegin(&xtime_lock);
last_update = last_jiffies_update;
last_jiffies = jiffies;
+ time_delta = timekeeping_max_deferment();
} while (read_seqretry(&xtime_lock, seq));
- /* Get the next timer wheel timer */
- next_jiffies = get_next_timer_interrupt(last_jiffies);
- delta_jiffies = next_jiffies - last_jiffies;
-
- if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu))
+ if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
+ arch_needs_cpu(cpu)) {
+ next_jiffies = last_jiffies + 1;
delta_jiffies = 1;
+ } else {
+ /* Get the next timer wheel timer */
+ next_jiffies = get_next_timer_interrupt(last_jiffies);
+ delta_jiffies = next_jiffies - last_jiffies;
+ }
/*
* Do not stop the tick, if we are only one off
* or if the cpu is required for rcu
@@ -289,22 +291,51 @@ void tick_nohz_stop_sched_tick(int inidle)
if ((long)delta_jiffies >= 1) {
/*
- * calculate the expiry time for the next timer wheel
- * timer
- */
- expires = ktime_add_ns(last_update, tick_period.tv64 *
- delta_jiffies);
-
- /*
* If this cpu is the one which updates jiffies, then
* give up the assignment and let it be taken by the
* cpu which runs the tick timer next, which might be
* this cpu as well. If we don't drop this here the
* jiffies might be stale and do_timer() never
- * invoked.
+ * invoked. Keep track of the fact that it was the one
+ * which had the do_timer() duty last. If this cpu is
+ * the one which had the do_timer() duty last, we
+ * limit the sleep time to the timekeeping
+ * max_deferement value which we retrieved
+ * above. Otherwise we can sleep as long as we want.
*/
- if (cpu == tick_do_timer_cpu)
+ if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ ts->do_timer_last = 1;
+ } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ time_delta = KTIME_MAX;
+ ts->do_timer_last = 0;
+ } else if (!ts->do_timer_last) {
+ time_delta = KTIME_MAX;
+ }
+
+ /*
+ * calculate the expiry time for the next timer wheel
+ * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
+ * that there is no timer pending or at least extremely
+ * far into the future (12 days for HZ=1000). In this
+ * case we set the expiry to the end of time.
+ */
+ if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
+ /*
+ * Calculate the time delta for the next timer event.
+ * If the time delta exceeds the maximum time delta
+ * permitted by the current clocksource then adjust
+ * the time delta accordingly to ensure the
+ * clocksource does not wrap.
+ */
+ time_delta = min_t(u64, time_delta,
+ tick_period.tv64 * delta_jiffies);
+ }
+
+ if (time_delta < KTIME_MAX)
+ expires = ktime_add_ns(last_update, time_delta);
+ else
+ expires.tv64 = KTIME_MAX;
if (delta_jiffies > 1)
cpumask_set_cpu(cpu, nohz_cpu_mask);
@@ -337,22 +368,19 @@ void tick_nohz_stop_sched_tick(int inidle)
ts->idle_sleeps++;
+ /* Mark expires */
+ ts->idle_expires = expires;
+
/*
- * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that
- * there is no timer pending or at least extremly far
- * into the future (12 days for HZ=1000). In this case
- * we simply stop the tick timer:
+ * If the expiration time == KTIME_MAX, then
+ * in this case we simply stop the tick timer.
*/
- if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) {
- ts->idle_expires.tv64 = KTIME_MAX;
+ if (unlikely(expires.tv64 == KTIME_MAX)) {
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_cancel(&ts->sched_timer);
goto out;
}
- /* Mark expiries */
- ts->idle_expires = expires;
-
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start(&ts->sched_timer, expires,
HRTIMER_MODE_ABS_PINNED);
@@ -431,7 +459,11 @@ void tick_nohz_restart_sched_tick(void)
ktime_t now;
local_irq_disable();
- tick_nohz_stop_idle(cpu);
+ if (ts->idle_active || (ts->inidle && ts->tick_stopped))
+ now = ktime_get();
+
+ if (ts->idle_active)
+ tick_nohz_stop_idle(cpu, now);
if (!ts->inidle || !ts->tick_stopped) {
ts->inidle = 0;
@@ -445,7 +477,6 @@ void tick_nohz_restart_sched_tick(void)
/* Update jiffies first */
select_nohz_load_balancer(0);
- now = ktime_get();
tick_do_update_jiffies64(now);
cpumask_clear_cpu(cpu, nohz_cpu_mask);
@@ -579,22 +610,18 @@ static void tick_nohz_switch_to_nohz(void)
* timer and do not touch the other magic bits which need to be done
* when idle is left.
*/
-static void tick_nohz_kick_tick(int cpu)
+static void tick_nohz_kick_tick(int cpu, ktime_t now)
{
#if 0
/* Switch back to 2.6.27 behaviour */
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
- ktime_t delta, now;
-
- if (!ts->tick_stopped)
- return;
+ ktime_t delta;
/*
* Do not touch the tick device, when the next expiry is either
* already reached or less/equal than the tick period.
*/
- now = ktime_get();
delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
if (delta.tv64 <= tick_period.tv64)
return;
@@ -603,9 +630,26 @@ static void tick_nohz_kick_tick(int cpu)
#endif
}
+static inline void tick_check_nohz(int cpu)
+{
+ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ ktime_t now;
+
+ if (!ts->idle_active && !ts->tick_stopped)
+ return;
+ now = ktime_get();
+ if (ts->idle_active)
+ tick_nohz_stop_idle(cpu, now);
+ if (ts->tick_stopped) {
+ tick_nohz_update_jiffies(now);
+ tick_nohz_kick_tick(cpu, now);
+ }
+}
+
#else
static inline void tick_nohz_switch_to_nohz(void) { }
+static inline void tick_check_nohz(int cpu) { }
#endif /* NO_HZ */
@@ -615,11 +659,7 @@ static inline void tick_nohz_switch_to_nohz(void) { }
void tick_check_idle(int cpu)
{
tick_check_oneshot_broadcast(cpu);
-#ifdef CONFIG_NO_HZ
- tick_nohz_stop_idle(cpu);
- tick_nohz_update_jiffies();
- tick_nohz_kick_tick(cpu);
-#endif
+ tick_check_nohz(cpu);
}
/*
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c
index 71e7f1a1915..12f5c55090b 100644
--- a/kernel/time/timecompare.c
+++ b/kernel/time/timecompare.c
@@ -40,7 +40,7 @@ ktime_t timecompare_transform(struct timecompare *sync,
return ns_to_ktime(nsec);
}
-EXPORT_SYMBOL(timecompare_transform);
+EXPORT_SYMBOL_GPL(timecompare_transform);
int timecompare_offset(struct timecompare *sync,
s64 *offset,
@@ -89,7 +89,7 @@ int timecompare_offset(struct timecompare *sync,
* source time
*/
sample.offset =
- ktime_to_ns(ktime_add(end, start)) / 2 -
+ (ktime_to_ns(end) + ktime_to_ns(start)) / 2 -
ts;
/* simple insertion sort based on duration */
@@ -131,7 +131,7 @@ int timecompare_offset(struct timecompare *sync,
return used;
}
-EXPORT_SYMBOL(timecompare_offset);
+EXPORT_SYMBOL_GPL(timecompare_offset);
void __timecompare_update(struct timecompare *sync,
u64 source_tstamp)
@@ -188,4 +188,4 @@ void __timecompare_update(struct timecompare *sync,
}
}
}
-EXPORT_SYMBOL(__timecompare_update);
+EXPORT_SYMBOL_GPL(__timecompare_update);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index fb0f46fa1ec..7faaa32fbf4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -13,6 +13,7 @@
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/sched.h>
#include <linux/sysdev.h>
#include <linux/clocksource.h>
#include <linux/jiffies.h>
@@ -176,7 +177,7 @@ void timekeeping_leap_insert(int leapsecond)
{
xtime.tv_sec += leapsecond;
wall_to_monotonic.tv_sec -= leapsecond;
- update_vsyscall(&xtime, timekeeper.clock);
+ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
}
#ifdef CONFIG_GENERIC_TIME
@@ -336,7 +337,7 @@ int do_settimeofday(struct timespec *tv)
timekeeper.ntp_error = 0;
ntp_clear();
- update_vsyscall(&xtime, timekeeper.clock);
+ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -487,6 +488,17 @@ int timekeeping_valid_for_hres(void)
}
/**
+ * timekeeping_max_deferment - Returns max time the clocksource can be deferred
+ *
+ * Caller must observe xtime_lock via read_seqbegin/read_seqretry to
+ * ensure that the clocksource does not change!
+ */
+u64 timekeeping_max_deferment(void)
+{
+ return timekeeper.clock->max_idle_ns;
+}
+
+/**
* read_persistent_clock - Return time from the persistent clock.
*
* Weak dummy function for arches that do not yet support it.
@@ -721,6 +733,51 @@ static void timekeeping_adjust(s64 offset)
timekeeper.ntp_error_shift;
}
+
+/**
+ * logarithmic_accumulation - shifted accumulation of cycles
+ *
+ * This functions accumulates a shifted interval of cycles into
+ * into a shifted interval nanoseconds. Allows for O(log) accumulation
+ * loop.
+ *
+ * Returns the unconsumed cycles.
+ */
+static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
+{
+ u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
+
+ /* If the offset is smaller then a shifted interval, do nothing */
+ if (offset < timekeeper.cycle_interval<<shift)
+ return offset;
+
+ /* Accumulate one shifted interval */
+ offset -= timekeeper.cycle_interval << shift;
+ timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift;
+
+ timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
+ while (timekeeper.xtime_nsec >= nsecps) {
+ timekeeper.xtime_nsec -= nsecps;
+ xtime.tv_sec++;
+ second_overflow();
+ }
+
+ /* Accumulate into raw time */
+ raw_time.tv_nsec += timekeeper.raw_interval << shift;;
+ while (raw_time.tv_nsec >= NSEC_PER_SEC) {
+ raw_time.tv_nsec -= NSEC_PER_SEC;
+ raw_time.tv_sec++;
+ }
+
+ /* Accumulate error between NTP and clock interval */
+ timekeeper.ntp_error += tick_length << shift;
+ timekeeper.ntp_error -= timekeeper.xtime_interval <<
+ (timekeeper.ntp_error_shift + shift);
+
+ return offset;
+}
+
+
/**
* update_wall_time - Uses the current clocksource to increment the wall time
*
@@ -731,6 +788,7 @@ void update_wall_time(void)
struct clocksource *clock;
cycle_t offset;
u64 nsecs;
+ int shift = 0, maxshift;
/* Make sure we're fully resumed: */
if (unlikely(timekeeping_suspended))
@@ -744,33 +802,22 @@ void update_wall_time(void)
#endif
timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
- /* normally this loop will run just once, however in the
- * case of lost or late ticks, it will accumulate correctly.
+ /*
+ * With NO_HZ we may have to accumulate many cycle_intervals
+ * (think "ticks") worth of time at once. To do this efficiently,
+ * we calculate the largest doubling multiple of cycle_intervals
+ * that is smaller then the offset. We then accumulate that
+ * chunk in one go, and then try to consume the next smaller
+ * doubled multiple.
*/
+ shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
+ shift = max(0, shift);
+ /* Bound shift to one less then what overflows tick_length */
+ maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1;
+ shift = min(shift, maxshift);
while (offset >= timekeeper.cycle_interval) {
- u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
-
- /* accumulate one interval */
- offset -= timekeeper.cycle_interval;
- clock->cycle_last += timekeeper.cycle_interval;
-
- timekeeper.xtime_nsec += timekeeper.xtime_interval;
- if (timekeeper.xtime_nsec >= nsecps) {
- timekeeper.xtime_nsec -= nsecps;
- xtime.tv_sec++;
- second_overflow();
- }
-
- raw_time.tv_nsec += timekeeper.raw_interval;
- if (raw_time.tv_nsec >= NSEC_PER_SEC) {
- raw_time.tv_nsec -= NSEC_PER_SEC;
- raw_time.tv_sec++;
- }
-
- /* accumulate error between NTP and clock interval */
- timekeeper.ntp_error += tick_length;
- timekeeper.ntp_error -= timekeeper.xtime_interval <<
- timekeeper.ntp_error_shift;
+ offset = logarithmic_accumulation(offset, shift);
+ shift--;
}
/* correct the clock when NTP error is too big */
@@ -810,7 +857,7 @@ void update_wall_time(void)
update_xtime_cache(nsecs);
/* check to see if there is a new clocksource to use */
- update_vsyscall(&xtime, timekeeper.clock);
+ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
}
/**
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 1b5b7aa2fdf..bdfb8dd1050 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -84,7 +84,7 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
next_one:
i = 0;
- spin_lock_irqsave(&base->cpu_base->lock, flags);
+ raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
curr = base->first;
/*
@@ -100,13 +100,13 @@ next_one:
timer = rb_entry(curr, struct hrtimer, node);
tmp = *timer;
- spin_unlock_irqrestore(&base->cpu_base->lock, flags);
+ raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
print_timer(m, timer, &tmp, i, now);
next++;
goto next_one;
}
- spin_unlock_irqrestore(&base->cpu_base->lock, flags);
+ raw_spin_unlock_irqrestore(&base->cpu_base->lock, flags);
}
static void
@@ -150,6 +150,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
P_ns(expires_next);
P(hres_active);
P(nr_events);
+ P(nr_retries);
+ P(nr_hangs);
+ P_ns(max_hang_time);
#endif
#undef P
#undef P_ns
@@ -204,10 +207,12 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
return;
}
SEQ_printf(m, "%s\n", dev->name);
- SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns);
- SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns);
- SEQ_printf(m, " mult: %lu\n", dev->mult);
- SEQ_printf(m, " shift: %d\n", dev->shift);
+ SEQ_printf(m, " max_delta_ns: %llu\n",
+ (unsigned long long) dev->max_delta_ns);
+ SEQ_printf(m, " min_delta_ns: %llu\n",
+ (unsigned long long) dev->min_delta_ns);
+ SEQ_printf(m, " mult: %u\n", dev->mult);
+ SEQ_printf(m, " shift: %u\n", dev->shift);
SEQ_printf(m, " mode: %d\n", dev->mode);
SEQ_printf(m, " next_event: %Ld nsecs\n",
(unsigned long long) ktime_to_ns(dev->next_event));
@@ -232,10 +237,10 @@ static void timer_list_show_tickdevices(struct seq_file *m)
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
print_tickdevice(m, tick_get_broadcast_device(), -1);
SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
- tick_get_broadcast_mask()->bits[0]);
+ cpumask_bits(tick_get_broadcast_mask())[0]);
#ifdef CONFIG_TICK_ONESHOT
SEQ_printf(m, "tick_broadcast_oneshot_mask: %08lx\n",
- tick_get_broadcast_oneshot_mask()->bits[0]);
+ cpumask_bits(tick_get_broadcast_oneshot_mask())[0]);
#endif
SEQ_printf(m, "\n");
#endif
@@ -252,7 +257,7 @@ static int timer_list_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get());
int cpu;
- SEQ_printf(m, "Timer List Version: v0.4\n");
+ SEQ_printf(m, "Timer List Version: v0.5\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 63b117e9eba..2f3b585b8d7 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -86,7 +86,7 @@ static DEFINE_SPINLOCK(table_lock);
/*
* Per-CPU lookup locks for fast hash lookup:
*/
-static DEFINE_PER_CPU(spinlock_t, tstats_lookup_lock);
+static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
/*
* Mutex to serialize state changes with show-stats activities:
@@ -238,7 +238,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
/*
* It doesnt matter which lock we take:
*/
- spinlock_t *lock;
+ raw_spinlock_t *lock;
struct entry *entry, input;
unsigned long flags;
@@ -253,7 +253,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
input.pid = pid;
input.timer_flag = timer_flag;
- spin_lock_irqsave(lock, flags);
+ raw_spin_lock_irqsave(lock, flags);
if (!timer_stats_active)
goto out_unlock;
@@ -264,7 +264,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
atomic_inc(&overflow_count);
out_unlock:
- spin_unlock_irqrestore(lock, flags);
+ raw_spin_unlock_irqrestore(lock, flags);
}
static void print_name_offset(struct seq_file *m, unsigned long addr)
@@ -348,10 +348,11 @@ static void sync_access(void)
int cpu;
for_each_online_cpu(cpu) {
- spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
- spin_lock_irqsave(lock, flags);
+ raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
+
+ raw_spin_lock_irqsave(lock, flags);
/* nothing */
- spin_unlock_irqrestore(lock, flags);
+ raw_spin_unlock_irqrestore(lock, flags);
}
}
@@ -409,7 +410,7 @@ void __init init_timer_stats(void)
int cpu;
for_each_possible_cpu(cpu)
- spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
+ raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
}
static int __init init_tstats_procfs(void)