diff options
Diffstat (limited to 'kernel/time')
| -rw-r--r-- | kernel/time/Kconfig | 4 | ||||
| -rw-r--r-- | kernel/time/Makefile | 5 | ||||
| -rw-r--r-- | kernel/time/alarmtimer.c | 24 | ||||
| -rw-r--r-- | kernel/time/clockevents.c | 117 | ||||
| -rw-r--r-- | kernel/time/clocksource.c | 52 | ||||
| -rw-r--r-- | kernel/time/jiffies.c | 6 | ||||
| -rw-r--r-- | kernel/time/ntp.c | 40 | ||||
| -rw-r--r-- | kernel/time/sched_clock.c | 145 | ||||
| -rw-r--r-- | kernel/time/tick-broadcast-hrtimer.c | 106 | ||||
| -rw-r--r-- | kernel/time/tick-broadcast.c | 93 | ||||
| -rw-r--r-- | kernel/time/tick-common.c | 34 | ||||
| -rw-r--r-- | kernel/time/tick-internal.h | 18 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 87 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 71 | ||||
| -rw-r--r-- | kernel/time/timekeeping_debug.c | 2 | ||||
| -rw-r--r-- | kernel/time/timer_stats.c | 8 | 
16 files changed, 559 insertions, 253 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 2b62fe86f9e..f448513a45e 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -100,7 +100,7 @@ config NO_HZ_FULL  	# RCU_USER_QS dependency  	depends on HAVE_CONTEXT_TRACKING  	# VIRT_CPU_ACCOUNTING_GEN dependency -	depends on 64BIT +	depends on HAVE_VIRT_CPU_ACCOUNTING_GEN  	select NO_HZ_COMMON  	select RCU_USER_QS  	select RCU_NOCB_CPU @@ -124,7 +124,7 @@ config NO_HZ_FULL  endchoice  config NO_HZ_FULL_ALL -       bool "Full dynticks system on all CPUs by default" +       bool "Full dynticks system on all CPUs by default (except CPU 0)"         depends on NO_HZ_FULL         help           If the user doesn't pass the nohz_full boot option to diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 9250130646f..57a413fd0eb 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -3,7 +3,10 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o  obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o  obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= tick-broadcast.o +ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) + obj-y						+= tick-broadcast.o + obj-$(CONFIG_TICK_ONESHOT)			+= tick-broadcast-hrtimer.o +endif  obj-$(CONFIG_GENERIC_SCHED_CLOCK)		+= sched_clock.o  obj-$(CONFIG_TICK_ONESHOT)			+= tick-oneshot.o  obj-$(CONFIG_TICK_ONESHOT)			+= tick-sched.o diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index eec50fcef9e..fe75444ae7e 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -490,7 +490,7 @@ static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp)  	clockid_t baseid = alarm_bases[clock2alarm(which_clock)].base_clockid;  	if (!alarmtimer_get_rtcdev()) -		return -ENOTSUPP; +		return -EINVAL;  	return hrtimer_get_res(baseid, tp);  } @@ -507,7 +507,7 @@ static int alarm_clock_get(clockid_t which_clock, struct timespec *tp)  	struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)];  	if (!alarmtimer_get_rtcdev()) -		return -ENOTSUPP; +		return -EINVAL;  	*tp = ktime_to_timespec(base->gettime());  	return 0; @@ -585,9 +585,14 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,  				struct itimerspec *new_setting,  				struct itimerspec *old_setting)  { +	ktime_t exp; +  	if (!rtcdev)  		return -ENOTSUPP; +	if (flags & ~TIMER_ABSTIME) +		return -EINVAL; +  	if (old_setting)  		alarm_timer_get(timr, old_setting); @@ -597,8 +602,16 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,  	/* start the timer */  	timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); -	alarm_start(&timr->it.alarm.alarmtimer, -			timespec_to_ktime(new_setting->it_value)); +	exp = timespec_to_ktime(new_setting->it_value); +	/* Convert (if necessary) to absolute time */ +	if (flags != TIMER_ABSTIME) { +		ktime_t now; + +		now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime(); +		exp = ktime_add(now, exp); +	} + +	alarm_start(&timr->it.alarm.alarmtimer, exp);  	return 0;  } @@ -730,6 +743,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,  	if (!alarmtimer_get_rtcdev())  		return -ENOTSUPP; +	if (flags & ~TIMER_ABSTIME) +		return -EINVAL; +  	if (!capable(CAP_WAKE_ALARM))  		return -EPERM; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 38959c86678..9c94c19f130 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -33,29 +33,64 @@ struct ce_unbind {  	int res;  }; -/** - * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds - * @latch:	value to convert - * @evt:	pointer to clock event device descriptor - * - * Math helper, returns latch value converted to nanoseconds (bound checked) - */ -u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) +static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt, +			bool ismax)  {  	u64 clc = (u64) latch << evt->shift; +	u64 rnd;  	if (unlikely(!evt->mult)) {  		evt->mult = 1;  		WARN_ON(1);  	} +	rnd = (u64) evt->mult - 1; + +	/* +	 * Upper bound sanity check. If the backwards conversion is +	 * not equal latch, we know that the above shift overflowed. +	 */ +	if ((clc >> evt->shift) != (u64)latch) +		clc = ~0ULL; + +	/* +	 * Scaled math oddities: +	 * +	 * For mult <= (1 << shift) we can safely add mult - 1 to +	 * prevent integer rounding loss. So the backwards conversion +	 * from nsec to device ticks will be correct. +	 * +	 * For mult > (1 << shift), i.e. device frequency is > 1GHz we +	 * need to be careful. Adding mult - 1 will result in a value +	 * which when converted back to device ticks can be larger +	 * than latch by up to (mult - 1) >> shift. For the min_delta +	 * calculation we still want to apply this in order to stay +	 * above the minimum device ticks limit. For the upper limit +	 * we would end up with a latch value larger than the upper +	 * limit of the device, so we omit the add to stay below the +	 * device upper boundary. +	 * +	 * Also omit the add if it would overflow the u64 boundary. +	 */ +	if ((~0ULL - clc > rnd) && +	    (!ismax || evt->mult <= (1U << evt->shift))) +		clc += rnd;  	do_div(clc, evt->mult); -	if (clc < 1000) -		clc = 1000; -	if (clc > KTIME_MAX) -		clc = KTIME_MAX; -	return clc; +	/* Deltas less than 1usec are pointless noise */ +	return clc > 1000 ? clc : 1000; +} + +/** + * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds + * @latch:	value to convert + * @evt:	pointer to clock event device descriptor + * + * Math helper, returns latch value converted to nanoseconds (bound checked) + */ +u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) +{ +	return cev_delta2ns(latch, evt, false);  }  EXPORT_SYMBOL_GPL(clockevent_delta2ns); @@ -111,7 +146,8 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev)  {  	/* Nothing to do if we already reached the limit */  	if (dev->min_delta_ns >= MIN_DELTA_LIMIT) { -		printk(KERN_WARNING "CE: Reprogramming failure. Giving up\n"); +		printk_deferred(KERN_WARNING +				"CE: Reprogramming failure. Giving up\n");  		dev->next_event.tv64 = KTIME_MAX;  		return -ETIME;  	} @@ -124,9 +160,10 @@ static int clockevents_increase_min_delta(struct clock_event_device *dev)  	if (dev->min_delta_ns > MIN_DELTA_LIMIT)  		dev->min_delta_ns = MIN_DELTA_LIMIT; -	printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n", -	       dev->name ? dev->name : "?", -	       (unsigned long long) dev->min_delta_ns); +	printk_deferred(KERN_WARNING +			"CE: %s increased min_delta_ns to %llu nsec\n", +			dev->name ? dev->name : "?", +			(unsigned long long) dev->min_delta_ns);  	return 0;  } @@ -380,8 +417,8 @@ void clockevents_config(struct clock_event_device *dev, u32 freq)  		sec = 600;  	clockevents_calc_mult_shift(dev, freq, sec); -	dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev); -	dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev); +	dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false); +	dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true);  }  /** @@ -404,6 +441,19 @@ void clockevents_config_and_register(struct clock_event_device *dev,  }  EXPORT_SYMBOL_GPL(clockevents_config_and_register); +int __clockevents_update_freq(struct clock_event_device *dev, u32 freq) +{ +	clockevents_config(dev, freq); + +	if (dev->mode == CLOCK_EVT_MODE_ONESHOT) +		return clockevents_program_event(dev, dev->next_event, false); + +	if (dev->mode == CLOCK_EVT_MODE_PERIODIC) +		dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev); + +	return 0; +} +  /**   * clockevents_update_freq - Update frequency and reprogram a clock event device.   * @dev:	device to modify @@ -411,17 +461,22 @@ EXPORT_SYMBOL_GPL(clockevents_config_and_register);   *   * Reconfigure and reprogram a clock event device in oneshot   * mode. Must be called on the cpu for which the device delivers per - * cpu timer events with interrupts disabled!  Returns 0 on success, - * -ETIME when the event is in the past. + * cpu timer events. If called for the broadcast device the core takes + * care of serialization. + * + * Returns 0 on success, -ETIME when the event is in the past.   */  int clockevents_update_freq(struct clock_event_device *dev, u32 freq)  { -	clockevents_config(dev, freq); - -	if (dev->mode != CLOCK_EVT_MODE_ONESHOT) -		return 0; +	unsigned long flags; +	int ret; -	return clockevents_program_event(dev, dev->next_event, false); +	local_irq_save(flags); +	ret = tick_broadcast_update_freq(dev, freq); +	if (ret == -ENODEV) +		ret = __clockevents_update_freq(dev, freq); +	local_irq_restore(flags); +	return ret;  }  /* @@ -489,12 +544,13 @@ void clockevents_resume(void)  #ifdef CONFIG_GENERIC_CLOCKEVENTS  /**   * clockevents_notify - notification about relevant events + * Returns 0 on success, any other value on error   */ -void clockevents_notify(unsigned long reason, void *arg) +int clockevents_notify(unsigned long reason, void *arg)  {  	struct clock_event_device *dev, *tmp;  	unsigned long flags; -	int cpu; +	int cpu, ret = 0;  	raw_spin_lock_irqsave(&clockevents_lock, flags); @@ -507,7 +563,7 @@ void clockevents_notify(unsigned long reason, void *arg)  	case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:  	case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: -		tick_broadcast_oneshot_control(reason); +		ret = tick_broadcast_oneshot_control(reason);  		break;  	case CLOCK_EVT_NOTIFY_CPU_DYING: @@ -550,6 +606,7 @@ void clockevents_notify(unsigned long reason, void *arg)  		break;  	}  	raw_spin_unlock_irqrestore(&clockevents_lock, flags); +	return ret;  }  EXPORT_SYMBOL_GPL(clockevents_notify); @@ -584,7 +641,7 @@ static ssize_t sysfs_unbind_tick_dev(struct device *dev,  				     const char *buf, size_t count)  {  	char name[CS_NAME_LEN]; -	size_t ret = sysfs_get_uname(buf, name, count); +	ssize_t ret = sysfs_get_uname(buf, name, count);  	struct clock_event_device *ce;  	if (ret < 0) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 50a8736757f..ba3e502c955 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -479,6 +479,7 @@ static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }  static inline void clocksource_resume_watchdog(void) { }  static inline int __clocksource_watchdog_kthread(void) { return 0; }  static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } +void clocksource_mark_unstable(struct clocksource *cs) { }  #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ @@ -537,40 +538,55 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)  }  /** - * clocksource_max_deferment - Returns max time the clocksource can be deferred - * @cs:         Pointer to clocksource - * + * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted + * @mult:	cycle to nanosecond multiplier + * @shift:	cycle to nanosecond divisor (power of two) + * @maxadj:	maximum adjustment value to mult (~11%) + * @mask:	bitmask for two's complement subtraction of non 64 bit counters   */ -static u64 clocksource_max_deferment(struct clocksource *cs) +u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)  {  	u64 max_nsecs, max_cycles;  	/*  	 * Calculate the maximum number of cycles that we can pass to the  	 * cyc2ns function without overflowing a 64-bit signed result. The -	 * maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj) +	 * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)  	 * which is equivalent to the below. -	 * max_cycles < (2^63)/(cs->mult + cs->maxadj) -	 * max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj))) -	 * max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj)) -	 * max_cycles < 2^(63 - log2(cs->mult + cs->maxadj)) -	 * max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj)) +	 * max_cycles < (2^63)/(mult + maxadj) +	 * max_cycles < 2^(log2((2^63)/(mult + maxadj))) +	 * max_cycles < 2^(log2(2^63) - log2(mult + maxadj)) +	 * max_cycles < 2^(63 - log2(mult + maxadj)) +	 * max_cycles < 1 << (63 - log2(mult + maxadj))  	 * Please note that we add 1 to the result of the log2 to account for  	 * any rounding errors, ensure the above inequality is satisfied and  	 * no overflow will occur.  	 */ -	max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1)); +	max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));  	/*  	 * The actual maximum number of cycles we can defer the clocksource is -	 * determined by the minimum of max_cycles and cs->mask. +	 * determined by the minimum of max_cycles and mask.  	 * Note: Here we subtract the maxadj to make sure we don't sleep for  	 * too long if there's a large negative adjustment.  	 */ -	max_cycles = min_t(u64, max_cycles, (u64) cs->mask); -	max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj, -					cs->shift); +	max_cycles = min(max_cycles, mask); +	max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); + +	return max_nsecs; +} + +/** + * clocksource_max_deferment - Returns max time the clocksource can be deferred + * @cs:         Pointer to clocksource + * + */ +static u64 clocksource_max_deferment(struct clocksource *cs) +{ +	u64 max_nsecs; +	max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, +					  cs->mask);  	/*  	 * To ensure that the clocksource does not wrap whilst we are idle,  	 * limit the time the clocksource can be deferred by 12.5%. Please @@ -893,7 +909,7 @@ sysfs_show_current_clocksources(struct device *dev,  	return count;  } -size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) +ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)  {  	size_t ret = cnt; @@ -924,7 +940,7 @@ static ssize_t sysfs_override_clocksource(struct device *dev,  					  struct device_attribute *attr,  					  const char *buf, size_t count)  { -	size_t ret; +	ssize_t ret;  	mutex_lock(&clocksource_mutex); @@ -952,7 +968,7 @@ static ssize_t sysfs_unbind_clocksource(struct device *dev,  {  	struct clocksource *cs;  	char name[CS_NAME_LEN]; -	size_t ret; +	ssize_t ret;  	ret = sysfs_get_uname(buf, name, count);  	if (ret < 0) diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 7a925ba456f..a6a5bf53e86 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -51,7 +51,13 @@   * HZ shrinks, so values greater than 8 overflow 32bits when   * HZ=100.   */ +#if HZ < 34 +#define JIFFIES_SHIFT	6 +#elif HZ < 67 +#define JIFFIES_SHIFT	7 +#else  #define JIFFIES_SHIFT	8 +#endif  static cycle_t jiffies_read(struct clocksource *cs)  { diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index bb2215174f0..33db43a3951 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -165,21 +165,21 @@ static inline void pps_set_freq(s64 freq)  static inline int is_error_status(int status)  { -	return (time_status & (STA_UNSYNC|STA_CLOCKERR)) +	return (status & (STA_UNSYNC|STA_CLOCKERR))  		/* PPS signal lost when either PPS time or  		 * PPS frequency synchronization requested  		 */ -		|| ((time_status & (STA_PPSFREQ|STA_PPSTIME)) -			&& !(time_status & STA_PPSSIGNAL)) +		|| ((status & (STA_PPSFREQ|STA_PPSTIME)) +			&& !(status & STA_PPSSIGNAL))  		/* PPS jitter exceeded when  		 * PPS time synchronization requested */ -		|| ((time_status & (STA_PPSTIME|STA_PPSJITTER)) +		|| ((status & (STA_PPSTIME|STA_PPSJITTER))  			== (STA_PPSTIME|STA_PPSJITTER))  		/* PPS wander exceeded or calibration error when  		 * PPS frequency synchronization requested  		 */ -		|| ((time_status & STA_PPSFREQ) -			&& (time_status & (STA_PPSWANDER|STA_PPSERROR))); +		|| ((status & STA_PPSFREQ) +			&& (status & (STA_PPSWANDER|STA_PPSERROR)));  }  static inline void pps_fill_timex(struct timex *txc) @@ -475,6 +475,7 @@ static void sync_cmos_clock(struct work_struct *work)  	 * called as close as possible to 500 ms before the new second starts.  	 * This code is run on a timer.  If the clock is set, that timer  	 * may not expire at the correct time.  Thus, we adjust... +	 * We want the clock to be within a couple of ticks from the target.  	 */  	if (!ntp_synced()) {  		/* @@ -485,7 +486,7 @@ static void sync_cmos_clock(struct work_struct *work)  	}  	getnstimeofday(&now); -	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) { +	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) {  		struct timespec adjust = now;  		fail = -ENODEV; @@ -513,12 +514,13 @@ static void sync_cmos_clock(struct work_struct *work)  		next.tv_sec++;  		next.tv_nsec -= NSEC_PER_SEC;  	} -	schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next)); +	queue_delayed_work(system_power_efficient_wq, +			   &sync_cmos_work, timespec_to_jiffies(&next));  }  void ntp_notify_cmos_timer(void)  { -	schedule_delayed_work(&sync_cmos_work, 0); +	queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);  }  #else @@ -784,8 +786,9 @@ static long hardpps_update_freq(struct pps_normtime freq_norm)  		time_status |= STA_PPSERROR;  		pps_errcnt++;  		pps_dec_freq_interval(); -		pr_err("hardpps: PPSERROR: interval too long - %ld s\n", -				freq_norm.sec); +		printk_deferred(KERN_ERR +			"hardpps: PPSERROR: interval too long - %ld s\n", +			freq_norm.sec);  		return 0;  	} @@ -798,7 +801,8 @@ static long hardpps_update_freq(struct pps_normtime freq_norm)  	delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT);  	pps_freq = ftemp;  	if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) { -		pr_warning("hardpps: PPSWANDER: change=%ld\n", delta); +		printk_deferred(KERN_WARNING +				"hardpps: PPSWANDER: change=%ld\n", delta);  		time_status |= STA_PPSWANDER;  		pps_stbcnt++;  		pps_dec_freq_interval(); @@ -842,8 +846,9 @@ static void hardpps_update_phase(long error)  	 * the time offset is updated.  	 */  	if (jitter > (pps_jitter << PPS_POPCORN)) { -		pr_warning("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n", -		       jitter, (pps_jitter << PPS_POPCORN)); +		printk_deferred(KERN_WARNING +				"hardpps: PPSJITTER: jitter=%ld, limit=%ld\n", +				jitter, (pps_jitter << PPS_POPCORN));  		time_status |= STA_PPSJITTER;  		pps_jitcnt++;  	} else if (time_status & STA_PPSTIME) { @@ -900,7 +905,7 @@ void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)  		time_status |= STA_PPSJITTER;  		/* restart the frequency calibration interval */  		pps_fbase = *raw_ts; -		pr_err("hardpps: PPSJITTER: bad pulse\n"); +		printk_deferred(KERN_ERR "hardpps: PPSJITTER: bad pulse\n");  		return;  	} @@ -921,7 +926,10 @@ void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)  static int __init ntp_tick_adj_setup(char *str)  { -	ntp_tick_adj = simple_strtol(str, NULL, 0); +	int rc = kstrtol(str, 0, (long *)&ntp_tick_adj); + +	if (rc) +		return rc;  	ntp_tick_adj <<= NTP_SCALE_SHIFT;  	return 1; diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 0b479a6a22b..01d2d15aa66 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -8,25 +8,28 @@  #include <linux/clocksource.h>  #include <linux/init.h>  #include <linux/jiffies.h> +#include <linux/ktime.h>  #include <linux/kernel.h>  #include <linux/moduleparam.h>  #include <linux/sched.h>  #include <linux/syscore_ops.h> -#include <linux/timer.h> +#include <linux/hrtimer.h>  #include <linux/sched_clock.h> +#include <linux/seqlock.h> +#include <linux/bitops.h>  struct clock_data { +	ktime_t wrap_kt;  	u64 epoch_ns; -	u32 epoch_cyc; -	u32 epoch_cyc_copy; +	u64 epoch_cyc; +	seqcount_t seq;  	unsigned long rate;  	u32 mult;  	u32 shift;  	bool suspended;  }; -static void sched_clock_poll(unsigned long wrap_ticks); -static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); +static struct hrtimer sched_clock_timer;  static int irqtime = -1;  core_param(irqtime, irqtime, int, 0400); @@ -35,42 +38,39 @@ static struct clock_data cd = {  	.mult	= NSEC_PER_SEC / HZ,  }; -static u32 __read_mostly sched_clock_mask = 0xffffffff; +static u64 __read_mostly sched_clock_mask; -static u32 notrace jiffy_sched_clock_read(void) +static u64 notrace jiffy_sched_clock_read(void)  { -	return (u32)(jiffies - INITIAL_JIFFIES); +	/* +	 * We don't need to use get_jiffies_64 on 32-bit arches here +	 * because we register with BITS_PER_LONG +	 */ +	return (u64)(jiffies - INITIAL_JIFFIES);  } -static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; +static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;  static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)  {  	return (cyc * mult) >> shift;  } -static unsigned long long notrace sched_clock_32(void) +unsigned long long notrace sched_clock(void)  {  	u64 epoch_ns; -	u32 epoch_cyc; -	u32 cyc; +	u64 epoch_cyc; +	u64 cyc; +	unsigned long seq;  	if (cd.suspended)  		return cd.epoch_ns; -	/* -	 * Load the epoch_cyc and epoch_ns atomically.  We do this by -	 * ensuring that we always write epoch_cyc, epoch_ns and -	 * epoch_cyc_copy in strict order, and read them in strict order. -	 * If epoch_cyc and epoch_cyc_copy are not equal, then we're in -	 * the middle of an update, and we should repeat the load. -	 */  	do { +		seq = raw_read_seqcount_begin(&cd.seq);  		epoch_cyc = cd.epoch_cyc; -		smp_rmb();  		epoch_ns = cd.epoch_ns; -		smp_rmb(); -	} while (epoch_cyc != cd.epoch_cyc_copy); +	} while (read_seqcount_retry(&cd.seq, seq));  	cyc = read_sched_clock();  	cyc = (cyc - epoch_cyc) & sched_clock_mask; @@ -83,49 +83,68 @@ static unsigned long long notrace sched_clock_32(void)  static void notrace update_sched_clock(void)  {  	unsigned long flags; -	u32 cyc; +	u64 cyc;  	u64 ns;  	cyc = read_sched_clock();  	ns = cd.epoch_ns +  		cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,  			  cd.mult, cd.shift); -	/* -	 * Write epoch_cyc and epoch_ns in a way that the update is -	 * detectable in cyc_to_fixed_sched_clock(). -	 */ +  	raw_local_irq_save(flags); -	cd.epoch_cyc_copy = cyc; -	smp_wmb(); +	raw_write_seqcount_begin(&cd.seq);  	cd.epoch_ns = ns; -	smp_wmb();  	cd.epoch_cyc = cyc; +	raw_write_seqcount_end(&cd.seq);  	raw_local_irq_restore(flags);  } -static void sched_clock_poll(unsigned long wrap_ticks) +static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)  { -	mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));  	update_sched_clock(); +	hrtimer_forward_now(hrt, cd.wrap_kt); +	return HRTIMER_RESTART;  } -void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate) +void __init sched_clock_register(u64 (*read)(void), int bits, +				 unsigned long rate)  { -	unsigned long r, w; -	u64 res, wrap; +	u64 res, wrap, new_mask, new_epoch, cyc, ns; +	u32 new_mult, new_shift; +	ktime_t new_wrap_kt; +	unsigned long r;  	char r_unit;  	if (cd.rate > rate)  		return; -	BUG_ON(bits > 32);  	WARN_ON(!irqs_disabled()); -	read_sched_clock = read; -	sched_clock_mask = (1ULL << bits) - 1; -	cd.rate = rate;  	/* calculate the mult/shift to convert counter ticks to ns. */ -	clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0); +	clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); + +	new_mask = CLOCKSOURCE_MASK(bits); + +	/* calculate how many ns until we wrap */ +	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask); +	new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); + +	/* update epoch for new counter and update epoch_ns from old counter*/ +	new_epoch = read(); +	cyc = read_sched_clock(); +	ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, +			  cd.mult, cd.shift); + +	raw_write_seqcount_begin(&cd.seq); +	read_sched_clock = read; +	sched_clock_mask = new_mask; +	cd.rate = rate; +	cd.wrap_kt = new_wrap_kt; +	cd.mult = new_mult; +	cd.shift = new_shift; +	cd.epoch_cyc = new_epoch; +	cd.epoch_ns = ns; +	raw_write_seqcount_end(&cd.seq);  	r = rate;  	if (r >= 4000000) { @@ -137,27 +156,11 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)  	} else  		r_unit = ' '; -	/* calculate how many ns until we wrap */ -	wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift); -	do_div(wrap, NSEC_PER_MSEC); -	w = wrap; -  	/* calculate the ns resolution of this counter */ -	res = cyc_to_ns(1ULL, cd.mult, cd.shift); -	pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", -		bits, r, r_unit, res, w); +	res = cyc_to_ns(1ULL, new_mult, new_shift); -	/* -	 * Start the timer to keep sched_clock() properly updated and -	 * sets the initial epoch. -	 */ -	sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); -	update_sched_clock(); - -	/* -	 * Ensure that sched_clock() starts off at 0ns -	 */ -	cd.epoch_ns = 0; +	pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", +		bits, r, r_unit, res, wrap);  	/* Enable IRQ time accounting if we have a fast enough sched_clock */  	if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) @@ -166,13 +169,6 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)  	pr_debug("Registered %pF as sched_clock source\n", read);  } -unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32; - -unsigned long long notrace sched_clock(void) -{ -	return sched_clock_func(); -} -  void __init sched_clock_postinit(void)  {  	/* @@ -180,14 +176,23 @@ void __init sched_clock_postinit(void)  	 * make it the final one one.  	 */  	if (read_sched_clock == jiffy_sched_clock_read) -		setup_sched_clock(jiffy_sched_clock_read, 32, HZ); +		sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); + +	update_sched_clock(); -	sched_clock_poll(sched_clock_timer.data); +	/* +	 * Start the timer to keep sched_clock() properly updated and +	 * sets the initial epoch. +	 */ +	hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); +	sched_clock_timer.function = sched_clock_poll; +	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);  }  static int sched_clock_suspend(void)  { -	sched_clock_poll(sched_clock_timer.data); +	update_sched_clock(); +	hrtimer_cancel(&sched_clock_timer);  	cd.suspended = true;  	return 0;  } @@ -195,7 +200,7 @@ static int sched_clock_suspend(void)  static void sched_clock_resume(void)  {  	cd.epoch_cyc = read_sched_clock(); -	cd.epoch_cyc_copy = cd.epoch_cyc; +	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);  	cd.suspended = false;  } diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c new file mode 100644 index 00000000000..eb682d5c697 --- /dev/null +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -0,0 +1,106 @@ +/* + * linux/kernel/time/tick-broadcast-hrtimer.c + * This file emulates a local clock event device + * via a pseudo clock device. + */ +#include <linux/cpu.h> +#include <linux/err.h> +#include <linux/hrtimer.h> +#include <linux/interrupt.h> +#include <linux/percpu.h> +#include <linux/profile.h> +#include <linux/clockchips.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/module.h> + +#include "tick-internal.h" + +static struct hrtimer bctimer; + +static void bc_set_mode(enum clock_event_mode mode, +			struct clock_event_device *bc) +{ +	switch (mode) { +	case CLOCK_EVT_MODE_SHUTDOWN: +		/* +		 * Note, we cannot cancel the timer here as we might +		 * run into the following live lock scenario: +		 * +		 * cpu 0		cpu1 +		 * lock(broadcast_lock); +		 *			hrtimer_interrupt() +		 *			bc_handler() +		 *			   tick_handle_oneshot_broadcast(); +		 *			    lock(broadcast_lock); +		 * hrtimer_cancel() +		 *  wait_for_callback() +		 */ +		hrtimer_try_to_cancel(&bctimer); +		break; +	default: +		break; +	} +} + +/* + * This is called from the guts of the broadcast code when the cpu + * which is about to enter idle has the earliest broadcast timer event. + */ +static int bc_set_next(ktime_t expires, struct clock_event_device *bc) +{ +	/* +	 * We try to cancel the timer first. If the callback is on +	 * flight on some other cpu then we let it handle it. If we +	 * were able to cancel the timer nothing can rearm it as we +	 * own broadcast_lock. +	 * +	 * However we can also be called from the event handler of +	 * ce_broadcast_hrtimer itself when it expires. We cannot +	 * restart the timer because we are in the callback, but we +	 * can set the expiry time and let the callback return +	 * HRTIMER_RESTART. +	 */ +	if (hrtimer_try_to_cancel(&bctimer) >= 0) { +		hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED); +		/* Bind the "device" to the cpu */ +		bc->bound_on = smp_processor_id(); +	} else if (bc->bound_on == smp_processor_id()) { +		hrtimer_set_expires(&bctimer, expires); +	} +	return 0; +} + +static struct clock_event_device ce_broadcast_hrtimer = { +	.set_mode		= bc_set_mode, +	.set_next_ktime		= bc_set_next, +	.features		= CLOCK_EVT_FEAT_ONESHOT | +				  CLOCK_EVT_FEAT_KTIME | +				  CLOCK_EVT_FEAT_HRTIMER, +	.rating			= 0, +	.bound_on		= -1, +	.min_delta_ns		= 1, +	.max_delta_ns		= KTIME_MAX, +	.min_delta_ticks	= 1, +	.max_delta_ticks	= ULONG_MAX, +	.mult			= 1, +	.shift			= 0, +	.cpumask		= cpu_all_mask, +}; + +static enum hrtimer_restart bc_handler(struct hrtimer *t) +{ +	ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer); + +	if (ce_broadcast_hrtimer.next_event.tv64 == KTIME_MAX) +		return HRTIMER_NORESTART; + +	return HRTIMER_RESTART; +} + +void tick_setup_hrtimer_broadcast(void) +{ +	hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); +	bctimer.function = bc_handler; +	clockevents_register_device(&ce_broadcast_hrtimer); +} diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 218bcb565fe..64c5990fd50 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -70,6 +70,7 @@ static bool tick_check_broadcast_device(struct clock_event_device *curdev,  					struct clock_event_device *newdev)  {  	if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || +	    (newdev->features & CLOCK_EVT_FEAT_PERCPU) ||  	    (newdev->features & CLOCK_EVT_FEAT_C3STOP))  		return false; @@ -119,6 +120,19 @@ int tick_is_broadcast_device(struct clock_event_device *dev)  	return (dev && tick_broadcast_device.evtdev == dev);  } +int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) +{ +	int ret = -ENODEV; + +	if (tick_is_broadcast_device(dev)) { +		raw_spin_lock(&tick_broadcast_lock); +		ret = __clockevents_update_freq(dev, freq); +		raw_spin_unlock(&tick_broadcast_lock); +	} +	return ret; +} + +  static void err_broadcast(const struct cpumask *mask)  {  	pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); @@ -271,12 +285,8 @@ static void tick_do_broadcast(struct cpumask *mask)   */  static void tick_do_periodic_broadcast(void)  { -	raw_spin_lock(&tick_broadcast_lock); -  	cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);  	tick_do_broadcast(tmpmask); - -	raw_spin_unlock(&tick_broadcast_lock);  }  /* @@ -286,13 +296,15 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)  {  	ktime_t next; +	raw_spin_lock(&tick_broadcast_lock); +  	tick_do_periodic_broadcast();  	/*  	 * The device is in periodic mode. No reprogramming necessary:  	 */  	if (dev->mode == CLOCK_EVT_MODE_PERIODIC) -		return; +		goto unlock;  	/*  	 * Setup the next period for devices, which do not have @@ -305,9 +317,11 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)  		next = ktime_add(next, tick_period);  		if (!clockevents_program_event(dev, next, false)) -			return; +			goto unlock;  		tick_do_periodic_broadcast();  	} +unlock: +	raw_spin_unlock(&tick_broadcast_lock);  }  /* @@ -537,10 +551,10 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)   * Called from irq_enter() when idle was interrupted to reenable the   * per cpu device.   */ -void tick_check_oneshot_broadcast(int cpu) +void tick_check_oneshot_broadcast_this_cpu(void)  { -	if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) { -		struct tick_device *td = &per_cpu(tick_cpu_device, cpu); +	if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { +		struct tick_device *td = &__get_cpu_var(tick_cpu_device);  		/*  		 * We might be in the middle of switching over from @@ -629,24 +643,61 @@ again:  	raw_spin_unlock(&tick_broadcast_lock);  } +static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) +{ +	if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) +		return 0; +	if (bc->next_event.tv64 == KTIME_MAX) +		return 0; +	return bc->bound_on == cpu ? -EBUSY : 0; +} + +static void broadcast_shutdown_local(struct clock_event_device *bc, +				     struct clock_event_device *dev) +{ +	/* +	 * For hrtimer based broadcasting we cannot shutdown the cpu +	 * local device if our own event is the first one to expire or +	 * if we own the broadcast timer. +	 */ +	if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { +		if (broadcast_needs_cpu(bc, smp_processor_id())) +			return; +		if (dev->next_event.tv64 < bc->next_event.tv64) +			return; +	} +	clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); +} + +static void broadcast_move_bc(int deadcpu) +{ +	struct clock_event_device *bc = tick_broadcast_device.evtdev; + +	if (!bc || !broadcast_needs_cpu(bc, deadcpu)) +		return; +	/* This moves the broadcast assignment to this cpu */ +	clockevents_program_event(bc, bc->next_event, 1); +} +  /*   * Powerstate information: The system enters/leaves a state, where   * affected devices might stop + * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.   */ -void tick_broadcast_oneshot_control(unsigned long reason) +int tick_broadcast_oneshot_control(unsigned long reason)  {  	struct clock_event_device *bc, *dev;  	struct tick_device *td;  	unsigned long flags;  	ktime_t now; -	int cpu; +	int cpu, ret = 0;  	/*  	 * Periodic mode does not care about the enter/exit of power  	 * states  	 */  	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) -		return; +		return 0;  	/*  	 * We are called with preemtion disabled from the depth of the @@ -657,7 +708,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)  	dev = td->evtdev;  	if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) -		return; +		return 0;  	bc = tick_broadcast_device.evtdev; @@ -665,7 +716,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)  	if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {  		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {  			WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); -			clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); +			broadcast_shutdown_local(bc, dev);  			/*  			 * We only reprogram the broadcast timer if we  			 * did not mark ourself in the force mask and @@ -678,6 +729,16 @@ void tick_broadcast_oneshot_control(unsigned long reason)  			    dev->next_event.tv64 < bc->next_event.tv64)  				tick_broadcast_set_event(bc, cpu, dev->next_event, 1);  		} +		/* +		 * If the current CPU owns the hrtimer broadcast +		 * mechanism, it cannot go deep idle and we remove the +		 * CPU from the broadcast mask. We don't have to go +		 * through the EXIT path as the local timer is not +		 * shutdown. +		 */ +		ret = broadcast_needs_cpu(bc, cpu); +		if (ret) +			cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);  	} else {  		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {  			clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); @@ -745,6 +806,7 @@ void tick_broadcast_oneshot_control(unsigned long reason)  	}  out:  	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); +	return ret;  }  /* @@ -755,6 +817,7 @@ out:  static void tick_broadcast_clear_oneshot(int cpu)  {  	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); +	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);  }  static void tick_broadcast_init_next_event(struct cpumask *mask, @@ -850,6 +913,8 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)  	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);  	cpumask_clear_cpu(cpu, tick_broadcast_force_mask); +	broadcast_move_bc(cpu); +  	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);  } diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 64522ecdfe0..0a0608edeb2 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -33,6 +33,21 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device);   */  ktime_t tick_next_period;  ktime_t tick_period; + +/* + * tick_do_timer_cpu is a timer core internal variable which holds the CPU NR + * which is responsible for calling do_timer(), i.e. the timekeeping stuff. This + * variable has two functions: + * + * 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the + *    timekeeping lock all at once. Only the CPU which is assigned to do the + *    update is handling it. + * + * 2) Hand off the duty in the NOHZ idle case by setting the value to + *    TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks + *    at it will take over and keep the time keeping alive.  The handover + *    procedure also covers cpu hotplug. + */  int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;  /* @@ -70,6 +85,7 @@ static void tick_periodic(int cpu)  		do_timer(1);  		write_sequnlock(&jiffies_lock); +		update_wall_time();  	}  	update_process_times(user_mode(get_irq_regs())); @@ -82,18 +98,19 @@ static void tick_periodic(int cpu)  void tick_handle_periodic(struct clock_event_device *dev)  {  	int cpu = smp_processor_id(); -	ktime_t next; +	ktime_t next = dev->next_event;  	tick_periodic(cpu);  	if (dev->mode != CLOCK_EVT_MODE_ONESHOT)  		return; -	/* -	 * Setup the next period for devices, which do not have -	 * periodic mode: -	 */ -	next = ktime_add(dev->next_event, tick_period);  	for (;;) { +		/* +		 * Setup the next period for devices, which do not have +		 * periodic mode: +		 */ +		next = ktime_add(next, tick_period); +  		if (!clockevents_program_event(dev, next, false))  			return;  		/* @@ -102,12 +119,11 @@ void tick_handle_periodic(struct clock_event_device *dev)  		 * to be sure we're using a real hardware clocksource.  		 * Otherwise we could get trapped in an infinite  		 * loop, as the tick_periodic() increments jiffies, -		 * when then will increment time, posibly causing +		 * which then will increment time, possibly causing  		 * the loop to trigger again and again.  		 */  		if (timekeeping_valid_for_hres())  			tick_periodic(cpu); -		next = ktime_add(next, tick_period);  	}  } @@ -260,7 +276,7 @@ static bool tick_check_preferred(struct clock_event_device *curdev,  bool tick_check_replacement(struct clock_event_device *curdev,  			    struct clock_event_device *newdev)  { -	if (tick_check_percpu(curdev, newdev, smp_processor_id())) +	if (!tick_check_percpu(curdev, newdev, smp_processor_id()))  		return false;  	return tick_check_preferred(curdev, newdev); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index bc906cad709..7ab92b19965 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -31,7 +31,7 @@ extern void tick_install_replacement(struct clock_event_device *dev);  extern void clockevents_shutdown(struct clock_event_device *dev); -extern size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); +extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);  /*   * NO_HZ / high resolution timer shared code @@ -46,23 +46,23 @@ extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));  extern void tick_resume_oneshot(void);  # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST  extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); -extern void tick_broadcast_oneshot_control(unsigned long reason); +extern int tick_broadcast_oneshot_control(unsigned long reason);  extern void tick_broadcast_switch_to_oneshot(void);  extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);  extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);  extern int tick_broadcast_oneshot_active(void); -extern void tick_check_oneshot_broadcast(int cpu); +extern void tick_check_oneshot_broadcast_this_cpu(void);  bool tick_broadcast_oneshot_available(void);  # else /* BROADCAST */  static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)  {  	BUG();  } -static inline void tick_broadcast_oneshot_control(unsigned long reason) { } +static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }  static inline void tick_broadcast_switch_to_oneshot(void) { }  static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }  static inline int tick_broadcast_oneshot_active(void) { return 0; } -static inline void tick_check_oneshot_broadcast(int cpu) { } +static inline void tick_check_oneshot_broadcast_this_cpu(void) { }  static inline bool tick_broadcast_oneshot_available(void) { return true; }  # endif /* !BROADCAST */ @@ -87,7 +87,7 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)  {  	BUG();  } -static inline void tick_broadcast_oneshot_control(unsigned long reason) { } +static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }  static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }  static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)  { @@ -111,6 +111,7 @@ extern int tick_resume_broadcast(void);  extern void tick_broadcast_init(void);  extern void  tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); +int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);  #else /* !BROADCAST */ @@ -133,6 +134,8 @@ static inline void tick_shutdown_broadcast(unsigned int *cpup) { }  static inline void tick_suspend_broadcast(void) { }  static inline int tick_resume_broadcast(void) { return 0; }  static inline void tick_broadcast_init(void) { } +static inline int tick_broadcast_update_freq(struct clock_event_device *dev, +					     u32 freq) { return -ENODEV; }  /*   * Set the periodic handler in non broadcast mode @@ -152,6 +155,9 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)  	return !(dev->features & CLOCK_EVT_FEAT_DUMMY);  } +int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); +  #endif  extern void do_timer(unsigned long ticks); +extern void update_wall_time(void); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3612fc77f83..6558b7ac112 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -84,8 +84,12 @@ static void tick_do_update_jiffies64(ktime_t now)  		/* Keep the tick_next_period variable up to date */  		tick_next_period = ktime_add(last_jiffies_update, tick_period); +	} else { +		write_sequnlock(&jiffies_lock); +		return;  	}  	write_sequnlock(&jiffies_lock); +	update_wall_time();  }  /* @@ -177,7 +181,7 @@ static bool can_stop_full_tick(void)  	 * TODO: kick full dynticks CPUs when  	 * sched_clock_stable is set.  	 */ -	if (!sched_clock_stable) { +	if (!sched_clock_stable()) {  		trace_tick_stop(0, "unstable sched clock\n");  		/*  		 * Don't allow the user to think they can get @@ -361,8 +365,8 @@ void __init tick_nohz_init(void)  /*   * NO HZ enabled ?   */ -int tick_nohz_enabled __read_mostly  = 1; - +static int tick_nohz_enabled __read_mostly  = 1; +int tick_nohz_active  __read_mostly;  /*   * Enable / Disable tickless mode   */ @@ -391,11 +395,9 @@ __setup("nohz=", setup_tick_nohz);   */  static void tick_nohz_update_jiffies(ktime_t now)  { -	int cpu = smp_processor_id(); -	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);  	unsigned long flags; -	ts->idle_waketime = now; +	__this_cpu_write(tick_cpu_sched.idle_waketime, now);  	local_irq_save(flags);  	tick_do_update_jiffies64(now); @@ -426,17 +428,15 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda  } -static void tick_nohz_stop_idle(int cpu, ktime_t now) +static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)  { -	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); - -	update_ts_time_stats(cpu, ts, now, NULL); +	update_ts_time_stats(smp_processor_id(), ts, now, NULL);  	ts->idle_active = 0;  	sched_clock_idle_wakeup_event(0);  } -static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) +static ktime_t tick_nohz_start_idle(struct tick_sched *ts)  {  	ktime_t now = ktime_get(); @@ -465,7 +465,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)  	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);  	ktime_t now, idle; -	if (!tick_nohz_enabled) +	if (!tick_nohz_active)  		return -1;  	now = ktime_get(); @@ -506,7 +506,7 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)  	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);  	ktime_t now, iowait; -	if (!tick_nohz_enabled) +	if (!tick_nohz_active)  		return -1;  	now = ktime_get(); @@ -536,12 +536,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,  	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;  	u64 time_delta; +	time_delta = timekeeping_max_deferment(); +  	/* Read jiffies and the time when jiffies were updated last */  	do {  		seq = read_seqbegin(&jiffies_lock);  		last_update = last_jiffies_update;  		last_jiffies = jiffies; -		time_delta = timekeeping_max_deferment();  	} while (read_seqretry(&jiffies_lock, seq));  	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || @@ -681,18 +682,18 @@ out:  static void tick_nohz_full_stop_tick(struct tick_sched *ts)  {  #ifdef CONFIG_NO_HZ_FULL -       int cpu = smp_processor_id(); +	int cpu = smp_processor_id(); -       if (!tick_nohz_full_cpu(cpu) || is_idle_task(current)) -               return; +	if (!tick_nohz_full_cpu(cpu) || is_idle_task(current)) +		return; -       if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) -	       return; +	if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) +		return; -       if (!can_stop_full_tick()) -               return; +	if (!can_stop_full_tick()) +		return; -       tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); +	tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);  #endif  } @@ -711,8 +712,10 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)  		return false;  	} -	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) +	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) { +		ts->sleep_length = (ktime_t) { .tv64 = NSEC_PER_SEC/HZ };  		return false; +	}  	if (need_resched())  		return false; @@ -752,7 +755,7 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)  	ktime_t now, expires;  	int cpu = smp_processor_id(); -	now = tick_nohz_start_idle(cpu, ts); +	now = tick_nohz_start_idle(ts);  	if (can_stop_idle_tick(cpu, ts)) {  		int was_stopped = ts->tick_stopped; @@ -799,11 +802,6 @@ void tick_nohz_idle_enter(void)  	local_irq_disable();  	ts = &__get_cpu_var(tick_cpu_sched); -	/* -	 * set ts->inidle unconditionally. even if the system did not -	 * switch to nohz mode the cpu frequency governers rely on the -	 * update of the idle time accounting in tick_nohz_start_idle(). -	 */  	ts->inidle = 1;  	__tick_nohz_idle_enter(ts); @@ -914,8 +912,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)   */  void tick_nohz_idle_exit(void)  { -	int cpu = smp_processor_id(); -	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); +	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);  	ktime_t now;  	local_irq_disable(); @@ -928,7 +925,7 @@ void tick_nohz_idle_exit(void)  		now = ktime_get();  	if (ts->idle_active) -		tick_nohz_stop_idle(cpu, now); +		tick_nohz_stop_idle(ts, now);  	if (ts->tick_stopped) {  		tick_nohz_restart_sched_tick(ts, now); @@ -981,7 +978,7 @@ static void tick_nohz_switch_to_nohz(void)  		local_irq_enable();  		return;  	} - +	tick_nohz_active = 1;  	ts->nohz_mode = NOHZ_MODE_LOWRES;  	/* @@ -1012,12 +1009,10 @@ static void tick_nohz_switch_to_nohz(void)   * timer and do not touch the other magic bits which need to be done   * when idle is left.   */ -static void tick_nohz_kick_tick(int cpu, ktime_t now) +static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now)  {  #if 0  	/* Switch back to 2.6.27 behaviour */ - -	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);  	ktime_t delta;  	/* @@ -1032,36 +1027,36 @@ static void tick_nohz_kick_tick(int cpu, ktime_t now)  #endif  } -static inline void tick_check_nohz(int cpu) +static inline void tick_nohz_irq_enter(void)  { -	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); +	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);  	ktime_t now;  	if (!ts->idle_active && !ts->tick_stopped)  		return;  	now = ktime_get();  	if (ts->idle_active) -		tick_nohz_stop_idle(cpu, now); +		tick_nohz_stop_idle(ts, now);  	if (ts->tick_stopped) {  		tick_nohz_update_jiffies(now); -		tick_nohz_kick_tick(cpu, now); +		tick_nohz_kick_tick(ts, now);  	}  }  #else  static inline void tick_nohz_switch_to_nohz(void) { } -static inline void tick_check_nohz(int cpu) { } +static inline void tick_nohz_irq_enter(void) { }  #endif /* CONFIG_NO_HZ_COMMON */  /*   * Called from irq_enter to notify about the possible interruption of idle()   */ -void tick_check_idle(int cpu) +void tick_irq_enter(void)  { -	tick_check_oneshot_broadcast(cpu); -	tick_check_nohz(cpu); +	tick_check_oneshot_broadcast_this_cpu(); +	tick_nohz_irq_enter();  }  /* @@ -1139,8 +1134,10 @@ void tick_setup_sched_timer(void)  	}  #ifdef CONFIG_NO_HZ_COMMON -	if (tick_nohz_enabled) +	if (tick_nohz_enabled) {  		ts->nohz_mode = NOHZ_MODE_HIGHRES; +		tick_nohz_active = 1; +	}  #endif  }  #endif /* HIGH_RES_TIMERS */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 947ba25a95a..32d8d6aaedb 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -22,6 +22,7 @@  #include <linux/tick.h>  #include <linux/stop_machine.h>  #include <linux/pvclock_gtod.h> +#include <linux/compiler.h>  #include "tick-internal.h"  #include "ntp_internal.h" @@ -77,7 +78,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)  	tk->wall_to_monotonic = wtm;  	set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);  	tk->offs_real = timespec_to_ktime(tmp); -	tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tk->tai_offset, 0)); +	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));  }  static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t) @@ -90,8 +91,9 @@ static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)  }  /** - * timekeeper_setup_internals - Set up internals to use clocksource clock. + * tk_setup_internals - Set up internals to use clocksource clock.   * + * @tk:		The target timekeeper to setup.   * @clock:		Pointer to clocksource.   *   * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment @@ -595,7 +597,7 @@ s32 timekeeping_get_tai_offset(void)  static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)  {  	tk->tai_offset = tai_offset; -	tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tai_offset, 0)); +	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));  }  /** @@ -610,6 +612,7 @@ void timekeeping_set_tai_offset(s32 tai_offset)  	raw_spin_lock_irqsave(&timekeeper_lock, flags);  	write_seqcount_begin(&timekeeper_seq);  	__timekeeping_set_tai_offset(tk, tai_offset); +	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);  	write_seqcount_end(&timekeeper_seq);  	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);  	clock_was_set(); @@ -758,7 +761,7 @@ u64 timekeeping_max_deferment(void)   *   *  XXX - Do be sure to remove it once all arches implement it.   */ -void __attribute__((weak)) read_persistent_clock(struct timespec *ts) +void __weak read_persistent_clock(struct timespec *ts)  {  	ts->tv_sec = 0;  	ts->tv_nsec = 0; @@ -773,7 +776,7 @@ void __attribute__((weak)) read_persistent_clock(struct timespec *ts)   *   *  XXX - Do be sure to remove it once all arches implement it.   */ -void __attribute__((weak)) read_boot_clock(struct timespec *ts) +void __weak read_boot_clock(struct timespec *ts)  {  	ts->tv_sec = 0;  	ts->tv_nsec = 0; @@ -849,8 +852,9 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,  							struct timespec *delta)  {  	if (!timespec_valid_strict(delta)) { -		printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " -					"sleep delta value!\n"); +		printk_deferred(KERN_WARNING +				"__timekeeping_inject_sleeptime: Invalid " +				"sleep delta value!\n");  		return;  	}  	tk_xtime_add(tk, delta); @@ -1023,6 +1027,8 @@ static int timekeeping_suspend(void)  		timekeeping_suspend_time =  			timespec_add(timekeeping_suspend_time, delta_delta);  	} + +	timekeeping_update(tk, TK_MIRROR);  	write_seqcount_end(&timekeeper_seq);  	raw_spin_unlock_irqrestore(&timekeeper_lock, flags); @@ -1130,16 +1136,6 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)  		 * we can adjust by 1.  		 */  		error >>= 2; -		/* -		 * XXX - In update_wall_time, we round up to the next -		 * nanosecond, and store the amount rounded up into -		 * the error. This causes the likely below to be unlikely. -		 * -		 * The proper fix is to avoid rounding up by using -		 * the high precision tk->xtime_nsec instead of -		 * xtime.tv_nsec everywhere. Fixing this will take some -		 * time. -		 */  		if (likely(error <= interval))  			adj = 1;  		else @@ -1162,7 +1158,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)  	if (unlikely(tk->clock->maxadj &&  		(tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { -		printk_once(KERN_WARNING +		printk_deferred_once(KERN_WARNING  			"Adjusting %s more than 11%% (%ld vs %ld)\n",  			tk->clock->name, (long)tk->mult + adj,  			(long)tk->clock->mult + tk->clock->maxadj); @@ -1255,7 +1251,7 @@ out_adjust:  static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)  {  	u64 nsecps = (u64)NSEC_PER_SEC << tk->shift; -	unsigned int action = 0; +	unsigned int clock_set = 0;  	while (tk->xtime_nsec >= nsecps) {  		int leap; @@ -1277,11 +1273,10 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)  			__timekeeping_set_tai_offset(tk, tk->tai_offset - leap); -			clock_was_set_delayed(); -			action = TK_CLOCK_WAS_SET; +			clock_set = TK_CLOCK_WAS_SET;  		}  	} -	return action; +	return clock_set;  }  /** @@ -1294,7 +1289,8 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)   * Returns the unconsumed cycles.   */  static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, -						u32 shift) +						u32 shift, +						unsigned int *clock_set)  {  	cycle_t interval = tk->cycle_interval << shift;  	u64 raw_nsecs; @@ -1308,7 +1304,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,  	tk->cycle_last += interval;  	tk->xtime_nsec += tk->xtime_interval << shift; -	accumulate_nsecs_to_secs(tk); +	*clock_set |= accumulate_nsecs_to_secs(tk);  	/* Accumulate raw time */  	raw_nsecs = (u64)tk->raw_interval << shift; @@ -1347,7 +1343,7 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)  	tk->xtime_nsec -= remainder;  	tk->xtime_nsec += 1ULL << tk->shift;  	tk->ntp_error += remainder << tk->ntp_error_shift; - +	tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift;  }  #else  #define old_vsyscall_fixup(tk) @@ -1359,14 +1355,14 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)   * update_wall_time - Uses the current clocksource to increment the wall time   *   */ -static void update_wall_time(void) +void update_wall_time(void)  {  	struct clocksource *clock;  	struct timekeeper *real_tk = &timekeeper;  	struct timekeeper *tk = &shadow_timekeeper;  	cycle_t offset;  	int shift = 0, maxshift; -	unsigned int action; +	unsigned int clock_set = 0;  	unsigned long flags;  	raw_spin_lock_irqsave(&timekeeper_lock, flags); @@ -1401,7 +1397,8 @@ static void update_wall_time(void)  	maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;  	shift = min(shift, maxshift);  	while (offset >= tk->cycle_interval) { -		offset = logarithmic_accumulation(tk, offset, shift); +		offset = logarithmic_accumulation(tk, offset, shift, +							&clock_set);  		if (offset < tk->cycle_interval<<shift)  			shift--;  	} @@ -1419,7 +1416,7 @@ static void update_wall_time(void)  	 * Finally, make sure that after the rounding  	 * xtime_nsec isn't larger than NSEC_PER_SEC  	 */ -	action = accumulate_nsecs_to_secs(tk); +	clock_set |= accumulate_nsecs_to_secs(tk);  	write_seqcount_begin(&timekeeper_seq);  	/* Update clock->cycle_last with the new value */ @@ -1435,10 +1432,13 @@ static void update_wall_time(void)  	 * updating.  	 */  	memcpy(real_tk, tk, sizeof(*tk)); -	timekeeping_update(real_tk, action); +	timekeeping_update(real_tk, clock_set);  	write_seqcount_end(&timekeeper_seq);  out:  	raw_spin_unlock_irqrestore(&timekeeper_lock, flags); +	if (clock_set) +		/* Have to call _delayed version, since in irq context*/ +		clock_was_set_delayed();  }  /** @@ -1583,7 +1583,6 @@ struct timespec get_monotonic_coarse(void)  void do_timer(unsigned long ticks)  {  	jiffies_64 += ticks; -	update_wall_time();  	calc_global_load(ticks);  } @@ -1613,9 +1612,10 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,   * ktime_get_update_offsets - hrtimer helper   * @offs_real:	pointer to storage for monotonic -> realtime offset   * @offs_boot:	pointer to storage for monotonic -> boottime offset + * @offs_tai:	pointer to storage for monotonic -> clock tai offset   *   * Returns current monotonic time and updates the offsets - * Called from hrtimer_interupt() or retrigger_next_event() + * Called from hrtimer_interrupt() or retrigger_next_event()   */  ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot,  							ktime_t *offs_tai) @@ -1697,12 +1697,14 @@ int do_adjtimex(struct timex *txc)  	if (tai != orig_tai) {  		__timekeeping_set_tai_offset(tk, tai); -		update_pvclock_gtod(tk, true); -		clock_was_set_delayed(); +		timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);  	}  	write_seqcount_end(&timekeeper_seq);  	raw_spin_unlock_irqrestore(&timekeeper_lock, flags); +	if (tai != orig_tai) +		clock_was_set(); +  	ntp_notify_cmos_timer();  	return ret; @@ -1738,4 +1740,5 @@ void xtime_update(unsigned long ticks)  	write_seqlock(&jiffies_lock);  	do_timer(ticks);  	write_sequnlock(&jiffies_lock); +	update_wall_time();  } diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index 802433a4f5e..4d54f97558d 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -21,6 +21,8 @@  #include <linux/seq_file.h>  #include <linux/time.h> +#include "timekeeping_internal.h" +  static unsigned int sleep_time_bin[32] = {0};  static int tk_debug_show_sleep_time(struct seq_file *s, void *data) diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 0b537f27b55..1fb08f21302 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -298,15 +298,15 @@ static int tstats_show(struct seq_file *m, void *v)  	period = ktime_to_timespec(time);  	ms = period.tv_nsec / 1000000; -	seq_puts(m, "Timer Stats Version: v0.2\n"); +	seq_puts(m, "Timer Stats Version: v0.3\n");  	seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);  	if (atomic_read(&overflow_count)) -		seq_printf(m, "Overflow: %d entries\n", -			atomic_read(&overflow_count)); +		seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count)); +	seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive");  	for (i = 0; i < nr_entries; i++) {  		entry = entries + i; - 		if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) { +		if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) {  			seq_printf(m, "%4luD, %5d %-16s ",  				entry->count, entry->pid, entry->comm);  		} else {  | 
