diff options
Diffstat (limited to 'kernel/time/clocksource.c')
| -rw-r--r-- | kernel/time/clocksource.c | 510 | 
1 files changed, 352 insertions, 158 deletions
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c18d7efa1b4..ba3e502c955 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -23,14 +23,16 @@   *   o Allow clocksource drivers to be unregistered   */ +#include <linux/device.h>  #include <linux/clocksource.h> -#include <linux/sysdev.h>  #include <linux/init.h>  #include <linux/module.h>  #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */  #include <linux/tick.h>  #include <linux/kthread.h> +#include "tick-internal.h" +  void timecounter_init(struct timecounter *tc,  		      const struct cyclecounter *cc,  		      u64 start_tstamp) @@ -113,7 +115,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time);   * @shift:	pointer to shift variable   * @from:	frequency to convert from   * @to:		frequency to convert to - * @minsec:	guaranteed runtime conversion range in seconds + * @maxsec:	guaranteed runtime conversion range in seconds   *   * The function evaluates the shift/mult pair for the scaled math   * operations of clocksources and clockevents. @@ -122,7 +124,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time);   * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock   * event @to is the counter frequency and @from is NSEC_PER_SEC.   * - * The @minsec conversion range argument controls the time frame in + * The @maxsec conversion range argument controls the time frame in   * seconds which must be covered by the runtime conversion with the   * calculated mult and shift factors. This guarantees that no 64bit   * overflow happens when the input value of the conversion is @@ -131,7 +133,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time);   * factors.   */  void -clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) +clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)  {  	u64 tmp;  	u32 sft, sftacc= 32; @@ -140,7 +142,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)  	 * Calculate the shift factor which is limiting the conversion  	 * range:  	 */ -	tmp = ((u64)minsec * from) >> 32; +	tmp = ((u64)maxsec * from) >> 32;  	while (tmp) {  		tmp >>=1;  		sftacc--; @@ -152,6 +154,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)  	 */  	for (sft = 32; sft > 0; sft--) {  		tmp = (u64) to << sft; +		tmp += from / 2;  		do_div(tmp, from);  		if ((tmp >> sftacc) == 0)  			break; @@ -173,19 +176,20 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)  static struct clocksource *curr_clocksource;  static LIST_HEAD(clocksource_list);  static DEFINE_MUTEX(clocksource_mutex); -static char override_name[32]; +static char override_name[CS_NAME_LEN];  static int finished_booting;  #ifdef CONFIG_CLOCKSOURCE_WATCHDOG  static void clocksource_watchdog_work(struct work_struct *work); +static void clocksource_select(void);  static LIST_HEAD(watchdog_list);  static struct clocksource *watchdog;  static struct timer_list watchdog_timer;  static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);  static DEFINE_SPINLOCK(watchdog_lock); -static cycle_t watchdog_last;  static int watchdog_running; +static atomic_t watchdog_reset_pending;  static int clocksource_watchdog_kthread(void *data);  static void __clocksource_change_rating(struct clocksource *cs, int rating); @@ -247,16 +251,13 @@ static void clocksource_watchdog(unsigned long data)  	struct clocksource *cs;  	cycle_t csnow, wdnow;  	int64_t wd_nsec, cs_nsec; -	int next_cpu; +	int next_cpu, reset_pending;  	spin_lock(&watchdog_lock);  	if (!watchdog_running)  		goto out; -	wdnow = watchdog->read(watchdog); -	wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, -				     watchdog->mult, watchdog->shift); -	watchdog_last = wdnow; +	reset_pending = atomic_read(&watchdog_reset_pending);  	list_for_each_entry(cs, &watchdog_list, wd_list) { @@ -267,20 +268,33 @@ static void clocksource_watchdog(unsigned long data)  			continue;  		} +		local_irq_disable();  		csnow = cs->read(cs); +		wdnow = watchdog->read(watchdog); +		local_irq_enable();  		/* Clocksource initialized ? */ -		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { +		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || +		    atomic_read(&watchdog_reset_pending)) {  			cs->flags |= CLOCK_SOURCE_WATCHDOG; -			cs->wd_last = csnow; +			cs->wd_last = wdnow; +			cs->cs_last = csnow;  			continue;  		} -		/* Check the deviation from the watchdog clocksource. */ -		cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & +		wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask, +					     watchdog->mult, watchdog->shift); + +		cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) &  					     cs->mask, cs->mult, cs->shift); -		cs->wd_last = csnow; -		if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { +		cs->cs_last = csnow; +		cs->wd_last = wdnow; + +		if (atomic_read(&watchdog_reset_pending)) +			continue; + +		/* Check the deviation from the watchdog clocksource. */ +		if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {  			clocksource_unstable(cs, cs_nsec - wd_nsec);  			continue;  		} @@ -288,17 +302,41 @@ static void clocksource_watchdog(unsigned long data)  		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&  		    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&  		    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { +			/* Mark it valid for high-res. */  			cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; + +			/* +			 * clocksource_done_booting() will sort it if +			 * finished_booting is not set yet. +			 */ +			if (!finished_booting) +				continue; +  			/* -			 * We just marked the clocksource as highres-capable, -			 * notify the rest of the system as well so that we -			 * transition into high-res mode: +			 * If this is not the current clocksource let +			 * the watchdog thread reselect it. Due to the +			 * change to high res this clocksource might +			 * be preferred now. If it is the current +			 * clocksource let the tick code know about +			 * that change.  			 */ -			tick_clock_notify(); +			if (cs != curr_clocksource) { +				cs->flags |= CLOCK_SOURCE_RESELECT; +				schedule_work(&watchdog_work); +			} else { +				tick_clock_notify(); +			}  		}  	}  	/* +	 * We only clear the watchdog_reset_pending, when we did a +	 * full cycle through all clocksources. +	 */ +	if (reset_pending) +		atomic_dec(&watchdog_reset_pending); + +	/*  	 * Cycle through CPUs to check if the CPUs stay synchronized  	 * to each other.  	 */ @@ -317,7 +355,6 @@ static inline void clocksource_start_watchdog(void)  		return;  	init_timer(&watchdog_timer);  	watchdog_timer.function = clocksource_watchdog; -	watchdog_last = watchdog->read(watchdog);  	watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;  	add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));  	watchdog_running = 1; @@ -341,23 +378,7 @@ static inline void clocksource_reset_watchdog(void)  static void clocksource_resume_watchdog(void)  { -	unsigned long flags; - -	/* -	 * We use trylock here to avoid a potential dead lock when -	 * kgdb calls this code after the kernel has been stopped with -	 * watchdog_lock held. When watchdog_lock is held we just -	 * return and accept, that the watchdog might trigger and mark -	 * the monitored clock source (usually TSC) unstable. -	 * -	 * This does not affect the other caller clocksource_resume() -	 * because at this point the kernel is UP, interrupts are -	 * disabled and nothing can hold watchdog_lock. -	 */ -	if (!spin_trylock_irqsave(&watchdog_lock, flags)) -		return; -	clocksource_reset_watchdog(); -	spin_unlock_irqrestore(&watchdog_lock, flags); +	atomic_inc(&watchdog_reset_pending);  }  static void clocksource_enqueue_watchdog(struct clocksource *cs) @@ -387,44 +408,39 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)  static void clocksource_dequeue_watchdog(struct clocksource *cs)  { -	struct clocksource *tmp;  	unsigned long flags;  	spin_lock_irqsave(&watchdog_lock, flags); -	if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { -		/* cs is a watched clocksource. */ -		list_del_init(&cs->wd_list); -	} else if (cs == watchdog) { -		/* Reset watchdog cycles */ -		clocksource_reset_watchdog(); -		/* Current watchdog is removed. Find an alternative. */ -		watchdog = NULL; -		list_for_each_entry(tmp, &clocksource_list, list) { -			if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY) -				continue; -			if (!watchdog || tmp->rating > watchdog->rating) -				watchdog = tmp; +	if (cs != watchdog) { +		if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { +			/* cs is a watched clocksource. */ +			list_del_init(&cs->wd_list); +			/* Check if the watchdog timer needs to be stopped. */ +			clocksource_stop_watchdog();  		}  	} -	cs->flags &= ~CLOCK_SOURCE_WATCHDOG; -	/* Check if the watchdog timer needs to be stopped. */ -	clocksource_stop_watchdog();  	spin_unlock_irqrestore(&watchdog_lock, flags);  } -static int clocksource_watchdog_kthread(void *data) +static int __clocksource_watchdog_kthread(void)  {  	struct clocksource *cs, *tmp;  	unsigned long flags;  	LIST_HEAD(unstable); +	int select = 0; -	mutex_lock(&clocksource_mutex);  	spin_lock_irqsave(&watchdog_lock, flags); -	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) +	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {  		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {  			list_del_init(&cs->wd_list);  			list_add(&cs->wd_list, &unstable); +			select = 1;  		} +		if (cs->flags & CLOCK_SOURCE_RESELECT) { +			cs->flags &= ~CLOCK_SOURCE_RESELECT; +			select = 1; +		} +	}  	/* Check if the watchdog timer needs to be stopped. */  	clocksource_stop_watchdog();  	spin_unlock_irqrestore(&watchdog_lock, flags); @@ -434,10 +450,23 @@ static int clocksource_watchdog_kthread(void *data)  		list_del_init(&cs->wd_list);  		__clocksource_change_rating(cs, 0);  	} +	return select; +} + +static int clocksource_watchdog_kthread(void *data) +{ +	mutex_lock(&clocksource_mutex); +	if (__clocksource_watchdog_kthread()) +		clocksource_select();  	mutex_unlock(&clocksource_mutex);  	return 0;  } +static bool clocksource_is_watchdog(struct clocksource *cs) +{ +	return cs == watchdog; +} +  #else /* CONFIG_CLOCKSOURCE_WATCHDOG */  static void clocksource_enqueue_watchdog(struct clocksource *cs) @@ -448,7 +477,9 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)  static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }  static inline void clocksource_resume_watchdog(void) { } -static inline int clocksource_watchdog_kthread(void *data) { return 0; } +static inline int __clocksource_watchdog_kthread(void) { return 0; } +static bool clocksource_is_watchdog(struct clocksource *cs) { return false; } +void clocksource_mark_unstable(struct clocksource *cs) { }  #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */ @@ -491,66 +522,118 @@ void clocksource_touch_watchdog(void)  }  /** - * clocksource_max_deferment - Returns max time the clocksource can be deferred + * clocksource_max_adjustment- Returns max adjustment amount   * @cs:         Pointer to clocksource   *   */ -static u64 clocksource_max_deferment(struct clocksource *cs) +static u32 clocksource_max_adjustment(struct clocksource *cs) +{ +	u64 ret; +	/* +	 * We won't try to correct for more than 11% adjustments (110,000 ppm), +	 */ +	ret = (u64)cs->mult * 11; +	do_div(ret,100); +	return (u32)ret; +} + +/** + * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted + * @mult:	cycle to nanosecond multiplier + * @shift:	cycle to nanosecond divisor (power of two) + * @maxadj:	maximum adjustment value to mult (~11%) + * @mask:	bitmask for two's complement subtraction of non 64 bit counters + */ +u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)  {  	u64 max_nsecs, max_cycles;  	/*  	 * Calculate the maximum number of cycles that we can pass to the  	 * cyc2ns function without overflowing a 64-bit signed result. The -	 * maximum number of cycles is equal to ULLONG_MAX/cs->mult which -	 * is equivalent to the below. -	 * max_cycles < (2^63)/cs->mult -	 * max_cycles < 2^(log2((2^63)/cs->mult)) -	 * max_cycles < 2^(log2(2^63) - log2(cs->mult)) -	 * max_cycles < 2^(63 - log2(cs->mult)) -	 * max_cycles < 1 << (63 - log2(cs->mult)) +	 * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj) +	 * which is equivalent to the below. +	 * max_cycles < (2^63)/(mult + maxadj) +	 * max_cycles < 2^(log2((2^63)/(mult + maxadj))) +	 * max_cycles < 2^(log2(2^63) - log2(mult + maxadj)) +	 * max_cycles < 2^(63 - log2(mult + maxadj)) +	 * max_cycles < 1 << (63 - log2(mult + maxadj))  	 * Please note that we add 1 to the result of the log2 to account for  	 * any rounding errors, ensure the above inequality is satisfied and  	 * no overflow will occur.  	 */ -	max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1)); +	max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));  	/*  	 * The actual maximum number of cycles we can defer the clocksource is -	 * determined by the minimum of max_cycles and cs->mask. +	 * determined by the minimum of max_cycles and mask. +	 * Note: Here we subtract the maxadj to make sure we don't sleep for +	 * too long if there's a large negative adjustment.  	 */ -	max_cycles = min_t(u64, max_cycles, (u64) cs->mask); -	max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift); +	max_cycles = min(max_cycles, mask); +	max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); + +	return max_nsecs; +} + +/** + * clocksource_max_deferment - Returns max time the clocksource can be deferred + * @cs:         Pointer to clocksource + * + */ +static u64 clocksource_max_deferment(struct clocksource *cs) +{ +	u64 max_nsecs; +	max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, +					  cs->mask);  	/*  	 * To ensure that the clocksource does not wrap whilst we are idle,  	 * limit the time the clocksource can be deferred by 12.5%. Please  	 * note a margin of 12.5% is used because this can be computed with  	 * a shift, versus say 10% which would require division.  	 */ -	return max_nsecs - (max_nsecs >> 5); +	return max_nsecs - (max_nsecs >> 3);  }  #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET -/** - * clocksource_select - Select the best clocksource available - * - * Private function. Must hold clocksource_mutex when called. - * - * Select the clocksource with the best rating, or the clocksource, - * which is selected by userspace override. - */ -static void clocksource_select(void) +static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)  { -	struct clocksource *best, *cs; +	struct clocksource *cs;  	if (!finished_booting || list_empty(&clocksource_list)) +		return NULL; + +	/* +	 * We pick the clocksource with the highest rating. If oneshot +	 * mode is active, we pick the highres valid clocksource with +	 * the best rating. +	 */ +	list_for_each_entry(cs, &clocksource_list, list) { +		if (skipcur && cs == curr_clocksource) +			continue; +		if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES)) +			continue; +		return cs; +	} +	return NULL; +} + +static void __clocksource_select(bool skipcur) +{ +	bool oneshot = tick_oneshot_mode_active(); +	struct clocksource *best, *cs; + +	/* Find the best suitable clocksource */ +	best = clocksource_find_best(oneshot, skipcur); +	if (!best)  		return; -	/* First clocksource on the list has the best rating. */ -	best = list_first_entry(&clocksource_list, struct clocksource, list); +  	/* Check for the override clocksource. */  	list_for_each_entry(cs, &clocksource_list, list) { +		if (skipcur && cs == curr_clocksource) +			continue;  		if (strcmp(cs->name, override_name) != 0)  			continue;  		/* @@ -558,8 +641,7 @@ static void clocksource_select(void)  		 * capable clocksource if the tick code is in oneshot  		 * mode (highres or nohz)  		 */ -		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && -		    tick_oneshot_mode_active()) { +		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {  			/* Override clocksource cannot be used. */  			printk(KERN_WARNING "Override clocksource %s is not "  			       "HRT compatible. Cannot switch while in " @@ -570,16 +652,35 @@ static void clocksource_select(void)  			best = cs;  		break;  	} -	if (curr_clocksource != best) { -		printk(KERN_INFO "Switching to clocksource %s\n", best->name); + +	if (curr_clocksource != best && !timekeeping_notify(best)) { +		pr_info("Switched to clocksource %s\n", best->name);  		curr_clocksource = best; -		timekeeping_notify(curr_clocksource);  	}  } +/** + * clocksource_select - Select the best clocksource available + * + * Private function. Must hold clocksource_mutex when called. + * + * Select the clocksource with the best rating, or the clocksource, + * which is selected by userspace override. + */ +static void clocksource_select(void) +{ +	return __clocksource_select(false); +} + +static void clocksource_select_fallback(void) +{ +	return __clocksource_select(true); +} +  #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */  static inline void clocksource_select(void) { } +static inline void clocksource_select_fallback(void) { }  #endif @@ -594,16 +695,11 @@ static int __init clocksource_done_booting(void)  {  	mutex_lock(&clocksource_mutex);  	curr_clocksource = clocksource_default_clock(); -	mutex_unlock(&clocksource_mutex); -  	finished_booting = 1; -  	/*  	 * Run the watchdog first to eliminate unstable clock sources  	 */ -	clocksource_watchdog_kthread(NULL); - -	mutex_lock(&clocksource_mutex); +	__clocksource_watchdog_kthread();  	clocksource_select();  	mutex_unlock(&clocksource_mutex);  	return 0; @@ -625,22 +721,9 @@ static void clocksource_enqueue(struct clocksource *cs)  	list_add(&cs->list, entry);  } - -/* - * Maximum time we expect to go between ticks. This includes idle - * tickless time. It provides the trade off between selecting a - * mult/shift pair that is very precise but can only handle a short - * period of time, vs. a mult/shift pair that can handle long periods - * of time but isn't as precise. - * - * This is a subsystem constant, and actual hardware limitations - * may override it (ie: clocksources that wrap every 3 seconds). - */ -#define MAX_UPDATE_LENGTH 5 /* Seconds */ -  /**   * __clocksource_updatefreq_scale - Used update clocksource with new freq - * @t:		clocksource to be registered + * @cs:		clocksource to be registered   * @scale:	Scale factor multiplied against freq to get clocksource hz   * @freq:	clocksource frequency (cycles per second) divided by scale   * @@ -651,22 +734,48 @@ static void clocksource_enqueue(struct clocksource *cs)   */  void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)  { +	u64 sec;  	/* -	 * Ideally we want to use  some of the limits used in -	 * clocksource_max_deferment, to provide a more informed -	 * MAX_UPDATE_LENGTH. But for now this just gets the -	 * register interface working properly. +	 * Calc the maximum number of seconds which we can run before +	 * wrapping around. For clocksources which have a mask > 32bit +	 * we need to limit the max sleep time to have a good +	 * conversion precision. 10 minutes is still a reasonable +	 * amount. That results in a shift value of 24 for a +	 * clocksource with mask >= 40bit and f >= 4GHz. That maps to +	 * ~ 0.06ppm granularity for NTP. We apply the same 12.5% +	 * margin as we do in clocksource_max_deferment()  	 */ +	sec = (cs->mask - (cs->mask >> 3)); +	do_div(sec, freq); +	do_div(sec, scale); +	if (!sec) +		sec = 1; +	else if (sec > 600 && cs->mask > UINT_MAX) +		sec = 600; +  	clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, -				      NSEC_PER_SEC/scale, -				      MAX_UPDATE_LENGTH*scale); +			       NSEC_PER_SEC / scale, sec * scale); + +	/* +	 * for clocksources that have large mults, to avoid overflow. +	 * Since mult may be adjusted by ntp, add an safety extra margin +	 * +	 */ +	cs->maxadj = clocksource_max_adjustment(cs); +	while ((cs->mult + cs->maxadj < cs->mult) +		|| (cs->mult - cs->maxadj > cs->mult)) { +		cs->mult >>= 1; +		cs->shift--; +		cs->maxadj = clocksource_max_adjustment(cs); +	} +  	cs->max_idle_ns = clocksource_max_deferment(cs);  }  EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);  /**   * __clocksource_register_scale - Used to install new clocksources - * @t:		clocksource to be registered + * @cs:		clocksource to be registered   * @scale:	Scale factor multiplied against freq to get clocksource hz   * @freq:	clocksource frequency (cycles per second) divided by scale   * @@ -678,14 +787,14 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);  int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)  { -	/* Intialize mult/shift and max_idle_ns */ +	/* Initialize mult/shift and max_idle_ns */  	__clocksource_updatefreq_scale(cs, scale, freq);  	/* Add clocksource to the clcoksource list */  	mutex_lock(&clocksource_mutex);  	clocksource_enqueue(cs); -	clocksource_select();  	clocksource_enqueue_watchdog(cs); +	clocksource_select();  	mutex_unlock(&clocksource_mutex);  	return 0;  } @@ -694,19 +803,25 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale);  /**   * clocksource_register - Used to install new clocksources - * @t:		clocksource to be registered + * @cs:		clocksource to be registered   *   * Returns -EBUSY if registration fails, zero otherwise.   */  int clocksource_register(struct clocksource *cs)  { +	/* calculate max adjustment for given mult/shift */ +	cs->maxadj = clocksource_max_adjustment(cs); +	WARN_ONCE(cs->mult + cs->maxadj < cs->mult, +		"Clocksource %s might overflow on 11%% adjustment\n", +		cs->name); +  	/* calculate max idle time permitted for this clocksource */  	cs->max_idle_ns = clocksource_max_deferment(cs);  	mutex_lock(&clocksource_mutex);  	clocksource_enqueue(cs); -	clocksource_select();  	clocksource_enqueue_watchdog(cs); +	clocksource_select();  	mutex_unlock(&clocksource_mutex);  	return 0;  } @@ -717,30 +832,58 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)  	list_del(&cs->list);  	cs->rating = rating;  	clocksource_enqueue(cs); -	clocksource_select();  }  /**   * clocksource_change_rating - Change the rating of a registered clocksource + * @cs:		clocksource to be changed + * @rating:	new rating   */  void clocksource_change_rating(struct clocksource *cs, int rating)  {  	mutex_lock(&clocksource_mutex);  	__clocksource_change_rating(cs, rating); +	clocksource_select();  	mutex_unlock(&clocksource_mutex);  }  EXPORT_SYMBOL(clocksource_change_rating); +/* + * Unbind clocksource @cs. Called with clocksource_mutex held + */ +static int clocksource_unbind(struct clocksource *cs) +{ +	/* +	 * I really can't convince myself to support this on hardware +	 * designed by lobotomized monkeys. +	 */ +	if (clocksource_is_watchdog(cs)) +		return -EBUSY; + +	if (cs == curr_clocksource) { +		/* Select and try to install a replacement clock source */ +		clocksource_select_fallback(); +		if (curr_clocksource == cs) +			return -EBUSY; +	} +	clocksource_dequeue_watchdog(cs); +	list_del_init(&cs->list); +	return 0; +} +  /**   * clocksource_unregister - remove a registered clocksource + * @cs:	clocksource to be unregistered   */ -void clocksource_unregister(struct clocksource *cs) +int clocksource_unregister(struct clocksource *cs)  { +	int ret = 0; +  	mutex_lock(&clocksource_mutex); -	clocksource_dequeue_watchdog(cs); -	list_del(&cs->list); -	clocksource_select(); +	if (!list_empty(&cs->list)) +		ret = clocksource_unbind(cs);  	mutex_unlock(&clocksource_mutex); +	return ret;  }  EXPORT_SYMBOL(clocksource_unregister); @@ -748,13 +891,14 @@ EXPORT_SYMBOL(clocksource_unregister);  /**   * sysfs_show_current_clocksources - sysfs interface for current clocksource   * @dev:	unused + * @attr:	unused   * @buf:	char buffer to be filled with clocksource list   *   * Provides sysfs interface for listing current clocksource.   */  static ssize_t -sysfs_show_current_clocksources(struct sys_device *dev, -				struct sysdev_attribute *attr, char *buf) +sysfs_show_current_clocksources(struct device *dev, +				struct device_attribute *attr, char *buf)  {  	ssize_t count = 0; @@ -765,35 +909,44 @@ sysfs_show_current_clocksources(struct sys_device *dev,  	return count;  } +ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt) +{ +	size_t ret = cnt; + +	/* strings from sysfs write are not 0 terminated! */ +	if (!cnt || cnt >= CS_NAME_LEN) +		return -EINVAL; + +	/* strip of \n: */ +	if (buf[cnt-1] == '\n') +		cnt--; +	if (cnt > 0) +		memcpy(dst, buf, cnt); +	dst[cnt] = 0; +	return ret; +} +  /**   * sysfs_override_clocksource - interface for manually overriding clocksource   * @dev:	unused + * @attr:	unused   * @buf:	name of override clocksource   * @count:	length of buffer   *   * Takes input from sysfs interface for manually overriding the default   * clocksource selection.   */ -static ssize_t sysfs_override_clocksource(struct sys_device *dev, -					  struct sysdev_attribute *attr, +static ssize_t sysfs_override_clocksource(struct device *dev, +					  struct device_attribute *attr,  					  const char *buf, size_t count)  { -	size_t ret = count; - -	/* strings from sysfs write are not 0 terminated! */ -	if (count >= sizeof(override_name)) -		return -EINVAL; - -	/* strip of \n: */ -	if (buf[count-1] == '\n') -		count--; +	ssize_t ret;  	mutex_lock(&clocksource_mutex); -	if (count > 0) -		memcpy(override_name, buf, count); -	override_name[count] = 0; -	clocksource_select(); +	ret = sysfs_get_uname(buf, override_name, count); +	if (ret >= 0) +		clocksource_select();  	mutex_unlock(&clocksource_mutex); @@ -801,15 +954,50 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,  }  /** + * sysfs_unbind_current_clocksource - interface for manually unbinding clocksource + * @dev:	unused + * @attr:	unused + * @buf:	unused + * @count:	length of buffer + * + * Takes input from sysfs interface for manually unbinding a clocksource. + */ +static ssize_t sysfs_unbind_clocksource(struct device *dev, +					struct device_attribute *attr, +					const char *buf, size_t count) +{ +	struct clocksource *cs; +	char name[CS_NAME_LEN]; +	ssize_t ret; + +	ret = sysfs_get_uname(buf, name, count); +	if (ret < 0) +		return ret; + +	ret = -ENODEV; +	mutex_lock(&clocksource_mutex); +	list_for_each_entry(cs, &clocksource_list, list) { +		if (strcmp(cs->name, name)) +			continue; +		ret = clocksource_unbind(cs); +		break; +	} +	mutex_unlock(&clocksource_mutex); + +	return ret ? ret : count; +} + +/**   * sysfs_show_available_clocksources - sysfs interface for listing clocksource   * @dev:	unused + * @attr:	unused   * @buf:	char buffer to be filled with clocksource list   *   * Provides sysfs interface for listing registered clocksources   */  static ssize_t -sysfs_show_available_clocksources(struct sys_device *dev, -				  struct sysdev_attribute *attr, +sysfs_show_available_clocksources(struct device *dev, +				  struct device_attribute *attr,  				  char *buf)  {  	struct clocksource *src; @@ -838,35 +1026,41 @@ sysfs_show_available_clocksources(struct sys_device *dev,  /*   * Sysfs setup bits:   */ -static SYSDEV_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, +static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,  		   sysfs_override_clocksource); -static SYSDEV_ATTR(available_clocksource, 0444, +static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource); + +static DEVICE_ATTR(available_clocksource, 0444,  		   sysfs_show_available_clocksources, NULL); -static struct sysdev_class clocksource_sysclass = { +static struct bus_type clocksource_subsys = {  	.name = "clocksource", +	.dev_name = "clocksource",  }; -static struct sys_device device_clocksource = { +static struct device device_clocksource = {  	.id	= 0, -	.cls	= &clocksource_sysclass, +	.bus	= &clocksource_subsys,  };  static int __init init_clocksource_sysfs(void)  { -	int error = sysdev_class_register(&clocksource_sysclass); +	int error = subsys_system_register(&clocksource_subsys, NULL);  	if (!error) -		error = sysdev_register(&device_clocksource); +		error = device_register(&device_clocksource);  	if (!error) -		error = sysdev_create_file( +		error = device_create_file(  				&device_clocksource, -				&attr_current_clocksource); +				&dev_attr_current_clocksource); +	if (!error) +		error = device_create_file(&device_clocksource, +					   &dev_attr_unbind_clocksource);  	if (!error) -		error = sysdev_create_file( +		error = device_create_file(  				&device_clocksource, -				&attr_available_clocksource); +				&dev_attr_available_clocksource);  	return error;  }  | 
