diff options
Diffstat (limited to 'arch/powerpc/kernel/time.c')
| -rw-r--r-- | arch/powerpc/kernel/time.c | 1326 |
1 files changed, 701 insertions, 625 deletions
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index b1c89bc4bf9..9fff9cdcc51 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -17,8 +17,7 @@ * * TODO (not necessarily in this file): * - improve precision and reproducibility of timebase frequency - * measurement at boot time. (for iSeries, we calibrate the timebase - * against the Titan chip's clock.) + * measurement at boot time. * - for astronomical applications: add a new function to get * non ambiguous timestamps even around leap seconds. This needs * a new timestamp format and a good name. @@ -32,9 +31,8 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/errno.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/param.h> @@ -44,12 +42,19 @@ #include <linux/timex.h> #include <linux/kernel_stat.h> #include <linux/time.h> +#include <linux/clockchips.h> #include <linux/init.h> #include <linux/profile.h> #include <linux/cpu.h> #include <linux/security.h> #include <linux/percpu.h> #include <linux/rtc.h> +#include <linux/jiffies.h> +#include <linux/posix-timers.h> +#include <linux/irq.h> +#include <linux/delay.h> +#include <linux/irq_work.h> +#include <asm/trace.h> #include <asm/io.h> #include <asm/processor.h> @@ -61,27 +66,53 @@ #include <asm/prom.h> #include <asm/irq.h> #include <asm/div64.h> -#ifdef CONFIG_PPC64 -#include <asm/systemcfg.h> -#include <asm/firmware.h> -#endif -#ifdef CONFIG_PPC_ISERIES -#include <asm/iseries/it_lp_queue.h> -#include <asm/iseries/hv_call_xm.h> -#endif #include <asm/smp.h> +#include <asm/vdso_datapage.h> +#include <asm/firmware.h> +#include <asm/cputime.h> -/* keep track of when we need to update the rtc */ -time_t last_rtc_update; -extern int piranha_simulator; -#ifdef CONFIG_PPC_ISERIES -unsigned long iSeries_recal_titan = 0; -unsigned long iSeries_recal_tb = 0; -static unsigned long first_settimeofday = 1; -#endif +/* powerpc clocksource/clockevent code */ + +#include <linux/clockchips.h> +#include <linux/timekeeper_internal.h> + +static cycle_t rtc_read(struct clocksource *); +static struct clocksource clocksource_rtc = { + .name = "rtc", + .rating = 400, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .mask = CLOCKSOURCE_MASK(64), + .read = rtc_read, +}; + +static cycle_t timebase_read(struct clocksource *); +static struct clocksource clocksource_timebase = { + .name = "timebase", + .rating = 400, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .mask = CLOCKSOURCE_MASK(64), + .read = timebase_read, +}; -/* The decrementer counts down by 128 every 128ns on a 601. */ -#define DECREMENTER_COUNT_601 (1000000000 / HZ) +#define DECREMENTER_MAX 0x7fffffff + +static int decrementer_set_next_event(unsigned long evt, + struct clock_event_device *dev); +static void decrementer_set_mode(enum clock_event_mode mode, + struct clock_event_device *dev); + +struct clock_event_device decrementer_clockevent = { + .name = "decrementer", + .rating = 200, + .irq = 0, + .set_next_event = decrementer_set_next_event, + .set_mode = decrementer_set_mode, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, +}; +EXPORT_SYMBOL(decrementer_clockevent); + +DEFINE_PER_CPU(u64, decrementers_next_tb); +static DEFINE_PER_CPU(struct clock_event_device, decrementers); #define XSEC_PER_SEC (1024*1024) @@ -96,218 +127,289 @@ unsigned long tb_ticks_per_jiffy; unsigned long tb_ticks_per_usec = 100; /* sane default */ EXPORT_SYMBOL(tb_ticks_per_usec); unsigned long tb_ticks_per_sec; -u64 tb_to_xs; -unsigned tb_to_us; -unsigned long processor_freq; +EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ + DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL_GPL(rtc_lock); -u64 tb_to_ns_scale; -unsigned tb_to_ns_shift; - -struct gettimeofday_struct do_gtod; - -extern unsigned long wall_jiffies; +static u64 tb_to_ns_scale __read_mostly; +static unsigned tb_to_ns_shift __read_mostly; +static u64 boot_tb __read_mostly; extern struct timezone sys_tz; static long timezone_offset; -void ppc_adjtimex(void); - -static unsigned adjusting_time = 0; - unsigned long ppc_proc_freq; +EXPORT_SYMBOL_GPL(ppc_proc_freq); unsigned long ppc_tb_freq; +EXPORT_SYMBOL_GPL(ppc_tb_freq); -#ifdef CONFIG_PPC32 /* XXX for now */ -#define boot_cpuid 0 -#endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +/* + * Factors for converting from cputime_t (timebase ticks) to + * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). + * These are all stored as 0.64 fixed-point binary fractions. + */ +u64 __cputime_jiffies_factor; +EXPORT_SYMBOL(__cputime_jiffies_factor); +u64 __cputime_usec_factor; +EXPORT_SYMBOL(__cputime_usec_factor); +u64 __cputime_sec_factor; +EXPORT_SYMBOL(__cputime_sec_factor); +u64 __cputime_clockt_factor; +EXPORT_SYMBOL(__cputime_clockt_factor); +DEFINE_PER_CPU(unsigned long, cputime_last_delta); +DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); + +cputime_t cputime_one_jiffy; + +void (*dtl_consumer)(struct dtl_entry *, u64); + +static void calc_cputime_factors(void) +{ + struct div_result res; -u64 tb_last_jiffy __cacheline_aligned_in_smp; -unsigned long tb_last_stamp; + div128_by_32(HZ, 0, tb_ticks_per_sec, &res); + __cputime_jiffies_factor = res.result_low; + div128_by_32(1000000, 0, tb_ticks_per_sec, &res); + __cputime_usec_factor = res.result_low; + div128_by_32(1, 0, tb_ticks_per_sec, &res); + __cputime_sec_factor = res.result_low; + div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); + __cputime_clockt_factor = res.result_low; +} /* - * Note that on ppc32 this only stores the bottom 32 bits of - * the timebase value, but that's enough to tell when a jiffy - * has passed. + * Read the SPURR on systems that have it, otherwise the PURR, + * or if that doesn't exist return the timebase value passed in. */ -DEFINE_PER_CPU(unsigned long, last_jiffy); - -static __inline__ void timer_check_rtc(void) -{ - /* - * update the rtc when needed, this should be performed on the - * right fraction of a second. Half or full second ? - * Full second works on mk48t59 clocks, others need testing. - * Note that this update is basically only used through - * the adjtimex system calls. Setting the HW clock in - * any other way is a /dev/rtc and userland business. - * This is still wrong by -0.5/+1.5 jiffies because of the - * timer interrupt resolution and possible delay, but here we - * hit a quantization limit which can only be solved by higher - * resolution timers and decoupling time management from timer - * interrupts. This is also wrong on the clocks - * which require being written at the half second boundary. - * We should have an rtc call that only sets the minutes and - * seconds like on Intel to avoid problems with non UTC clocks. - */ - if (ppc_md.set_rtc_time && ntp_synced() && - xtime.tv_sec - last_rtc_update >= 659 && - abs((xtime.tv_nsec/1000) - (1000000-1000000/HZ)) < 500000/HZ && - jiffies - wall_jiffies == 1) { - struct rtc_time tm; - to_tm(xtime.tv_sec + 1 + timezone_offset, &tm); - tm.tm_year -= 1900; - tm.tm_mon -= 1; - if (ppc_md.set_rtc_time(&tm) == 0) - last_rtc_update = xtime.tv_sec + 1; - else - /* Try again one minute later */ - last_rtc_update += 60; - } +static u64 read_spurr(u64 tb) +{ + if (cpu_has_feature(CPU_FTR_SPURR)) + return mfspr(SPRN_SPURR); + if (cpu_has_feature(CPU_FTR_PURR)) + return mfspr(SPRN_PURR); + return tb; } +#ifdef CONFIG_PPC_SPLPAR + /* - * This version of gettimeofday has microsecond resolution. + * Scan the dispatch trace log and count up the stolen time. + * Should be called with interrupts disabled. */ -static inline void __do_gettimeofday(struct timeval *tv, u64 tb_val) +static u64 scan_dispatch_log(u64 stop_tb) { - unsigned long sec, usec; - u64 tb_ticks, xsec; - struct gettimeofday_vars *temp_varp; - u64 temp_tb_to_xs, temp_stamp_xsec; + u64 i = local_paca->dtl_ridx; + struct dtl_entry *dtl = local_paca->dtl_curr; + struct dtl_entry *dtl_end = local_paca->dispatch_log_end; + struct lppaca *vpa = local_paca->lppaca_ptr; + u64 tb_delta; + u64 stolen = 0; + u64 dtb; + + if (!dtl) + return 0; - /* - * These calculations are faster (gets rid of divides) - * if done in units of 1/2^20 rather than microseconds. - * The conversion to microseconds at the end is done - * without a divide (and in fact, without a multiply) + if (i == be64_to_cpu(vpa->dtl_idx)) + return 0; + while (i < be64_to_cpu(vpa->dtl_idx)) { + dtb = be64_to_cpu(dtl->timebase); + tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + + be32_to_cpu(dtl->ready_to_enqueue_time); + barrier(); + if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { + /* buffer has overflowed */ + i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; + dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + continue; + } + if (dtb > stop_tb) + break; + if (dtl_consumer) + dtl_consumer(dtl, i); + stolen += tb_delta; + ++i; + ++dtl; + if (dtl == dtl_end) + dtl = local_paca->dispatch_log; + } + local_paca->dtl_ridx = i; + local_paca->dtl_curr = dtl; + return stolen; +} + +/* + * Accumulate stolen time by scanning the dispatch trace log. + * Called on entry from user mode. + */ +void accumulate_stolen_time(void) +{ + u64 sst, ust; + + u8 save_soft_enabled = local_paca->soft_enabled; + + /* We are called early in the exception entry, before + * soft/hard_enabled are sync'ed to the expected state + * for the exception. We are hard disabled but the PACA + * needs to reflect that so various debug stuff doesn't + * complain */ - temp_varp = do_gtod.varp; - tb_ticks = tb_val - temp_varp->tb_orig_stamp; - temp_tb_to_xs = temp_varp->tb_to_xs; - temp_stamp_xsec = temp_varp->stamp_xsec; - xsec = temp_stamp_xsec + mulhdu(tb_ticks, temp_tb_to_xs); - sec = xsec / XSEC_PER_SEC; - usec = (unsigned long)xsec & (XSEC_PER_SEC - 1); - usec = SCALE_XSEC(usec, 1000000); + local_paca->soft_enabled = 0; - tv->tv_sec = sec; - tv->tv_usec = usec; + sst = scan_dispatch_log(local_paca->starttime_user); + ust = scan_dispatch_log(local_paca->starttime); + local_paca->system_time -= sst; + local_paca->user_time -= ust; + local_paca->stolen_time += ust + sst; + + local_paca->soft_enabled = save_soft_enabled; } -void do_gettimeofday(struct timeval *tv) +static inline u64 calculate_stolen_time(u64 stop_tb) { - if (__USE_RTC()) { - /* do this the old way */ - unsigned long flags, seq; - unsigned int sec, nsec, usec, lost; + u64 stolen = 0; - do { - seq = read_seqbegin_irqsave(&xtime_lock, flags); - sec = xtime.tv_sec; - nsec = xtime.tv_nsec + tb_ticks_since(tb_last_stamp); - lost = jiffies - wall_jiffies; - } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); - usec = nsec / 1000 + lost * (1000000 / HZ); - while (usec >= 1000000) { - usec -= 1000000; - ++sec; - } - tv->tv_sec = sec; - tv->tv_usec = usec; - return; + if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) { + stolen = scan_dispatch_log(stop_tb); + get_paca()->system_time -= stolen; } - __do_gettimeofday(tv, get_tb()); + + stolen += get_paca()->stolen_time; + get_paca()->stolen_time = 0; + return stolen; } -EXPORT_SYMBOL(do_gettimeofday); +#else /* CONFIG_PPC_SPLPAR */ +static inline u64 calculate_stolen_time(u64 stop_tb) +{ + return 0; +} -/* Synchronize xtime with do_gettimeofday */ +#endif /* CONFIG_PPC_SPLPAR */ -static inline void timer_sync_xtime(unsigned long cur_tb) +/* + * Account time for a transition between system, hard irq + * or soft irq state. + */ +static u64 vtime_delta(struct task_struct *tsk, + u64 *sys_scaled, u64 *stolen) { -#ifdef CONFIG_PPC64 - /* why do we do this? */ - struct timeval my_tv; + u64 now, nowscaled, deltascaled; + u64 udelta, delta, user_scaled; + + WARN_ON_ONCE(!irqs_disabled()); - __do_gettimeofday(&my_tv, cur_tb); + now = mftb(); + nowscaled = read_spurr(now); + get_paca()->system_time += now - get_paca()->starttime; + get_paca()->starttime = now; + deltascaled = nowscaled - get_paca()->startspurr; + get_paca()->startspurr = nowscaled; - if (xtime.tv_sec <= my_tv.tv_sec) { - xtime.tv_sec = my_tv.tv_sec; - xtime.tv_nsec = my_tv.tv_usec * 1000; + *stolen = calculate_stolen_time(now); + + delta = get_paca()->system_time; + get_paca()->system_time = 0; + udelta = get_paca()->user_time - get_paca()->utime_sspurr; + get_paca()->utime_sspurr = get_paca()->user_time; + + /* + * Because we don't read the SPURR on every kernel entry/exit, + * deltascaled includes both user and system SPURR ticks. + * Apportion these ticks to system SPURR ticks and user + * SPURR ticks in the same ratio as the system time (delta) + * and user time (udelta) values obtained from the timebase + * over the same interval. The system ticks get accounted here; + * the user ticks get saved up in paca->user_time_scaled to be + * used by account_process_tick. + */ + *sys_scaled = delta; + user_scaled = udelta; + if (deltascaled != delta + udelta) { + if (udelta) { + *sys_scaled = deltascaled * delta / (delta + udelta); + user_scaled = deltascaled - *sys_scaled; + } else { + *sys_scaled = deltascaled; + } } -#endif + get_paca()->user_time_scaled += user_scaled; + + return delta; } -/* - * There are two copies of tb_to_xs and stamp_xsec so that no - * lock is needed to access and use these values in - * do_gettimeofday. We alternate the copies and as long as a - * reasonable time elapses between changes, there will never - * be inconsistent values. ntpd has a minimum of one minute - * between updates. - */ -static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, - u64 new_tb_to_xs) +void vtime_account_system(struct task_struct *tsk) { - unsigned temp_idx; - struct gettimeofday_vars *temp_varp; + u64 delta, sys_scaled, stolen; - temp_idx = (do_gtod.var_idx == 0); - temp_varp = &do_gtod.vars[temp_idx]; + delta = vtime_delta(tsk, &sys_scaled, &stolen); + account_system_time(tsk, 0, delta, sys_scaled); + if (stolen) + account_steal_time(stolen); +} +EXPORT_SYMBOL_GPL(vtime_account_system); - temp_varp->tb_to_xs = new_tb_to_xs; - temp_varp->tb_orig_stamp = new_tb_stamp; - temp_varp->stamp_xsec = new_stamp_xsec; - smp_mb(); - do_gtod.varp = temp_varp; - do_gtod.var_idx = temp_idx; +void vtime_account_idle(struct task_struct *tsk) +{ + u64 delta, sys_scaled, stolen; -#ifdef CONFIG_PPC64 - /* - * tb_update_count is used to allow the userspace gettimeofday code - * to assure itself that it sees a consistent view of the tb_to_xs and - * stamp_xsec variables. It reads the tb_update_count, then reads - * tb_to_xs and stamp_xsec and then reads tb_update_count again. If - * the two values of tb_update_count match and are even then the - * tb_to_xs and stamp_xsec values are consistent. If not, then it - * loops back and reads them again until this criteria is met. - */ - ++(systemcfg->tb_update_count); - smp_wmb(); - systemcfg->tb_orig_stamp = new_tb_stamp; - systemcfg->stamp_xsec = new_stamp_xsec; - systemcfg->tb_to_xs = new_tb_to_xs; - smp_wmb(); - ++(systemcfg->tb_update_count); -#endif + delta = vtime_delta(tsk, &sys_scaled, &stolen); + account_idle_time(delta + stolen); } /* - * When the timebase - tb_orig_stamp gets too big, we do a manipulation - * between tb_orig_stamp and stamp_xsec. The goal here is to keep the - * difference tb - tb_orig_stamp small enough to always fit inside a - * 32 bits number. This is a requirement of our fast 32 bits userland - * implementation in the vdso. If we "miss" a call to this function - * (interrupt latency, CPU locked in a spinlock, ...) and we end up - * with a too big difference, then the vdso will fallback to calling - * the syscall + * Transfer the user time accumulated in the paca + * by the exception entry and exit code to the generic + * process user time records. + * Must be called with interrupts disabled. + * Assumes that vtime_account_system/idle() has been called + * recently (i.e. since the last entry from usermode) so that + * get_paca()->user_time_scaled is up to date. */ -static __inline__ void timer_recalc_offset(u64 cur_tb) +void vtime_account_user(struct task_struct *tsk) { - unsigned long offset; - u64 new_stamp_xsec; + cputime_t utime, utimescaled; + + utime = get_paca()->user_time; + utimescaled = get_paca()->user_time_scaled; + get_paca()->user_time = 0; + get_paca()->user_time_scaled = 0; + get_paca()->utime_sspurr = 0; + account_user_time(tsk, utime, utimescaled); +} - if (__USE_RTC()) - return; - offset = cur_tb - do_gtod.varp->tb_orig_stamp; - if ((offset & 0x80000000u) == 0) - return; - new_stamp_xsec = do_gtod.varp->stamp_xsec - + mulhdu(offset, do_gtod.varp->tb_to_xs); - update_gtod(cur_tb, new_stamp_xsec, do_gtod.varp->tb_to_xs); +#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ +#define calc_cputime_factors() +#endif + +void __delay(unsigned long loops) +{ + unsigned long start; + int diff; + + if (__USE_RTC()) { + start = get_rtcl(); + do { + /* the RTCL register wraps at 1000000000 */ + diff = get_rtcl() - start; + if (diff < 0) + diff += 1000000000; + } while (diff < loops); + } else { + start = get_tbl(); + while (get_tbl() - start < loops) + HMT_low(); + HMT_medium(); + } } +EXPORT_SYMBOL(__delay); + +void udelay(unsigned long usecs) +{ + __delay(tb_ticks_per_usec * usecs); +} +EXPORT_SYMBOL(udelay); #ifdef CONFIG_SMP unsigned long profile_pc(struct pt_regs *regs) @@ -322,137 +424,90 @@ unsigned long profile_pc(struct pt_regs *regs) EXPORT_SYMBOL(profile_pc); #endif -#ifdef CONFIG_PPC_ISERIES +#ifdef CONFIG_IRQ_WORK -/* - * This function recalibrates the timebase based on the 49-bit time-of-day - * value in the Titan chip. The Titan is much more accurate than the value - * returned by the service processor for the timebase frequency. +/* + * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ +#ifdef CONFIG_PPC64 +static inline unsigned long test_irq_work_pending(void) +{ + unsigned long x; -static void iSeries_tb_recal(void) -{ - struct div_result divres; - unsigned long titan, tb; - tb = get_tb(); - titan = HvCallXm_loadTod(); - if ( iSeries_recal_titan ) { - unsigned long tb_ticks = tb - iSeries_recal_tb; - unsigned long titan_usec = (titan - iSeries_recal_titan) >> 12; - unsigned long new_tb_ticks_per_sec = (tb_ticks * USEC_PER_SEC)/titan_usec; - unsigned long new_tb_ticks_per_jiffy = (new_tb_ticks_per_sec+(HZ/2))/HZ; - long tick_diff = new_tb_ticks_per_jiffy - tb_ticks_per_jiffy; - char sign = '+'; - /* make sure tb_ticks_per_sec and tb_ticks_per_jiffy are consistent */ - new_tb_ticks_per_sec = new_tb_ticks_per_jiffy * HZ; - - if ( tick_diff < 0 ) { - tick_diff = -tick_diff; - sign = '-'; - } - if ( tick_diff ) { - if ( tick_diff < tb_ticks_per_jiffy/25 ) { - printk( "Titan recalibrate: new tb_ticks_per_jiffy = %lu (%c%ld)\n", - new_tb_ticks_per_jiffy, sign, tick_diff ); - tb_ticks_per_jiffy = new_tb_ticks_per_jiffy; - tb_ticks_per_sec = new_tb_ticks_per_sec; - div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres ); - do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; - tb_to_xs = divres.result_low; - do_gtod.varp->tb_to_xs = tb_to_xs; - systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; - systemcfg->tb_to_xs = tb_to_xs; - } - else { - printk( "Titan recalibrate: FAILED (difference > 4 percent)\n" - " new tb_ticks_per_jiffy = %lu\n" - " old tb_ticks_per_jiffy = %lu\n", - new_tb_ticks_per_jiffy, tb_ticks_per_jiffy ); - } - } - } - iSeries_recal_titan = titan; - iSeries_recal_tb = tb; + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, irq_work_pending))); + return x; } -#endif -/* - * For iSeries shared processors, we have to let the hypervisor - * set the hardware decrementer. We set a virtual decrementer - * in the lppaca and call the hypervisor if the virtual - * decrementer is less than the current value in the hardware - * decrementer. (almost always the new decrementer value will - * be greater than the current hardware decementer so the hypervisor - * call will not be needed) - */ +static inline void set_irq_work_pending_flag(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (1), + "i" (offsetof(struct paca_struct, irq_work_pending))); +} -/* - * timer_interrupt - gets called when the decrementer overflows, - * with interrupts disabled. - */ -void timer_interrupt(struct pt_regs * regs) +static inline void clear_irq_work_pending(void) { - int next_dec; - int cpu = smp_processor_id(); - unsigned long ticks; + asm volatile("stb %0,%1(13)" : : + "r" (0), + "i" (offsetof(struct paca_struct, irq_work_pending))); +} -#ifdef CONFIG_PPC32 - if (atomic_read(&ppc_n_lost_interrupts) != 0) - do_IRQ(regs); -#endif +#else /* 32-bit */ - irq_enter(); +DEFINE_PER_CPU(u8, irq_work_pending); - profile_tick(CPU_PROFILING, regs); +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 +#define test_irq_work_pending() __get_cpu_var(irq_work_pending) +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 -#ifdef CONFIG_PPC_ISERIES - get_paca()->lppaca.int_dword.fields.decr_int = 0; -#endif +#endif /* 32 vs 64 bit */ - while ((ticks = tb_ticks_since(per_cpu(last_jiffy, cpu))) - >= tb_ticks_per_jiffy) { - /* Update last_jiffy */ - per_cpu(last_jiffy, cpu) += tb_ticks_per_jiffy; - /* Handle RTCL overflow on 601 */ - if (__USE_RTC() && per_cpu(last_jiffy, cpu) >= 1000000000) - per_cpu(last_jiffy, cpu) -= 1000000000; - - /* - * We cannot disable the decrementer, so in the period - * between this cpu's being marked offline in cpu_online_map - * and calling stop-self, it is taking timer interrupts. - * Avoid calling into the scheduler rebalancing code if this - * is the case. - */ - if (!cpu_is_offline(cpu)) - update_process_times(user_mode(regs)); - - /* - * No need to check whether cpu is offline here; boot_cpuid - * should have been fixed up by now. - */ - if (cpu != boot_cpuid) - continue; +void arch_irq_work_raise(void) +{ + preempt_disable(); + set_irq_work_pending_flag(); + set_dec(1); + preempt_enable(); +} + +#else /* CONFIG_IRQ_WORK */ + +#define test_irq_work_pending() 0 +#define clear_irq_work_pending() + +#endif /* CONFIG_IRQ_WORK */ - write_seqlock(&xtime_lock); - tb_last_jiffy += tb_ticks_per_jiffy; - tb_last_stamp = per_cpu(last_jiffy, cpu); - timer_recalc_offset(tb_last_jiffy); - do_timer(regs); - timer_sync_xtime(tb_last_jiffy); - timer_check_rtc(); - write_sequnlock(&xtime_lock); - if (adjusting_time && (time_adjust == 0)) - ppc_adjtimex(); +void __timer_interrupt(void) +{ + struct pt_regs *regs = get_irq_regs(); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + struct clock_event_device *evt = &__get_cpu_var(decrementers); + u64 now; + + trace_timer_interrupt_entry(regs); + + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); } - - next_dec = tb_ticks_per_jiffy - ticks; - set_dec(next_dec); -#ifdef CONFIG_PPC_ISERIES - if (hvlpevent_is_pending()) - process_hvlpevents(regs); -#endif + now = get_tb_or_rtc(); + if (now >= *next_tb) { + *next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); + __get_cpu_var(irq_stat).timer_irqs_event++; + } else { + now = *next_tb - now; + if (now <= DECREMENTER_MAX) + set_dec((int)now); + /* We may have raced with new irq work */ + if (test_irq_work_pending()) + set_dec(1); + __get_cpu_var(irq_stat).timer_irqs_others++; + } #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ @@ -462,36 +517,93 @@ void timer_interrupt(struct pt_regs * regs) } #endif + trace_timer_interrupt_exit(regs); +} + +/* + * timer_interrupt - gets called when the decrementer overflows, + * with interrupts disabled. + */ +void timer_interrupt(struct pt_regs * regs) +{ + struct pt_regs *old_regs; + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + + /* Ensure a positive value is written to the decrementer, or else + * some CPUs will continue to take decrementer exceptions. + */ + set_dec(DECREMENTER_MAX); + + /* Some implementations of hotplug will get timer interrupts while + * offline, just ignore these and we also need to set + * decrementers_next_tb as MAX to make sure __check_irq_replay + * don't replay timer interrupt when return, otherwise we'll trap + * here infinitely :( + */ + if (!cpu_online(smp_processor_id())) { + *next_tb = ~(u64)0; + return; + } + + /* Conditionally hard-enable interrupts now that the DEC has been + * bumped to its maximum value + */ + may_hard_irq_enable(); + + +#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) + if (atomic_read(&ppc_n_lost_interrupts) != 0) + do_IRQ(regs); +#endif + + old_regs = set_irq_regs(regs); + irq_enter(); + + __timer_interrupt(); irq_exit(); + set_irq_regs(old_regs); } -void wakeup_decrementer(void) +/* + * Hypervisor decrementer interrupts shouldn't occur but are sometimes + * left pending on exit from a KVM guest. We don't need to do anything + * to clear them, as they are edge-triggered. + */ +void hdec_interrupt(struct pt_regs *regs) { - int i; +} - set_dec(tb_ticks_per_jiffy); - /* - * We don't expect this to be called on a machine with a 601, - * so using get_tbl is fine. +#ifdef CONFIG_SUSPEND +static void generic_suspend_disable_irqs(void) +{ + /* Disable the decrementer, so that it doesn't interfere + * with suspending. */ - tb_last_stamp = tb_last_jiffy = get_tb(); - for_each_cpu(i) - per_cpu(last_jiffy, i) = tb_last_stamp; + + set_dec(DECREMENTER_MAX); + local_irq_disable(); + set_dec(DECREMENTER_MAX); } -#ifdef CONFIG_SMP -void __init smp_space_timers(unsigned int max_cpus) +static void generic_suspend_enable_irqs(void) { - int i; - unsigned long offset = tb_ticks_per_jiffy / max_cpus; - unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid); + local_irq_enable(); +} - for_each_cpu(i) { - if (i != boot_cpuid) { - previous_tb += offset; - per_cpu(last_jiffy, i) = previous_tb; - } - } +/* Overrides the weak version in kernel/power/main.c */ +void arch_suspend_disable_irqs(void) +{ + if (ppc_md.suspend_disable_irqs) + ppc_md.suspend_disable_irqs(); + generic_suspend_disable_irqs(); +} + +/* Overrides the weak version in kernel/power/main.c */ +void arch_suspend_enable_irqs(void) +{ + generic_suspend_enable_irqs(); + if (ppc_md.suspend_enable_irqs) + ppc_md.suspend_enable_irqs(); } #endif @@ -506,177 +618,290 @@ unsigned long long sched_clock(void) { if (__USE_RTC()) return get_rtc(); - return mulhdu(get_tb(), tb_to_ns_scale) << tb_to_ns_shift; + return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; } -int do_settimeofday(struct timespec *tv) +static int __init get_freq(char *name, int cells, unsigned long *val) { - time_t wtm_sec, new_sec = tv->tv_sec; - long wtm_nsec, new_nsec = tv->tv_nsec; - unsigned long flags; - long int tb_delta; - u64 new_xsec, tb_delta_xs; + struct device_node *cpu; + const __be32 *fp; + int found = 0; - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; + /* The cpu node should have timebase and clock frequency properties */ + cpu = of_find_node_by_type(NULL, "cpu"); - write_seqlock_irqsave(&xtime_lock, flags); + if (cpu) { + fp = of_get_property(cpu, name, NULL); + if (fp) { + found = 1; + *val = of_read_ulong(fp, cells); + } - /* - * Updating the RTC is not the job of this code. If the time is - * stepped under NTP, the RTC will be updated after STA_UNSYNC - * is cleared. Tools like clock/hwclock either copy the RTC - * to the system time, in which case there is no point in writing - * to the RTC again, or write to the RTC but then they don't call - * settimeofday to perform this operation. - */ -#ifdef CONFIG_PPC_ISERIES - if (first_settimeofday) { - iSeries_tb_recal(); - first_settimeofday = 0; + of_node_put(cpu); } -#endif - tb_delta = tb_ticks_since(tb_last_stamp); - tb_delta += (jiffies - wall_jiffies) * tb_ticks_per_jiffy; - tb_delta_xs = mulhdu(tb_delta, do_gtod.varp->tb_to_xs); - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - new_sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - new_nsec); + return found; +} - set_normalized_timespec(&xtime, new_sec, new_nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); +void start_cpu_decrementer(void) +{ +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + /* Clear any pending timer interrupts */ + mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); - /* In case of a large backwards jump in time with NTP, we want the - * clock to be updated as soon as the PLL is again in lock. - */ - last_rtc_update = new_sec - 658; + /* Enable decrementer interrupt */ + mtspr(SPRN_TCR, TCR_DIE); +#endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */ +} - ntp_clear(); +void __init generic_calibrate_decr(void) +{ + ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ - new_xsec = 0; - if (new_nsec != 0) { - new_xsec = (u64)new_nsec * XSEC_PER_SEC; - do_div(new_xsec, NSEC_PER_SEC); + if (!get_freq("ibm,extended-timebase-frequency", 2, &ppc_tb_freq) && + !get_freq("timebase-frequency", 1, &ppc_tb_freq)) { + + printk(KERN_ERR "WARNING: Estimating decrementer frequency " + "(not found)\n"); } - new_xsec += (u64)new_sec * XSEC_PER_SEC - tb_delta_xs; - update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs); -#ifdef CONFIG_PPC64 - systemcfg->tz_minuteswest = sys_tz.tz_minuteswest; - systemcfg->tz_dsttime = sys_tz.tz_dsttime; -#endif + ppc_proc_freq = DEFAULT_PROC_FREQ; /* hardcoded default */ - write_sequnlock_irqrestore(&xtime_lock, flags); - clock_was_set(); - return 0; + if (!get_freq("ibm,extended-clock-frequency", 2, &ppc_proc_freq) && + !get_freq("clock-frequency", 1, &ppc_proc_freq)) { + + printk(KERN_ERR "WARNING: Estimating processor frequency " + "(not found)\n"); + } } -EXPORT_SYMBOL(do_settimeofday); +int update_persistent_clock(struct timespec now) +{ + struct rtc_time tm; + + if (!ppc_md.set_rtc_time) + return -ENODEV; -void __init generic_calibrate_decr(void) + to_tm(now.tv_sec + 1 + timezone_offset, &tm); + tm.tm_year -= 1900; + tm.tm_mon -= 1; + + return ppc_md.set_rtc_time(&tm); +} + +static void __read_persistent_clock(struct timespec *ts) { - struct device_node *cpu; - unsigned int *fp; - int node_found; + struct rtc_time tm; + static int first = 1; + + ts->tv_nsec = 0; + /* XXX this is a litle fragile but will work okay in the short term */ + if (first) { + first = 0; + if (ppc_md.time_init) + timezone_offset = ppc_md.time_init(); + + /* get_boot_time() isn't guaranteed to be safe to call late */ + if (ppc_md.get_boot_time) { + ts->tv_sec = ppc_md.get_boot_time() - timezone_offset; + return; + } + } + if (!ppc_md.get_rtc_time) { + ts->tv_sec = 0; + return; + } + ppc_md.get_rtc_time(&tm); + + ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); +} + +void read_persistent_clock(struct timespec *ts) +{ + __read_persistent_clock(ts); + + /* Sanitize it in case real time clock is set below EPOCH */ + if (ts->tv_sec < 0) { + ts->tv_sec = 0; + ts->tv_nsec = 0; + } + +} + +/* clocksource code */ +static cycle_t rtc_read(struct clocksource *cs) +{ + return (cycle_t)get_rtc(); +} + +static cycle_t timebase_read(struct clocksource *cs) +{ + return (cycle_t)get_tb(); +} + +void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm, + struct clocksource *clock, u32 mult) +{ + u64 new_tb_to_xs, new_stamp_xsec; + u32 frac_sec; + + if (clock != &clocksource_timebase) + return; + + /* Make userspace gettimeofday spin until we're done. */ + ++vdso_data->tb_update_count; + smp_mb(); + + /* 19342813113834067 ~= 2^(20+64) / 1e9 */ + new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift); + new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC; + do_div(new_stamp_xsec, 1000000000); + new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; + + BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC); + /* this is tv_nsec / 1e9 as a 0.32 fraction */ + frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32; /* - * The cpu node should have a timebase-frequency property - * to tell us the rate at which the decrementer counts. + * tb_update_count is used to allow the userspace gettimeofday code + * to assure itself that it sees a consistent view of the tb_to_xs and + * stamp_xsec variables. It reads the tb_update_count, then reads + * tb_to_xs and stamp_xsec and then reads tb_update_count again. If + * the two values of tb_update_count match and are even then the + * tb_to_xs and stamp_xsec values are consistent. If not, then it + * loops back and reads them again until this criteria is met. + * We expect the caller to have done the first increment of + * vdso_data->tb_update_count already. */ - cpu = of_find_node_by_type(NULL, "cpu"); + vdso_data->tb_orig_stamp = clock->cycle_last; + vdso_data->stamp_xsec = new_stamp_xsec; + vdso_data->tb_to_xs = new_tb_to_xs; + vdso_data->wtom_clock_sec = wtm->tv_sec; + vdso_data->wtom_clock_nsec = wtm->tv_nsec; + vdso_data->stamp_xtime = *wall_time; + vdso_data->stamp_sec_fraction = frac_sec; + smp_wmb(); + ++(vdso_data->tb_update_count); +} - ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ - node_found = 0; - if (cpu != 0) { - fp = (unsigned int *)get_property(cpu, "timebase-frequency", - NULL); - if (fp != 0) { - node_found = 1; - ppc_tb_freq = *fp; - } - } - if (!node_found) - printk(KERN_ERR "WARNING: Estimating decrementer frequency " - "(not found)\n"); +void update_vsyscall_tz(void) +{ + vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; + vdso_data->tz_dsttime = sys_tz.tz_dsttime; +} - ppc_proc_freq = DEFAULT_PROC_FREQ; - node_found = 0; - if (cpu != 0) { - fp = (unsigned int *)get_property(cpu, "clock-frequency", - NULL); - if (fp != 0) { - node_found = 1; - ppc_proc_freq = *fp; - } +static void __init clocksource_init(void) +{ + struct clocksource *clock; + + if (__USE_RTC()) + clock = &clocksource_rtc; + else + clock = &clocksource_timebase; + + if (clocksource_register_hz(clock, tb_ticks_per_sec)) { + printk(KERN_ERR "clocksource: %s is already registered\n", + clock->name); + return; } -#ifdef CONFIG_BOOKE - /* Set the time base to zero */ - mtspr(SPRN_TBWL, 0); - mtspr(SPRN_TBWU, 0); - /* Clear any pending timer interrupts */ - mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); + printk(KERN_INFO "clocksource: %s mult[%x] shift[%d] registered\n", + clock->name, clock->mult, clock->shift); +} - /* Enable decrementer interrupt */ - mtspr(SPRN_TCR, TCR_DIE); -#endif - if (!node_found) - printk(KERN_ERR "WARNING: Estimating processor frequency " - "(not found)\n"); +static int decrementer_set_next_event(unsigned long evt, + struct clock_event_device *dev) +{ + __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; + set_dec(evt); + + /* We may have raced with new irq work */ + if (test_irq_work_pending()) + set_dec(1); - of_node_put(cpu); + return 0; } -unsigned long get_boot_time(void) +static void decrementer_set_mode(enum clock_event_mode mode, + struct clock_event_device *dev) { - struct rtc_time tm; + if (mode != CLOCK_EVT_MODE_ONESHOT) + decrementer_set_next_event(DECREMENTER_MAX, dev); +} - if (ppc_md.get_boot_time) - return ppc_md.get_boot_time(); - if (!ppc_md.get_rtc_time) - return 0; - ppc_md.get_rtc_time(&tm); - return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec); +/* Interrupt handler for the timer broadcast IPI */ +void tick_broadcast_ipi_handler(void) +{ + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + + *next_tb = get_tb_or_rtc(); + __timer_interrupt(); +} + +static void register_decrementer_clockevent(int cpu) +{ + struct clock_event_device *dec = &per_cpu(decrementers, cpu); + + *dec = decrementer_clockevent; + dec->cpumask = cpumask_of(cpu); + + printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", + dec->name, dec->mult, dec->shift, cpu); + + clockevents_register_device(dec); +} + +static void __init init_decrementer_clockevent(void) +{ + int cpu = smp_processor_id(); + + clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4); + + decrementer_clockevent.max_delta_ns = + clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); + decrementer_clockevent.min_delta_ns = + clockevent_delta2ns(2, &decrementer_clockevent); + + register_decrementer_clockevent(cpu); +} + +void secondary_cpu_time_init(void) +{ + /* Start the decrementer on CPUs that have manual control + * such as BookE + */ + start_cpu_decrementer(); + + /* FIME: Should make unrelatred change to move snapshot_timebase + * call here ! */ + register_decrementer_clockevent(smp_processor_id()); } /* This function is only called on the boot processor */ void __init time_init(void) { - unsigned long flags; - unsigned long tm = 0; struct div_result res; u64 scale; unsigned shift; - if (ppc_md.time_init != NULL) - timezone_offset = ppc_md.time_init(); - if (__USE_RTC()) { /* 601 processor: dec counts down by 128 every 128ns */ ppc_tb_freq = 1000000000; - tb_last_stamp = get_rtcl(); - tb_last_jiffy = tb_last_stamp; } else { /* Normal PowerPC with timebase register */ ppc_md.calibrate_decr(); - printk(KERN_INFO "time_init: decrementer frequency = %lu.%.6lu MHz\n", + printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n", ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); - printk(KERN_INFO "time_init: processor frequency = %lu.%.6lu MHz\n", + printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); - tb_last_stamp = tb_last_jiffy = get_tb(); } tb_ticks_per_jiffy = ppc_tb_freq / HZ; - tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; + tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; - tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); - div128_by_32(1024*1024, 0, tb_ticks_per_sec, &res); - tb_to_xs = res.result_low; - -#ifdef CONFIG_PPC64 - get_paca()->default_decr = tb_ticks_per_jiffy; -#endif + calc_cputime_factors(); + setup_cputime_one_jiffy(); /* * Compute scale factor for sched_clock. @@ -696,167 +921,28 @@ void __init time_init(void) } tb_to_ns_scale = scale; tb_to_ns_shift = shift; - -#ifdef CONFIG_PPC_ISERIES - if (!piranha_simulator) -#endif - tm = get_boot_time(); - - write_seqlock_irqsave(&xtime_lock, flags); - xtime.tv_sec = tm; - xtime.tv_nsec = 0; - do_gtod.varp = &do_gtod.vars[0]; - do_gtod.var_idx = 0; - do_gtod.varp->tb_orig_stamp = tb_last_jiffy; - __get_cpu_var(last_jiffy) = tb_last_stamp; - do_gtod.varp->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC; - do_gtod.tb_ticks_per_sec = tb_ticks_per_sec; - do_gtod.varp->tb_to_xs = tb_to_xs; - do_gtod.tb_to_us = tb_to_us; -#ifdef CONFIG_PPC64 - systemcfg->tb_orig_stamp = tb_last_jiffy; - systemcfg->tb_update_count = 0; - systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; - systemcfg->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC; - systemcfg->tb_to_xs = tb_to_xs; -#endif - - time_freq = 0; + /* Save the current timebase to pretty up CONFIG_PRINTK_TIME */ + boot_tb = get_tb_or_rtc(); /* If platform provided a timezone (pmac), we correct the time */ - if (timezone_offset) { + if (timezone_offset) { sys_tz.tz_minuteswest = -timezone_offset / 60; sys_tz.tz_dsttime = 0; - xtime.tv_sec -= timezone_offset; - } - - last_rtc_update = xtime.tv_sec; - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - write_sequnlock_irqrestore(&xtime_lock, flags); - - /* Not exact, but the timer interrupt takes care of this */ - set_dec(tb_ticks_per_jiffy); -} - -/* - * After adjtimex is called, adjust the conversion of tb ticks - * to microseconds to keep do_gettimeofday synchronized - * with ntpd. - * - * Use the time_adjust, time_freq and time_offset computed by adjtimex to - * adjust the frequency. - */ - -/* #define DEBUG_PPC_ADJTIMEX 1 */ - -void ppc_adjtimex(void) -{ -#ifdef CONFIG_PPC64 - unsigned long den, new_tb_ticks_per_sec, tb_ticks, old_xsec, - new_tb_to_xs, new_xsec, new_stamp_xsec; - unsigned long tb_ticks_per_sec_delta; - long delta_freq, ltemp; - struct div_result divres; - unsigned long flags; - long singleshot_ppm = 0; - - /* - * Compute parts per million frequency adjustment to - * accomplish the time adjustment implied by time_offset to be - * applied over the elapsed time indicated by time_constant. - * Use SHIFT_USEC to get it into the same units as - * time_freq. - */ - if ( time_offset < 0 ) { - ltemp = -time_offset; - ltemp <<= SHIFT_USEC - SHIFT_UPDATE; - ltemp >>= SHIFT_KG + time_constant; - ltemp = -ltemp; - } else { - ltemp = time_offset; - ltemp <<= SHIFT_USEC - SHIFT_UPDATE; - ltemp >>= SHIFT_KG + time_constant; - } - - /* If there is a single shot time adjustment in progress */ - if ( time_adjust ) { -#ifdef DEBUG_PPC_ADJTIMEX - printk("ppc_adjtimex: "); - if ( adjusting_time == 0 ) - printk("starting "); - printk("single shot time_adjust = %ld\n", time_adjust); -#endif - - adjusting_time = 1; - - /* - * Compute parts per million frequency adjustment - * to match time_adjust - */ - singleshot_ppm = tickadj * HZ; - /* - * The adjustment should be tickadj*HZ to match the code in - * linux/kernel/timer.c, but experiments show that this is too - * large. 3/4 of tickadj*HZ seems about right - */ - singleshot_ppm -= singleshot_ppm / 4; - /* Use SHIFT_USEC to get it into the same units as time_freq */ - singleshot_ppm <<= SHIFT_USEC; - if ( time_adjust < 0 ) - singleshot_ppm = -singleshot_ppm; - } - else { -#ifdef DEBUG_PPC_ADJTIMEX - if ( adjusting_time ) - printk("ppc_adjtimex: ending single shot time_adjust\n"); -#endif - adjusting_time = 0; - } - - /* Add up all of the frequency adjustments */ - delta_freq = time_freq + ltemp + singleshot_ppm; - - /* - * Compute a new value for tb_ticks_per_sec based on - * the frequency adjustment - */ - den = 1000000 * (1 << (SHIFT_USEC - 8)); - if ( delta_freq < 0 ) { - tb_ticks_per_sec_delta = ( tb_ticks_per_sec * ( (-delta_freq) >> (SHIFT_USEC - 8))) / den; - new_tb_ticks_per_sec = tb_ticks_per_sec + tb_ticks_per_sec_delta; - } - else { - tb_ticks_per_sec_delta = ( tb_ticks_per_sec * ( delta_freq >> (SHIFT_USEC - 8))) / den; - new_tb_ticks_per_sec = tb_ticks_per_sec - tb_ticks_per_sec_delta; } - -#ifdef DEBUG_PPC_ADJTIMEX - printk("ppc_adjtimex: ltemp = %ld, time_freq = %ld, singleshot_ppm = %ld\n", ltemp, time_freq, singleshot_ppm); - printk("ppc_adjtimex: tb_ticks_per_sec - base = %ld new = %ld\n", tb_ticks_per_sec, new_tb_ticks_per_sec); -#endif - /* - * Compute a new value of tb_to_xs (used to convert tb to - * microseconds) and a new value of stamp_xsec which is the - * time (in 1/2^20 second units) corresponding to - * tb_orig_stamp. This new value of stamp_xsec compensates - * for the change in frequency (implied by the new tb_to_xs) - * which guarantees that the current time remains the same. - */ - write_seqlock_irqsave( &xtime_lock, flags ); - tb_ticks = get_tb() - do_gtod.varp->tb_orig_stamp; - div128_by_32(1024*1024, 0, new_tb_ticks_per_sec, &divres); - new_tb_to_xs = divres.result_low; - new_xsec = mulhdu(tb_ticks, new_tb_to_xs); + vdso_data->tb_update_count = 0; + vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; - old_xsec = mulhdu(tb_ticks, do_gtod.varp->tb_to_xs); - new_stamp_xsec = do_gtod.varp->stamp_xsec + old_xsec - new_xsec; + /* Start the decrementer on CPUs that have manual control + * such as BookE + */ + start_cpu_decrementer(); - update_gtod(do_gtod.varp->tb_orig_stamp, new_stamp_xsec, new_tb_to_xs); + /* Register the clocksource */ + clocksource_init(); - write_sequnlock_irqrestore( &xtime_lock, flags ); -#endif /* CONFIG_PPC64 */ + init_decrementer_clockevent(); + tick_setup_hrtimer_broadcast(); } @@ -939,39 +1025,6 @@ void to_tm(int tim, struct rtc_time * tm) GregorianDay(tm); } -/* Auxiliary function to compute scaling factors */ -/* Actually the choice of a timebase running at 1/4 the of the bus - * frequency giving resolution of a few tens of nanoseconds is quite nice. - * It makes this computation very precise (27-28 bits typically) which - * is optimistic considering the stability of most processor clock - * oscillators and the precision with which the timebase frequency - * is measured but does not harm. - */ -unsigned mulhwu_scale_factor(unsigned inscale, unsigned outscale) -{ - unsigned mlt=0, tmp, err; - /* No concern for performance, it's done once: use a stupid - * but safe and compact method to find the multiplier. - */ - - for (tmp = 1U<<31; tmp != 0; tmp >>= 1) { - if (mulhwu(inscale, mlt|tmp) < outscale) - mlt |= tmp; - } - - /* We might still be off by 1 for the best approximation. - * A side effect of this is that if outscale is too large - * the returned value will be zero. - * Many corner cases have been checked and seem to work, - * some might have been forgotten in the test however. - */ - - err = inscale * (mlt+1); - if (err <= inscale/2) - mlt++; - return mlt; -} - /* * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit * result. @@ -1004,3 +1057,26 @@ void div128_by_32(u64 dividend_high, u64 dividend_low, dr->result_low = ((u64)y << 32) + z; } + +/* We don't need to calibrate delay, we use the CPU timebase for that */ +void calibrate_delay(void) +{ + /* Some generic code (such as spinlock debug) use loops_per_jiffy + * as the number of __delay(1) in a jiffy, so make it so + */ + loops_per_jiffy = tb_ticks_per_jiffy; +} + +static int __init rtc_init(void) +{ + struct platform_device *pdev; + + if (!ppc_md.get_rtc_time) + return -ENODEV; + + pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); + + return PTR_ERR_OR_ZERO(pdev); +} + +module_init(rtc_init); |
