diff options
Diffstat (limited to 'arch/x86/xen/time.c')
| -rw-r--r-- | arch/x86/xen/time.c | 152 |
1 files changed, 102 insertions, 50 deletions
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index b2bb5aa3b05..7b78f88c170 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -14,6 +14,8 @@ #include <linux/kernel_stat.h> #include <linux/math64.h> #include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/pvclock_gtod.h> #include <asm/pvclock.h> #include <asm/xen/hypervisor.h> @@ -26,8 +28,6 @@ #include "xen-ops.h" -#define XEN_SHIFT 22 - /* Xen may fire a timer up to this many ns early */ #define TIMER_SLOP 100000 #define NS_PER_TICK (1000000000LL / HZ) @@ -38,9 +38,8 @@ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); /* snapshots of runstate info */ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); -/* unused ns of stolen and blocked time */ +/* unused ns of stolen time */ static DEFINE_PER_CPU(u64, xen_residual_stolen); -static DEFINE_PER_CPU(u64, xen_residual_blocked); /* return an consistent snapshot of 64-bit time/counter value */ static u64 get64(const u64 *p) @@ -117,7 +116,7 @@ static void do_stolen_accounting(void) { struct vcpu_runstate_info state; struct vcpu_runstate_info *snap; - s64 blocked, runnable, offline, stolen; + s64 runnable, offline, stolen; cputime_t ticks; get_runstate_snapshot(&state); @@ -127,7 +126,6 @@ static void do_stolen_accounting(void) snap = &__get_cpu_var(xen_runstate_snapshot); /* work out how much time the VCPU has not been runn*ing* */ - blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; @@ -135,25 +133,14 @@ static void do_stolen_accounting(void) /* Add the appropriate number of ticks of stolen time, including any left-overs from last time. */ - stolen = runnable + offline + __get_cpu_var(xen_residual_stolen); + stolen = runnable + offline + __this_cpu_read(xen_residual_stolen); if (stolen < 0) stolen = 0; ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); - __get_cpu_var(xen_residual_stolen) = stolen; + __this_cpu_write(xen_residual_stolen, stolen); account_steal_ticks(ticks); - - /* Add the appropriate number of ticks of blocked time, - including any left-overs from last time. */ - blocked += __get_cpu_var(xen_residual_blocked); - - if (blocked < 0) - blocked = 0; - - ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); - __get_cpu_var(xen_residual_blocked) = blocked; - account_idle_ticks(ticks); } /* Get the TSC speed from Xen */ @@ -170,9 +157,10 @@ cycle_t xen_clocksource_read(void) struct pvclock_vcpu_time_info *src; cycle_t ret; - src = &get_cpu_var(xen_vcpu)->time; + preempt_disable_notrace(); + src = &__get_cpu_var(xen_vcpu)->time; ret = pvclock_clocksource_read(src); - put_cpu_var(xen_vcpu); + preempt_enable_notrace(); return ret; } @@ -192,27 +180,61 @@ static void xen_read_wallclock(struct timespec *ts) put_cpu_var(xen_vcpu); } -static unsigned long xen_get_wallclock(void) +static void xen_get_wallclock(struct timespec *now) { - struct timespec ts; - - xen_read_wallclock(&ts); - return ts.tv_sec; + xen_read_wallclock(now); } -static int xen_set_wallclock(unsigned long now) +static int xen_set_wallclock(const struct timespec *now) { - /* do nothing for domU */ return -1; } +static int xen_pvclock_gtod_notify(struct notifier_block *nb, + unsigned long was_set, void *priv) +{ + /* Protected by the calling core code serialization */ + static struct timespec next_sync; + + struct xen_platform_op op; + struct timespec now; + + now = __current_kernel_time(); + + /* + * We only take the expensive HV call when the clock was set + * or when the 11 minutes RTC synchronization time elapsed. + */ + if (!was_set && timespec_compare(&now, &next_sync) < 0) + return NOTIFY_OK; + + op.cmd = XENPF_settime; + op.u.settime.secs = now.tv_sec; + op.u.settime.nsecs = now.tv_nsec; + op.u.settime.system_time = xen_clocksource_read(); + + (void)HYPERVISOR_dom0_op(&op); + + /* + * Move the next drift compensation time 11 minutes + * ahead. That's emulating the sync_cmos_clock() update for + * the hardware RTC. + */ + next_sync = now; + next_sync.tv_sec += 11 * 60; + + return NOTIFY_OK; +} + +static struct notifier_block xen_pvclock_gtod_notifier = { + .notifier_call = xen_pvclock_gtod_notify, +}; + static struct clocksource xen_clocksource __read_mostly = { .name = "xen", .rating = 400, .read = xen_clocksource_get_cycles, .mask = ~0, - .mult = 1<<XEN_SHIFT, /* time directly in nanoseconds */ - .shift = XEN_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -366,11 +388,16 @@ static const struct clock_event_device xen_vcpuop_clockevent = { static const struct clock_event_device *xen_clockevent = &xen_timerop_clockevent; -static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events); + +struct xen_clock_event_device { + struct clock_event_device evt; + char *name; +}; +static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 }; static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) { - struct clock_event_device *evt = &__get_cpu_var(xen_clock_events); + struct clock_event_device *evt = &__get_cpu_var(xen_clock_events).evt; irqreturn_t ret; ret = IRQ_NONE; @@ -384,12 +411,31 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) return ret; } +void xen_teardown_timer(int cpu) +{ + struct clock_event_device *evt; + BUG_ON(cpu == 0); + evt = &per_cpu(xen_clock_events, cpu).evt; + + if (evt->irq >= 0) { + unbind_from_irqhandler(evt->irq, NULL); + evt->irq = -1; + kfree(per_cpu(xen_clock_events, cpu).name); + per_cpu(xen_clock_events, cpu).name = NULL; + } +} + void xen_setup_timer(int cpu) { - const char *name; + char *name; struct clock_event_device *evt; int irq; + evt = &per_cpu(xen_clock_events, cpu).evt; + WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu); + if (evt->irq >= 0) + xen_teardown_timer(cpu); + printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); name = kasprintf(GFP_KERNEL, "timer%d", cpu); @@ -397,35 +443,32 @@ void xen_setup_timer(int cpu) name = "<timer kasprintf failed>"; irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, + IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| + IRQF_FORCE_RESUME, name, NULL); + (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); - evt = &per_cpu(xen_clock_events, cpu); memcpy(evt, xen_clockevent, sizeof(*evt)); evt->cpumask = cpumask_of(cpu); evt->irq = irq; + per_cpu(xen_clock_events, cpu).name = name; } -void xen_teardown_timer(int cpu) -{ - struct clock_event_device *evt; - BUG_ON(cpu == 0); - evt = &per_cpu(xen_clock_events, cpu); - unbind_from_irqhandler(evt->irq, NULL); -} void xen_setup_cpu_clockevents(void) { BUG_ON(preemptible()); - clockevents_register_device(&__get_cpu_var(xen_clock_events)); + clockevents_register_device(&__get_cpu_var(xen_clock_events).evt); } void xen_timer_resume(void) { int cpu; + pvclock_resume(); + if (xen_clockevent != &xen_vcpuop_clockevent) return; @@ -435,16 +478,16 @@ void xen_timer_resume(void) } } -static const struct pv_time_ops xen_time_ops __initdata = { +static const struct pv_time_ops xen_time_ops __initconst = { .sched_clock = xen_clocksource_read, }; -static __init void xen_time_init(void) +static void __init xen_time_init(void) { int cpu = smp_processor_id(); struct timespec tp; - clocksource_register(&xen_clocksource); + clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { /* Successfully turned off 100Hz tick, so we have the @@ -462,9 +505,12 @@ static __init void xen_time_init(void) xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_setup_cpu_clockevents(); + + if (xen_initial_domain()) + pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } -__init void xen_init_time_ops(void) +void __init xen_init_time_ops(void) { pv_time_ops = xen_time_ops; @@ -474,7 +520,9 @@ __init void xen_init_time_ops(void) x86_platform.calibrate_tsc = xen_tsc_khz; x86_platform.get_wallclock = xen_get_wallclock; - x86_platform.set_wallclock = xen_set_wallclock; + /* Dom0 uses the native method to set the hardware RTC. */ + if (!xen_initial_domain()) + x86_platform.set_wallclock = xen_set_wallclock; } #ifdef CONFIG_XEN_PVHVM @@ -482,11 +530,15 @@ static void xen_hvm_setup_cpu_clockevents(void) { int cpu = smp_processor_id(); xen_setup_runstate_info(cpu); - xen_setup_timer(cpu); + /* + * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence + * doing it xen_hvm_cpu_notify (which gets called by smp_init during + * early bootup and also during CPU hotplug events). + */ xen_setup_cpu_clockevents(); } -__init void xen_hvm_init_time_ops(void) +void __init xen_hvm_init_time_ops(void) { /* vector callback is needed otherwise we cannot receive interrupts * on cpu > 0 and at this point we don't know how many cpus are |
