aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/kvmclock.c56
-rw-r--r--arch/x86/kernel/pvclock.c37
-rw-r--r--arch/x86/kernel/tboot.c1
3 files changed, 73 insertions, 21 deletions
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index feaeb0d3aa4..eb9b76c716c 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -29,6 +29,8 @@
#define KVM_SCALE 22
static int kvmclock = 1;
+static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
+static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
static int parse_no_kvmclock(char *arg)
{
@@ -54,7 +56,8 @@ static unsigned long kvm_get_wallclock(void)
low = (int)__pa_symbol(&wall_clock);
high = ((u64)__pa_symbol(&wall_clock) >> 32);
- native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
+
+ native_write_msr(msr_kvm_wall_clock, low, high);
vcpu_time = &get_cpu_var(hv_clock);
pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
@@ -130,7 +133,8 @@ static int kvm_register_clock(char *txt)
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
cpu, high, low, txt);
- return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
+
+ return native_write_msr_safe(msr_kvm_system_time, low, high);
}
#ifdef CONFIG_X86_LOCAL_APIC
@@ -165,14 +169,14 @@ static void __init kvm_smp_prepare_boot_cpu(void)
#ifdef CONFIG_KEXEC
static void kvm_crash_shutdown(struct pt_regs *regs)
{
- native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+ native_write_msr(msr_kvm_system_time, 0, 0);
native_machine_crash_shutdown(regs);
}
#endif
static void kvm_shutdown(void)
{
- native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+ native_write_msr(msr_kvm_system_time, 0, 0);
native_machine_shutdown();
}
@@ -181,27 +185,37 @@ void __init kvmclock_init(void)
if (!kvm_para_available())
return;
- if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
- if (kvm_register_clock("boot clock"))
- return;
- pv_time_ops.sched_clock = kvm_clock_read;
- x86_platform.calibrate_tsc = kvm_get_tsc_khz;
- x86_platform.get_wallclock = kvm_get_wallclock;
- x86_platform.set_wallclock = kvm_set_wallclock;
+ if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) {
+ msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
+ msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
+ } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)))
+ return;
+
+ printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
+ msr_kvm_system_time, msr_kvm_wall_clock);
+
+ if (kvm_register_clock("boot clock"))
+ return;
+ pv_time_ops.sched_clock = kvm_clock_read;
+ x86_platform.calibrate_tsc = kvm_get_tsc_khz;
+ x86_platform.get_wallclock = kvm_get_wallclock;
+ x86_platform.set_wallclock = kvm_set_wallclock;
#ifdef CONFIG_X86_LOCAL_APIC
- x86_cpuinit.setup_percpu_clockev =
- kvm_setup_secondary_clock;
+ x86_cpuinit.setup_percpu_clockev =
+ kvm_setup_secondary_clock;
#endif
#ifdef CONFIG_SMP
- smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
+ smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
#endif
- machine_ops.shutdown = kvm_shutdown;
+ machine_ops.shutdown = kvm_shutdown;
#ifdef CONFIG_KEXEC
- machine_ops.crash_shutdown = kvm_crash_shutdown;
+ machine_ops.crash_shutdown = kvm_crash_shutdown;
#endif
- kvm_get_preset_lpj();
- clocksource_register(&kvm_clock);
- pv_info.paravirt_enabled = 1;
- pv_info.name = "KVM";
- }
+ kvm_get_preset_lpj();
+ clocksource_register(&kvm_clock);
+ pv_info.paravirt_enabled = 1;
+ pv_info.name = "KVM";
+
+ if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
+ pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 03801f2f761..239427ca02a 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -31,8 +31,16 @@ struct pvclock_shadow_time {
u32 tsc_to_nsec_mul;
int tsc_shift;
u32 version;
+ u8 flags;
};
+static u8 valid_flags __read_mostly = 0;
+
+void pvclock_set_flags(u8 flags)
+{
+ valid_flags = flags;
+}
+
/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
@@ -91,6 +99,7 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
dst->system_timestamp = src->system_time;
dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
dst->tsc_shift = src->tsc_shift;
+ dst->flags = src->flags;
rmb(); /* test version after fetching data */
} while ((src->version & 1) || (dst->version != src->version));
@@ -109,11 +118,14 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
return pv_tsc_khz;
}
+static atomic64_t last_value = ATOMIC64_INIT(0);
+
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
{
struct pvclock_shadow_time shadow;
unsigned version;
cycle_t ret, offset;
+ u64 last;
do {
version = pvclock_get_time_values(&shadow, src);
@@ -123,6 +135,31 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
barrier();
} while (version != src->version);
+ if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
+ (shadow.flags & PVCLOCK_TSC_STABLE_BIT))
+ return ret;
+
+ /*
+ * Assumption here is that last_value, a global accumulator, always goes
+ * forward. If we are less than that, we should not be much smaller.
+ * We assume there is an error marging we're inside, and then the correction
+ * does not sacrifice accuracy.
+ *
+ * For reads: global may have changed between test and return,
+ * but this means someone else updated poked the clock at a later time.
+ * We just need to make sure we are not seeing a backwards event.
+ *
+ * For updates: last_value = ret is not enough, since two vcpus could be
+ * updating at the same time, and one of them could be slightly behind,
+ * making the assumption that last_value always go forward fail to hold.
+ */
+ last = atomic64_read(&last_value);
+ do {
+ if (ret < last)
+ return last;
+ last = atomic64_cmpxchg(&last_value, last, ret);
+ } while (unlikely(last != ret));
+
return ret;
}
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index cc2c60474fd..c2f1b26141e 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -46,6 +46,7 @@
/* Global pointer to shared data; NULL means no measured launch. */
struct tboot *tboot __read_mostly;
+EXPORT_SYMBOL(tboot);
/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
#define AP_WAIT_TIMEOUT 1