diff options
Diffstat (limited to 'arch')
34 files changed, 416 insertions, 370 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 7336ba653b8..137b277f7e5 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -101,6 +101,9 @@ config GENERIC_IOMAP bool default y +config ARCH_CLOCKSOURCE_DATA + def_bool y + config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/ia64/include/asm/clocksource.h b/arch/ia64/include/asm/clocksource.h new file mode 100644 index 00000000000..5c8596e4cb0 --- /dev/null +++ b/arch/ia64/include/asm/clocksource.h @@ -0,0 +1,10 @@ +/* IA64-specific clocksource additions */ + +#ifndef _ASM_IA64_CLOCKSOURCE_H +#define _ASM_IA64_CLOCKSOURCE_H + +struct arch_clocksource_data { + void *fsys_mmio; /* used by fsyscall asm code */ +}; + +#endif /* _ASM_IA64_CLOCKSOURCE_H */ diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index f64097b5118..4826ff957a3 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -115,7 +115,7 @@ int __init init_cyclone_clock(void) } /* initialize last tick */ cyclone_mc = cyclone_timer; - clocksource_cyclone.fsys_mmio = cyclone_timer; + clocksource_cyclone.archdata.fsys_mmio = cyclone_timer; clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ); return 0; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 85118dfe9bb..43920de425f 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -468,7 +468,7 @@ void update_vsyscall(struct timespec *wall, struct timespec *wtm, fsyscall_gtod_data.clk_mask = c->mask; fsyscall_gtod_data.clk_mult = mult; fsyscall_gtod_data.clk_shift = c->shift; - fsyscall_gtod_data.clk_fsys_mmio = c->fsys_mmio; + fsyscall_gtod_data.clk_fsys_mmio = c->archdata.fsys_mmio; fsyscall_gtod_data.clk_cycle_last = c->cycle_last; /* copy kernel time structures */ diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c index c34efda122e..0f8844e4936 100644 --- a/arch/ia64/sn/kernel/sn2/timer.c +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -54,7 +54,7 @@ ia64_sn_udelay (unsigned long usecs) void __init sn_timer_init(void) { - clocksource_sn2.fsys_mmio = RTC_COUNTER_ADDR; + clocksource_sn2.archdata.fsys_mmio = RTC_COUNTER_ADDR; clocksource_register_hz(&clocksource_sn2, sn_rtc_cycles_per_second); ia64_udelay = &ia64_sn_udelay; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fc76e420900..5f60ea190d5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -95,6 +95,10 @@ config CLOCKSOURCE_WATCHDOG config GENERIC_CLOCKEVENTS def_bool y +config ARCH_CLOCKSOURCE_DATA + def_bool y + depends on X86_64 + config GENERIC_CLOCKEVENTS_BROADCAST def_bool y depends on X86_64 || (X86_32 && X86_LOCAL_APIC) diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 94d420b360d..4554cc6fb96 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -17,8 +17,8 @@ .macro altinstruction_entry orig alt feature orig_len alt_len .align 8 - .quad \orig - .quad \alt + .long \orig - . + .long \alt - . .word \feature .byte \orig_len .byte \alt_len diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index bf535f947e8..23fb6d79f20 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -43,8 +43,8 @@ #endif struct alt_instr { - u8 *instr; /* original instruction */ - u8 *replacement; + s32 instr_offset; /* original instruction */ + s32 repl_offset; /* offset to replacement instruction */ u16 cpuid; /* cpuid bit set for replacement */ u8 instrlen; /* length of original instruction */ u8 replacementlen; /* length of new instruction, <= instrlen */ @@ -84,8 +84,8 @@ static inline int alternatives_text_reserved(void *start, void *end) "661:\n\t" oldinstr "\n662:\n" \ ".section .altinstructions,\"a\"\n" \ _ASM_ALIGN "\n" \ - _ASM_PTR "661b\n" /* label */ \ - _ASM_PTR "663f\n" /* new instruction */ \ + " .long 661b - .\n" /* label */ \ + " .long 663f - .\n" /* new instruction */ \ " .word " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* sourcelen */ \ " .byte 664f-663f\n" /* replacementlen */ \ diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h new file mode 100644 index 00000000000..0bdbbb3b9ce --- /dev/null +++ b/arch/x86/include/asm/clocksource.h @@ -0,0 +1,18 @@ +/* x86-specific clocksource additions */ + +#ifndef _ASM_X86_CLOCKSOURCE_H +#define _ASM_X86_CLOCKSOURCE_H + +#ifdef CONFIG_X86_64 + +#define VCLOCK_NONE 0 /* No vDSO clock available. */ +#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ +#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ + +struct arch_clocksource_data { + int vclock_mode; +}; + +#endif /* CONFIG_X86_64 */ + +#endif /* _ASM_X86_CLOCKSOURCE_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 71cc3800712..9929b35929f 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -331,8 +331,8 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) "2:\n" ".section .altinstructions,\"a\"\n" _ASM_ALIGN "\n" - _ASM_PTR "1b\n" - _ASM_PTR "0\n" /* no replacement */ + " .long 1b - .\n" + " .long 0\n" /* no replacement */ " .word %P0\n" /* feature bit */ " .byte 2b - 1b\n" /* source len */ " .byte 0\n" /* replacement len */ @@ -349,8 +349,8 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) "2:\n" ".section .altinstructions,\"a\"\n" _ASM_ALIGN "\n" - _ASM_PTR "1b\n" - _ASM_PTR "3f\n" + " .long 1b - .\n" + " .long 3f - .\n" " .word %P1\n" /* feature bit */ " .byte 2b - 1b\n" /* source len */ " .byte 4f - 3f\n" /* replacement len */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 4729b2b6311..460c74e4852 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -78,6 +78,7 @@ enum fixed_addresses { VSYSCALL_LAST_PAGE, VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, + VVAR_PAGE, VSYSCALL_HPET, #endif FIX_DBGP_BASE, diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6665026ea3e..f9a320984a1 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -17,7 +17,8 @@ * Vectors 0 ... 31 : system traps and exceptions - hardcoded events * Vectors 32 ... 127 : device interrupts * Vector 128 : legacy int80 syscall interface - * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts + * Vector 204 : legacy x86_64 vsyscall emulation + * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts * * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. @@ -50,6 +51,9 @@ #ifdef CONFIG_X86_32 # define SYSCALL_VECTOR 0x80 #endif +#ifdef CONFIG_X86_64 +# define VSYSCALL_EMU_VECTOR 0xcc +#endif /* * Vectors 0x30-0x3f are used for ISA interrupts. diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d56187c6b83..013286a10c2 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -107,7 +107,8 @@ #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) #define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) #define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) -#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) +#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) +#define __PAGE_KERNEL_VVAR_NOCACHE (__PAGE_KERNEL_VVAR | _PAGE_PCD | _PAGE_PWT) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) @@ -129,7 +130,8 @@ #define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) #define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) -#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) +#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR) +#define PAGE_KERNEL_VVAR_NOCACHE __pgprot(__PAGE_KERNEL_VVAR_NOCACHE) #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) #define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0310da67307..2bae0a513b4 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_TRAPS_H #define _ASM_X86_TRAPS_H +#include <linux/kprobes.h> + #include <asm/debugreg.h> #include <asm/siginfo.h> /* TRAP_TRACE, ... */ @@ -38,6 +40,7 @@ asmlinkage void alignment_check(void); asmlinkage void machine_check(void); #endif /* CONFIG_X86_MCE */ asmlinkage void simd_coprocessor_error(void); +asmlinkage void emulate_vsyscall(void); dotraplinkage void do_divide_error(struct pt_regs *, long); dotraplinkage void do_debug(struct pt_regs *, long); @@ -64,6 +67,7 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long); dotraplinkage void do_machine_check(struct pt_regs *, long); #endif dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); +dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *, long); #endif diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 9db5583b6d3..83e2efd181e 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -51,10 +51,6 @@ extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); extern unsigned long native_calibrate_tsc(void); -#ifdef CONFIG_X86_64 -extern cycles_t vread_tsc(void); -#endif - /* * Boot-time check whether the TSCs are synchronized across * all CPUs/cores: diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 646b4c1ca69..815285bcace 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -11,10 +11,9 @@ struct vsyscall_gtod_data { time_t wall_time_sec; u32 wall_time_nsec; - int sysctl_enabled; struct timezone sys_tz; struct { /* extract of a clocksource struct */ - cycle_t (*vread)(void); + int vclock_mode; cycle_t cycle_last; cycle_t mask; u32 mult; diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d55597351f6..60107072c28 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -16,10 +16,6 @@ enum vsyscall_num { #ifdef __KERNEL__ #include <linux/seqlock.h> -/* Definitions for CONFIG_GENERIC_TIME definitions */ -#define __vsyscall_fn \ - __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace - #define VGETCPU_RDTSCP 1 #define VGETCPU_LSL 2 diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index 341b3559452..de656ac2af4 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -10,15 +10,14 @@ * In normal kernel code, they are used like any other variable. * In user code, they are accessed through the VVAR macro. * - * Each of these variables lives in the vsyscall page, and each - * one needs a unique offset within the little piece of the page - * reserved for vvars. Specify that offset in DECLARE_VVAR. - * (There are 896 bytes available. If you mess up, the linker will - * catch it.) + * These variables live in a page of kernel data that has an extra RO + * mapping for userspace. Each variable needs a unique offset within + * that page; specify that offset with the DECLARE_VVAR macro. (If + * you mess up, the linker will catch it.) */ -/* Offset of vars within vsyscall page */ -#define VSYSCALL_VARS_OFFSET (3072 + 128) +/* Base address of vvars. This is not ABI. */ +#define VVAR_ADDRESS (-10*1024*1024 - 4096) #if defined(__VVAR_KERNEL_LDS) @@ -26,17 +25,17 @@ * right place. */ #define DECLARE_VVAR(offset, type, name) \ - EMIT_VVAR(name, VSYSCALL_VARS_OFFSET + offset) + EMIT_VVAR(name, offset) #else #define DECLARE_VVAR(offset, type, name) \ static type const * const vvaraddr_ ## name = \ - (void *)(VSYSCALL_START + VSYSCALL_VARS_OFFSET + (offset)); + (void *)(VVAR_ADDRESS + (offset)); #define DEFINE_VVAR(type, name) \ - type __vvar_ ## name \ - __attribute__((section(".vsyscall_var_" #name), aligned(16))) + type name \ + __attribute__((section(".vvar_" #name), aligned(16))) #define VVAR(name) (*vvaraddr_ ## name) @@ -45,8 +44,7 @@ /* DECLARE_VVAR(offset, type, name) */ DECLARE_VVAR(0, volatile unsigned long, jiffies) -DECLARE_VVAR(8, int, vgetcpu_mode) +DECLARE_VVAR(16, int, vgetcpu_mode) DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) #undef DECLARE_VVAR -#undef VSYSCALL_VARS_OFFSET diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 11817ff8539..04105574c8e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -24,17 +24,12 @@ endif nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) CFLAGS_hpet.o := $(nostackp) -CFLAGS_vread_tsc_64.o := $(nostackp) CFLAGS_paravirt.o := $(nostackp) GCOV_PROFILE_vsyscall_64.o := n GCOV_PROFILE_hpet.o := n GCOV_PROFILE_tsc.o := n -GCOV_PROFILE_vread_tsc_64.o := n GCOV_PROFILE_paravirt.o := n -# vread_tsc_64 is hot and should be fully optimized: -CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls - obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o ldt.o dumpstack.o @@ -43,7 +38,8 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o -obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o +obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o +obj-$(CONFIG_X86_64) += vsyscall_emu_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a81f2d52f86..c6382281624 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -14,7 +14,6 @@ #include <asm/pgtable.h> #include <asm/mce.h> #include <asm/nmi.h> -#include <asm/vsyscall.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/io.h> @@ -250,7 +249,6 @@ static void __init_or_module add_nops(void *insns, unsigned int len) extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; -extern char __vsyscall_0; void *text_poke_early(void *addr, const void *opcode, size_t len); /* Replace instructions with better alternatives for this CPU type. @@ -263,6 +261,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start, struct alt_instr *end) { struct alt_instr *a; + u8 *instr, *replacement; u8 insnbuf[MAX_PATCH_LEN]; DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); @@ -276,25 +275,23 @@ void __init_or_module apply_alternatives(struct alt_instr *start, * order. */ for (a = start; a < end; a++) { - u8 *instr = a->instr; + instr = (u8 *)&a->instr_offset + a->instr_offset; + replacement = (u8 *)&a->repl_offset + a->repl_offset; BUG_ON(a->replacementlen > a->instrlen); BUG_ON(a->instrlen > sizeof(insnbuf)); BUG_ON(a->cpuid >= NCAPINTS*32); if (!boot_cpu_has(a->cpuid)) continue; -#ifdef CONFIG_X86_64 - /* vsyscall code is not mapped yet. resolve it manually. */ - if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { - instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); - DPRINTK("%s: vsyscall fixup: %p => %p\n", - __func__, a->instr, instr); - } -#endif - memcpy(insnbuf, a->replacement, a->replacementlen); + + memcpy(insnbuf, replacement, a->replacementlen); + + /* 0xe8 is a relative jump; fix the offset. */ if (*insnbuf == 0xe8 && a->replacementlen == 5) - *(s32 *)(insnbuf + 1) += a->replacement - a->instr; + *(s32 *)(insnbuf + 1) += replacement - instr; + add_nops(insnbuf + a->replacementlen, a->instrlen - a->replacementlen); + text_poke_early(instr, insnbuf, a->instrlen); } } diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 37e895a1c74..e13329d800c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -9,6 +9,8 @@ /* * entry.S contains the system-call and fault low-level handling routines. * + * Some of this is documented in Documentation/x86/entry_64.txt + * * NOTE: This code handles signal-recognition, which happens every time * after an interrupt and after each system call. * @@ -1109,6 +1111,8 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug zeroentry coprocessor_error do_coprocessor_error errorentry alignment_check do_alignment_check zeroentry simd_coprocessor_error do_simd_coprocessor_error +zeroentry emulate_vsyscall do_emulate_vsyscall + /* Reload gs selector with exception handling */ /* edi: new selector */ diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 0f4b0651cd3..4aecc54236a 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -72,7 +72,7 @@ static inline void hpet_set_mapping(void) { hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); #ifdef CONFIG_X86_64 - __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); + __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VVAR_NOCACHE); #endif } @@ -739,13 +739,6 @@ static cycle_t read_hpet(struct clocksource *cs) return (cycle_t)hpet_readl(HPET_COUNTER); } -#ifdef CONFIG_X86_64 -static cycle_t __vsyscall_fn vread_hpet(void) -{ - return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); -} -#endif - static struct clocksource clocksource_hpet = { .name = "hpet", .rating = 250, @@ -754,7 +747,7 @@ static struct clocksource clocksource_hpet = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, .resume = hpet_resume_counter, #ifdef CONFIG_X86_64 - .vread = vread_hpet, + .archdata = { .vclock_mode = VCLOCK_HPET }, #endif }; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b9b67166f9d..fbc097a085c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -872,6 +872,12 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); #endif +#ifdef CONFIG_X86_64 + BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors)); + set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall); + set_bit(VSYSCALL_EMU_VECTOR, used_vectors); +#endif + /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6cc6922262a..56c633a5db7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -777,7 +777,7 @@ static struct clocksource clocksource_tsc = { .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_MUST_VERIFY, #ifdef CONFIG_X86_64 - .vread = vread_tsc, + .archdata = { .vclock_mode = VCLOCK_TSC }, #endif }; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 89aed99aafc..4aa9c54a9b7 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -161,50 +161,47 @@ SECTIONS #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) -#define EMIT_VVAR(x, offset) .vsyscall_var_ ## x \ - ADDR(.vsyscall_0) + offset \ - : AT(VLOAD(.vsyscall_var_ ## x)) { \ - *(.vsyscall_var_ ## x) \ - } \ - x = VVIRT(.vsyscall_var_ ## x); . = ALIGN(4096); __vsyscall_0 = .; . = VSYSCALL_ADDR; - .vsyscall_0 : AT(VLOAD(.vsyscall_0)) { + .vsyscall : AT(VLOAD(.vsyscall)) { *(.vsyscall_0) - } :user - . = ALIGN(L1_CACHE_BYTES); - .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { - *(.vsyscall_fn) - } - - .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { + . = 1024; *(.vsyscall_1) - } - .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { - *(.vsyscall_2) - } - .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { - *(.vsyscall_3) - } - -#define __VVAR_KERNEL_LDS -#include <asm/vvar.h> -#undef __VVAR_KERNEL_LDS + . = 2048; + *(.vsyscall_2) - . = __vsyscall_0 + PAGE_SIZE; + . = 4096; /* Pad the whole page. */ + } :user =0xcc + . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE); #undef VSYSCALL_ADDR #undef VLOAD_OFFSET #undef VLOAD #undef VVIRT_OFFSET #undef VVIRT + + __vvar_page = .; + + .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { + + /* Place all vvars at the offsets in asm/vvar.h. */ +#define EMIT_VVAR(name, offset) \ + . = offset; \ + *(.vvar_ ## name) +#define __VVAR_KERNEL_LDS +#include <asm/vvar.h> +#undef __VVAR_KERNEL_LDS #undef EMIT_VVAR + } :data + + . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); + #endif /* CONFIG_X86_64 */ /* Init code and data - will be freed after init */ diff --git a/arch/x86/kernel/vread_tsc_64.c b/arch/x86/kernel/vread_tsc_64.c deleted file mode 100644 index a81aa9e9894..00000000000 --- a/arch/x86/kernel/vread_tsc_64.c +++ /dev/null @@ -1,36 +0,0 @@ -/* This code runs in userspace. */ - -#define DISABLE_BRANCH_PROFILING -#include <asm/vgtod.h> - -notrace cycle_t __vsyscall_fn vread_tsc(void) -{ - cycle_t ret; - u64 last; - - /* - * Empirically, a fence (of type that depends on the CPU) - * before rdtsc is enough to ensure that rdtsc is ordered - * with respect to loads. The various CPU manuals are unclear - * as to whether rdtsc can be reordered with later loads, - * but no one has ever seen it happen. - */ - rdtsc_barrier(); - ret = (cycle_t)vget_cycles(); - - last = VVAR(vsyscall_gtod_data).clock.cycle_last; - - if (likely(ret >= last)) - return ret; - - /* - * GCC likes to generate cmov here, but this branch is extremely - * predictable (it's just a funciton of time and the likely is - * very likely) and there's a data dependence, so force GCC - * to generate a branch instead. I don't barrier() because - * we don't actually need a barrier, and if this function - * ever gets inlined it will generate worse code. - */ - asm volatile (""); - return last; -} diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 3e682184d76..dda7dff9cef 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -2,6 +2,8 @@ * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE * Copyright 2003 Andi Kleen, SuSE Labs. * + * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ] + * * Thanks to hpa@transmeta.com for some useful hint. * Special thanks to Ingo Molnar for his early experience with * a different vsyscall implementation for Linux/IA32 and for the name. @@ -11,10 +13,9 @@ * vsyscalls. One vsyscall can reserve more than 1 slot to avoid * jumping out of line if necessary. We cannot add more with this * mechanism because older kernels won't return -ENOSYS. - * If we want more than four we need a vDSO. * - * Note: the concept clashes with user mode linux. If you use UML and - * want per guest time just set the kernel.vsyscall64 sysctl to 0. + * Note: the concept clashes with user mode linux. UML users should + * use the vDSO. */ /* Disable profiling for userspace code: */ @@ -32,9 +33,12 @@ #include <linux/cpu.h> #include <linux/smp.h> #include <linux/notifier.h> +#include <linux/syscalls.h> +#include <linux/ratelimit.h> #include <asm/vsyscall.h> #include <asm/pgtable.h> +#include <asm/compat.h> #include <asm/page.h> #include <asm/unistd.h> #include <asm/fixmap.h> @@ -44,16 +48,12 @@ #include <asm/desc.h> #include <asm/topology.h> #include <asm/vgtod.h> - -#define __vsyscall(nr) \ - __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace -#define __syscall_clobber "r11","cx","memory" +#include <asm/traps.h> DEFINE_VVAR(int, vgetcpu_mode); DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = { .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), - .sysctl_enabled = 1, }; void update_vsyscall_tz(void) @@ -72,179 +72,149 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, unsigned long flags; write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + /* copy vsyscall data */ - vsyscall_gtod_data.clock.vread = clock->vread; - vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; - vsyscall_gtod_data.clock.mask = clock->mask; - vsyscall_gtod_data.clock.mult = mult; - vsyscall_gtod_data.clock.shift = clock->shift; - vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.wall_to_monotonic = *wtm; - vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); + vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; + vsyscall_gtod_data |