diff options
Diffstat (limited to 'arch/x86/kernel/vsyscall_64.c')
| -rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 166 |
1 files changed, 81 insertions, 85 deletions
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index b07ba939356..ea5b5709aa7 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -18,6 +18,8 @@ * use the vDSO. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/time.h> #include <linux/init.h> #include <linux/kernel.h> @@ -26,7 +28,7 @@ #include <linux/jiffies.h> #include <linux/sysctl.h> #include <linux/topology.h> -#include <linux/clocksource.h> +#include <linux/timekeeper_internal.h> #include <linux/getcpu.h> #include <linux/cpu.h> #include <linux/smp.h> @@ -45,17 +47,12 @@ #include <asm/segment.h> #include <asm/desc.h> #include <asm/topology.h> -#include <asm/vgtod.h> #include <asm/traps.h> #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" DEFINE_VVAR(int, vgetcpu_mode); -DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = -{ - .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), -}; static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; @@ -78,59 +75,23 @@ static int __init vsyscall_setup(char *str) } early_param("vsyscall", vsyscall_setup); -void update_vsyscall_tz(void) -{ - unsigned long flags; - - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); - /* sys_tz has changed */ - vsyscall_gtod_data.sys_tz = sys_tz; - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); -} - -void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult) -{ - unsigned long flags; - - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); - - /* copy vsyscall data */ - vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; - vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; - vsyscall_gtod_data.clock.mask = clock->mask; - vsyscall_gtod_data.clock.mult = mult; - vsyscall_gtod_data.clock.shift = clock->shift; - vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.wall_to_monotonic = *wtm; - vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); - - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); -} - static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, const char *message) { - static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - struct task_struct *tsk; - - if (!show_unhandled_signals || !__ratelimit(&rs)) + if (!show_unhandled_signals) return; - tsk = current; - - printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", - level, tsk->comm, task_pid_nr(tsk), - message, regs->ip, regs->cs, - regs->sp, regs->ax, regs->si, regs->di); + pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", + level, current->comm, task_pid_nr(current), + message, regs->ip, regs->cs, + regs->sp, regs->ax, regs->si, regs->di); } static int addr_to_vsyscall_nr(unsigned long addr) { int nr; - if ((addr & ~0xC00UL) != VSYSCALL_START) + if ((addr & ~0xC00UL) != VSYSCALL_ADDR) return -EINVAL; nr = (addr & 0xC00UL) >> 10; @@ -153,7 +114,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) thread->error_code = 6; /* user fault, no page, write */ thread->cr2 = ptr; - thread->trap_no = 14; + thread->trap_nr = X86_TRAP_PF; memset(&info, 0, sizeof(info)); info.si_signo = SIGSEGV; @@ -172,7 +133,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; - int vsyscall_nr; + int vsyscall_nr, syscall_nr, tmp; int prev_sig_on_uaccess_error; long ret; @@ -206,8 +167,63 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) } tsk = current; - if (seccomp_mode(&tsk->seccomp)) - do_exit(SIGKILL); + + /* + * Check for access_ok violations and find the syscall nr. + * + * NULL is a valid user pointer (in the access_ok sense) on 32-bit and + * 64-bit, so we don't need to special-case it here. For all the + * vsyscalls, NULL means "don't write anything" not "write it at + * address 0". + */ + switch (vsyscall_nr) { + case 0: + if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || + !write_ok_or_segv(regs->si, sizeof(struct timezone))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_gettimeofday; + break; + + case 1: + if (!write_ok_or_segv(regs->di, sizeof(time_t))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_time; + break; + + case 2: + if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || + !write_ok_or_segv(regs->si, sizeof(unsigned))) { + ret = -EFAULT; + goto check_fault; + } + + syscall_nr = __NR_getcpu; + break; + } + + /* + * Handle seccomp. regs->ip must be the original value. + * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt. + * + * We could optimize the seccomp disabled case, but performance + * here doesn't matter. + */ + regs->orig_ax = syscall_nr; + regs->ax = -ENOSYS; + tmp = secure_computing(syscall_nr); + if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) { + warn_bad_vsyscall(KERN_DEBUG, regs, + "seccomp tried to change syscall nr or ip"); + do_exit(SIGSYS); + } + if (tmp) + goto do_ret; /* skip requested */ /* * With a real vsyscall, page faults cause SIGSEGV. We want to @@ -216,44 +232,28 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; current_thread_info()->sig_on_uaccess_error = 1; - /* - * 0 is a valid user pointer (in the access_ok sense) on 32-bit and - * 64-bit, so we don't need to special-case it here. For all the - * vsyscalls, 0 means "don't write anything" not "write it at - * address 0". - */ ret = -EFAULT; switch (vsyscall_nr) { case 0: - if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || - !write_ok_or_segv(regs->si, sizeof(struct timezone))) - break; - ret = sys_gettimeofday( (struct timeval __user *)regs->di, (struct timezone __user *)regs->si); break; case 1: - if (!write_ok_or_segv(regs->di, sizeof(time_t))) - break; - ret = sys_time((time_t __user *)regs->di); break; case 2: - if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || - !write_ok_or_segv(regs->si, sizeof(unsigned))) - break; - ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, - 0); + NULL); break; } current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; +check_fault: if (ret == -EFAULT) { /* Bad news -- userspace fed a bad pointer to a vsyscall. */ warn_bad_vsyscall(KERN_INFO, regs, @@ -272,10 +272,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) regs->ax = ret; +do_ret: /* Emulate a ret instruction. */ regs->ip = caller; regs->sp += 8; - return true; sigsegv: @@ -287,7 +287,7 @@ sigsegv: * Assume __initcall executes before all user space. Hopefully kmod * doesn't violate that. We'll find out if it does. */ -static void __cpuinit vsyscall_set_cpu(int cpu) +static void vsyscall_set_cpu(int cpu) { unsigned long d; unsigned long node = 0; @@ -309,13 +309,13 @@ static void __cpuinit vsyscall_set_cpu(int cpu) write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); } -static void __cpuinit cpu_vsyscall_init(void *arg) +static void cpu_vsyscall_init(void *arg) { /* preemption should be already off */ vsyscall_set_cpu(raw_smp_processor_id()); } -static int __cpuinit +static int cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) { long cpu = (long)arg; @@ -330,28 +330,24 @@ void __init map_vsyscall(void) { extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - extern char __vvar_page; - unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); - __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, + __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, vsyscall_mode == NATIVE ? PAGE_KERNEL_VSYSCALL : PAGE_KERNEL_VVAR); - BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) != - (unsigned long)VSYSCALL_START); - - __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); - BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != - (unsigned long)VVAR_ADDRESS); + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != + (unsigned long)VSYSCALL_ADDR); } static int __init vsyscall_init(void) { - BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)); + cpu_notifier_register_begin(); on_each_cpu(cpu_vsyscall_init, NULL, 1); /* notifier priority > KVM */ - hotcpu_notifier(cpu_vsyscall_notifier, 30); + __hotcpu_notifier(cpu_vsyscall_notifier, 30); + + cpu_notifier_register_done(); return 0; } |
