diff options
Diffstat (limited to 'arch/um/kernel')
37 files changed, 1225 insertions, 1651 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 499e5e95e60..d8b78a03855 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -3,17 +3,22 @@ # Licensed under the GPL # +CPPFLAGS_vmlinux.lds := -DSTART=$(LDS_START) \ + -DELF_ARCH=$(LDS_ELF_ARCH) \ + -DELF_FORMAT=$(LDS_ELF_FORMAT) \ + $(LDS_EXTRA) extra-y := vmlinux.lds clean-files := -obj-y = config.o exec.o exitcode.o init_task.o irq.o ksyms.o mem.o \ +obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ physmem.o process.o ptrace.o reboot.o sigio.o \ - signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o uaccess.o \ - um_arch.o umid.o skas/ + signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o \ + um_arch.o umid.o maccess.o skas/ obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_GCOV) += gmon_syms.o +obj-$(CONFIG_EARLY_PRINTK) += early_printk.o USER_OBJS := config.o @@ -28,7 +33,7 @@ $(obj)/config.tmp: $(objtree)/.config FORCE $(call if_changed,quote1) quiet_cmd_quote1 = QUOTE $@ - cmd_quote1 = sed -e 's/"/\\"/g' -e 's/^/"/' -e 's/$$/\\n"/' \ + cmd_quote1 = sed -e 's/"/\\"/g' -e 's/^/"/' -e 's/$$/\\n",/' \ $< > $@ $(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE @@ -36,9 +41,9 @@ $(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE quiet_cmd_quote2 = QUOTE $@ cmd_quote2 = sed -e '/CONFIG/{' \ - -e 's/"CONFIG"\;/""/' \ + -e 's/"CONFIG"//' \ -e 'r $(obj)/config.tmp' \ -e 'a \' \ - -e '""\;' \ + -e '""' \ -e '}' \ $< > $@ diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c index 91ea538e161..1fb12235ab9 100644 --- a/arch/um/kernel/asm-offsets.c +++ b/arch/um/kernel/asm-offsets.c @@ -1 +1 @@ -#include "sysdep/kernel-offsets.h" +#include <sysdep/kernel-offsets.h> diff --git a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in index c062cbfe386..972bf165956 100644 --- a/arch/um/kernel/config.c.in +++ b/arch/um/kernel/config.c.in @@ -5,13 +5,17 @@ #include <stdio.h> #include <stdlib.h> -#include "init.h" +#include <init.h> -static __initdata char *config = "CONFIG"; +static __initdata const char *config[] = { +"CONFIG" +}; static int __init print_config(char *line, int *add) { - printf("%s", config); + int i; + for (i = 0; i < sizeof(config)/sizeof(config[0]); i++) + printf("%s", config[i]); exit(0); } @@ -20,13 +24,3 @@ __uml_setup("--showconfig", print_config, " Prints the config file that this UML binary was generated from.\n\n" ); -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 26090b7f323..adde088aeef 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -1,4 +1,5 @@ #include <asm-generic/vmlinux.lds.h> +#include <asm/page.h> OUTPUT_FORMAT(ELF_FORMAT) OUTPUT_ARCH(ELF_ARCH) @@ -13,15 +14,9 @@ SECTIONS __binary_start = .; . = ALIGN(4096); /* Init code and data */ _text = .; - _stext = .; - __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT - _einittext = .; - } + INIT_TEXT_SECTION(PAGE_SIZE) - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); /* Read-only sections, merged into text segment: */ .hash : { *(.hash) } @@ -53,13 +48,24 @@ SECTIONS .rela.got : { *(.rela.got) } .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) } .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) } - .rel.plt : { *(.rel.plt) } - .rela.plt : { *(.rela.plt) } + .rel.plt : { + *(.rel.plt) + PROVIDE_HIDDEN(__rel_iplt_start = .); + *(.rel.iplt) + PROVIDE_HIDDEN(__rel_iplt_end = .); + } + .rela.plt : { + *(.rela.plt) + PROVIDE_HIDDEN(__rela_iplt_start = .); + *(.rela.iplt) + PROVIDE_HIDDEN(__rela_iplt_end = .); + } .init : { KEEP (*(.init)) } =0x90909090 .plt : { *(.plt) } .text : { + _stext = .; TEXT_TEXT SCHED_TEXT LOCK_TEXT @@ -68,9 +74,9 @@ SECTIONS /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); } =0x90909090 - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); .syscall_stub : { __syscall_stub_start = .; *(.__syscall_stub*) @@ -82,9 +88,11 @@ SECTIONS .kstrtab : { *(.kstrtab) } - #include "asm/common.lds.S" + #include <asm/common.lds.S> + __init_begin = .; init.data : { INIT_DATA } + __init_end = .; /* Ensure the __preinit_array_start label is properly aligned. We could instead move the label definition inside the section, but @@ -95,10 +103,9 @@ SECTIONS .init_array : { *(.init_array) } .fini_array : { *(.fini_array) } .data : { - . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ - *(.data.init_task) + INIT_TASK_DATA(KERNEL_STACK_SIZE) . = ALIGN(KERNEL_STACK_SIZE); - *(.data.init_irqstack) + *(.data..init_irqstack) DATA_DATA *(.data.* .gnu.linkonce.d.*) SORT(CONSTRUCTORS) @@ -149,10 +156,13 @@ SECTIONS . = ALIGN(32 / 8); . = ALIGN(32 / 8); } + __bss_stop = .; _end = .; PROVIDE (end = .); STABS_DEBUG DWARF_DEBUG + + DISCARDS } diff --git a/arch/um/kernel/early_printk.c b/arch/um/kernel/early_printk.c new file mode 100644 index 00000000000..4a0800bc37b --- /dev/null +++ b/arch/um/kernel/early_printk.c @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2011 Richard Weinberger <richrd@nod.at> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/console.h> +#include <linux/init.h> +#include <os.h> + +static void early_console_write(struct console *con, const char *s, unsigned int n) +{ + um_early_printk(s, n); +} + +static struct console early_console_dev = { + .name = "earlycon", + .write = early_console_write, + .flags = CON_BOOT, + .index = -1, +}; + +static int __init setup_early_printk(char *buf) +{ + if (!early_console) { + early_console = &early_console_dev; + register_console(&early_console_dev); + } + return 0; +} + +early_param("earlyprintk", setup_early_printk); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 8196450451c..0d7103c9eff 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -3,96 +3,48 @@ * Licensed under the GPL */ -#include "linux/stddef.h" -#include "linux/fs.h" -#include "linux/smp_lock.h" -#include "linux/ptrace.h" -#include "linux/sched.h" -#include "asm/current.h" -#include "asm/processor.h" -#include "asm/uaccess.h" -#include "as-layout.h" -#include "mem_user.h" -#include "skas.h" -#include "os.h" +#include <linux/stddef.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <asm/current.h> +#include <asm/processor.h> +#include <asm/uaccess.h> +#include <as-layout.h> +#include <mem_user.h> +#include <skas.h> +#include <os.h> void flush_thread(void) { void *data = NULL; - unsigned long end = proc_mm ? task_size : STUB_START; int ret; arch_flush_thread(¤t->thread.arch); - ret = unmap(¤t->mm->context.id, 0, end, 1, &data); + ret = unmap(¤t->mm->context.id, 0, STUB_START, 0, &data); + ret = ret || unmap(¤t->mm->context.id, STUB_END, + host_task_size - STUB_END, 1, &data); if (ret) { printk(KERN_ERR "flush_thread - clearing address space failed, " "err = %d\n", ret); force_sig(SIGKILL, current); } + get_safe_registers(current_pt_regs()->regs.gp, + current_pt_regs()->regs.fp); __switch_mm(¤t->mm->context.id); } void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { - set_fs(USER_DS); PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; -} - -#ifdef CONFIG_TTY_LOG -extern void log_exec(char **argv, void *tty); -#endif - -static long execve1(char *file, char __user * __user *argv, - char __user *__user *env) -{ - long error; -#ifdef CONFIG_TTY_LOG - struct tty_struct *tty; - - mutex_lock(&tty_mutex); - tty = get_current_tty(); - if (tty) - log_exec(argv, tty); - mutex_unlock(&tty_mutex); -#endif - error = do_execve(file, argv, env, ¤t->thread.regs); - if (error == 0) { - task_lock(current); - current->ptrace &= ~PT_DTRACE; + current->ptrace &= ~PT_DTRACE; #ifdef SUBARCH_EXECVE1 - SUBARCH_EXECVE1(¤t->thread.regs.regs); + SUBARCH_EXECVE1(regs->regs); #endif - task_unlock(current); - } - return error; -} - -long um_execve(char *file, char __user *__user *argv, char __user *__user *env) -{ - long err; - - err = execve1(file, argv, env); - if (!err) - UML_LONGJMP(current->thread.exec_buf, 1); - return err; -} - -long sys_execve(char __user *file, char __user *__user *argv, - char __user *__user *env) -{ - long error; - char *filename; - - lock_kernel(); - filename = getname(file); - error = PTR_ERR(filename); - if (IS_ERR(filename)) goto out; - error = execve1(filename, argv, env); - putname(filename); - out: - unlock_kernel(); - return error; } +EXPORT_SYMBOL(start_thread); diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index c716b5a6db1..41ebbfebb33 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -1,71 +1,79 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "linux/kernel.h" -#include "linux/init.h" -#include "linux/ctype.h" -#include "linux/proc_fs.h" -#include "asm/uaccess.h" +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/types.h> +#include <asm/uaccess.h> -/* If read and write race, the read will still atomically read a valid +/* + * If read and write race, the read will still atomically read a valid * value. */ int uml_exitcode = 0; -static int read_proc_exitcode(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int exitcode_proc_show(struct seq_file *m, void *v) { - int len, val; + int val; - /* Save uml_exitcode in a local so that we don't need to guarantee + /* + * Save uml_exitcode in a local so that we don't need to guarantee * that sprintf accesses it atomically. */ val = uml_exitcode; - len = sprintf(page, "%d\n", val); - len -= off; - if(len <= off+count) - *eof = 1; - *start = page + off; - if(len > count) - len = count; - if(len < 0) - len = 0; - return len; + seq_printf(m, "%d\n", val); + return 0; +} + +static int exitcode_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, exitcode_proc_show, NULL); } -static int write_proc_exitcode(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t exitcode_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *pos) { char *end, buf[sizeof("nnnnn\0")]; + size_t size; int tmp; - if(copy_from_user(buf, buffer, count)) + size = min(count, sizeof(buf)); + if (copy_from_user(buf, buffer, size)) return -EFAULT; tmp = simple_strtol(buf, &end, 0); - if((*end != '\0') && !isspace(*end)) + if ((*end != '\0') && !isspace(*end)) return -EINVAL; uml_exitcode = tmp; return count; } +static const struct file_operations exitcode_proc_fops = { + .owner = THIS_MODULE, + .open = exitcode_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = exitcode_proc_write, +}; + static int make_proc_exitcode(void) { struct proc_dir_entry *ent; - ent = create_proc_entry("exitcode", 0600, &proc_root); - if(ent == NULL){ + ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_fops); + if (ent == NULL) { printk(KERN_WARNING "make_proc_exitcode : Failed to register " "/proc/exitcode\n"); return 0; } - - ent->read_proc = read_proc_exitcode; - ent->write_proc = write_proc_exitcode; - return 0; } diff --git a/arch/um/kernel/gmon_syms.c b/arch/um/kernel/gmon_syms.c index 734f873cab1..1bf61266da8 100644 --- a/arch/um/kernel/gmon_syms.c +++ b/arch/um/kernel/gmon_syms.c @@ -1,23 +1,9 @@ -/* - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "linux/module.h" +#include <linux/module.h> extern void __bb_init_func(void *) __attribute__((weak)); EXPORT_SYMBOL(__bb_init_func); - -/* This is defined (and referred to in profiling stub code) only by some GCC - * versions in libgcov. - * - * Since SuSE backported the fix, we cannot handle it depending on GCC version. - * So, unconditionally export it. But also give it a weak declaration, which will - * be overridden by any other one. - */ - -extern void __gcov_init(void *) __attribute__((weak)); -EXPORT_SYMBOL(__gcov_init); - -extern void __gcov_merge_add(void *) __attribute__((weak)); -EXPORT_SYMBOL(__gcov_merge_add); diff --git a/arch/um/kernel/gprof_syms.c b/arch/um/kernel/gprof_syms.c index 9244f018d44..74ddb44288a 100644 --- a/arch/um/kernel/gprof_syms.c +++ b/arch/um/kernel/gprof_syms.c @@ -1,20 +1,9 @@ /* - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "linux/module.h" +#include <linux/module.h> extern void mcount(void); EXPORT_SYMBOL(mcount); - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c deleted file mode 100644 index dcfceca9505..00000000000 --- a/arch/um/kernel/init_task.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,intel.linux}.com) - * Licensed under the GPL - */ - -#include "linux/sched.h" -#include "linux/init_task.h" -#include "linux/fs.h" -#include "linux/module.h" -#include "linux/mqueue.h" -#include "asm/uaccess.h" - -static struct fs_struct init_fs = INIT_FS; -struct mm_struct init_mm = INIT_MM(init_mm); -static struct files_struct init_files = INIT_FILES; -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -EXPORT_SYMBOL(init_mm); - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ - -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); - -/* - * Initial thread structure. - * - * We need to make sure that this is aligned due to the - * way process stacks are handled. This is done by having a special - * "init_task" linker map entry.. - */ - -union thread_union init_thread_union - __attribute__((__section__(".data.init_task"))) = - { INIT_THREAD_INFO(init_task) }; - -union thread_union cpu0_irqstack - __attribute__((__section__(".data.init_irqstack"))) = - { INIT_THREAD_INFO(init_task) }; diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c index 16dc43e9d94..55cead809b1 100644 --- a/arch/um/kernel/initrd.c +++ b/arch/um/kernel/initrd.c @@ -1,19 +1,18 @@ /* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "linux/init.h" -#include "linux/bootmem.h" -#include "linux/initrd.h" -#include "asm/types.h" -#include "kern_util.h" -#include "initrd.h" -#include "init.h" -#include "os.h" +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/initrd.h> +#include <asm/types.h> +#include <init.h> +#include <os.h> /* Changed by uml_initrd_setup, which is a setup */ static char *initrd __initdata = NULL; +static int load_initrd(char *filename, void *buf, int size); static int __init read_initrd(void) { @@ -21,18 +20,27 @@ static int __init read_initrd(void) long long size; int err; - if(initrd == NULL) + if (initrd == NULL) return 0; err = os_file_size(initrd, &size); - if(err) + if (err) return 0; + /* + * This is necessary because alloc_bootmem craps out if you + * ask for no memory. + */ + if (size == 0) { + printk(KERN_ERR "\"%s\" is a zero-size initrd\n", initrd); + return 0; + } + area = alloc_bootmem(size); - if(area == NULL) + if (area == NULL) return 0; - if(load_initrd(initrd, area, size) == -1) + if (load_initrd(initrd, area, size) == -1) return 0; initrd_start = (unsigned long) area; @@ -54,18 +62,20 @@ __uml_setup("initrd=", uml_initrd_setup, " name of the file containing the image.\n\n" ); -int load_initrd(char *filename, void *buf, int size) +static int load_initrd(char *filename, void *buf, int size) { int fd, n; fd = os_open_file(filename, of_read(OPENFLAGS()), 0); - if(fd < 0){ - printk("Opening '%s' failed - err = %d\n", filename, -fd); + if (fd < 0) { + printk(KERN_ERR "Opening '%s' failed - err = %d\n", filename, + -fd); return -1; } n = os_read_file(fd, buf, size); - if(n != size){ - printk("Read of %d bytes from '%s' failed, err = %d\n", size, + if (n != size) { + printk(KERN_ERR "Read of %d bytes from '%s' failed, " + "err = %d\n", size, filename, -n); return -1; } diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index ba11ccd6a8a..1d8505b1e29 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -5,59 +5,17 @@ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar */ -#include "linux/cpumask.h" -#include "linux/hardirq.h" -#include "linux/interrupt.h" -#include "linux/kernel_stat.h" -#include "linux/module.h" -#include "linux/seq_file.h" -#include "as-layout.h" -#include "kern_util.h" -#include "os.h" - -/* - * Generic, controller-independent functions: - */ - -int show_interrupts(struct seq_file *p, void *v) -{ - int i = *(loff_t *) v, j; - struct irqaction * action; - unsigned long flags; - - if (i == 0) { - seq_printf(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%d ",j); - seq_putc(p, '\n'); - } - - if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; - if (!action) - goto skip; - seq_printf(p, "%3d: ",i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); -#endif - seq_printf(p, " %14s", irq_desc[i].chip->typename); - seq_printf(p, " %s", action->name); - - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) - seq_putc(p, '\n'); - - return 0; -} +#include <linux/cpumask.h> +#include <linux/hardirq.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <as-layout.h> +#include <kern_util.h> +#include <os.h> /* * This list is accessed under irq_lock, except in sigio_handler, @@ -72,7 +30,7 @@ static struct irq_fd **last_irq_ptr = &active_fds; extern void free_irqs(void); -void sigio_handler(int sig, struct uml_pt_regs *regs) +void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { struct irq_fd *irq_fd; int n; @@ -102,15 +60,14 @@ void sigio_handler(int sig, struct uml_pt_regs *regs) static DEFINE_SPINLOCK(irq_lock); -int activate_fd(int irq, int fd, int type, void *dev_id) +static int activate_fd(int irq, int fd, int type, void *dev_id) { struct pollfd *tmp_pfd; struct irq_fd *new_fd, *irq_fd; unsigned long flags; - int pid, events, err, n; + int events, err, n; - pid = os_getpid(); - err = os_set_fd_async(fd, pid); + err = os_set_fd_async(fd); if (err < 0) goto out; @@ -127,7 +84,6 @@ int activate_fd(int irq, int fd, int type, void *dev_id) .fd = fd, .type = type, .irq = irq, - .pid = pid, .events = events, .current_events = 0 } ); @@ -218,7 +174,7 @@ static int same_irq_and_dev(struct irq_fd *irq, void *d) return ((irq->irq == data->irq) && (irq->id == data->dev)); } -void free_irq_by_irq_and_dev(unsigned int irq, void *dev) +static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) { struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq, .dev = dev }); @@ -302,6 +258,7 @@ void deactivate_fd(int fd, int irqnum) ignore_sigio_fd(fd); } +EXPORT_SYMBOL(deactivate_fd); /* * Called just before shutdown in order to provide a clean exec @@ -334,12 +291,19 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs); irq_enter(); - __do_IRQ(irq); + generic_handle_irq(irq); irq_exit(); set_irq_regs(old_regs); return 1; } +void um_free_irq(unsigned int irq, void *dev) +{ + free_irq_by_irq_and_dev(irq, dev); + free_irq(irq, dev); +} +EXPORT_SYMBOL(um_free_irq); + int um_request_irq(unsigned int irq, int fd, int type, irq_handler_t handler, unsigned long irqflags, const char * devname, @@ -360,80 +324,40 @@ EXPORT_SYMBOL(um_request_irq); EXPORT_SYMBOL(reactivate_fd); /* - * hw_interrupt_type must define (startup || enable) && - * (shutdown || disable) && end + * irq_chip must define at least enable/disable and ack when + * the edge handler is used. */ -static void dummy(unsigned int irq) +static void dummy(struct irq_data *d) { } /* This is used for everything else than the timer. */ -static struct hw_interrupt_type normal_irq_type = { - .typename = "SIGIO", - .release = free_irq_by_irq_and_dev, - .disable = dummy, - .enable = dummy, - .ack = dummy, - .end = dummy +static struct irq_chip normal_irq_type = { + .name = "SIGIO", + .irq_disable = dummy, + .irq_enable = dummy, + .irq_ack = dummy, + .irq_mask = dummy, + .irq_unmask = dummy, }; -static struct hw_interrupt_type SIGVTALRM_irq_type = { - .typename = "SIGVTALRM", - .release = free_irq_by_irq_and_dev, - .shutdown = dummy, /* never called */ - .disable = dummy, - .enable = dummy, - .ack = dummy, - .end = dummy +static struct irq_chip SIGVTALRM_irq_type = { + .name = "SIGVTALRM", + .irq_disable = dummy, + .irq_enable = dummy, + .irq_ack = dummy, + .irq_mask = dummy, + .irq_unmask = dummy, }; void __init init_IRQ(void) { int i; - irq_desc[TIMER_IRQ].status = IRQ_DISABLED; - irq_desc[TIMER_IRQ].action = NULL; - irq_desc[TIMER_IRQ].depth = 1; - irq_desc[TIMER_IRQ].chip = &SIGVTALRM_irq_type; - enable_irq(TIMER_IRQ); - for (i = 1; i < NR_IRQS; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = NULL; - irq_desc[i].depth = 1; - irq_desc[i].chip = &normal_irq_type; - enable_irq(i); - } -} + irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq); -int init_aio_irq(int irq, char *name, irq_handler_t handler) -{ - int fds[2], err; - - err = os_pipe(fds, 1, 1); - if (err) { - printk(KERN_ERR "init_aio_irq - os_pipe failed, err = %d\n", - -err); - goto out; - } - - err = um_request_irq(irq, fds[0], IRQ_READ, handler, - IRQF_DISABLED | IRQF_SAMPLE_RANDOM, name, - (void *) (long) fds[0]); - if (err) { - printk(KERN_ERR "init_aio_irq - : um_request_irq failed, " - "err = %d\n", - err); - goto out_close; - } - - err = fds[1]; - goto out; - - out_close: - os_close_file(fds[0]); - os_close_file(fds[1]); - out: - return err; + for (i = 1; i < NR_IRQS; i++) + irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); } /* diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 7c7142ba3bd..543c0475693 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -3,44 +3,15 @@ * Licensed under the GPL */ -#include "linux/module.h" -#include "linux/syscalls.h" -#include "asm/a.out.h" -#include "asm/tlbflush.h" -#include "asm/uaccess.h" -#include "as-layout.h" -#include "kern_util.h" -#include "mem_user.h" -#include "os.h" +#include <linux/module.h> +#include <os.h> -EXPORT_SYMBOL(uml_physmem); EXPORT_SYMBOL(set_signals); EXPORT_SYMBOL(get_signals); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(sys_waitpid); -EXPORT_SYMBOL(task_size); -EXPORT_SYMBOL(flush_tlb_range); -EXPORT_SYMBOL(host_task_size); -EXPORT_SYMBOL(arch_validate); -EXPORT_SYMBOL(get_kmem_end); - -EXPORT_SYMBOL(high_physmem); -EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(um_virt_to_phys); -EXPORT_SYMBOL(handle_page_fault); -EXPORT_SYMBOL(find_iomem); - -EXPORT_SYMBOL(strnlen_user); -EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(copy_to_user); -EXPORT_SYMBOL(copy_from_user); -EXPORT_SYMBOL(clear_user); -EXPORT_SYMBOL(uml_strdup); EXPORT_SYMBOL(os_stat_fd); EXPORT_SYMBOL(os_stat_file); EXPORT_SYMBOL(os_access); -EXPORT_SYMBOL(os_get_exec_close); EXPORT_SYMBOL(os_set_exec_close); EXPORT_SYMBOL(os_getpid); EXPORT_SYMBOL(os_open_file); @@ -64,17 +35,10 @@ EXPORT_SYMBOL(os_connect_socket); EXPORT_SYMBOL(os_accept_connection); EXPORT_SYMBOL(os_rcv_fd); EXPORT_SYMBOL(run_helper); -EXPORT_SYMBOL(start_thread); -EXPORT_SYMBOL(dump_thread); - -#ifdef CONFIG_SMP - -/* required for SMP */ - -extern void __write_lock_failed(rwlock_t *rw); -EXPORT_SYMBOL(__write_lock_failed); - -extern void __read_lock_failed(rwlock_t *rw); -EXPORT_SYMBOL(__read_lock_failed); +EXPORT_SYMBOL(os_major); +EXPORT_SYMBOL(os_minor); +EXPORT_SYMBOL(os_makedev); -#endif +EXPORT_SYMBOL(add_sigio_fd); +EXPORT_SYMBOL(ignore_sigio_fd); +EXPORT_SYMBOL(sigio_broken); diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c new file mode 100644 index 00000000000..1f3d5c4910d --- /dev/null +++ b/arch/um/kernel/maccess.c @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2013 Richard Weinberger <richrd@nod.at> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/uaccess.h> +#include <linux/kernel.h> +#include <os.h> + +long probe_kernel_read(void *dst, const void *src, size_t size) +{ + void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE); + + if ((unsigned long)src < PAGE_SIZE || size <= 0) + return -EFAULT; + + if (os_mincore(psrc, size + src - psrc) <= 0) + return -EFAULT; + + return __probe_kernel_read(dst, src, size); +} diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 59822dee438..8636e905426 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -1,97 +1,78 @@ /* - * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "linux/stddef.h" -#include "linux/kernel.h" -#include "linux/mm.h" -#include "linux/bootmem.h" -#include "linux/swap.h" -#include "linux/highmem.h" -#include "linux/gfp.h" -#include "asm/page.h" -#include "asm/fixmap.h" -#include "asm/pgalloc.h" -#include "kern_util.h" -#include "as-layout.h" -#include "kern.h" -#include "mem_user.h" -#include "um_uaccess.h" -#include "os.h" -#include "linux/types.h" -#include "linux/string.h" -#include "init.h" -#include "kern_constants.h" +#include <linux/stddef.h> +#include <linux/module.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/slab.h> +#include <asm/fixmap.h> +#include <asm/page.h> +#include <as-layout.h> +#include <init.h> +#include <kern.h> +#include <kern_util.h> +#include <mem_user.h> +#include <os.h> /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */ unsigned long *empty_zero_page = NULL; +EXPORT_SYMBOL(empty_zero_page); /* allocated in paging_init and unchanged thereafter */ -unsigned long *empty_bad_page = NULL; +static unsigned long *empty_bad_page = NULL; + +/* + * Initialized during boot, and readonly for initializing page tables + * afterwards + */ pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +/* Initialized at boot time, and readonly after that */ unsigned long long highmem; int kmalloc_ok = 0; +/* Used during early boot */ static unsigned long brk_end; -void unmap_physmem(void) -{ - os_unmap_memory((void *) brk_end, uml_reserved - brk_end); -} - -static void map_cb(void *unused) -{ - map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); -} - #ifdef CONFIG_HIGHMEM static void setup_highmem(unsigned long highmem_start, unsigned long highmem_len) { - struct page *page; unsigned long highmem_pfn; int i; highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT; - for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ - page = &mem_map[highmem_pfn + i]; - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - } + for (i = 0; i < highmem_len >> PAGE_SHIFT; i++) + free_highmem_page(&mem_map[highmem_pfn + i]); } #endif void __init mem_init(void) { /* clear the zero-page */ - memset((void *) empty_zero_page, 0, PAGE_SIZE); + memset(empty_zero_page, 0, PAGE_SIZE); /* Map in the area just after the brk now that kmalloc is about * to be turned on. */ brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); - map_cb(NULL); - initial_thread_cb(map_cb, NULL); + map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); free_bootmem(__pa(brk_end), uml_reserved - brk_end); uml_reserved = brk_end; /* this will put all low memory onto the freelists */ - totalram_pages = free_all_bootmem(); + free_all_bootmem(); max_low_pfn = totalram_pages; #ifdef CONFIG_HIGHMEM - totalhigh_pages = highmem >> PAGE_SHIFT; - totalram_pages += totalhigh_pages; + setup_highmem(end_iomem, highmem); #endif - num_physpages = totalram_pages; max_pfn = totalram_pages; - printk(KERN_INFO "Memory: %luk available\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); kmalloc_ok = 1; - -#ifdef CONFIG_HIGHMEM - setup_highmem(end_iomem, highmem); -#endif } /* @@ -119,7 +100,7 @@ static void __init one_md_table_init(pud_t *pud) #endif } -static void __init fixrange_init(unsigned long start, unsigned long end, +static void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) { pgd_t *pgd; @@ -138,7 +119,7 @@ static void __init fixrange_init(unsigned long start, unsigned long end, if (pud_none(*pud)) one_md_table_init(pud); pmd = pmd_offset(pud, vaddr); - for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { + for (; (j < PTRS_PER_PMD) && (vaddr < end); pmd++, j++) { one_page_table_init(pmd); vaddr += PMD_SIZE; } @@ -152,7 +133,7 @@ pgprot_t kmap_prot; #define kmap_get_fixmap_pte(vaddr) \ pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\ - (vaddr)), (vaddr)) + (vaddr)), (vaddr)) static void __init kmap_init(void) { @@ -197,21 +178,23 @@ static void __init fixaddr_user_init( void) pud_t *pud; pmd_t *pmd; pte_t *pte; - unsigned long paddr, vaddr = FIXADDR_USER_START; + phys_t p; + unsigned long v, vaddr = FIXADDR_USER_START; - if ( ! size ) + if (!size) return; fixrange_init( FIXADDR_USER_START, FIXADDR_USER_END, swapper_pg_dir); - paddr = (unsigned long)alloc_bootmem_low_pages( size); - memcpy( (void *)paddr, (void *)FIXADDR_USER_START, size); - paddr = __pa(paddr); - for ( ; size > 0; size-=PAGE_SIZE, vaddr+=PAGE_SIZE, paddr+=PAGE_SIZE){ + v = (unsigned long) alloc_bootmem_low_pages(size); + memcpy((void *) v , (void *) FIXADDR_USER_START, size); + p = __pa(v); + for ( ; size > 0; size -= PAGE_SIZE, vaddr += PAGE_SIZE, + p += PAGE_SIZE) { pgd = swapper_pg_dir + pgd_index(vaddr); pud = pud_offset(pgd, vaddr); pmd = pmd_offset(pud, vaddr); pte = pte_offset_kernel(pmd, vaddr); - pte_set_val( (*pte), paddr, PAGE_READONLY); + pte_set_val(*pte, p, PAGE_READONLY); } #endif } @@ -223,7 +206,7 @@ void __init paging_init(void) empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); - for(i = 0; i < ARRAY_SIZE(zones_size); i++) + for (i = 0; i < ARRAY_SIZE(zones_size); i++) zones_size[i] = 0; zones_size[ZONE_NORMAL] = (end_iomem >> PAGE_SHIFT) - @@ -247,38 +230,8 @@ void __init paging_init(void) #endif } -struct page *arch_validate(struct page *page, gfp_t mask, int order) -{ - unsigned long addr, zero = 0; - int i; - - again: - if(page == NULL) - return page; - if(PageHighMem(page)) - return page; - - addr = (unsigned long) page_address(page); - for(i = 0; i < (1 << order); i++){ - current->thread.fault_addr = (void *) addr; - if(__do_copy_to_user((void __user *) addr, &zero, - sizeof(zero), - ¤t->thread.fault_addr, - ¤t->thread.fault_catcher)){ - if(!(mask & __GFP_WAIT)) - return NULL; - else break; - } - addr += PAGE_SIZE; - } - - if(i == (1 << order)) - return page; - page = alloc_pages(mask, order); - goto again; -} - -/* This can't do anything because nothing in the kernel image can be freed +/* + * This can't do anything because nothing in the kernel image can be freed * since it's not in kernel physical memory. */ @@ -289,51 +242,11 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start < end) - printk ("Freeing initrd memory: %ldk freed\n", - (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - init_page_count(virt_to_page(start)); - free_page(start); - totalram_pages++; - } + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif -void show_mem(void) -{ - int pfn, total = 0, reserved = 0; - int shared = 0, cached = 0; - int highmem = 0; - struct page *page; - - printk("Mem-info:\n"); - show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - pfn = max_mapnr; - while(pfn-- > 0) { - page = pfn_to_page(pfn); - total++; - if(PageHighMem(page)) - highmem++; - if(PageReserved(page)) - reserved++; - else if(PageSwapCache(page)) - cached++; - else if(page_count(page)) - shared += page_count(page) - 1; - } - printk("%d pages of RAM\n", total); - printk("%d pages of HIGHMEM\n", highmem); - printk("%d reserved pages\n", reserved); - printk("%d pages shared\n", shared); - printk("%d pages swap cached\n", cached); -} - -/* - * Allocate and free page tables. - */ +/* Allocate and free page tables. */ pgd_t *pgd_alloc(struct mm_struct *mm) { @@ -341,14 +254,14 @@ pgd_t *pgd_alloc(struct mm_struct *mm) if (pgd) { memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); - memcpy(pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); } return pgd; } -void pgd_free(pgd_t *pgd) +void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_page((unsigned long) pgd); } @@ -361,10 +274,33 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) return pte; } -struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } + +#ifdef CONFIG_3_LEVEL_PGTABLES +pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL); + + if (pmd) + memset(pmd, 0, PAGE_SIZE); + + return pmd; +} +#endif + +void *uml_kmalloc(int size, int flags) +{ + return kmalloc(size, flags); +} diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index e66432f4248..30fdd5d0067 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -3,20 +3,22 @@ * Licensed under the GPL */ -#include "linux/bootmem.h" -#include "linux/mm.h" -#include "linux/pfn.h" -#include "asm/page.h" -#include "as-layout.h" -#include "init.h" -#include "kern.h" -#include "mem_user.h" -#include "os.h" +#include <linux/module.h> +#include <linux/bootmem.h> +#include <linux/mm.h> +#include <linux/pfn.h> +#include <asm/page.h> +#include <as-layout.h> +#include <init.h> +#include <kern.h> +#include <mem_user.h> +#include <os.h> static int physmem_fd = -1; /* Changed during early boot */ unsigned long high_physmem; +EXPORT_SYMBOL(high_physmem); extern unsigned long long physmem_size; @@ -55,16 +57,6 @@ int __init init_maps(unsigned long physmem, unsigned long iomem, return 0; } -/* Changed during early boot */ -static unsigned long kmem_top = 0; - -unsigned long get_kmem_end(void) -{ - if (kmem_top == 0) - kmem_top = host_task_size - 1024 * 1024; - return kmem_top; -} - void map_memory(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x) { @@ -111,6 +103,7 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end, */ os_seek_file(physmem_fd, __pa(&__syscall_stub_start)); os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE); + os_fsync_file(physmem_fd); bootmap_size = init_bootmem(pfn, pfn + delta); free_bootmem(__pa(reserve_end) + bootmap_size, @@ -174,10 +167,10 @@ __uml_setup("iomem=", parse_iomem, * setup_iomem, both of which run during early boot. Afterwards, it's * unchanged. */ -struct iomem_region *iomem_regions = NULL; +struct iomem_region *iomem_regions; -/* Initialized in parse_iomem */ -int iomem_size = 0; +/* Initialized in parse_iomem and unchanged thereafter */ +int iomem_size; unsigned long find_iomem(char *driver, unsigned long *len_out) { @@ -194,8 +187,9 @@ unsigned long find_iomem(char *driver, unsigned long *len_out) return 0; } +EXPORT_SYMBOL(find_iomem); -int setup_iomem(void) +static int setup_iomem(void) { struct iomem_region *region = iomem_regions; unsigned long iomem_start = high_physmem + PAGE_SIZE; diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 0eae00b3e58..f17bca8ed2c 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -4,24 +4,29 @@ * Licensed under the GPL */ -#include "linux/stddef.h" -#include "linux/err.h" -#include "linux/hardirq.h" -#include "linux/mm.h" -#include "linux/personality.h" -#include "linux/proc_fs.h" -#include "linux/ptrace.h" -#include "linux/random.h" -#include "linux/sched.h" -#include "linux/tick.h" -#include "linux/threads.h" -#include "asm/pgtable.h" -#include "asm/uaccess.h" -#include "as-layout.h" -#include "kern_util.h" -#include "os.h" -#include "skas.h" -#include "tlb.h" +#include <linux/stddef.h> +#include <linux/err.h> +#include <linux/hardirq.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/personality.h> +#include <linux/proc_fs.h> +#include <linux/ptrace.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/tick.h> +#include <linux/threads.h> +#include <linux/tracehook.h> +#include <asm/current.h> +#include <asm/pgtable.h> +#include <asm/mmu_context.h> +#include <asm/uaccess.h> +#include <as-layout.h> +#include <kern_util.h> +#include <os.h> +#include <skas.h> /* * This is a per-cpu array. A processor only modifies its entry and it only @@ -30,7 +35,7 @@ */ struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } }; -static inline int external_pid(struct task_struct *task) +static inline int external_pid(void) { /* FIXME: Need to look up userspace_pid by cpu */ return userspace_pid[0]; @@ -40,7 +45,7 @@ int pid_to_processor_id(int pid) { int i; - for(i = 0; i < ncpus; i++) { + for (i = 0; i < ncpus; i++) { if (cpu_tasks[i].pid == pid) return i; } @@ -60,76 +65,48 @@ unsigned long alloc_stack(int order, int atomic) if (atomic) flags = GFP_ATOMIC; page = __get_free_pages(flags, order); - if (page == 0) - return 0; return page; } -int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -{ - int pid; - - current->thread.request.u.thread.proc = fn; - current->thread.request.u.thread.arg = arg; - pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, - ¤t->thread.regs, 0, NULL, NULL); - return pid; -} - static inline void set_current(struct task_struct *task) { cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task) - { external_pid(task), task }); + { external_pid(), task }); } -extern void arch_switch_to(struct task_struct *from, struct task_struct *to); +extern void arch_switch_to(struct task_struct *to); -void *_switch_to(void *prev, void *next, void *last) +void *__switch_to(struct task_struct *from, struct task_struct *to) { - struct task_struct *from = prev; - struct task_struct *to= next; - to->thread.prev_sched = from; set_current(to); - do { - current->thread.saved_task = NULL; - - switch_threads(&from->thread.switch_buf, - &to->thread.switch_buf); - - arch_switch_to(current->thread.prev_sched, current); - - if (current->thread.saved_task) - show_regs(&(current->thread.regs)); - next= current->thread.saved_task; - prev= current; - } while(current->thread.saved_task); + switch_threads(&from->thread.switch_buf, &to->thread.switch_buf); + arch_switch_to(current); return current->thread.prev_sched; - } void interrupt_end(void) { if (need_resched()) schedule(); - if (test_tsk_thread_flag(current, TIF_SIGPENDING)) + if (test_thread_flag(TIF_SIGPENDING)) do_signal(); + if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) + tracehook_notify_resume(¤t->thread.regs); } void exit_thread(void) { } -void *get_current(void) +int get_current_pid(void) { - return current; + return task_pid_nr(current); } -extern void schedule_tail(struct task_struct *prev); - /* * This is called magically, by its address being stuffed in a jmp_buf * and being longjmp-d to. @@ -147,24 +124,16 @@ void new_thread_handler(void) arg = current->thread.request.u.thread.arg; /* - * The return value is 1 if the kernel thread execs a process, - * 0 if it just exits + * callback returns only if the kernel thread execs a process */ - n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); - if (n == 1) { - /* Handle any immediate reschedules or signals */ - interrupt_end(); - userspace(¤t->thread.regs.regs); - } - else do_exit(0); + n = fn(arg); + userspace(¤t->thread.regs.regs); } /* Called magically, see new_thread_handler above */ void fork_handler(void) { force_flush_all(); - if (current->thread.prev_sched == NULL) - panic("blech"); schedule_tail(current->thread.prev_sched); @@ -173,45 +142,42 @@ void fork_handler(void) * arch_switch_to isn't needed. We could want to apply this to * improve performance. -bb */ - arch_switch_to(current->thread.prev_sched, current); + arch_switch_to(current); current->thread.prev_sched = NULL; - /* Handle any immediate reschedules or signals */ - interrupt_end(); - userspace(¤t->thread.regs.regs); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, - unsigned long stack_top, struct task_struct * p, - struct pt_regs *regs) +int copy_thread(unsigned long clone_flags, unsigned long sp, + unsigned long arg, struct task_struct * p) { void (*handler)(void); + int kthread = current->flags & PF_KTHREAD; int ret = 0; p->thread = (struct thread_struct) INIT_THREAD; - if (current->thread.forking) { - memcpy(&p->thread.regs.regs, ®s->regs, + if (!kthread) { + memcpy(&p->thread.regs.regs, current_pt_regs(), sizeof(p->thread.regs.regs)); - REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.gp, 0); + PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0); if (sp != 0) REGS_SP(p->thread.regs.regs.gp) = sp; handler = fork_handler; arch_copy_thread(¤t->thread.arch, &p->thread.arch); - } - else { - init_thread_registers(&p->thread.regs.regs); - p->thread.request.u.thread = current->thread.request.u.thread; + } else { + get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp); + p->thread.request.u.thread.proc = (int (*)(void *))sp; + p->thread.request.u.thread.arg = (void *)arg; handler = new_thread_handler; } new_thread(task_stack_page(p), &p->thread.switch_buf, handler); - if (current->thread.forking) { + if (!kthread) { clear_flushed_tls(p); /* @@ -233,78 +199,14 @@ void initial_thread_cb(void (*proc)(void *), void *arg) kmalloc_ok = save_kmalloc_ok; } -void default_idle(void) +void arch_cpu_idle(void) { unsigned long long nsecs; - while(1) { - /* endless idle loop with no priority at all */ - - /* - * although we are an idle CPU, we do not want to - * get into the scheduler unnecessarily. - */ - if (need_resched()) - schedule(); - - tick_nohz_stop_sched_tick(); - nsecs = disable_timer(); - idle_sleep(nsecs); - tick_nohz_restart_sched_tick(); - } -} - -void cpu_idle(void) -{ - cpu_tasks[current_thread->cpu].pid = os_getpid(); - default_idle(); -} - -void *um_virt_to_phys(struct task_struct *task, unsigned long addr, - pte_t *pte_out) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - pte_t ptent; - - if (task->mm == NULL) - return ERR_PTR(-EINVAL); - pgd = pgd_offset(task->mm, addr); - if (!pgd_present(*pgd)) - return ERR_PTR(-EINVAL); - - pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) - return ERR_PTR(-EINVAL); - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) - return ERR_PTR(-EINVAL); - - pte = pte_offset_kernel(pmd, addr); - ptent = *pte; - if (!pte_present(ptent)) - return ERR_PTR(-EINVAL); - - if (pte_out != NULL) - *pte_out = ptent; - return (void *) (pte_val(ptent) & PAGE_MASK) + (addr & ~PAGE_MASK); -} - -char *current_cmd(void) -{ -#if defined(CONFIG_SMP) || defined(CONFIG_HIGHMEM) - return "(Unknown)"; -#else - void *addr = um_virt_to_phys(current, current->mm->arg_start, NULL); - return IS_ERR(addr) ? "(Unknown)": __va((unsigned long) addr); -#endif -} - -void dump_thread(struct pt_regs *regs, struct user *u) -{ + cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); + nsecs = disable_timer(); + idle_sleep(nsecs); + local_irq_enable(); } int __cant_sleep(void) { @@ -317,7 +219,7 @@ int user_context(unsigned long sp) unsigned long stack; stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER); - return stack != (unsigned long) current_thread; + return stack != (unsigned long) current_thread_info(); } extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; @@ -331,10 +233,11 @@ void do_uml_exitcalls(void) (*call)(); } -char *uml_strdup(char *string) +char *uml_strdup(const char *string) { return kstrdup(string, GFP_KERNEL); } +EXPORT_SYMBOL(uml_strdup); int copy_to_user_proc(void __user *to, void *from, int size) { @@ -359,7 +262,7 @@ int strlen_user_proc(char __user *str) int smp_sigio_handler(void) { #ifdef CONFIG_SMP - int cpu = current_thread->cpu; + int cpu = current_thread_info()->cpu; IPI_handler(cpu); if (cpu != 0) return 1; @@ -369,7 +272,7 @@ int smp_sigio_handler(void) int cpu(void) { - return current_thread->cpu; + return current_thread_info()->cpu; } static atomic_t using_sysemu = ATOMIC_INIT(0); @@ -387,16 +290,19 @@ int get_using_sysemu(void) return atomic_read(&using_sysemu); } -static int proc_read_sysemu(char *buf, char **start, off_t offset, int size,int *eof, void *data) +static int sysemu_proc_show(struct seq_file *m, void *v) { - if (snprintf(buf, size, "%d\n", get_using_sysemu()) < size) - /* No overflow */ - *eof = 1; + seq_printf(m, "%d\n", get_using_sysemu()); + return 0; +} - return strlen(buf); +static int sysemu_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, sysemu_proc_show, NULL); } -static int proc_write_sysemu(struct file *file,const char __user *buf, unsigned long count,void *data) +static ssize_t sysemu_proc_write(struct file *file, const char __user *buf, + size_t count, loff_t *pos) { char tmp[2]; @@ -409,13 +315,22 @@ static int proc_write_sysemu(struct file *file,const char __user *buf, unsigned return count; } +static const struct file_operations sysemu_proc_fops = { + .owner = THIS_MODULE, + .open = sysemu_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = sysemu_proc_write, +}; + int __init make_proc_sysemu(void) { struct proc_dir_entry *ent; if (!sysemu_supported) return 0; - ent = create_proc_entry("sysemu", 0600, &proc_root); + ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_fops); if (ent == NULL) { @@ -423,9 +338,6 @@ int __init make_proc_sysemu(void) return 0; } - ent->read_proc = proc_read_sysemu; - ent->write_proc = proc_write_sysemu; - return 0; } @@ -435,7 +347,7 @@ int singlestepping(void * t) { struct task_struct *task = t ? t : current; - if ( ! (task->ptrace & PT_DTRACE) ) + if (!(task->ptrace & PT_DTRACE)) return 0; if (task->thread.singlestep_syscall) @@ -447,7 +359,7 @@ int singlestepping(void * t) /* * Only x86 and x86_64 have an arch_align_stack(). * All other arches have "#define arch_align_stack(x) (x)" - * in their asm/system.h + * in their asm/exec.h * As this is included in UML from asm-um/system-generic.h, * we can use it to behave as the subarch does. */ @@ -459,3 +371,46 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ~0xf; } #endif + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long stack_page, sp, ip; + bool seen_sched = 0; + + if ((p == NULL) || (p == current) || (p->state == TASK_RUNNING)) + return 0; + + stack_page = (unsigned long) task_stack_page(p); + /* Bail if the process has no kernel stack for some reason */ + if (stack_page == 0) + return 0; + + sp = p->thread.switch_buf->JB_SP; + /* + * Bail if the stack pointer is below the bottom of the kernel + * stack for some reason + */ + if (sp < stack_page) + return 0; + + while (sp < stack_page + THREAD_SIZE) { + ip = *((unsigned long *) sp); + if (in_sched_functions(ip)) + /* Ignore everything until we're above the scheduler */ + seen_sched = 1; + else if (kernel_text_address(ip) && seen_sched) + return ip; + + sp += sizeof(unsigned long); + } + + return 0; +} + +int elf_core_copy_fpregs(struct task_struct *t, elf_fpregset_t *fpu) +{ + int cpu = current_thread_info()->cpu; + + return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu); +} + diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 47b57b497d5..694d551c889 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -3,25 +3,32 @@ * Licensed under the GPL */ -#include "linux/audit.h" -#include "linux/ptrace.h" -#include "linux/sched.h" -#include "asm/uaccess.h" -#ifdef CONFIG_PROC_MM -#include "proc_mm.h" +#include <linux/audit.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/tracehook.h> +#include <asm/uaccess.h> +#include <skas_ptrace.h> + + + +void user_enable_single_step(struct task_struct *child) +{ + child->ptrace |= PT_DTRACE; + child->thread.singlestep_syscall = 0; + +#ifdef SUBARCH_SET_SINGLESTEPPING + SUBARCH_SET_SINGLESTEPPING(child, 1); #endif -#include "skas_ptrace.h" +} -static inline void set_singlestepping(struct task_struct *child, int on) +void user_disable_single_step(struct task_struct *child) { - if (on) - child->ptrace |= PT_DTRACE; - else - child->ptrace &= ~PT_DTRACE; + child->ptrace &= ~PT_DTRACE; child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING - SUBARCH_SET_SINGLESTEPPING(child, on); + SUBARCH_SET_SINGLESTEPPING(child, 0); #endif } @@ -30,86 +37,34 @@ static inline void set_singlestepping(struct task_struct *child, int on) */ void ptrace_disable(struct task_struct *child) { - set_singlestepping(child,0); + user_disable_single_step(child); } extern int peek_user(struct task_struct * child, long addr, long data); extern int poke_user(struct task_struct * child, long addr, long data); -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int i, ret; - unsigned long __user *p = (void __user *)(unsigned long)data; + unsigned long __user *p = (void __user *)data; + void __user *vp = p; switch (request) { - /* read word at location addr. */ - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - ret = generic_ptrace_peekdata(child, addr, data); - break; - /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: ret = peek_user(child, addr, data); break; - /* write the word at location addr. */ - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: - ret = generic_ptrace_pokedata(child, addr, data); - break; - /* write the word at location addr in the USER area */ case PTRACE_POKEUSR: ret = poke_user(child, addr, data); break; - /* continue and stop at next (return from) syscall */ - case PTRACE_SYSCALL: - /* restart after signal. */ - case PTRACE_CONT: { + case PTRACE_SYSEMU: + case PTRACE_SYSEMU_SINGLESTEP: ret = -EIO; - if (!valid_signal(data)) - break; - - set_singlestepping(child, 0); - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - wake_up_process(child); - ret = 0; - break; - } - -/* - * make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - case PTRACE_KILL: { - ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - - set_singlestepping(child, 0); - child->exit_code = SIGKILL; - wake_up_process(child); - break; - } - - case PTRACE_SINGLESTEP: { /* set the trap flag. */ - ret = -EIO; - if (!valid_signal(data)) - break; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - set_singlestepping(child, 1); - child->exit_code = data; - /* give it a chance to run. */ - wake_up_process(child); - ret = 0; break; - } #ifdef PTRACE_GETREGS case PTRACE_GETREGS: { /* Get all gp regs from the child. */ @@ -141,26 +96,12 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } #endif -#ifdef PTRACE_GETFPREGS - case PTRACE_GETFPREGS: /* Get the child FPU state. */ - ret = get_fpregs((struct user_i387_struct __user *) data, - child); - break; -#endif -#ifdef PTRACE_SETFPREGS - case PTRACE_SETFPREGS: /* Set the child FPU state. */ - ret = set_fpregs((struct user_i387_struct __user *) data, - child); - break; -#endif case PTRACE_GET_THREAD_AREA: - ret = ptrace_get_thread_area(child, addr, - (struct user_desc __user *) data); + ret = ptrace_get_thread_area(child, addr, vp); break; case PTRACE_SET_THREAD_AREA: - ret = ptrace_set_thread_area(child, addr, - (struct user_desc __user *) data); + ret = ptrace_set_thread_area(child, addr, vp); break; case PTRACE_FAULTINFO: { @@ -170,7 +111,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) * On i386, ptrace_faultinfo is smaller! */ ret = copy_to_user(p, &child->thread.arch.faultinfo, - sizeof(struct ptrace_faultinfo)); + sizeof(struct ptrace_faultinfo)) ? + -EIO : 0; break; } @@ -191,30 +133,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } #endif -#ifdef CONFIG_PROC_MM - case PTRACE_SWITCH_MM: { - struct mm_struct *old = child->mm; - struct mm_struct *new = proc_mm_get_mm(data); - - if (IS_ERR(new)) { - ret = PTR_ERR(new); - break; - } - - atomic_inc(&new->mm_users); - child->mm = new; - child->active_mm = new; - mmput(old); - ret = 0; - break; - } -#endif -#ifdef PTRACE_ARCH_PRCTL - case PTRACE_ARCH_PRCTL: - /* XXX Calls ptrace on the host - needs some SMP thinking */ - ret = arch_prctl(child, data, (void *) addr); - break; -#endif default: ret = ptrace_request(child, request, addr, data); if (ret == -EIO) @@ -225,7 +143,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return ret; } -void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs, +static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs, int error_code) { struct siginfo info; @@ -245,50 +163,36 @@ void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs, * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check */ -void syscall_trace(struct uml_pt_regs *regs, int entryexit) +void syscall_trace_enter(struct pt_regs *regs) { - int is_singlestep = (current->ptrace & PT_DTRACE) && entryexit; - int tracesysgood; - - if (unlikely(current->audit_context)) { - if (!entryexit) - audit_syscall_entry(HOST_AUDIT_ARCH, - UPT_SYSCALL_NR(regs), - UPT_SYSCALL_ARG1(regs), - UPT_SYSCALL_ARG2(regs), - UPT_SYSCALL_ARG3(regs), - UPT_SYSCALL_ARG4(regs)); - else audit_syscall_exit(AUDITSC_RESULT(UPT_SYSCALL_RET(regs)), - UPT_SYSCALL_RET(regs)); - } - - /* Fake a debug trap */ - if (is_singlestep) - send_sigtrap(current, regs, 0); + audit_syscall_entry(HOST_AUDIT_ARCH, + UPT_SYSCALL_NR(®s->regs), + UPT_SYSCALL_ARG1(®s->regs), + UPT_SYSCALL_ARG2(®s->regs), + UPT_SYSCALL_ARG3(®s->regs), + UPT_SYSCALL_ARG4(®s->regs)); if (!test_thread_flag(TIF_SYSCALL_TRACE)) return; - if (!(current->ptrace & PT_PTRACED)) - return; + tracehook_report_syscall_entry(regs); +} - /* - * the 0x80 provides a way for the tracing parent to distinguish - * between a syscall stop and SIGTRAP delivery - */ - tracesysgood = (current->ptrace & PT_TRACESYSGOOD); - ptrace_notify(SIGTRAP | (tracesysgood ? 0x80 : 0)); +void syscall_trace_leave(struct pt_regs *regs) +{ + int ptraced = current->ptrace; - if (entryexit) /* force do_signal() --> is_syscall() */ - set_thread_flag(TIF_SIGPENDING); + audit_syscall_exit(regs); - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } + /* Fake a debug trap */ + if (ptraced & PT_DTRACE) + send_sigtrap(current, ®s->regs, 0); + + if (!test_thread_flag(TIF_SYSCALL_TRACE)) + return; + + tracehook_report_syscall_exit(regs, 0); + /* force do_signal() --> is_syscall() */ + if (ptraced & PT_PTRACED) + set_thread_flag(TIF_SIGPENDING); } diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 04cebcf0679..ced8903921a 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -3,31 +3,39 @@ * Licensed under the GPL */ -#include "linux/sched.h" -#include "os.h" -#include "skas.h" +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/oom.h> +#include <kern_util.h> +#include <os.h> +#include <skas.h> void (*pm_power_off)(void); static void kill_off_processes(void) { - if(proc_mm) + if (proc_mm) /* * FIXME: need to loop over userspace_pids */ os_kill_ptraced_process(userspace_pid[0], 1); else { struct task_struct *p; - int pid, me; + int pid; - me = os_getpid(); - for_each_process(p){ - if(p->mm == NULL) - continue; + read_lock(&tasklist_lock); + for_each_process(p) { + struct task_struct *t; - pid = p->mm->context.id.u.pid; + t = find_lock_task_mm(p); + if (!t) + continue; + pid = t->mm->context.id.u.pid; + task_unlock(t); os_kill_ptraced_process(pid, 1); } + read_unlock(&tasklist_lock); } } diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index 89f9866a135..b5e0cbb3438 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c @@ -1,18 +1,12 @@ /* - * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) * Licensed under the GPL */ -#include "linux/kernel.h" -#include "linux/list.h" -#include "linux/slab.h" -#include "linux/signal.h" -#include "linux/interrupt.h" -#include "init.h" -#include "sigio.h" -#include "irq_user.h" -#include "irq_kern.h" -#include "os.h" +#include <linux/interrupt.h> +#include <irq_kern.h> +#include <os.h> +#include <sigio.h> /* Protected by sigio_lock() called from write_sigio_workaround */ static int sigio_irq_fd = -1; @@ -31,11 +25,10 @@ int write_sigio_irq(int fd) int err; err = um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, - IRQF_DISABLED|IRQF_SAMPLE_RANDOM, "write sigio", - NULL); - if(err){ - printk("write_sigio_irq : um_request_irq failed, err = %d\n", - err); + 0, "write sigio", NULL); + if (err) { + printk(KERN_ERR "write_sigio_irq : um_request_irq failed, " + "err = %d\n", err); return -1; } sigio_irq_fd = fd; diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 19cb9773393..f57e02e7910 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -3,40 +3,36 @@ * Licensed under the GPL */ -#include "linux/module.h" -#include "linux/ptrace.h" -#include "linux/sched.h" -#include "asm/siginfo.h" -#include "asm/signal.h" -#include "asm/unistd.h" -#include "frame_kern.h" -#include "kern_util.h" -#include "sigcontext.h" +#include <linux/module.h> +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <asm/siginfo.h> +#include <asm/signal.h> +#include <asm/unistd.h> +#include <frame_kern.h> +#include <kern_util.h> EXPORT_SYMBOL(block_signals); EXPORT_SYMBOL(unblock_signals); -#define _S(nr) (1<<((nr)-1)) - -#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) - /* * OK, we're invoking a handler */ -static int handle_signal(struct pt_regs *regs, unsigned long signr, - struct k_sigaction *ka, siginfo_t *info, - sigset_t *oldset) +static void handle_signal(struct pt_regs *regs, unsigned long signr, + struct k_sigaction *ka, struct siginfo *info) { + sigset_t *oldset = sigmask_to_save(); + int singlestep = 0; unsigned long sp; int err; - /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + singlestep = 1; /* Did we come from a system call? */ if (PT_REGS_SYSCALL_NR(regs) >= 0) { /* If so, check system call restarting.. */ - switch(PT_REGS_SYSCALL_RET(regs)) { + switch (PT_REGS_SYSCALL_RET(regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: PT_REGS_SYSCALL_RET(regs) = -EINTR; @@ -66,57 +62,28 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr, #endif err = setup_signal_stack_si(sp, signr, ka, regs, info, oldset); - if (err) { - spin_lock_irq(¤t->sighand->siglock); - current->blocked = *oldset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + if (err) force_sigsegv(signr, current); - } else { - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, - &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } - - return err; + else + signal_delivered(signr, info, ka, regs, singlestep); } static int kern_do_signal(struct pt_regs *regs) { struct k_sigaction ka_copy; - siginfo_t info; - sigset_t *oldset; + struct siginfo info; int sig, handled_sig = 0; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - oldset = ¤t->saved_sigmask; - else - oldset = ¤t->blocked; - while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) { handled_sig = 1; /* Whee! Actually deliver the signal. */ - if (!handle_signal(regs, sig, &ka_copy, &info, oldset)) { - /* - * a signal was successfully delivered; the saved - * sigmask will have been stored in the signal frame, - * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag - */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); - break; - } + handle_signal(regs, sig, &ka_copy, &info); } /* Did we come from a system call? */ if (!handled_sig && (PT_REGS_SYSCALL_NR(regs) >= 0)) { /* Restart the system call - no handlers present */ - switch(PT_REGS_SYSCALL_RET(regs)) { + switch (PT_REGS_SYSCALL_RET(regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: @@ -146,10 +113,8 @@ static int kern_do_signal(struct pt_regs *regs) * if there's no signal to deliver, we just put the saved sigmask * back */ - if (!handled_sig && test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); - } + if (!handled_sig) + restore_saved_sigmask(); return handled_sig; } @@ -157,26 +122,3 @@ int do_signal(void) { return kern_do_signal(¤t->thread.regs); } - -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -long sys_sigsuspend(int history0, int history1, old_sigset_t mask) -{ - mask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - current->saved_sigmask = current->blocked; - siginitset(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - current->state = TASK_INTERRUPTIBLE; - schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); - return -ERESTARTNOHAND; -} - -long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss) -{ - return do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs)); -} diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index 8d07a7acb90..289771dadf8 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -1,17 +1,19 @@ -#include <sched.h> +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + #include <signal.h> -#include <sys/mman.h> -#include <sys/time.h> +#include <sched.h> #include <asm/unistd.h> -#include "as-layout.h" -#include "ptrace_user.h" -#include "skas.h" -#include "stub-data.h" -#include "uml-config.h" -#include "sysdep/stub.h" -#include "kern_constants.h" - -/* This is in a separate file because it needs to be compiled with any +#include <sys/time.h> +#include <as-layout.h> +#include <ptrace_user.h> +#include <stub-data.h> +#include <sysdep/stub.h> + +/* + * This is in a separate file because it needs to be compiled with any * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled * * Use UM_KERN_PAGE_SIZE instead of PAGE_SIZE because that calls getpagesize @@ -26,25 +28,26 @@ stub_clone_handler(void) err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, STUB_DATA + UM_KERN_PAGE_SIZE / 2 - sizeof(void *)); - if(err != 0) + if (err != 0) goto out; err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); - if(err) + if (err) goto out; - err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, + err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, (long) &data->timer, 0); - if(err) + if (err) goto out; remap_stack(data->fd, data->offset); goto done; out: - /* save current result. - * Parent: pid; - * child: retcode of mmap already saved and it jumps around this + /* + * save current result. + * Parent: pid; + * child: retcode of mmap already saved and it jumps around this * assignment */ data->err = err; diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index f859ec306cd..007d5503f49 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -3,13 +3,14 @@ * Licensed under the GPL */ -#include "linux/mm.h" -#include "linux/sched.h" -#include "asm/pgalloc.h" -#include "asm/pgtable.h" -#include "as-layout.h" -#include "os.h" -#include "skas.h" +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <as-layout.h> +#include <os.h> +#include <skas.h> extern int __syscall_stub_start; @@ -30,37 +31,18 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, if (!pmd) goto out_pmd; - pte = pte_alloc_map(mm, pmd, proc); + pte = pte_alloc_map(mm, NULL, pmd, proc); if (!pte) goto out_pte; - /* - * There's an interaction between the skas0 stub pages, stack - * randomization, and the BUG at the end of exit_mmap. exit_mmap - * checks that the number of page tables freed is the same as had - * been allocated. If the stack is on the last page table page, - * then the stack pte page will be freed, and if not, it won't. To - * avoid having to know where the stack is, or if the process mapped - * something at the top of its address space for some other reason, - * we set TASK_SIZE to end at the start of the last page table. - * This keeps exit_mmap off the last page, but introduces a leak - * of that page. So, we hang onto it here and free it in - * destroy_context_skas. - */ - - mm->context.last_page_table = pmd_page_vaddr(*pmd); -#ifdef CONFIG_3_LEVEL_PGTABLES - mm->context.last_pmd = (unsigned long) __va(pud_val(*pud)); -#endif - *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); *pte = pte_mkread(*pte); return 0; - out_pmd: - pud_free(pud); out_pte: - pmd_free(pmd); + pmd_free(mm, pmd); + out_pmd: + pud_free(mm, pud); out: return -ENOMEM; } @@ -76,24 +58,6 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) stack = get_zeroed_page(GFP_KERNEL); if (stack == 0) goto out; - - /* - * This zeros the entry that pgd_alloc didn't, needed since - * we are about to reinitialize it, and want mm.nr_ptes to - * be accurate. - */ - mm->pgd[USER_PTRS_PER_PGD] = __pgd(0); - - ret = init_stub_pte(mm, STUB_CODE, - (unsigned long) &__syscall_stub_start); - if (ret) - goto out_free; - - ret = init_stub_pte(mm, STUB_DATA, stack); - if (ret) - goto out_free; - - mm->nr_ptes--; } to_mm->id.stack = stack; @@ -114,6 +78,11 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) to_mm->id.u.pid = copy_context_skas0(stack, from_mm->id.u.pid); else to_mm->id.u.pid = start_userspace(stack); + + if (to_mm->id.u.pid < 0) { + ret = to_mm->id.u.pid; + goto out_free; + } } ret = init_new_ldt(to_mm, from_mm); @@ -132,24 +101,78 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) return ret; } +void uml_setup_stubs(struct mm_struct *mm) +{ + int err, ret; + + if (!skas_needs_stub) + return; + + ret = init_stub_pte(mm, STUB_CODE, + (unsigned long) &__syscall_stub_start); + if (ret) + goto out; + + ret = init_stub_pte(mm, STUB_DATA, mm->context.id.stack); + if (ret) + goto out; + + mm->context.stub_pages[0] = virt_to_page(&__syscall_stub_start); + mm->context.stub_pages[1] = virt_to_page(mm->context.id.stack); + + /* dup_mmap already holds mmap_sem */ + err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START, + VM_READ | VM_MAYREAD | VM_EXEC | + VM_MAYEXEC | VM_DONTCOPY | VM_PFNMAP, + mm->context.stub_pages); + if (err) { + printk(KERN_ERR "install_special_mapping returned %d\n", err); + goto out; + } + return; + +out: + force_sigsegv(SIGSEGV, current); +} + +void arch_exit_mmap(struct mm_struct *mm) +{ + pte_t *pte; + + pte = virt_to_pte(mm, STUB_CODE); + if (pte != NULL) + pte_clear(mm, STUB_CODE, pte); + + pte = virt_to_pte(mm, STUB_DATA); + if (pte == NULL) + return; + + pte_clear(mm, STUB_DATA, pte); +} + void destroy_context(struct mm_struct *mm) { struct mm_context *mmu = &mm->context; if (proc_mm) os_close_file(mmu->id.u.mm_fd); - else + else { + /* + * If init_new_context wasn't called, this will be + * zero, resulting in a kill(0), which will result in the + * whole UML suddenly dying. Also, cover negative and + * 1 cases, since they shouldn't happen either. + */ + if (mmu->id.u.pid < 2) { + printk(KERN_ERR "corrupt mm_context - pid = %d\n", + mmu->id.u.pid); + return; + } os_kill_ptraced_process(mmu->id.u.pid, 1); + } - if (!proc_mm || !ptrace_faultinfo) { + if (skas_needs_stub) free_page(mmu->id.stack); - pte_lock_deinit(virt_to_page(mmu->last_page_table)); - pte_free_kernel((pte_t *) mmu->last_page_table); - dec_zone_page_state(virt_to_page(mmu->last_page_table), NR_PAGETABLE); -#ifdef CONFIG_3_LEVEL_PGTABLES - pmd_free((pmd_t *) mmu->last_pmd); -#endif - } free_ldt(mmu); } diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index fce389c2342..4da11b3c8dd 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -3,22 +3,28 @@ * Licensed under the GPL */ -#include "linux/init.h" -#include "linux/sched.h" -#include "as-layout.h" -#include "os.h" -#include "skas.h" +#include <linux/init.h> +#include <linux/sched.h> +#include <as-layout.h> +#include <kern.h> +#include <os.h> +#include <skas.h> int new_mm(unsigned long stack) { - int fd; + int fd, err; fd = os_open_file("/proc/mm", of_cloexec(of_write(OPENFLAGS())), 0); if (fd < 0) return fd; - if (skas_needs_stub) - map_stub_pages(fd, STUB_CODE, STUB_DATA, stack); + if (skas_needs_stub) { + err = map_stub_pages(fd, STUB_CODE, STUB_DATA, stack); + if (err) { + os_close_file(fd); + return err; + } + } return fd; } @@ -35,7 +41,7 @@ static int __init start_kernel_proc(void *unused) cpu_tasks[0].pid = pid; cpu_tasks[0].task = current; #ifdef CONFIG_SMP - cpu_online_map = cpumask_of_cpu(0); + init_cpu_online(get_cpu_mask(0)); #endif start_kernel(); return 0; @@ -49,8 +55,14 @@ int __init start_uml(void) { stack_protections((unsigned long) &cpu0_irqstack); set_sigstack(cpu0_irqstack, THREAD_SIZE); - if (proc_mm) + if (proc_mm) { userspace_pid[0] = start_userspace(0); + if (userspace_pid[0] < 0) { + printf("start_uml - start_userspace returned %d\n", + userspace_pid[0]); + exit(1); + } + } init_new_thread_signals(); diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 50b476f2b38..c0681e09743 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -3,11 +3,14 @@ * Licensed under the GPL */ -#include "linux/kernel.h" -#include "linux/ptrace.h" -#include "kern_util.h" -#include "sysdep/ptrace.h" -#include "sysdep/syscalls.h" +#include <linux/kernel.h> +#include <linux/ptrace.h> +#include <kern_util.h> +#include <sysdep/ptrace.h> +#include <sysdep/syscalls.h> + +extern int syscall_table_size; +#define NR_SYSCALLS (syscall_table_size / sizeof(void *)) void handle_syscall(struct uml_pt_regs *r) { @@ -15,10 +18,7 @@ void handle_syscall(struct uml_pt_regs *r) long result; int syscall; - syscall_trace(r, 0); - - current->thread.nsyscalls++; - nsyscalls++; + syscall_trace_enter(regs); /* * This should go in the declaration of syscall, but when I do that, @@ -30,11 +30,11 @@ void handle_syscall(struct uml_pt_regs *r) * in case it's a compiler bug. */ syscall = UPT_SYSCALL_NR(r); - if ((syscall >= NR_syscalls) || (syscall < 0)) + if ((syscall >= NR_SYSCALLS) || (syscall < 0)) result = -ENOSYS; else result = EXECUTE_SYSCALL(syscall, regs); - REGS_SET_SYSCALL_RETURN(r->gp, result); + PT_REGS_SET_SYSCALL_RETURN(regs, result); - syscall_trace(r, 1); + syscall_trace_leave(regs); } diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c index 1d8b119f2d0..4ffb644d6c0 100644 --- a/arch/um/kernel/skas/uaccess.c +++ b/arch/um/kernel/skas/uaccess.c @@ -3,128 +3,131 @@ * Licensed under the GPL */ -#include "linux/err.h" -#include "linux/highmem.h" -#include "linux/mm.h" -#include "asm/current.h" -#include "asm/page.h" -#include "asm/pgtable.h" -#include "kern_util.h" -#include "os.h" - -extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr, - pte_t *pte_out); - -static unsigned long maybe_map(unsigned long virt, int is_write) +#include <linux/err.h> +#include <linux/highmem.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <asm/current.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <kern_util.h> +#include <os.h> + +pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr) { - pte_t pte; - int err; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; - void *phys = um_virt_to_phys(current, virt, &pte); - int dummy_code; + if (mm == NULL) + return NULL; - if (IS_ERR(phys) || (is_write && !pte_write(pte))) { + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + return NULL; + + pud = pud_offset(pgd, addr); + if (!pud_present(*pud)) + return NULL; + + pmd = pmd_offset(pud, addr); + if (!pmd_present(*pmd)) + return NULL; + + return pte_offset_kernel(pmd, addr); +} + +static pte_t *maybe_map(unsigned long virt, int is_write) +{ + pte_t *pte = virt_to_pte(current->mm, virt); + int err, dummy_code; + + if ((pte == NULL) || !pte_present(*pte) || + (is_write && !pte_write(*pte))) { err = handle_page_fault(virt, 0, is_write, 1, &dummy_code); if (err) - return -1UL; - phys = um_virt_to_phys(current, virt, NULL); + return NULL; + pte = virt_to_pte(current->mm, virt); } - if (IS_ERR(phys)) - phys = (void *) -1; + if (!pte_present(*pte)) + pte = NULL; - return (unsigned long) phys; + return pte; } static int do_op_one_page(unsigned long addr, int len, int is_write, int (*op)(unsigned long addr, int len, void *arg), void *arg) { + jmp_buf buf; struct page *page; - int n; + pte_t *pte; + int n, faulted; - addr = maybe_map(addr, is_write); - if (addr == -1UL) + pte = maybe_map(addr, is_write); + if (pte == NULL) return -1; - page = phys_to_page(addr); - addr = (unsigned long) kmap_atomic(page, KM_UML_USERCOPY) + + page = pte_page(*pte); + addr = (unsigned long) kmap_atomic(page) + (addr & ~PAGE_MASK); - n = (*op)(addr, len, arg); + current->thread.fault_catcher = &buf; - kunmap_atomic(page, KM_UML_USERCOPY); + faulted = UML_SETJMP(&buf); + if (faulted == 0) + n = (*op)(addr, len, arg); + else + n = -1; + + current->thread.fault_catcher = NULL; + + kunmap_atomic((void *)addr); return n; } -static void do_buffer_op(void *jmpbuf, void *arg_ptr) +static int buffer_op(unsigned long addr, int len, int is_write, + int (*op)(unsigned long, int, void *), void *arg) { - va_list args; - unsigned long addr; - int len, is_write, size, remain, n; - int (*op)(unsigned long, int, void *); - void *arg; - int *res; - - va_copy(args, *(va_list *)arg_ptr); - addr = va_arg(args, unsigned long); - len = va_arg(args, int); - is_write = va_arg(args, int); - op = va_arg(args, void *); - arg = va_arg(args, void *); - res = va_arg(args, int *); - va_end(args); + int size, remain, n; + size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); remain = len; - current->thread.fault_catcher = jmpbuf; n = do_op_one_page(addr, size, is_write, op, arg); if (n != 0) { - *res = (n < 0 ? remain : 0); + remain = (n < 0 ? remain : 0); goto out; } addr += size; remain -= size; - if (remain == 0) { - *res = 0; + if (remain == 0) goto out; - } - while(addr < ((addr + remain) & PAGE_MASK)) { + while (addr < ((addr + remain) & PAGE_MASK)) { n = do_op_one_page(addr, PAGE_SIZE, is_write, op, arg); if (n != 0) { - *res = (n < 0 ? remain : 0); + remain = (n < 0 ? remain : 0); goto out; } addr += PAGE_SIZE; remain -= PAGE_SIZE; } - if (remain == 0) { - *res = 0; + if (remain == 0) goto out; - } n = do_op_one_page(addr, remain, is_write, op, arg); - if (n != 0) - *res = (n < 0 ? remain : 0); - else *res = 0; - out: - current->thread.fault_catcher = NULL; -} - -static int buffer_op(unsigned long addr, int len, int is_write, - int (*op)(unsigned long addr, int len, void *arg), - void *arg) -{ - int faulted, res; - - faulted = setjmp_wrapper(do_buffer_op, addr, len, is_write, op, arg, - &res); - if (!faulted) - return res; + if (n != 0) { + remain = (n < 0 ? remain : 0); + goto out; + } - return addr + len - (unsigned long) current->thread.fault_addr; + return 0; + out: + return remain; } static int copy_chunk_from_user(unsigned long from, int len, void *arg) @@ -147,6 +150,7 @@ int copy_from_user(void *to, const void __user *from, int n) buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): n; } +EXPORT_SYMBOL(copy_from_user); static int copy_chunk_to_user(unsigned long to, int len, void *arg) { @@ -168,6 +172,7 @@ int copy_to_user(void __user *to, const void *from, int n) buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : n; } +EXPORT_SYMBOL(copy_to_user); static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) { @@ -202,6 +207,7 @@ int strncpy_from_user(char *dst, const char __user *src, int count) return -EFAULT; return strnlen(dst, count); } +EXPORT_SYMBOL(strncpy_from_user); static int clear_chunk(unsigned long addr, int len, void *unused) { @@ -224,6 +230,7 @@ int clear_user(void __user *mem, int len) return access_ok(VERIFY_WRITE, mem, len) ? buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len; } +EXPORT_SYMBOL(clear_user); static int strnlen_chunk(unsigned long str, int len, void *arg) { @@ -247,5 +254,6 @@ int strnlen_user(const void __user *str, int len) n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count); if (n == 0) return count + 1; - return -EFAULT; + return 0; } +EXPORT_SYMBOL(strnlen_user); diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index 36d89cf8d20..5c8c3ea7db7 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -3,35 +3,24 @@ * Licensed under the GPL */ -#include "linux/percpu.h" -#include "asm/pgalloc.h" -#include "asm/tlb.h" - -/* For some reason, mmu_gathers are referenced when CONFIG_SMP is off. */ -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +#include <linux/percpu.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> #ifdef CONFIG_SMP -#include "linux/sched.h" -#include "linux/module.h" -#include "linux/threads.h" -#include "linux/interrupt.h" -#include "linux/err.h" -#include "linux/hardirq.h" -#include "asm/smp.h" -#include "asm/processor.h" -#include "asm/spinlock.h" -#include "kern_util.h" -#include "kern.h" -#include "irq_user.h" -#include "os.h" - -/* CPU online map, set by smp_boot_cpus */ -cpumask_t cpu_online_map = CPU_MASK_NONE; -cpumask_t cpu_possible_map = CPU_MASK_NONE; - -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_possible_map); +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/threads.h> +#include <linux/interrupt.h> +#include <linux/err.h> +#include <linux/hardirq.h> +#include <asm/smp.h> +#include <asm/processor.h> +#include <asm/spinlock.h> +#include <kern.h> +#include <irq_user.h> +#include <os.h> /* Per CPU bogomips and other parameters * The only piece used here is the ipi pipe, which is set before SMP is @@ -61,7 +50,7 @@ void smp_send_stop(void) continue; os_write_file(cpu_data[i].ipi_pipe[1], "S", 1); } - printk(KERN_INFO "done\n"); + printk(KERN_CONT "done\n"); } static cpumask_t smp_commenced_mask = CPU_MASK_NONE; @@ -75,8 +64,7 @@ static int idle_proc(void *cpup) if (err < 0) panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err); - os_set_fd_async(cpu_data[cpu].ipi_pipe[0], - current->thread.mode.tt.extern_pid); + os_set_fd_async(cpu_data[cpu].ipi_pipe[0]); wmb(); if (cpu_test_and_set(cpu, cpu_callin_map)) { @@ -87,7 +75,8 @@ static int idle_proc(void *cpup) while (!cpu_isset(cpu, smp_commenced_mask)) cpu_relax(); - cpu_set(cpu, cpu_online_map); + notify_cpu_starting(cpu); + set_cpu_online(cpu, true); default_idle(); return 0; } @@ -119,18 +108,16 @@ void smp_prepare_cpus(unsigned int maxcpus) int i; for (i = 0; i < ncpus; ++i) - cpu_set(i, cpu_possible_map); + set_cpu_possible(i, true); - cpu_clear(me, cpu_online_map); - cpu_set(me, cpu_online_map); + set_cpu_online(me, true); cpu_set(me, cpu_callin_map); err = os_pipe(cpu_data[me].ipi_pipe, 1, 1); if (err < 0) panic("CPU#0 failed to create IPI pipe, errno = %d", -err); - os_set_fd_async(cpu_data[me].ipi_pipe[0], - current->thread.mode.tt.extern_pid); + os_set_fd_async(cpu_data[me].ipi_pipe[0]); for (cpu = 1; cpu < ncpus; cpu++) { printk(KERN_INFO "Booting processor %d...\n", cpu); @@ -143,21 +130,20 @@ void smp_prepare_cpus(unsigned int maxcpus) while (waittime-- && !cpu_isset(cpu, cpu_callin_map)) cpu_relax(); - if (cpu_isset(cpu, cpu_callin_map)) - printk(KERN_INFO "done\n"); - else printk(KERN_INFO "failed\n"); + printk(KERN_INFO "%s\n", + cpu_isset(cpu, cpu_calling_map) ? "done" : "failed"); } } void smp_prepare_boot_cpu(void) { - cpu_set(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), true); } -int __cpu_up(unsigned int cpu) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { cpu_set(cpu, smp_commenced_mask); - while (!cpu_isset(cpu, cpu_online_map)) + while (!cpu_online(cpu)) mb(); return 0; } @@ -183,7 +169,7 @@ void IPI_handler(int cpu) break; case 'R': - set_tsk_need_resched(current); + scheduler_ipi(); break; case 'S': @@ -218,8 +204,7 @@ void smp_call_function_slave(int cpu) atomic_inc(&scf_finished); } -int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic, - int wait) +int smp_call_function(void (*_func)(void *info), void *_info, int wait) { int cpus = num_online_cpus() - 1; int i; diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index b9d92b2089a..c1d0ae069b5 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -3,66 +3,16 @@ * Licensed under the GPL */ -#include "linux/file.h" -#include "linux/fs.h" -#include "linux/mm.h" -#include "linux/sched.h" -#include "linux/utsname.h" -#include "asm/current.h" -#include "asm/mman.h" -#include "asm/uaccess.h" -#include "asm/unistd.h" - -/* Unlocked, I don't care if this is a bit off */ -int nsyscalls = 0; - -long sys_fork(void) -{ - long ret; - - current->thread.forking = 1; - ret = do_fork(SIGCHLD, UPT_SP(¤t->thread.regs.regs), - ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; -} - -long sys_vfork(void) -{ - long ret; - - current->thread.forking = 1; - ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, - UPT_SP(¤t->thread.regs.regs), - ¤t->thread.regs, 0, NULL, NULL); - current->thread.forking = 0; - return ret; -} - -/* common code for old and new mmaps */ -long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - long error = -EBADF; - struct file * file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); - out: - return error; -} +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/utsname.h> +#include <linux/syscalls.h> +#include <asm/current.h> +#include <asm/mman.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> long old_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -72,82 +22,7 @@ long old_mmap(unsigned long addr, unsigned long len, if (offset & ~PAGE_MASK) goto out; - err = sys_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); + err = sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); out: return err; } -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way unix traditionally does this, though. - */ -long sys_pipe(unsigned long __user * fildes) -{ - int fd[2]; - long error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, sizeof(fd))) - error = -EFAULT; - } - return error; -} - - -long sys_uname(struct old_utsname __user * name) -{ - long err; - if (!name) - return -EFAULT; - down_read(&uts_sem); - err = copy_to_user(name, utsname(), sizeof (*name)); - up_read(&uts_sem); - return err?-EFAULT:0; -} - -long sys_olduname(struct oldold_utsname __user * name) -{ - long error; - - if (!name) - return -EFAULT; - if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) - return -EFAULT; - - down_read(&uts_sem); - - error = __copy_to_user(&name->sysname, &utsname()->sysname, - __OLD_UTS_LEN); - error |= __put_user(0, name->sysname + __OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename, &utsname()->nodename, - __OLD_UTS_LEN); - error |= __put_user(0, name->nodename + __OLD_UTS_LEN); - error |= __copy_to_user(&name->release, &utsname()->release, - __OLD_UTS_LEN); - error |= __put_user(0, name->release + __OLD_UTS_LEN); - error |= __copy_to_user(&name->version, &utsname()->version, - __OLD_UTS_LEN); - error |= __put_user(0, name->version + __OLD_UTS_LEN); - error |= __copy_to_user(&name->machine, &utsname()->machine, - __OLD_UTS_LEN); - error |= __put_user(0, name->machine + __OLD_UTS_LEN); - - up_read(&uts_sem); - - error = error ? -EFAULT : 0; - - return error; -} - -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) -{ - mm_segment_t fs; - int ret; - - fs = get_fs(); - set_fs(KERNEL_DS); - ret = um_execve(filename, argv, envp); - set_fs(fs); - - return ret; -} diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 93263571d81..799d7e413bf 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -1,80 +1,98 @@ -/* - * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2013 Richard Weinberger <richrd@nod.at> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. */ -#include "linux/sched.h" -#include "linux/kernel.h" -#include "linux/module.h" -#include "linux/kallsyms.h" -#include "asm/page.h" -#include "asm/processor.h" -#include "sysrq.h" +#include <linux/kallsyms.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <asm/sysrq.h> +#include <os.h> -/* Catch non-i386 SUBARCH's. */ -#if !defined(CONFIG_UML_X86) || defined(CONFIG_64BIT) -void show_trace(struct task_struct *task, unsigned long * stack) -{ - unsigned long addr; +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; - if (!stack) { - stack = (unsigned long*) &stack; - WARN_ON(1); - } +static void do_stack_trace(unsigned long *sp, unsigned long bp) +{ + int reliable; + unsigned long addr; + struct stack_frame *frame = (struct stack_frame *)bp; - printk("Call Trace: \n"); - while (((long) stack & (THREAD_SIZE-1)) != 0) { - addr = *stack; + printk(KERN_INFO "Call Trace:\n"); + while (((long) sp & (THREAD_SIZE-1)) != 0) { + addr = *sp; if (__kernel_text_address(addr)) { - printk("%08lx: [<%08lx>]", (unsigned long) stack, addr); - print_symbol(" %s", addr); - printk("\n"); - } - stack++; - } - printk("\n"); + reliable = 0; + if ((unsigned long) sp == bp + sizeof(long)) { + frame = frame ? frame->next_frame : NULL; + bp = (unsigned long)frame; + reliable = 1; + } + + printk(KERN_INFO " [<%08lx>]", addr); + printk(KERN_CONT " %s", reliable ? "" : "? "); + print_symbol(KERN_CONT "%s", addr); + printk(KERN_CONT "\n"); + } + sp++; + } + printk(KERN_INFO "\n"); } -#endif -/* - * stack dumps generator - this is used by arch-independent code. - * And this is identical to i386 currently. - */ -void dump_stack(void) +static unsigned long get_frame_pointer(struct task_struct *task, + struct pt_regs *segv_regs) { - unsigned long stack; - - show_trace(current, &stack); + if (!task || task == current) + return segv_regs ? PT_REGS_BP(segv_regs) : current_bp(); + else + return KSTK_EBP(task); } -EXPORT_SYMBOL(dump_stack); -/*Stolen from arch/i386/kernel/traps.c */ -static const int kstack_depth_to_print = 24; +static unsigned long *get_stack_pointer(struct task_struct *task, + struct pt_regs *segv_regs) +{ + if (!task || task == current) + return segv_regs ? (unsigned long *)PT_REGS_SP(segv_regs) : current_sp(); + else + return (unsigned long *)KSTK_ESP(task); +} -/* This recently started being used in arch-independent code too, as in - * kernel/sched.c.*/ -void show_stack(struct task_struct *task, unsigned long *esp) +void show_stack(struct task_struct *task, unsigned long *stack) { - unsigned long *stack; + unsigned long *sp = stack, bp = 0; + struct pt_regs *segv_regs = current->thread.segv_regs; int i; - if (esp == NULL) { - if (task != current && task != NULL) { - esp = (unsigned long *) KSTK_ESP(task); - } else { - esp = (unsigned long *) &esp; - } + if (!segv_regs && os_is_signal_stack()) { + printk(KERN_ERR "Received SIGSEGV in SIGSEGV handler," + " aborting stack trace!\n"); + return; } - stack = esp; - for(i = 0; i < kstack_depth_to_print; i++) { +#ifdef CONFIG_FRAME_POINTER + bp = get_frame_pointer(task, segv_regs); +#endif + + if (!stack) + sp = get_stack_pointer(task, segv_regs); + + printk(KERN_INFO "Stack:\n"); + stack = sp; + for (i = 0; i < 3 * STACKSLOTS_PER_LINE; i++) { if (kstack_end(stack)) break; - if (i && ((i % 8) == 0)) - printk("\n "); - printk("%08lx ", *stack++); + if (i && ((i % STACKSLOTS_PER_LINE) == 0)) + printk(KERN_CONT "\n"); + printk(KERN_CONT " %08lx", *stack++); } + printk(KERN_CONT "\n"); - printk("Call Trace: \n"); - show_trace(task, esp); + do_stack_trace(sp, bp); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 1ac746a9eae..117568d4f64 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -3,24 +3,17 @@ * Licensed under the GPL */ -#include "linux/clockchips.h" -#include "linux/interrupt.h" -#include "linux/jiffies.h" -#include "linux/threads.h" -#include "asm/irq.h" -#include "asm/param.h" -#include "kern_util.h" -#include "os.h" - -/* - * Scheduler clock - returns current time in nanosec units. - */ -unsigned long long sched_clock(void) -{ - return (unsigned long long)jiffies_64 * (NSEC_PER_SEC / HZ); -} - -void timer_handler(int sig, struct uml_pt_regs *regs) +#include <linux/clockchips.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/jiffies.h> +#include <linux/threads.h> +#include <asm/irq.h> +#include <asm/param.h> +#include <kern_util.h> +#include <os.h> + +void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { unsigned long flags; @@ -32,7 +25,7 @@ void timer_handler(int sig, struct uml_pt_regs *regs) static void itimer_set_mode(enum clock_event_mode mode, struct clock_event_device *evt) { - switch(mode) { + switch (mode) { case CLOCK_EVT_MODE_PERIODIC: set_interval(); break; @@ -57,7 +50,7 @@ static int itimer_next_event(unsigned long delta, static struct clock_event_device itimer_clockevent = { .name = "itimer", .rating = 250, - .cpumask = CPU_MASK_ALL, + .cpumask = cpu_all_mask, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = itimer_set_mode, .set_next_event = itimer_next_event, @@ -72,9 +65,9 @@ static irqreturn_t um_timer(int irq, void *dev) return IRQ_HANDLED; } -static cycle_t itimer_read(void) +static cycle_t itimer_read(struct clocksource *cs) { - return os_nsecs(); + return os_nsecs() / 1000; } static struct clocksource itimer_clocksource = { @@ -82,8 +75,6 @@ static struct clocksource itimer_clocksource = { .rating = 300, .read = itimer_read, .mask = CLOCKSOURCE_MASK(64), - .mult = 1, - .shift = 0, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -91,7 +82,7 @@ static void __init setup_itimer(void) { int err; - err = request_irq(TIMER_IRQ, um_timer, IRQF_DISABLED, "timer", NULL); + err = request_irq(TIMER_IRQ, um_timer, 0, "timer", NULL); if (err != 0) printk(KERN_ERR "register_timer : request_irq failed - " "errno = %d\n", -err); @@ -101,26 +92,24 @@ static void __init setup_itimer(void) clockevent_delta2ns(60 * HZ, &itimer_clockevent); itimer_clockevent.min_delta_ns = clockevent_delta2ns(1, &itimer_clockevent); - err = clocksource_register(&itimer_clocksource); + err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC); if (err) { - printk(KERN_ERR "clocksource_register returned %d\n", err); + printk(KERN_ERR "clocksource_register_hz returned %d\n", err); return; } clockevents_register_device(&itimer_clockevent); } -extern void (*late_time_init)(void); +void read_persistent_clock(struct timespec *ts) +{ + long long nsecs = os_nsecs(); + + set_normalized_timespec(ts, nsecs / NSEC_PER_SEC, + nsecs % NSEC_PER_SEC); +} void __init time_init(void) { - long long nsecs; - timer_init(); - - nsecs = os_nsecs(); - set_normalized_timespec(&wall_to_monotonic, -nsecs / NSEC_PER_SEC, - -nsecs % NSEC_PER_SEC); - set_normalized_timespec(&xtime, nsecs / NSEC_PER_SEC, - nsecs % NSEC_PER_SEC); late_time_init = setup_itimer; } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index f4a0e407eee..f1b3eb14b85 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -3,14 +3,16 @@ * Licensed under the GPL */ -#include "linux/mm.h" -#include "asm/pgtable.h" -#include "asm/tlbflush.h" -#include "as-layout.h" -#include "mem_user.h" -#include "os.h" -#include "skas.h" -#include "tlb.h" +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <as-layout.h> +#include <mem_user.h> +#include <os.h> +#include <skas.h> +#include <kern_util.h> struct host_vm_change { struct host_vm_op { @@ -56,7 +58,7 @@ static int do_ops(struct host_vm_change *hvc, int end, for (i = 0; i < end && !ret; i++) { op = &hvc->ops[i]; - switch(op->type) { + switch (op->type) { case MMAP: ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len, op->u.mmap.prot, op->u.mmap.fd, @@ -74,6 +76,7 @@ static int do_ops(struct host_vm_change *hvc, int end, default: printk(KERN_ERR "Unknown op type %d in do_ops\n", op->type); + BUG(); break; } } @@ -122,6 +125,9 @@ static int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; + if ((addr >= STUB_START) && (addr < STUB_END)) + return -EINVAL; + if (hvc->index != 0) { last = &hvc->ops[hvc->index - 1]; if ((last->type == MUNMAP) && @@ -183,27 +189,30 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr, pte = pte_offset_kernel(pmd, addr); do { + if ((addr >= STUB_START) && (addr < STUB_END)) + continue; + r = pte_read(*pte); w = pte_write(*pte); x = pte_exec(*pte); if (!pte_young(*pte)) { r = 0; w = 0; - } else if (!pte_dirty(*pte)) { + } else if (!pte_dirty(*pte)) w = 0; - } + prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) | (x ? UM_PROT_EXEC : 0)); if (hvc->force || pte_newpage(*pte)) { if (pte_present(*pte)) ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK, PAGE_SIZE, prot, hvc); - else ret = add_munmap(addr, PAGE_SIZE, hvc); - } - else if (pte_newprot(*pte)) + else + ret = add_munmap(addr, PAGE_SIZE, hvc); + } else if (pte_newprot(*pte)) ret = add_mprotect(addr, PAGE_SIZE, prot, hvc); *pte = pte_mkuptodate(*pte); - } while (pte++, addr += PAGE_SIZE, ((addr != end) && !ret)); + } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret)); return ret; } @@ -225,7 +234,7 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr, } } else ret = update_pte_range(pmd, addr, next, hvc); - } while (pmd++, addr = next, ((addr != end) && !ret)); + } while (pmd++, addr = next, ((addr < end) && !ret)); return ret; } @@ -247,7 +256,7 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr, } } else ret = update_pmd_range(pud, addr, next, hvc); - } while (pud++, addr = next, ((addr != end) && !ret)); + } while (pud++, addr = next, ((addr < end) && !ret)); return ret; } @@ -270,7 +279,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, } } else ret = update_pud_range(pgd, addr, next, &hvc); - } while (pgd++, addr = next, ((addr != end_addr) && !ret)); + } while (pgd++, addr = next, ((addr < end_addr) && !ret)); if (!ret) ret = do_ops(&hvc, hvc.index, 1); @@ -278,12 +287,15 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, /* This is not an else because ret is modified above */ if (ret) { printk(KERN_ERR "fix_range_common: failed, killing current " - "process\n"); + "process: %d\n", task_tgid_vnr(current)); + /* We are under mmap_sem, release it such that current can terminate */ + up_write(¤t->mm->mmap_sem); force_sig(SIGKILL, current); + do_signal(); } } -int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) +static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) { struct mm_struct *mm; pgd_t *pgd; @@ -485,9 +497,6 @@ void __flush_tlb_one(unsigned long addr) static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { - if (!proc_mm && (end_addr > STUB_START)) - end_addr = STUB_START; - fix_range_common(mm, start_addr, end_addr, force); } @@ -498,11 +507,11 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, flush_tlb_kernel_range_common(start, end); else fix_range(vma->vm_mm, start, end, 0); } +EXPORT_SYMBOL(flush_tlb_range); -void flush_tlb_mm(struct mm_struct *mm) +void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + unsigned long end) { - unsigned long end; - /* * Don't bother flushing if this address space is about to be * destroyed. @@ -510,8 +519,17 @@ void flush_tlb_mm(struct mm_struct *mm) if (atomic_read(&mm->mm_users) == 0) return; - end = proc_mm ? task_size : STUB_START; - fix_range(mm, 0, end, 0); + fix_range(mm, start, end, 0); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + struct vm_area_struct *vma = mm->mmap; + + while (vma != NULL) { + fix_range(mm, vma->vm_start, vma->vm_end, 0); + vma = vma->vm_next; + } } void force_flush_all(void) diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index cb3321f8e0a..5678c3571e7 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -6,14 +6,15 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/hardirq.h> +#include <linux/module.h> #include <asm/current.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> -#include "arch.h" -#include "as-layout.h" -#include "kern_util.h" -#include "os.h" -#include "sysdep/sigcontext.h" +#include <arch.h> +#include <as-layout.h> +#include <kern_util.h> +#include <os.h> +#include <skas.h> /* * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by @@ -29,6 +30,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, pmd_t *pmd; pte_t *pte; int err = -EFAULT; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; *code_out = SEGV_MAPERR; @@ -39,6 +41,9 @@ int handle_page_fault(unsigned long address, unsigned long ip, if (in_atomic()) goto out_nosemaphore; + if (is_user) + flags |= FAULT_FLAG_USER; +retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) @@ -54,20 +59,26 @@ int handle_page_fault(unsigned long address, unsigned long ip, good_area: *code_out = SEGV_ACCERR; - if (is_write && !(vma->vm_flags & VM_WRITE)) - goto out; - - /* Don't require VM_READ|VM_EXEC for write faults! */ - if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC))) - goto out; + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto out; + flags |= FAULT_FLAG_WRITE; + } else { + /* Don't require VM_READ|VM_EXEC for write faults! */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto out; + } do { int fault; -survive: - fault = handle_mm_fault(mm, vma, address, is_write); + + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + goto out_nosemaphore; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) { - err = -ENOMEM; goto out_of_memory; } else if (fault & VM_FAULT_SIGBUS) { err = -EACCES; @@ -75,10 +86,18 @@ survive: } BUG(); } - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + + goto retry; + } + } pgd = pgd_offset(mm, address); pud = pud_offset(pgd, address); @@ -103,18 +122,38 @@ out: out_nosemaphore: return err; -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ out_of_memory: - if (is_global_init(current)) { - up_read(&mm->mmap_sem); - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - goto out; + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + up_read(&mm->mmap_sem); + if (!is_user) + goto out_nosemaphore; + pagefault_out_of_memory(); + return 0; +} +EXPORT_SYMBOL(handle_page_fault); + +static void show_segv_info(struct uml_pt_regs *regs) +{ + struct task_struct *tsk = current; + struct faultinfo *fi = UPT_FAULTINFO(regs); + + if (!unhandled_signal(tsk, SIGSEGV)) + return; + + if (!printk_ratelimit()) + return; + + printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi), + (void *)UPT_IP(regs), (void *)UPT_SP(regs), + fi->error_code); + + print_vma_addr(KERN_CONT " in ", UPT_IP(regs)); + printk(KERN_CONT "\n"); } static void bad_segv(struct faultinfo fi, unsigned long ip) @@ -128,11 +167,24 @@ static void bad_segv(struct faultinfo fi, unsigned long ip) force_sig_info(SIGSEGV, &si, current); } -static void segv_handler(int sig, struct uml_pt_regs *regs) +void fatal_sigsegv(void) +{ + force_sigsegv(SIGSEGV, current); + do_signal(); + /* + * This is to tell gcc that we're not returning - do_signal + * can, in general, return, but in this case, it's not, since + * we just got a fatal SIGSEGV queued. + */ + os_dump_core(); +} + +void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { struct faultinfo * fi = UPT_FAULTINFO(regs); if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) { + show_segv_info(regs); bad_segv(*fi, UPT_IP(regs)); return; } @@ -154,9 +206,12 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, int is_write = FAULT_WRITE(fi); unsigned long address = FAULT_ADDRESS(fi); + if (!is_user && regs) + current->thread.segv_regs = container_of(regs, struct pt_regs, regs); + if (!is_user && (address >= start_vm) && (address < end_vm)) { flush_tlb_kernel_vm(); - return 0; + goto out; } else if (current->mm == NULL) { show_regs(container_of(regs, struct pt_regs, regs)); @@ -178,7 +233,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, catcher = current->thread.fault_catcher; if (!err) - return 0; + goto out; else if (catcher != NULL) { current->thread.fault_addr = (void *) address; UML_LONGJMP(catcher, 1); @@ -186,7 +241,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, else if (current->thread.fault_addr != NULL) panic("fault_addr set but no fault catcher"); else if (!is_user && arch_fixup(ip, regs)) - return 0; + goto out; if (!is_user) { show_regs(container_of(regs, struct pt_regs, regs)); @@ -194,6 +249,8 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, address, ip); } + show_segv_info(regs); + if (err == -EACCES) { si.si_signo = SIGBUS; si.si_errno = 0; @@ -201,9 +258,6 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, si.si_addr = (void __user *)address; current->thread.arch.faultinfo = fi; force_sig_info(SIGBUS, &si, current); - } else if (err == -ENOMEM) { - printk(KERN_INFO "VM: killing process %s\n", current->comm); - do_exit(SIGKILL); } else { BUG_ON(err != -EFAULT); si.si_signo = SIGSEGV; @@ -211,13 +265,18 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, current->thread.arch.faultinfo = fi; force_sig_info(SIGSEGV, &si, current); } + +out: + if (regs) + current->thread.segv_regs = NULL; + return 0; } -void relay_signal(int sig, struct uml_pt_regs *regs) +void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) { - if (arch_handle_signal(sig, regs)) - return; + struct faultinfo *fi; + struct siginfo clean_si; if (!UPT_IS_USER(regs)) { if (sig == SIGBUS) @@ -226,31 +285,46 @@ void relay_signal(int sig, struct uml_pt_regs *regs) panic("Kernel mode signal %d", sig); } - current->thread.arch.faultinfo = *UPT_FAULTINFO(regs); - force_sig(sig, current); + arch_examine_signal(sig, regs); + + memset(&clean_si, 0, sizeof(clean_si)); + clean_si.si_signo = si->si_signo; + clean_si.si_errno = si->si_errno; + clean_si.si_code = si->si_code; + switch (sig) { + case SIGILL: + case SIGFPE: + case SIGSEGV: + case SIGBUS: + case SIGTRAP: + fi = UPT_FAULTINFO(regs); + clean_si.si_addr = (void __user *) FAULT_ADDRESS(*fi); + current->thread.arch.faultinfo = *fi; +#ifdef __ARCH_SI_TRAPNO + clean_si.si_trapno = si->si_trapno; +#endif + break; + default: + printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d)\n", + sig, si->si_code); + } + + force_sig_info(sig, &clean_si, current); } -static void bus_handler(int sig, struct uml_pt_regs *regs) +void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs) { if (current->thread.fault_catcher != NULL) UML_LONGJMP(current->thread.fault_catcher, 1); - else relay_signal(sig, regs); + else + relay_signal(sig, si, regs); } -static void winch(int sig, struct uml_pt_regs *regs) +void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { do_IRQ(WINCH_IRQ, regs); } -const struct kern_handlers handlinfo_kern = { - .relay_signal = relay_signal, - .winch = winch, - .bus_handler = bus_handler, - .page_fault = segv_handler, - .sigio_handler = sigio_handler, - .timer_handler = timer_handler -}; - void trap_init(void) { } diff --git a/arch/um/kernel/uaccess.c b/arch/um/kernel/uaccess.c deleted file mode 100644 index d7436aacd26..00000000000 --- a/arch/um/kernel/uaccess.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -/* These are here rather than tt/uaccess.c because skas mode needs them in - * order to do SIGBUS recovery when a tmpfs mount runs out of room. - */ - -#include <linux/string.h> -#include "os.h" - -void __do_copy(void *to, const void *from, int n) -{ - memcpy(to, from, n); -} - - -int __do_copy_to_user(void *to, const void *from, int n, - void **fault_addr, jmp_buf **fault_catcher) -{ - unsigned long fault; - int faulted; - - fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, - __do_copy, &faulted); - if(!faulted) return(0); - else return(n - (fault - (unsigned long) to)); -} diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index f1c71393f57..016adf0985d 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -3,22 +3,25 @@ * Licensed under the GPL */ -#include "linux/delay.h" -#include "linux/mm.h" -#include "linux/module.h" -#include "linux/seq_file.h" -#include "linux/string.h" -#include "linux/utsname.h" -#include "asm/pgtable.h" -#include "asm/processor.h" -#include "asm/setup.h" -#include "arch.h" -#include "as-layout.h" -#include "init.h" -#include "kern.h" -#include "mem_user.h" -#include "os.h" -#include "skas.h" +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/string.h> +#include <linux/utsname.h> +#include <linux/sched.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <as-layout.h> +#include <arch.h> +#include <init.h> +#include <kern.h> +#include <kern_util.h> +#include <mem_user.h> +#include <os.h> #define DEFAULT_COMMAND_LINE "root=98:0" @@ -46,6 +49,10 @@ struct cpuinfo_um boot_cpu_data = { .ipi_pipe = { -1, -1 } }; +union thread_union cpu0_irqstack + __attribute__((__section__(".data..init_irqstack"))) = + { INIT_THREAD_INFO(init_task) }; + unsigned long thread_saved_pc(struct task_struct *task) { /* FIXME: Need to look up userspace_pid by cpu */ @@ -100,9 +107,9 @@ const struct seq_operations cpuinfo_op = { }; /* Set in linux_main */ -unsigned long host_task_size; -unsigned long task_size; unsigned long uml_physmem; +EXPORT_SYMBOL(uml_physmem); + unsigned long uml_reserved; /* Also modified in mem_init */ unsigned long start_vm; unsigned long end_vm; @@ -116,7 +123,7 @@ static int have_root __initdata = 0; /* Set in uml_mem_setup and modified in linux_main */ long long physmem_size = 32 * 1024 * 1024; -static char *usage_string = +static const char *usage_string = "User Mode Linux v%s\n" " available at http://user-mode-linux.sourceforge.net/\n\n"; @@ -151,7 +158,7 @@ __uml_setup("root=", uml_root_setup, static int __init no_skas_debug_setup(char *line, int *add) { printf("'debug' is not necessary to gdb UML in skas mode - run \n"); - printf("'gdb linux'"); + printf("'gdb linux'\n"); return 0; } @@ -197,20 +204,19 @@ __uml_setup("--help", Usage, " Prints this message.\n\n" ); -static int __init uml_checksetup(char *line, int *add) +static void __init uml_checksetup(char *line, int *add) { struct uml_param *p; p = &__uml_setup_start; - while(p < &__uml_setup_end) { - int n; + while (p < &__uml_setup_end) { + size_t n; n = strlen(p->str); if (!strncmp(line, p->str, n) && p->setup_func(line + n, add)) - return 1; + return; p++; } - return 0; } static void __init uml_postsetup(void) @@ -218,14 +224,35 @@ static void __init uml_postsetup(void) initcall_t *p; p = &__uml_postsetup_start; - while(p < &__uml_postsetup_end) { + while (p < &__uml_postsetup_end) { (*p)(); p++; } return; } +static int panic_exit(struct notifier_block *self, unsigned long unused1, + void *unused2) +{ + bust_spinlocks(1); + bust_spinlocks(0); + uml_exitcode = 1; + os_dump_core(); + return 0; +} + +static struct notifier_block panic_exit_notifier = { + .notifier_call = panic_exit, + .next = NULL, + .priority = 0 +}; + /* Set during early boot */ +unsigned long task_size; +EXPORT_SYMBOL(task_size); + +unsigned long host_task_size; + unsigned long brk_start; unsigned long end_iomem; EXPORT_SYMBOL(end_iomem); @@ -234,25 +261,13 @@ EXPORT_SYMBOL(end_iomem); extern char __binary_start; -static unsigned long set_task_sizes_skas(unsigned long *task_size_out) -{ - /* Round up to the nearest 4M */ - unsigned long host_task_size = ROUND_4M((unsigned long) - &host_task_size); - - if (!skas_needs_stub) - *task_size_out = host_task_size; - else - *task_size_out = STUB_START & PGDIR_MASK; - - return host_task_size; -} - int __init linux_main(int argc, char **argv) { unsigned long avail, diff; unsigned long virtmem_size, max_physmem; - unsigned int i, add; + unsigned long stack; + unsigned int i; + int add; char * mode; for (i = 1; i < argc; i++) { @@ -266,6 +281,13 @@ int __init linux_main(int argc, char **argv) if (have_root == 0) add_arg(DEFAULT_COMMAND_LINE); + host_task_size = os_get_top_address(); + /* + * TASK_SIZE needs to be PGDIR_SIZE aligned or else exit_mmap craps + * out + */ + task_size = host_task_size & PGDIR_MASK; + /* OS sanity checks that need to happen before the kernel runs */ os_early_checks(); @@ -278,13 +300,6 @@ int __init linux_main(int argc, char **argv) printf("UML running in %s mode\n", mode); - host_task_size = set_task_sizes_skas(&task_size); - - /* - * Setting up handlers to 'sig_info' struct - */ - os_fill_handlinfo(handlinfo_kern); - brk_start = (unsigned long) sbrk(0); /* @@ -309,7 +324,7 @@ int __init linux_main(int argc, char **argv) highmem = 0; iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; - max_physmem = get_kmem_end() - uml_physmem - iomem_size - MIN_VMALLOC; + max_physmem = TASK_SIZE - uml_physmem - iomem_size - MIN_VMALLOC; /* * Zones have to begin on a 1 << MAX_ORDER page boundary, @@ -341,7 +356,9 @@ int __init linux_main(int argc, char **argv) } virtmem_size = physmem_size; - avail = get_kmem_end() - start_vm; + stack = (unsigned long) argv; + stack &= ~(1024 * 1024 - 1); + avail = stack - start_vm; if (physmem_size > avail) virtmem_size = avail; end_vm = start_vm + virtmem_size; @@ -350,6 +367,9 @@ int __init linux_main(int argc, char **argv) printf("Kernel virtual memory size shrunk to %lu bytes\n", virtmem_size); + atomic_notifier_chain_register(&panic_notifier_list, + &panic_exit_notifier); + uml_postsetup(); stack_protections((unsigned long) &init_thread_info); @@ -358,29 +378,8 @@ int __init linux_main(int argc, char **argv) return start_uml(); } -extern int uml_exitcode; - -static int panic_exit(struct notifier_block *self, unsigned long unused1, - void *unused2) -{ - bust_spinlocks(1); - show_regs(&(current->thread.regs)); - bust_spinlocks(0); - uml_exitcode = 1; - os_dump_core(); - return 0; -} - -static struct notifier_block panic_exit_notifier = { - .notifier_call = panic_exit, - .next = NULL, - .priority = 0 -}; - void __init setup_arch(char **cmdline_p) { - atomic_notifier_chain_register(&panic_notifier_list, - &panic_exit_notifier); paging_init(); strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c index 039e16efcd5..f6cc3bd6178 100644 --- a/arch/um/kernel/umid.c +++ b/arch/um/kernel/umid.c @@ -1,13 +1,12 @@ -/* - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "asm/errno.h" -#include "init.h" -#include "os.h" -#include "kern.h" -#include "linux/kernel.h" +#include <asm/errno.h> +#include <init.h> +#include <kern.h> +#include <os.h> /* Changed by set_umid_arg */ static int umid_inited = 0; @@ -16,16 +15,16 @@ static int __init set_umid_arg(char *name, int *add) { int err; - if(umid_inited){ + if (umid_inited) { printf("umid already set\n"); return 0; } *add = 0; err = set_umid(name); - if(err == -EEXIST) + if (err == -EEXIST) printf("umid '%s' already in use\n", name); - else if(!err) + else if (!err) umid_inited = 1; return 0; diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 5828c1d5450..6899195602b 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -1,4 +1,5 @@ #include <asm-generic/vmlinux.lds.h> +#include <asm/page.h> OUTPUT_FORMAT(ELF_FORMAT) OUTPUT_ARCH(ELF_ARCH) @@ -19,17 +20,12 @@ SECTIONS . = START + SIZEOF_HEADERS; _text = .; - _stext = .; - __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT - _einittext = .; - } - . = ALIGN(4096); + INIT_TEXT_SECTION(0) + . = ALIGN(PAGE_SIZE); .text : { + _stext = .; TEXT_TEXT SCHED_TEXT LOCK_TEXT @@ -39,22 +35,41 @@ SECTIONS *(.gnu.linkonce.t*) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); .syscall_stub : { __syscall_stub_start = .; *(.__syscall_stub*) __syscall_stub_end = .; } - #include "asm/common.lds.S" + /* + * These are needed even in a static link, even if they wind up being empty. + * Newer glibc needs these __rel{,a}_iplt_{start,end} symbols. + */ + .rel.plt : { + *(.rel.plt) + PROVIDE_HIDDEN(__rel_iplt_start = .); + *(.rel.iplt) + PROVIDE_HIDDEN(__rel_iplt_end = .); + } + .rela.plt : { + *(.rela.plt) + PROVIDE_HIDDEN(__rela_iplt_start = .); + *(.rela.iplt) + PROVIDE_HIDDEN(__rela_iplt_end = .); + } + + #include <asm/common.lds.S> + __init_begin = .; init.data : { INIT_DATA } + __init_end = .; + .data : { - . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ - *(.data.init_task) + INIT_TASK_DATA(KERNEL_STACK_SIZE) . = ALIGN(KERNEL_STACK_SIZE); - *(.data.init_irqstack) + *(.data..init_irqstack) DATA_DATA *(.gnu.linkonce.d*) CONSTRUCTORS @@ -79,24 +94,18 @@ SECTIONS .sdata : { *(.sdata) } _edata = .; PROVIDE (edata = .); - . = ALIGN(0x1000); - .sbss : - { - __bss_start = .; - PROVIDE(_bss_start = .); - *(.sbss) - *(.scommon) - } - .bss : - { - *(.dynbss) - *(.bss) - *(COMMON) - } + . = ALIGN(PAGE_SIZE); + __bss_start = .; + PROVIDE(_bss_start = .); + SBSS(0) + BSS(0) + __bss_stop = .; _end = .; PROVIDE (end = .); STABS_DEBUG DWARF_DEBUG + + DISCARDS } diff --git a/arch/um/kernel/vmlinux.lds.S b/arch/um/kernel/vmlinux.lds.S index f8aeb448aab..16e49bfa2b4 100644 --- a/arch/um/kernel/vmlinux.lds.S +++ b/arch/um/kernel/vmlinux.lds.S @@ -1,3 +1,6 @@ + +KERNEL_STACK_SIZE = 4096 * (1 << CONFIG_KERNEL_STACK_ORDER); + #ifdef CONFIG_LD_SCRIPT_STATIC #include "uml.lds.S" #else |
