diff options
Diffstat (limited to 'arch/um/os-Linux')
36 files changed, 4581 insertions, 2099 deletions
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index b83ac8e21c3..08ff5094fcd 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -1,19 +1,17 @@ # -# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) +# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) # Licensed under the GPL # -obj-y = aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \ - start_up.o time.o tt.o tty.o uaccess.o user_syms.o drivers/ \ - sys-$(SUBARCH)/ +obj-y = aio.o execvp.o file.o helper.o irq.o main.o mem.o process.o \ + registers.o sigio.o signal.o start_up.o time.o tty.o \ + umid.o user_syms.o util.o drivers/ skas/ -USER_OBJS := aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \ - start_up.o time.o tt.o tty.o uaccess.o +obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o -elf_aux.o: $(ARCH_DIR)/kernel-offsets.h -CFLAGS_elf_aux.o += -I$(objtree)/arch/um - -CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH) +USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \ + main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \ + tty.o umid.o util.o HAVE_AIO_ABI := $(shell [ -r /usr/include/linux/aio_abi.h ] && \ echo -DHAVE_AIO_ABI ) diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c index ffa759addd3..014eb35fd13 100644 --- a/arch/um/os-Linux/aio.c +++ b/arch/um/os-Linux/aio.c @@ -1,36 +1,33 @@ /* - * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include <stdlib.h> #include <unistd.h> +#include <sched.h> #include <signal.h> #include <errno.h> -#include <sched.h> -#include <sys/syscall.h> -#include "os.h" -#include "aio.h" -#include "init.h" -#include "user.h" -#include "mode.h" +#include <sys/time.h> +#include <asm/unistd.h> +#include <aio.h> +#include <init.h> +#include <kern_util.h> +#include <os.h> struct aio_thread_req { - enum aio_type type; - int io_fd; - unsigned long long offset; - char *buf; - int len; - struct aio_context *aio; + enum aio_type type; + int io_fd; + unsigned long long offset; + char *buf; + int len; + struct aio_context *aio; }; -static int aio_req_fd_r = -1; -static int aio_req_fd_w = -1; - #if defined(HAVE_AIO_ABI) #include <linux/aio_abi.h> -/* If we have the headers, we are going to build with AIO enabled. +/* + * If we have the headers, we are going to build with AIO enabled. * If we don't have aio in libc, we define the necessary stubs here. */ @@ -38,23 +35,24 @@ static int aio_req_fd_w = -1; static long io_setup(int n, aio_context_t *ctxp) { - return syscall(__NR_io_setup, n, ctxp); + return syscall(__NR_io_setup, n, ctxp); } static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp) { - return syscall(__NR_io_submit, ctx, nr, iocbpp); + return syscall(__NR_io_submit, ctx, nr, iocbpp); } static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, - struct io_event *events, struct timespec *timeout) + struct io_event *events, struct timespec *timeout) { - return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout); + return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout); } #endif -/* The AIO_MMAP cases force the mmapped page into memory here +/* + * The AIO_MMAP cases force the mmapped page into memory here * rather than in whatever place first touches the data. I used * to do this by touching the page, but that's delicate because * gcc is prone to optimizing that away. So, what's done here @@ -66,252 +64,247 @@ static long io_getevents(aio_context_t ctx_id, long min_nr, long nr, */ static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, - int len, unsigned long long offset, struct aio_context *aio) + int len, unsigned long long offset, struct aio_context *aio) { - struct iocb iocb, *iocbp = &iocb; - char c; - int err; - - iocb = ((struct iocb) { .aio_data = (unsigned long) aio, - .aio_reqprio = 0, - .aio_fildes = fd, - .aio_buf = (unsigned long) buf, - .aio_nbytes = len, - .aio_offset = offset, - .aio_reserved1 = 0, - .aio_reserved2 = 0, - .aio_reserved3 = 0 }); - - switch(type){ - case AIO_READ: - iocb.aio_lio_opcode = IOCB_CMD_PREAD; - err = io_submit(ctx, 1, &iocbp); - break; - case AIO_WRITE: - iocb.aio_lio_opcode = IOCB_CMD_PWRITE; - err = io_submit(ctx, 1, &iocbp); - break; - case AIO_MMAP: - iocb.aio_lio_opcode = IOCB_CMD_PREAD; - iocb.aio_buf = (unsigned long) &c; - iocb.aio_nbytes = sizeof(c); - err = io_submit(ctx, 1, &iocbp); - break; - default: - printk("Bogus op in do_aio - %d\n", type); - err = -EINVAL; - break; - } - - if(err > 0) - err = 0; - else - err = -errno; - - return err; + struct iocb *iocbp = & ((struct iocb) { + .aio_data = (unsigned long) aio, + .aio_fildes = fd, + .aio_buf = (unsigned long) buf, + .aio_nbytes = len, + .aio_offset = offset + }); + char c; + + switch (type) { + case AIO_READ: + iocbp->aio_lio_opcode = IOCB_CMD_PREAD; + break; + case AIO_WRITE: + iocbp->aio_lio_opcode = IOCB_CMD_PWRITE; + break; + case AIO_MMAP: + iocbp->aio_lio_opcode = IOCB_CMD_PREAD; + iocbp->aio_buf = (unsigned long) &c; + iocbp->aio_nbytes = sizeof(c); + break; + default: + printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type); + return -EINVAL; + } + + return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno; } +/* Initialized in an initcall and unchanged thereafter */ static aio_context_t ctx = 0; static int aio_thread(void *arg) { - struct aio_thread_reply reply; - struct io_event event; - int err, n, reply_fd; - - signal(SIGWINCH, SIG_IGN); - - while(1){ - n = io_getevents(ctx, 1, 1, &event, NULL); - if(n < 0){ - if(errno == EINTR) - continue; - printk("aio_thread - io_getevents failed, " - "errno = %d\n", errno); - } - else { - reply = ((struct aio_thread_reply) - { .data = (void *) (long) event.data, - .err = event.res }); + struct aio_thread_reply reply; + struct io_event event; + int err, n, reply_fd; + + os_fix_helper_signals(); + while (1) { + n = io_getevents(ctx, 1, 1, &event, NULL); + if (n < 0) { + if (errno == EINTR) + continue; + printk(UM_KERN_ERR "aio_thread - io_getevents failed, " + "errno = %d\n", errno); + } + else { + reply = ((struct aio_thread_reply) + { .data = (void *) (long) event.data, + .err = event.res }); reply_fd = ((struct aio_context *) reply.data)->reply_fd; - err = os_write_file(reply_fd, &reply, sizeof(reply)); - if(err != sizeof(reply)) - printk("aio_thread - write failed, fd = %d, " - "err = %d\n", aio_req_fd_r, -err); - } - } - return 0; + err = write(reply_fd, &reply, sizeof(reply)); + if (err != sizeof(reply)) + printk(UM_KERN_ERR "aio_thread - write failed, " + "fd = %d, err = %d\n", reply_fd, errno); + } + } + return 0; } #endif static int do_not_aio(struct aio_thread_req *req) { - char c; - int err; - - switch(req->type){ - case AIO_READ: - err = os_seek_file(req->io_fd, req->offset); - if(err) - goto out; - - err = os_read_file(req->io_fd, req->buf, req->len); - break; - case AIO_WRITE: - err = os_seek_file(req->io_fd, req->offset); - if(err) - goto out; - - err = os_write_file(req->io_fd, req->buf, req->len); - break; - case AIO_MMAP: - err = os_seek_file(req->io_fd, req->offset); - if(err) - goto out; - - err = os_read_file(req->io_fd, &c, sizeof(c)); - break; - default: - printk("do_not_aio - bad request type : %d\n", req->type); - err = -EINVAL; - break; - } - - out: - return err; + char c; + unsigned long long actual; + int n; + + actual = lseek64(req->io_fd, req->offset, SEEK_SET); + if (actual != req->offset) + return -errno; + + switch (req->type) { + case AIO_READ: + n = read(req->io_fd, req->buf, req->len); + break; + case AIO_WRITE: + n = write(req->io_fd, req->buf, req->len); + break; + case AIO_MMAP: + n = read(req->io_fd, &c, sizeof(c)); + break; + default: + printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n", + req->type); + return -EINVAL; + } + + if (n < 0) + return -errno; + return 0; } +/* These are initialized in initcalls and not changed */ +static int aio_req_fd_r = -1; +static int aio_req_fd_w = -1; +static int aio_pid = -1; +static unsigned long aio_stack; + static int not_aio_thread(void *arg) { - struct aio_thread_req req; - struct aio_thread_reply reply; - int err; - - signal(SIGWINCH, SIG_IGN); - while(1){ - err = os_read_file(aio_req_fd_r, &req, sizeof(req)); - if(err != sizeof(req)){ - if(err < 0) - printk("not_aio_thread - read failed, " - "fd = %d, err = %d\n", aio_req_fd_r, - -err); - else { - printk("not_aio_thread - short read, fd = %d, " - "length = %d\n", aio_req_fd_r, err); - } - continue; - } - err = do_not_aio(&req); - reply = ((struct aio_thread_reply) { .data = req.aio, - .err = err }); - err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply)); - if(err != sizeof(reply)) - printk("not_aio_thread - write failed, fd = %d, " - "err = %d\n", aio_req_fd_r, -err); - } + struct aio_thread_req req; + struct aio_thread_reply reply; + int err; + + os_fix_helper_signals(); + while (1) { + err = read(aio_req_fd_r, &req, sizeof(req)); + if (err != sizeof(req)) { + if (err < 0) + printk(UM_KERN_ERR "not_aio_thread - " + "read failed, fd = %d, err = %d\n", + aio_req_fd_r, + errno); + else { + printk(UM_KERN_ERR "not_aio_thread - short " + "read, fd = %d, length = %d\n", + aio_req_fd_r, err); + } + continue; + } + err = do_not_aio(&req); + reply = ((struct aio_thread_reply) { .data = req.aio, + .err = err }); + err = write(req.aio->reply_fd, &reply, sizeof(reply)); + if (err != sizeof(reply)) + printk(UM_KERN_ERR "not_aio_thread - write failed, " + "fd = %d, err = %d\n", req.aio->reply_fd, errno); + } + + return 0; } -static int aio_pid = -1; - static int init_aio_24(void) { - unsigned long stack; - int fds[2], err; - - err = os_pipe(fds, 1, 1); - if(err) - goto out; - - aio_req_fd_w = fds[0]; - aio_req_fd_r = fds[1]; - err = run_helper_thread(not_aio_thread, NULL, - CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); - if(err < 0) - goto out_close_pipe; - - aio_pid = err; - goto out; - - out_close_pipe: - os_close_file(fds[0]); - os_close_file(fds[1]); - aio_req_fd_w = -1; - aio_req_fd_r = -1; - out: + int fds[2], err; + + err = os_pipe(fds, 1, 1); + if (err) + goto out; + + aio_req_fd_w = fds[0]; + aio_req_fd_r = fds[1]; + + err = os_set_fd_block(aio_req_fd_w, 0); + if (err) + goto out_close_pipe; + + err = run_helper_thread(not_aio_thread, NULL, + CLONE_FILES | CLONE_VM, &aio_stack); + if (err < 0) + goto out_close_pipe; + + aio_pid = err; + goto out; + +out_close_pipe: + close(fds[0]); + close(fds[1]); + aio_req_fd_w = -1; + aio_req_fd_r = -1; +out: #ifndef HAVE_AIO_ABI - printk("/usr/include/linux/aio_abi.h not present during build\n"); + printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during " + "build\n"); #endif - printk("2.6 host AIO support not used - falling back to I/O " - "thread\n"); - return 0; + printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to " + "I/O thread\n"); + return 0; } #ifdef HAVE_AIO_ABI #define DEFAULT_24_AIO 0 static int init_aio_26(void) { - unsigned long stack; - int err; + int err; - if(io_setup(256, &ctx)){ + if (io_setup(256, &ctx)) { err = -errno; - printk("aio_thread failed to initialize context, err = %d\n", - errno); - return err; - } + printk(UM_KERN_ERR "aio_thread failed to initialize context, " + "err = %d\n", errno); + return err; + } - err = run_helper_thread(aio_thread, NULL, - CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0); - if(err < 0) - return err; + err = run_helper_thread(aio_thread, NULL, + CLONE_FILES | CLONE_VM, &aio_stack); + if (err < 0) + return err; - aio_pid = err; + aio_pid = err; - printk("Using 2.6 host AIO\n"); - return 0; + printk(UM_KERN_INFO "Using 2.6 host AIO\n"); + return 0; } static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, unsigned long long offset, struct aio_context *aio) { - struct aio_thread_reply reply; - int err; - - err = do_aio(ctx, type, io_fd, buf, len, offset, aio); - if(err){ - reply = ((struct aio_thread_reply) { .data = aio, - .err = err }); - err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); - if(err != sizeof(reply)) - printk("submit_aio_26 - write failed, " - "fd = %d, err = %d\n", aio->reply_fd, -err); - else err = 0; - } - - return err; + struct aio_thread_reply reply; + int err; + + err = do_aio(ctx, type, io_fd, buf, len, offset, aio); + if (err) { + reply = ((struct aio_thread_reply) { .data = aio, + .err = err }); + err = write(aio->reply_fd, &reply, sizeof(reply)); + if (err != sizeof(reply)) { + err = -errno; + printk(UM_KERN_ERR "submit_aio_26 - write failed, " + "fd = %d, err = %d\n", aio->reply_fd, -err); + } + else err = 0; + } + + return err; } #else #define DEFAULT_24_AIO 1 static int init_aio_26(void) { - return -ENOSYS; + return -ENOSYS; } static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, unsigned long long offset, struct aio_context *aio) { - return -ENOSYS; + return -ENOSYS; } #endif +/* Initialized in an initcall and unchanged thereafter */ static int aio_24 = DEFAULT_24_AIO; static int __init set_aio_24(char *name, int *add) { - aio_24 = 1; - return 0; + aio_24 = 1; + return 0; } __uml_setup("aio=2.4", set_aio_24, @@ -328,31 +321,26 @@ __uml_setup("aio=2.4", set_aio_24, static int init_aio(void) { - int err; - - CHOOSE_MODE(({ - if(!aio_24){ - printk("Disabling 2.6 AIO in tt mode\n"); - aio_24 = 1; - } }), (void) 0); - - if(!aio_24){ - err = init_aio_26(); - if(err && (errno == ENOSYS)){ - printk("2.6 AIO not supported on the host - " - "reverting to 2.4 AIO\n"); - aio_24 = 1; - } - else return err; - } - - if(aio_24) - return init_aio_24(); - - return 0; + int err; + + if (!aio_24) { + err = init_aio_26(); + if (err && (errno == ENOSYS)) { + printk(UM_KERN_INFO "2.6 AIO not supported on the " + "host - reverting to 2.4 AIO\n"); + aio_24 = 1; + } + else return err; + } + + if (aio_24) + return init_aio_24(); + + return 0; } -/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio +/* + * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio * needs to be called when the kernel is running because it calls run_helper, * which needs get_free_page. exit_aio is a __uml_exitcall because the generic * kernel does not run __exitcalls on shutdown, and can't because many of them @@ -362,8 +350,10 @@ __initcall(init_aio); static void exit_aio(void) { - if(aio_pid != -1) - os_kill_process(aio_pid, 1); + if (aio_pid != -1) { + os_kill_process(aio_pid, 1); + free_stack(aio_stack, 0); + } } __uml_exitcall(exit_aio); @@ -371,30 +361,30 @@ __uml_exitcall(exit_aio); static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, unsigned long long offset, struct aio_context *aio) { - struct aio_thread_req req = { .type = type, - .io_fd = io_fd, - .offset = offset, - .buf = buf, - .len = len, - .aio = aio, - }; - int err; - - err = os_write_file(aio_req_fd_w, &req, sizeof(req)); - if(err == sizeof(req)) - err = 0; - - return err; + struct aio_thread_req req = { .type = type, + .io_fd = io_fd, + .offset = offset, + .buf = buf, + .len = len, + .aio = aio, + }; + int err; + + err = write(aio_req_fd_w, &req, sizeof(req)); + if (err == sizeof(req)) + err = 0; + else err = -errno; + + return err; } int submit_aio(enum aio_type type, int io_fd, char *buf, int len, - unsigned long long offset, int reply_fd, - struct aio_context *aio) + unsigned long long offset, int reply_fd, + struct aio_context *aio) { - aio->reply_fd = reply_fd; - if(aio_24) - return submit_aio_24(type, io_fd, buf, len, offset, aio); - else { - return submit_aio_26(type, io_fd, buf, len, offset, aio); - } + aio->reply_fd = reply_fd; + if (aio_24) + return submit_aio_24(type, io_fd, buf, len, offset, aio); + else + return submit_aio_26(type, io_fd, buf, len, offset, aio); } diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h index b84f6c4740f..54183a679fd 100644 --- a/arch/um/os-Linux/drivers/etap.h +++ b/arch/um/os-Linux/drivers/etap.h @@ -1,9 +1,12 @@ /* - * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "net_user.h" +#ifndef __DRIVERS_ETAP_H +#define __DRIVERS_ETAP_H + +#include <net_user.h> struct ethertap_data { char *dev_name; @@ -13,15 +16,6 @@ struct ethertap_data { void *dev; }; -extern struct net_user_info ethertap_user_info; +extern const struct net_user_info ethertap_user_info; -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +#endif diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c index 6ae4b19d9f5..f424600a583 100644 --- a/arch/um/os-Linux/drivers/ethertap_kern.c +++ b/arch/um/os-Linux/drivers/ethertap_kern.c @@ -1,16 +1,15 @@ /* - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and * James Leu (jleu@mindspring.net). + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Copyright (C) 2001 by various other people who didn't put their name here. * Licensed under the GPL. */ -#include "linux/init.h" -#include "linux/netdevice.h" -#include "linux/etherdevice.h" -#include "net_kern.h" -#include "net_user.h" +#include <linux/init.h> +#include <linux/netdevice.h> #include "etap.h" +#include <net_kern.h> struct ethertap_init { char *dev_name; @@ -23,7 +22,7 @@ static void etap_init(struct net_device *dev, void *data) struct ethertap_data *epri; struct ethertap_init *init = data; - pri = dev->priv; + pri = netdev_priv(dev); epri = (struct ethertap_data *) pri->user; epri->dev_name = init->dev_name; epri->gate_addr = init->gate_addr; @@ -31,41 +30,33 @@ static void etap_init(struct net_device *dev, void *data) epri->control_fd = -1; epri->dev = dev; - printk("ethertap backend - %s", epri->dev_name); + printk(KERN_INFO "ethertap backend - %s", epri->dev_name); if (epri->gate_addr != NULL) - printk(", IP = %s", epri->gate_addr); - printk("\n"); + printk(KERN_CONT ", IP = %s", epri->gate_addr); + printk(KERN_CONT "\n"); } -static int etap_read(int fd, struct sk_buff **skb, struct uml_net_private *lp) +static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) { int len; - *skb = ether_adjust_skb(*skb, ETH_HEADER_ETHERTAP); - if(*skb == NULL) return(-ENOMEM); - len = net_recvfrom(fd, (*skb)->mac.raw, - (*skb)->dev->mtu + 2 * ETH_HEADER_ETHERTAP); - if(len <= 0) return(len); - skb_pull(*skb, 2); + len = net_recvfrom(fd, skb_mac_header(skb), + skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP); + if (len <= 0) + return(len); + + skb_pull(skb, 2); len -= 2; - return(len); + return len; } -static int etap_write(int fd, struct sk_buff **skb, struct uml_net_private *lp) +static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) { - if(skb_headroom(*skb) < 2){ - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(*skb, 2); - dev_kfree_skb(*skb); - if (skb2 == NULL) return(-ENOMEM); - *skb = skb2; - } - skb_push(*skb, 2); - return(net_send(fd, (*skb)->data, (*skb)->len)); + skb_push(skb, 2); + return net_send(fd, skb->data, skb->len); } -struct net_kern_info ethertap_kern_info = { +const struct net_kern_info ethertap_kern_info = { .init = etap_init, .protocol = eth_protocol, .read = etap_read, @@ -79,15 +70,15 @@ int ethertap_setup(char *str, char **mac_out, void *data) *init = ((struct ethertap_init) { .dev_name = NULL, .gate_addr = NULL }); - if(tap_setup_common(str, "ethertap", &init->dev_name, mac_out, + if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out, &init->gate_addr)) - return(0); - if(init->dev_name == NULL){ - printk("ethertap_setup : Missing tap device name\n"); - return(0); + return 0; + if (init->dev_name == NULL) { + printk(KERN_ERR "ethertap_setup : Missing tap device name\n"); + return 0; } - return(1); + return 1; } static struct transport ethertap_transport = { @@ -97,23 +88,13 @@ static struct transport ethertap_transport = { .user = ðertap_user_info, .kern = ðertap_kern_info, .private_size = sizeof(struct ethertap_data), + .setup_size = sizeof(struct ethertap_init), }; static int register_ethertap(void) { register_transport(ðertap_transport); - return(1); + return 0; } -__initcall(register_ethertap); - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +late_initcall(register_ethertap); diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c index 901b85e8a1c..b39b6696ac5 100644 --- a/arch/um/os-Linux/drivers/ethertap_user.c +++ b/arch/um/os-Linux/drivers/ethertap_user.c @@ -1,5 +1,6 @@ /* - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and * James Leu (jleu@mindspring.net). * Copyright (C) 2001 by various other people who didn't put their name here. * Licensed under the GPL. @@ -7,27 +8,23 @@ #include <stdio.h> #include <unistd.h> -#include <stddef.h> -#include <stdlib.h> -#include <sys/errno.h> +#include <errno.h> +#include <string.h> #include <sys/socket.h> #include <sys/wait.h> -#include <sys/un.h> -#include <net/if.h> -#include "user.h" -#include "kern_util.h" -#include "user_util.h" -#include "net_user.h" #include "etap.h" -#include "os.h" +#include <os.h> +#include <net_user.h> +#include <um_malloc.h> #define MAX_PACKET ETH_MAX_PACKET -void etap_user_init(void *data, void *dev) +static int etap_user_init(void *data, void *dev) { struct ethertap_data *pri = data; pri->dev = dev; + return 0; } struct addr_change { @@ -40,20 +37,25 @@ static void etap_change(int op, unsigned char *addr, unsigned char *netmask, int fd) { struct addr_change change; - void *output; + char *output; int n; change.what = op; memcpy(change.addr, addr, sizeof(change.addr)); memcpy(change.netmask, netmask, sizeof(change.netmask)); - n = os_write_file(fd, &change, sizeof(change)); - if(n != sizeof(change)) - printk("etap_change - request failed, err = %d\n", -n); - output = um_kmalloc(page_size()); - if(output == NULL) - printk("etap_change : Failed to allocate output buffer\n"); - read_output(fd, output, page_size()); - if(output != NULL){ + CATCH_EINTR(n = write(fd, &change, sizeof(change))); + if (n != sizeof(change)) { + printk(UM_KERN_ERR "etap_change - request failed, err = %d\n", + errno); + return; + } + + output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL); + if (output == NULL) + printk(UM_KERN_ERR "etap_change : Failed to allocate output " + "buffer\n"); + read_output(fd, output, UM_KERN_PAGE_SIZE); + if (output != NULL) { printk("%s", output); kfree(output); } @@ -82,27 +84,27 @@ static void etap_pre_exec(void *arg) struct etap_pre_exec_data *data = arg; dup2(data->control_remote, 1); - os_close_file(data->data_me); - os_close_file(data->control_me); + close(data->data_me); + close(data->control_me); } -static int etap_tramp(char *dev, char *gate, int control_me, +static int etap_tramp(char *dev, char *gate, int control_me, int control_remote, int data_me, int data_remote) { struct etap_pre_exec_data pe_data; - int pid, status, err, n; + int pid, err, n; char version_buf[sizeof("nnnnn\0")]; char data_fd_buf[sizeof("nnnnnn\0")]; char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; char *setup_args[] = { "uml_net", version_buf, "ethertap", dev, data_fd_buf, gate_buf, NULL }; - char *nosetup_args[] = { "uml_net", version_buf, "ethertap", + char *nosetup_args[] = { "uml_net", version_buf, "ethertap", dev, data_fd_buf, NULL }; char **args, c; sprintf(data_fd_buf, "%d", data_remote); sprintf(version_buf, "%d", UML_NET_VERSION); - if(gate != NULL){ + if (gate != NULL) { strcpy(gate_buf, gate); args = setup_args; } @@ -112,26 +114,24 @@ static int etap_tramp(char *dev, char *gate, int control_me, pe_data.control_remote = control_remote; pe_data.control_me = control_me; pe_data.data_me = data_me; - pid = run_helper(etap_pre_exec, &pe_data, args, NULL); - - if(pid < 0) err = pid; - os_close_file(data_remote); - os_close_file(control_remote); - n = os_read_file(control_me, &c, sizeof(c)); - if(n != sizeof(c)){ - printk("etap_tramp : read of status failed, err = %d\n", -n); - return(-EINVAL); + pid = run_helper(etap_pre_exec, &pe_data, args); + + if (pid < 0) + err = pid; + close(data_remote); + close(control_remote); + CATCH_EINTR(n = read(control_me, &c, sizeof(c))); + if (n != sizeof(c)) { + err = -errno; + printk(UM_KERN_ERR "etap_tramp : read of status failed, " + "err = %d\n", -err); + return err; } - if(c != 1){ - printk("etap_tramp : uml_net failed\n"); - err = -EINVAL; - CATCH_EINTR(n = waitpid(pid, &status, 0)); - if(n < 0) - err = -errno; - else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)) - printk("uml_net didn't exit with status 1\n"); + if (c != 1) { + printk(UM_KERN_ERR "etap_tramp : uml_net failed\n"); + err = helper_wait(pid); } - return(err); + return err; } static int etap_open(void *data) @@ -141,42 +141,56 @@ static int etap_open(void *data) int data_fds[2], control_fds[2], err, output_len; err = tap_open_common(pri->dev, pri->gate_addr); - if(err) return(err); - - err = os_pipe(data_fds, 0, 0); - if(err < 0){ - printk("data os_pipe failed - err = %d\n", -err); - return(err); + if (err) + return err; + + err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "etap_open - data socketpair failed - " + "err = %d\n", errno); + return err; } - err = os_pipe(control_fds, 1, 0); - if(err < 0){ - printk("control os_pipe failed - err = %d\n", -err); - return(err); + err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "etap_open - control socketpair failed - " + "err = %d\n", errno); + goto out_close_data; } - - err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], + + err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], control_fds[1], data_fds[0], data_fds[1]); - output_len = page_size(); - output = um_kmalloc(output_len); + output_len = UM_KERN_PAGE_SIZE; + output = uml_kmalloc(output_len, UM_GFP_KERNEL); read_output(control_fds[0], output, output_len); - if(output == NULL) - printk("etap_open : failed to allocate output buffer\n"); + if (output == NULL) + printk(UM_KERN_ERR "etap_open : failed to allocate output " + "buffer\n"); else { printk("%s", output); kfree(output); } - if(err < 0){ - printk("etap_tramp failed - err = %d\n", -err); - return(err); + if (err < 0) { + printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err); + goto out_close_control; } pri->data_fd = data_fds[0]; pri->control_fd = control_fds[0]; iter_addresses(pri->dev, etap_open_addr, &pri->control_fd); - return(data_fds[0]); + return data_fds[0]; + +out_close_control: + close(control_fds[0]); + close(control_fds[1]); +out_close_data: + close(data_fds[0]); + close(data_fds[1]); + return err; } static void etap_close(int fd, void *data) @@ -184,56 +198,51 @@ static void etap_close(int fd, void *data) struct ethertap_data *pri = data; iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); - os_close_file(fd); - os_shutdown_socket(pri->data_fd, 1, 1); - os_close_file(pri->data_fd); + close(fd); + + if (shutdown(pri->data_fd, SHUT_RDWR) < 0) + printk(UM_KERN_ERR "etap_close - shutdown data socket failed, " + "errno = %d\n", errno); + + if (shutdown(pri->control_fd, SHUT_RDWR) < 0) + printk(UM_KERN_ERR "etap_close - shutdown control socket " + "failed, errno = %d\n", errno); + + close(pri->data_fd); pri->data_fd = -1; - os_close_file(pri->control_fd); + close(pri->control_fd); pri->control_fd = -1; } -static int etap_set_mtu(int mtu, void *data) -{ - return(mtu); -} - static void etap_add_addr(unsigned char *addr, unsigned char *netmask, void *data) { struct ethertap_data *pri = data; tap_check_ips(pri->gate_addr, addr); - if(pri->control_fd == -1) return; + if (pri->control_fd == -1) + return; etap_open_addr(addr, netmask, &pri->control_fd); } -static void etap_del_addr(unsigned char *addr, unsigned char *netmask, +static void etap_del_addr(unsigned char *addr, unsigned char *netmask, void *data) { struct ethertap_data *pri = data; - if(pri->control_fd == -1) return; + if (pri->control_fd == -1) + return; + etap_close_addr(addr, netmask, &pri->control_fd); } -struct net_user_info ethertap_user_info = { +const struct net_user_info ethertap_user_info = { .init = etap_user_init, .open = etap_open, .close = etap_close, .remove = NULL, - .set_mtu = etap_set_mtu, .add_address = etap_add_addr, .delete_address = etap_del_addr, - .max_packet = MAX_PACKET - ETH_HEADER_ETHERTAP + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_ETHERTAP, }; - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h index 25d4a286881..7367354ac8d 100644 --- a/arch/um/os-Linux/drivers/tuntap.h +++ b/arch/um/os-Linux/drivers/tuntap.h @@ -1,12 +1,12 @@ /* - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #ifndef __UM_TUNTAP_H #define __UM_TUNTAP_H -#include "net_user.h" +#include <net_user.h> struct tuntap_data { char *dev_name; @@ -16,17 +16,6 @@ struct tuntap_data { void *dev; }; -extern struct net_user_info tuntap_user_info; +extern const struct net_user_info tuntap_user_info; #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c index 4202b9ebad4..d9d56e5810f 100644 --- a/arch/um/os-Linux/drivers/tuntap_kern.c +++ b/arch/um/os-Linux/drivers/tuntap_kern.c @@ -1,16 +1,13 @@ -/* - * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) +/* + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "linux/stddef.h" -#include "linux/netdevice.h" -#include "linux/etherdevice.h" -#include "linux/skbuff.h" -#include "linux/init.h" -#include "asm/errno.h" -#include "net_kern.h" -#include "net_user.h" +#include <linux/netdevice.h> +#include <linux/init.h> +#include <linux/skbuff.h> +#include <asm/errno.h> +#include <net_kern.h> #include "tuntap.h" struct tuntap_init { @@ -24,7 +21,7 @@ static void tuntap_init(struct net_device *dev, void *data) struct tuntap_data *tpri; struct tuntap_init *init = data; - pri = dev->priv; + pri = netdev_priv(dev); tpri = (struct tuntap_data *) pri->user; tpri->dev_name = init->dev_name; tpri->fixed_config = (init->dev_name != NULL); @@ -32,28 +29,24 @@ static void tuntap_init(struct net_device *dev, void *data) tpri->fd = -1; tpri->dev = dev; - printk("TUN/TAP backend - "); + printk(KERN_INFO "TUN/TAP backend - "); if (tpri->gate_addr != NULL) - printk("IP = %s", tpri->gate_addr); - printk("\n"); + printk(KERN_CONT "IP = %s", tpri->gate_addr); + printk(KERN_CONT "\n"); } -static int tuntap_read(int fd, struct sk_buff **skb, - struct uml_net_private *lp) +static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) { - *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); - if(*skb == NULL) return(-ENOMEM); - return(net_read(fd, (*skb)->mac.raw, - (*skb)->dev->mtu + ETH_HEADER_OTHER)); + return net_read(fd, skb_mac_header(skb), + skb->dev->mtu + ETH_HEADER_OTHER); } -static int tuntap_write(int fd, struct sk_buff **skb, - struct uml_net_private *lp) +static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) { - return(net_write(fd, (*skb)->data, (*skb)->len)); + return net_write(fd, skb->data, skb->len); } -struct net_kern_info tuntap_kern_info = { +const struct net_kern_info tuntap_kern_info = { .init = tuntap_init, .protocol = eth_protocol, .read = tuntap_read, @@ -67,11 +60,11 @@ int tuntap_setup(char *str, char **mac_out, void *data) *init = ((struct tuntap_init) { .dev_name = NULL, .gate_addr = NULL }); - if(tap_setup_common(str, "tuntap", &init->dev_name, mac_out, + if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out, &init->gate_addr)) - return(0); + return 0; - return(1); + return 1; } static struct transport tuntap_transport = { @@ -87,18 +80,7 @@ static struct transport tuntap_transport = { static int register_tuntap(void) { register_transport(&tuntap_transport); - return(1); + return 0; } -__initcall(register_tuntap); - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +late_initcall(register_tuntap); diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c index 52945338b64..14126d9176a 100644 --- a/arch/um/os-Linux/drivers/tuntap_user.c +++ b/arch/um/os-Linux/drivers/tuntap_user.c @@ -1,34 +1,28 @@ /* - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #include <stdio.h> -#include <stddef.h> -#include <stdlib.h> #include <unistd.h> #include <errno.h> -#include <sys/wait.h> +#include <string.h> +#include <linux/if_tun.h> +#include <net/if.h> +#include <sys/ioctl.h> #include <sys/socket.h> -#include <sys/un.h> +#include <sys/wait.h> #include <sys/uio.h> -#include <sys/ioctl.h> -#include <net/if.h> -#include <linux/if_tun.h> -#include "net_user.h" +#include <kern_util.h> +#include <os.h> #include "tuntap.h" -#include "kern_util.h" -#include "user_util.h" -#include "user.h" -#include "os.h" -#define MAX_PACKET ETH_MAX_PACKET - -void tuntap_user_init(void *data, void *dev) +static int tuntap_user_init(void *data, void *dev) { struct tuntap_data *pri = data; pri->dev = dev; + return 0; } static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, @@ -37,7 +31,8 @@ static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, struct tuntap_data *pri = data; tap_check_ips(pri->gate_addr, addr); - if((pri->fd == -1) || pri->fixed_config) return; + if ((pri->fd == -1) || pri->fixed_config) + return; open_addr(addr, netmask, pri->dev_name); } @@ -46,7 +41,8 @@ static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, { struct tuntap_data *pri = data; - if((pri->fd == -1) || pri->fixed_config) return; + if ((pri->fd == -1) || pri->fixed_config) + return; close_addr(addr, netmask, pri->dev_name); } @@ -58,9 +54,9 @@ struct tuntap_pre_exec_data { static void tuntap_pre_exec(void *arg) { struct tuntap_pre_exec_data *data = arg; - + dup2(data->stdout, 1); - os_close_file(data->close_me); + close(data->close_me); } static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, @@ -81,15 +77,16 @@ static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, data.stdout = remote; data.close_me = me; - pid = run_helper(tuntap_pre_exec, &data, argv, NULL); + pid = run_helper(tuntap_pre_exec, &data, argv); - if(pid < 0) return(-pid); + if (pid < 0) + return -pid; - os_close_file(remote); + close(remote); msg.msg_name = NULL; msg.msg_namelen = 0; - if(buffer != NULL){ + if (buffer != NULL) { iov = ((struct iovec) { buffer, buffer_len }); msg.msg_iov = &iov; msg.msg_iovlen = 1; @@ -103,26 +100,29 @@ static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, msg.msg_flags = 0; n = recvmsg(me, &msg, 0); *used_out = n; - if(n < 0){ + if (n < 0) { err = -errno; - printk("tuntap_open_tramp : recvmsg failed - errno = %d\n", - errno); + printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - " + "errno = %d\n", errno); return err; } - CATCH_EINTR(waitpid(pid, NULL, 0)); + helper_wait(pid); cmsg = CMSG_FIRSTHDR(&msg); - if(cmsg == NULL){ - printk("tuntap_open_tramp : didn't receive a message\n"); - return(-EINVAL); + if (cmsg == NULL) { + printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " + "message\n"); + return -EINVAL; } - if((cmsg->cmsg_level != SOL_SOCKET) || - (cmsg->cmsg_type != SCM_RIGHTS)){ - printk("tuntap_open_tramp : didn't receive a descriptor\n"); - return(-EINVAL); + if ((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)) { + printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " + "descriptor\n"); + return -EINVAL; } *fd_out = ((int *) CMSG_DATA(cmsg))[0]; - return(0); + os_set_exec_close(*fd_out); + return 0; } static int tuntap_open(void *data) @@ -133,47 +133,52 @@ static int tuntap_open(void *data) int err, fds[2], len, used; err = tap_open_common(pri->dev, pri->gate_addr); - if(err < 0) - return(err); - - if(pri->fixed_config){ - pri->fd = os_open_file("/dev/net/tun", of_rdwr(OPENFLAGS()), 0); - if(pri->fd < 0){ - printk("Failed to open /dev/net/tun, err = %d\n", - -pri->fd); - return(pri->fd); + if (err < 0) + return err; + + if (pri->fixed_config) { + pri->fd = os_open_file("/dev/net/tun", + of_cloexec(of_rdwr(OPENFLAGS())), 0); + if (pri->fd < 0) { + printk(UM_KERN_ERR "Failed to open /dev/net/tun, " + "err = %d\n", -pri->fd); + return pri->fd; } memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI; strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name)); - if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){ + if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) { err = -errno; - printk("TUNSETIFF failed, errno = %d\n", errno); - os_close_file(pri->fd); + printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n", + errno); + close(pri->fd); return err; } } else { - err = os_pipe(fds, 0, 0); - if(err < 0){ - printk("tuntap_open : os_pipe failed - err = %d\n", - -err); - return(err); + err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds); + if (err) { + err = -errno; + printk(UM_KERN_ERR "tuntap_open : socketpair failed - " + "errno = %d\n", errno); + return err; } buffer = get_output_buffer(&len); - if(buffer != NULL) len--; + if (buffer != NULL) + len--; used = 0; err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0], fds[1], buffer, len, &used); output = buffer; - if(err < 0) { + if (err < 0) { printk("%s", output); free_output_buffer(buffer); - printk("tuntap_open_tramp failed - err = %d\n", -err); - return(err); + printk(UM_KERN_ERR "tuntap_open_tramp failed - " + "err = %d\n", -err); + return err; } pri->dev_name = uml_strdup(buffer); @@ -181,46 +186,30 @@ static int tuntap_open(void *data) printk("%s", output); free_output_buffer(buffer); - os_close_file(fds[0]); + close(fds[0]); iter_addresses(pri->dev, open_addr, pri->dev_name); } - return(pri->fd); + return pri->fd; } static void tuntap_close(int fd, void *data) { struct tuntap_data *pri = data; - if(!pri->fixed_config) + if (!pri->fixed_config) iter_addresses(pri->dev, close_addr, pri->dev_name); - os_close_file(fd); + close(fd); pri->fd = -1; } -static int tuntap_set_mtu(int mtu, void *data) -{ - return(mtu); -} - -struct net_user_info tuntap_user_info = { +const struct net_user_info tuntap_user_info = { .init = tuntap_user_init, .open = tuntap_open, .close = tuntap_close, .remove = NULL, - .set_mtu = tuntap_set_mtu, .add_address = tuntap_add_addr, .delete_address = tuntap_del_addr, - .max_packet = MAX_PACKET + .mtu = ETH_MAX_PACKET, + .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, }; - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index 5a99dd3fbed..1a365ddc4d0 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -9,24 +9,17 @@ */ #include <elf.h> #include <stddef.h> -#include "init.h" -#include "elf_user.h" -#include "mem_user.h" -#include <kern_constants.h> +#include <init.h> +#include <elf_user.h> +#include <mem_user.h> -/* Use the one from the kernel - the host may miss it, if having old headers. */ -#if UM_ELF_CLASS == UM_ELFCLASS32 typedef Elf32_auxv_t elf_auxv_t; -#else -typedef Elf64_auxv_t elf_auxv_t; -#endif +/* These are initialized very early in boot and never changed */ char * elf_aux_platform; -long elf_aux_hwcap; - +extern long elf_aux_hwcap; unsigned long vsyscall_ehdr; unsigned long vsyscall_end; - unsigned long __kernel_vsyscall; __init void scan_elf_aux( char **envp) @@ -40,6 +33,9 @@ __init void scan_elf_aux( char **envp) switch ( auxv->a_type ) { case AT_SYSINFO: __kernel_vsyscall = auxv->a_un.a_val; + /* See if the page is under TASK_SIZE */ + if (__kernel_vsyscall < (unsigned long) envp) + __kernel_vsyscall = 0; break; case AT_SYSINFO_EHDR: vsyscall_ehdr = auxv->a_un.a_val; diff --git a/arch/um/os-Linux/execvp.c b/arch/um/os-Linux/execvp.c new file mode 100644 index 00000000000..8fb25ca07c4 --- /dev/null +++ b/arch/um/os-Linux/execvp.c @@ -0,0 +1,149 @@ +/* Copyright (C) 2006 by Paolo Giarrusso - modified from glibc' execvp.c. + Original copyright notice follows: + + Copyright (C) 1991,92,1995-99,2002,2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ +#include <unistd.h> + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <limits.h> + +#ifndef TEST +#include <um_malloc.h> +#else +#include <stdio.h> +#define um_kmalloc malloc +#endif +#include <os.h> + +/* Execute FILE, searching in the `PATH' environment variable if it contains + no slashes, with arguments ARGV and environment from `environ'. */ +int execvp_noalloc(char *buf, const char *file, char *const argv[]) +{ + if (*file == '\0') { + return -ENOENT; + } + + if (strchr (file, '/') != NULL) { + /* Don't search when it contains a slash. */ + execv(file, argv); + } else { + int got_eacces; + size_t len, pathlen; + char *name, *p; + char *path = getenv("PATH"); + if (path == NULL) + path = ":/bin:/usr/bin"; + + len = strlen(file) + 1; + pathlen = strlen(path); + /* Copy the file name at the top. */ + name = memcpy(buf + pathlen + 1, file, len); + /* And add the slash. */ + *--name = '/'; + + got_eacces = 0; + p = path; + do { + char *startp; + + path = p; + //Let's avoid this GNU extension. + //p = strchrnul (path, ':'); + p = strchr(path, ':'); + if (!p) + p = strchr(path, '\0'); + + if (p == path) + /* Two adjacent colons, or a colon at the beginning or the end + of `PATH' means to search the current directory. */ + startp = name + 1; + else + startp = memcpy(name - (p - path), path, p - path); + + /* Try to execute this name. If it works, execv will not return. */ + execv(startp, argv); + + /* + if (errno == ENOEXEC) { + } + */ + + switch (errno) { + case EACCES: + /* Record the we got a `Permission denied' error. If we end + up finding no executable we can use, we want to diagnose + that we did find one but were denied access. */ + got_eacces = 1; + case ENOENT: + case ESTALE: + case ENOTDIR: + /* Those errors indicate the file is missing or not executable + by us, in which case we want to just try the next path + directory. */ + case ENODEV: + case ETIMEDOUT: + /* Some strange filesystems like AFS return even + stranger error numbers. They cannot reasonably mean + anything else so ignore those, too. */ + case ENOEXEC: + /* We won't go searching for the shell + * if it is not executable - the Linux + * kernel already handles this enough, + * for us. */ + break; + + default: + /* Some other error means we found an executable file, but + something went wrong executing it; return the error to our + caller. */ + return -errno; + } + } while (*p++ != '\0'); + + /* We tried every element and none of them worked. */ + if (got_eacces) + /* At least one failure was due to permissions, so report that + error. */ + return -EACCES; + } + + /* Return the error from the last attempt (probably ENOENT). */ + return -errno; +} +#ifdef TEST +int main(int argc, char**argv) +{ + char buf[PATH_MAX]; + int ret; + argc--; + if (!argc) { + fprintf(stderr, "Not enough arguments\n"); + return 1; + } + argv++; + if (ret = execvp_noalloc(buf, argv[0], argv)) { + errno = -ret; + perror("execvp_noalloc"); + } + return 0; +} +#endif diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index f55773c819e..08d90fba952 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -1,5 +1,5 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -8,18 +8,14 @@ #include <errno.h> #include <fcntl.h> #include <signal.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/socket.h> -#include <sys/un.h> #include <sys/ioctl.h> #include <sys/mount.h> -#include <sys/uio.h> -#include "os.h" -#include "user.h" -#include "kern_util.h" +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/un.h> +#include <os.h> -static void copy_stat(struct uml_stat *dst, struct stat64 *src) +static void copy_stat(struct uml_stat *dst, const struct stat64 *src) { *dst = ((struct uml_stat) { .ust_dev = src->st_dev, /* device */ @@ -42,16 +38,13 @@ int os_stat_fd(const int fd, struct uml_stat *ubuf) struct stat64 sbuf; int err; - do { - err = fstat64(fd, &sbuf); - } while((err < 0) && (errno == EINTR)) ; - - if(err < 0) - return(-errno); + CATCH_EINTR(err = fstat64(fd, &sbuf)); + if (err < 0) + return -errno; - if(ubuf != NULL) + if (ubuf != NULL) copy_stat(ubuf, &sbuf); - return(err); + return err; } int os_stat_file(const char *file_name, struct uml_stat *ubuf) @@ -59,37 +52,29 @@ int os_stat_file(const char *file_name, struct uml_stat *ubuf) struct stat64 sbuf; int err; - do { - err = stat64(file_name, &sbuf); - } while((err < 0) && (errno == EINTR)) ; - - if(err < 0) - return(-errno); + CATCH_EINTR(err = stat64(file_name, &sbuf)); + if (err < 0) + return -errno; - if(ubuf != NULL) + if (ubuf != NULL) copy_stat(ubuf, &sbuf); - return(err); + return err; } -int os_access(const char* file, int mode) +int os_access(const char *file, int mode) { int amode, err; - amode=(mode&OS_ACC_R_OK ? R_OK : 0) | (mode&OS_ACC_W_OK ? W_OK : 0) | - (mode&OS_ACC_X_OK ? X_OK : 0) | (mode&OS_ACC_F_OK ? F_OK : 0) ; + amode = (mode & OS_ACC_R_OK ? R_OK : 0) | + (mode & OS_ACC_W_OK ? W_OK : 0) | + (mode & OS_ACC_X_OK ? X_OK : 0) | + (mode & OS_ACC_F_OK ? F_OK : 0); err = access(file, amode); - if(err < 0) - return(-errno); - - return(0); -} - -void os_print_error(int error, const char* str) -{ - errno = error < 0 ? -error : error; + if (err < 0) + return -errno; - perror(str); + return 0; } /* FIXME? required only by hostaudio (because it passes ioctls verbatim) */ @@ -98,43 +83,19 @@ int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) int err; err = ioctl(fd, cmd, arg); - if(err < 0) - return(-errno); - - return(err); -} - -int os_window_size(int fd, int *rows, int *cols) -{ - struct winsize size; - - if(ioctl(fd, TIOCGWINSZ, &size) < 0) - return(-errno); - - *rows = size.ws_row; - *cols = size.ws_col; - - return(0); -} - -int os_new_tty_pgrp(int fd, int pid) -{ - if(ioctl(fd, TIOCSCTTY, 0) < 0) + if (err < 0) return -errno; - if(tcsetpgrp(fd, pid) < 0) - return -errno; - - return(0); + return err; } /* FIXME: ensure namebuf in os_get_if_name is big enough */ int os_get_ifname(int fd, char* namebuf) { - if(ioctl(fd, SIOCGIFNAME, namebuf) < 0) - return(-errno); + if (ioctl(fd, SIOCGIFNAME, namebuf) < 0) + return -errno; - return(0); + return 0; } int os_set_slip(int fd) @@ -142,59 +103,25 @@ int os_set_slip(int fd) int disc, sencap; disc = N_SLIP; - if(ioctl(fd, TIOCSETD, &disc) < 0) + if (ioctl(fd, TIOCSETD, &disc) < 0) return -errno; sencap = 0; - if(ioctl(fd, SIOCSIFENCAP, &sencap) < 0) - return -errno; - - return(0); -} - -int os_set_owner(int fd, int pid) -{ - if(fcntl(fd, F_SETOWN, pid) < 0){ - int save_errno = errno; - - if(fcntl(fd, F_GETOWN, 0) != pid) - return(-save_errno); - } - - return(0); -} - -/* FIXME? moved wholesale from sigio_user.c to get fcntls out of that file */ -int os_sigio_async(int master, int slave) -{ - int flags; - - flags = fcntl(master, F_GETFL); - if(flags < 0) - return errno; - - if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || - (fcntl(master, F_SETOWN, os_getpid()) < 0)) - return -errno; - - if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) + if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0) return -errno; - return(0); + return 0; } int os_mode_fd(int fd, int mode) { int err; - do { - err = fchmod(fd, mode); - } while((err < 0) && (errno==EINTR)) ; - - if(err < 0) - return(-errno); + CATCH_EINTR(err = fchmod(fd, mode)); + if (err < 0) + return -errno; - return(0); + return 0; } int os_file_type(char *file) @@ -203,67 +130,82 @@ int os_file_type(char *file) int err; err = os_stat_file(file, &buf); - if(err < 0) - return(err); + if (err < 0) + return err; - if(S_ISDIR(buf.ust_mode)) return(OS_TYPE_DIR); - else if(S_ISLNK(buf.ust_mode)) return(OS_TYPE_SYMLINK); - else if(S_ISCHR(buf.ust_mode)) return(OS_TYPE_CHARDEV); - else if(S_ISBLK(buf.ust_mode)) return(OS_TYPE_BLOCKDEV); - else if(S_ISFIFO(buf.ust_mode)) return(OS_TYPE_FIFO); - else if(S_ISSOCK(buf.ust_mode)) return(OS_TYPE_SOCK); - else return(OS_TYPE_FILE); + if (S_ISDIR(buf.ust_mode)) + return OS_TYPE_DIR; + else if (S_ISLNK(buf.ust_mode)) + return OS_TYPE_SYMLINK; + else if (S_ISCHR(buf.ust_mode)) + return OS_TYPE_CHARDEV; + else if (S_ISBLK(buf.ust_mode)) + return OS_TYPE_BLOCKDEV; + else if (S_ISFIFO(buf.ust_mode)) + return OS_TYPE_FIFO; + else if (S_ISSOCK(buf.ust_mode)) + return OS_TYPE_SOCK; + else return OS_TYPE_FILE; } -int os_file_mode(char *file, struct openflags *mode_out) +int os_file_mode(const char *file, struct openflags *mode_out) { int err; *mode_out = OPENFLAGS(); - err = os_access(file, OS_ACC_W_OK); - if((err < 0) && (err != -EACCES)) - return(err); - - *mode_out = of_write(*mode_out); - - err = os_access(file, OS_ACC_R_OK); - if((err < 0) && (err != -EACCES)) - return(err); + err = access(file, W_OK); + if (err && (errno != EACCES)) + return -errno; + else if (!err) + *mode_out = of_write(*mode_out); - *mode_out = of_read(*mode_out); + err = access(file, R_OK); + if (err && (errno != EACCES)) + return -errno; + else if (!err) + *mode_out = of_read(*mode_out); - return(0); + return err; } -int os_open_file(char *file, struct openflags flags, int mode) +int os_open_file(const char *file, struct openflags flags, int mode) { int fd, err, f = 0; - if(flags.r && flags.w) f = O_RDWR; - else if(flags.r) f = O_RDONLY; - else if(flags.w) f = O_WRONLY; + if (flags.r && flags.w) + f = O_RDWR; + else if (flags.r) + f = O_RDONLY; + else if (flags.w) + f = O_WRONLY; else f = 0; - if(flags.s) f |= O_SYNC; - if(flags.c) f |= O_CREAT; - if(flags.t) f |= O_TRUNC; - if(flags.e) f |= O_EXCL; + if (flags.s) + f |= O_SYNC; + if (flags.c) + f |= O_CREAT; + if (flags.t) + f |= O_TRUNC; + if (flags.e) + f |= O_EXCL; + if (flags.a) + f |= O_APPEND; fd = open64(file, f, mode); - if(fd < 0) - return(-errno); + if (fd < 0) + return -errno; - if(flags.cl && fcntl(fd, F_SETFD, 1)){ + if (flags.cl && fcntl(fd, F_SETFD, 1)) { err = -errno; - os_close_file(fd); + close(fd); return err; } - return(fd); + return fd; } -int os_connect_socket(char *name) +int os_connect_socket(const char *name) { struct sockaddr_un sock; int fd, err; @@ -272,159 +214,136 @@ int os_connect_socket(char *name) snprintf(sock.sun_path, sizeof(sock.sun_path), "%s", name); fd = socket(AF_UNIX, SOCK_STREAM, 0); - if(fd < 0) - return(fd); + if (fd < 0) { + err = -errno; + goto out; + } err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); - if(err) - return(-errno); + if (err) { + err = -errno; + goto out_close; + } + + return fd; - return(fd); +out_close: + close(fd); +out: + return err; } void os_close_file(int fd) { close(fd); } +int os_fsync_file(int fd) +{ + if (fsync(fd) < 0) + return -errno; + return 0; +} -int os_seek_file(int fd, __u64 offset) +int os_seek_file(int fd, unsigned long long offset) { - __u64 actual; + unsigned long long actual; actual = lseek64(fd, offset, SEEK_SET); - if(actual != offset) - return(-errno); - return(0); + if (actual != offset) + return -errno; + return 0; } -static int fault_buffer(void *start, int len, - int (*copy_proc)(void *addr, void *buf, int len)) +int os_read_file(int fd, void *buf, int len) { - int page = getpagesize(), i; - char c; + int n = read(fd, buf, len); - for(i = 0; i < len; i += page){ - if((*copy_proc)(start + i, &c, sizeof(c))) - return(-EFAULT); - } - if((len % page) != 0){ - if((*copy_proc)(start + len - 1, &c, sizeof(c))) - return(-EFAULT); - } - return(0); + if (n < 0) + return -errno; + return n; } -static int file_io(int fd, void *buf, int len, - int (*io_proc)(int fd, void *buf, int len), - int (*copy_user_proc)(void *addr, void *buf, int len)) +int os_write_file(int fd, const void *buf, int len) { - int n, err; + int n = write(fd, (void *) buf, len); - do { - n = (*io_proc)(fd, buf, len); - if((n < 0) && (errno == EFAULT)){ - err = fault_buffer(buf, len, copy_user_proc); - if(err) - return(err); - n = (*io_proc)(fd, buf, len); - } - } while((n < 0) && (errno == EINTR)); - - if(n < 0) - return(-errno); - return(n); + if (n < 0) + return -errno; + return n; } -int os_read_file(int fd, void *buf, int len) +int os_sync_file(int fd) { - return(file_io(fd, buf, len, (int (*)(int, void *, int)) read, - copy_from_user_proc)); -} + int n = fsync(fd); -int os_write_file(int fd, const void *buf, int len) -{ - return(file_io(fd, (void *) buf, len, - (int (*)(int, void *, int)) write, copy_to_user_proc)); + if (n < 0) + return -errno; + return n; } -int os_file_size(char *file, unsigned long long *size_out) +int os_file_size(const char *file, unsigned long long *size_out) { struct uml_stat buf; int err; err = os_stat_file(file, &buf); - if(err < 0){ - printk("Couldn't stat \"%s\" : err = %d\n", file, -err); - return(err); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file, + -err); + return err; } - if(S_ISBLK(buf.ust_mode)){ - int fd, blocks; + if (S_ISBLK(buf.ust_mode)) { + int fd; + long blocks; - fd = os_open_file(file, of_read(OPENFLAGS()), 0); - if(fd < 0){ - printk("Couldn't open \"%s\", errno = %d\n", file, -fd); - return(fd); - } - if(ioctl(fd, BLKGETSIZE, &blocks) < 0){ + fd = open(file, O_RDONLY, 0); + if (fd < 0) { err = -errno; - printk("Couldn't get the block size of \"%s\", " + printk(UM_KERN_ERR "Couldn't open \"%s\", " "errno = %d\n", file, errno); - os_close_file(fd); - return(err); + return err; + } + if (ioctl(fd, BLKGETSIZE, &blocks) < 0) { + err = -errno; + printk(UM_KERN_ERR "Couldn't get the block size of " + "\"%s\", errno = %d\n", file, errno); + close(fd); + return err; } *size_out = ((long long) blocks) * 512; - os_close_file(fd); - return(0); + close(fd); } - *size_out = buf.ust_size; - return(0); + else *size_out = buf.ust_size; + + return 0; } -int os_file_modtime(char *file, unsigned long *modtime) +int os_file_modtime(const char *file, unsigned long *modtime) { struct uml_stat buf; int err; err = os_stat_file(file, &buf); - if(err < 0){ - printk("Couldn't stat \"%s\" : err = %d\n", file, -err); - return(err); + if (err < 0) { + printk(UM_KERN_ERR "Couldn't stat \"%s\" : err = %d\n", file, + -err); + return err; } *modtime = buf.ust_mtime; - return(0); + return 0; } -int os_get_exec_close(int fd, int* close_on_exec) +int os_set_exec_close(int fd) { - int ret; - - do { - ret = fcntl(fd, F_GETFD); - } while((ret < 0) && (errno == EINTR)) ; - - if(ret < 0) - return(-errno); - - *close_on_exec = (ret&FD_CLOEXEC) ? 1 : 0; - return(ret); -} - -int os_set_exec_close(int fd, int close_on_exec) -{ - int flag, err; - - if(close_on_exec) flag = FD_CLOEXEC; - else flag = 0; + int err; - do { - err = fcntl(fd, F_SETFD, flag); - } while((err < 0) && (errno == EINTR)) ; + CATCH_EINTR(err = fcntl(fd, F_SETFD, FD_CLOEXEC)); - if(err < 0) - return(-errno); - return(err); + if (err < 0) + return -errno; + return err; } int os_pipe(int *fds, int stream, int close_on_exec) @@ -432,67 +351,69 @@ int os_pipe(int *fds, int stream, int close_on_exec) int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; err = socketpair(AF_UNIX, type, 0, fds); - if(err < 0) - return(-errno); + if (err < 0) + return -errno; - if(!close_on_exec) - return(0); + if (!close_on_exec) + return 0; - err = os_set_exec_close(fds[0], 1); - if(err < 0) + err = os_set_exec_close(fds[0]); + if (err < 0) goto error; - err = os_set_exec_close(fds[1], 1); - if(err < 0) + err = os_set_exec_close(fds[1]); + if (err < 0) goto error; - return(0); + return 0; error: - printk("os_pipe : Setting FD_CLOEXEC failed, err = %d\n", -err); - os_close_file(fds[1]); - os_close_file(fds[0]); - return(err); + printk(UM_KERN_ERR "os_pipe : Setting FD_CLOEXEC failed, err = %d\n", + -err); + close(fds[1]); + close(fds[0]); + return err; } -int os_set_fd_async(int fd, int owner) +int os_set_fd_async(int fd) { - int err; + int err, flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; - /* XXX This should do F_GETFL first */ - if(fcntl(fd, F_SETFL, O_ASYNC | O_NONBLOCK) < 0){ + flags |= O_ASYNC | O_NONBLOCK; + if (fcntl(fd, F_SETFL, flags) < 0) { err = -errno; - printk("os_set_fd_async : failed to set O_ASYNC and " - "O_NONBLOCK on fd # %d, errno = %d\n", fd, errno); + printk(UM_KERN_ERR "os_set_fd_async : failed to set O_ASYNC " + "and O_NONBLOCK on fd # %d, errno = %d\n", fd, errno); return err; } -#ifdef notdef - if(fcntl(fd, F_SETFD, 1) < 0){ - printk("os_set_fd_async : Setting FD_CLOEXEC failed, " - "errno = %d\n", errno); - } -#endif - if((fcntl(fd, F_SETSIG, SIGIO) < 0) || - (fcntl(fd, F_SETOWN, owner) < 0)){ + if ((fcntl(fd, F_SETSIG, SIGIO) < 0) || + (fcntl(fd, F_SETOWN, os_getpid()) < 0)) { err = -errno; - printk("os_set_fd_async : Failed to fcntl F_SETOWN " - "(or F_SETSIG) fd %d to pid %d, errno = %d\n", fd, - owner, errno); + printk(UM_KERN_ERR "os_set_fd_async : Failed to fcntl F_SETOWN " + "(or F_SETSIG) fd %d, errno = %d\n", fd, errno); return err; } - return(0); + return 0; } int os_clear_fd_async(int fd) { - int flags = fcntl(fd, F_GETFL); + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; flags &= ~(O_ASYNC | O_NONBLOCK); - if(fcntl(fd, F_SETFL, flags) < 0) - return(-errno); - return(0); + if (fcntl(fd, F_SETFL, flags) < 0) + return -errno; + return 0; } int os_set_fd_block(int fd, int blocking) @@ -500,14 +421,18 @@ int os_set_fd_block(int fd, int blocking) int flags; flags = fcntl(fd, F_GETFL); + if (flags < 0) + return -errno; - if(blocking) flags &= ~O_NONBLOCK; - else flags |= O_NONBLOCK; + if (blocking) + flags &= ~O_NONBLOCK; + else + flags |= O_NONBLOCK; - if(fcntl(fd, F_SETFL, flags) < 0) + if (fcntl(fd, F_SETFL, flags) < 0) return -errno; - return(0); + return 0; } int os_accept_connection(int fd) @@ -515,9 +440,9 @@ int os_accept_connection(int fd) int new; new = accept(fd, NULL, 0); - if(new < 0) - return(-errno); - return(new); + if (new < 0) + return -errno; + return new; } #ifndef SHUT_RD @@ -536,17 +461,19 @@ int os_shutdown_socket(int fd, int r, int w) { int what, err; - if(r && w) what = SHUT_RDWR; - else if(r) what = SHUT_RD; - else if(w) what = SHUT_WR; - else { - printk("os_shutdown_socket : neither r or w was set\n"); - return(-EINVAL); - } + if (r && w) + what = SHUT_RDWR; + else if (r) + what = SHUT_RD; + else if (w) + what = SHUT_WR; + else + return -EINVAL; + err = shutdown(fd, what); - if(err < 0) - return(-errno); - return(0); + if (err < 0) + return -errno; + return 0; } int os_rcv_fd(int fd, int *helper_pid_out) @@ -568,53 +495,52 @@ int os_rcv_fd(int fd, int *helper_pid_out) msg.msg_flags = 0; n = recvmsg(fd, &msg, 0); - if(n < 0) - return(-errno); - - else if(n != sizeof(iov.iov_len)) + if (n < 0) + return -errno; + else if (n != iov.iov_len) *helper_pid_out = -1; cmsg = CMSG_FIRSTHDR(&msg); - if(cmsg == NULL){ - printk("rcv_fd didn't receive anything, error = %d\n", errno); - return(-1); + if (cmsg == NULL) { + printk(UM_KERN_ERR "rcv_fd didn't receive anything, " + "error = %d\n", errno); + return -1; } - if((cmsg->cmsg_level != SOL_SOCKET) || - (cmsg->cmsg_type != SCM_RIGHTS)){ - printk("rcv_fd didn't receive a descriptor\n"); - return(-1); + if ((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)) { + printk(UM_KERN_ERR "rcv_fd didn't receive a descriptor\n"); + return -1; } new = ((int *) CMSG_DATA(cmsg))[0]; - return(new); + return new; } -int os_create_unix_socket(char *file, int len, int close_on_exec) +int os_create_unix_socket(const char *file, int len, int close_on_exec) { struct sockaddr_un addr; int sock, err; sock = socket(PF_UNIX, SOCK_DGRAM, 0); - if(sock < 0) + if (sock < 0) return -errno; - if(close_on_exec) { - err = os_set_exec_close(sock, 1); - if(err < 0) - printk("create_unix_socket : close_on_exec failed, " - "err = %d", -err); + if (close_on_exec) { + err = os_set_exec_close(sock); + if (err < 0) + printk(UM_KERN_ERR "create_unix_socket : " + "close_on_exec failed, err = %d", -err); } addr.sun_family = AF_UNIX; - /* XXX Be more careful about overflow */ snprintf(addr.sun_path, len, "%s", file); err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); - if(err < 0) + if (err < 0) return -errno; - return(sock); + return sock; } void os_flush_stdout(void) @@ -632,29 +558,34 @@ int os_lock_file(int fd, int excl) int err, save; err = fcntl(fd, F_SETLK, &lock); - if(!err) + if (!err) goto out; save = -errno; err = fcntl(fd, F_GETLK, &lock); - if(err){ + if (err) { err = -errno; goto out; } - printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid); + printk(UM_KERN_ERR "F_SETLK failed, file already locked by pid %d\n", + lock.l_pid); err = save; out: - return(err); + return err; } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +unsigned os_major(unsigned long long dev) +{ + return major(dev); +} + +unsigned os_minor(unsigned long long dev) +{ + return minor(dev); +} + +unsigned long long os_makedev(unsigned major, unsigned minor) +{ + return makedev(major, minor); +} diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c index 36cc8475bcd..e3ee4a51ef6 100644 --- a/arch/um/os-Linux/helper.c +++ b/arch/um/os-Linux/helper.c @@ -1,165 +1,164 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <errno.h> #include <sched.h> -#include <sys/signal.h> +#include <linux/limits.h> +#include <sys/socket.h> #include <sys/wait.h> -#include "user.h" -#include "kern_util.h" -#include "user_util.h" -#include "os.h" +#include <kern_util.h> +#include <os.h> +#include <um_malloc.h> struct helper_data { void (*pre_exec)(void*); void *pre_data; char **argv; int fd; + char *buf; }; -/* Debugging aid, changed only from gdb */ -int helper_pause = 0; - -static void helper_hup(int sig) -{ -} - static int helper_child(void *arg) { struct helper_data *data = arg; char **argv = data->argv; - int errval; + int err, ret; - if(helper_pause){ - signal(SIGHUP, helper_hup); - pause(); - } - if(data->pre_exec != NULL) + if (data->pre_exec != NULL) (*data->pre_exec)(data->pre_data); - execvp(argv[0], argv); - errval = errno; - printk("execvp of '%s' failed - errno = %d\n", argv[0], errno); - os_write_file(data->fd, &errval, sizeof(errval)); - kill(os_getpid(), SIGKILL); - return(0); + err = execvp_noalloc(data->buf, argv[0], argv); + + /* If the exec succeeds, we don't get here */ + CATCH_EINTR(ret = write(data->fd, &err, sizeof(err))); + + return 0; } -/* Returns either the pid of the child process we run or -E* on failure. - * XXX The alloc_stack here breaks if this is called in the tracing thread */ -int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv, - unsigned long *stack_out) +/* Returns either the pid of the child process we run or -E* on failure. */ +int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv) { struct helper_data data; unsigned long stack, sp; int pid, fds[2], ret, n; - if((stack_out != NULL) && (*stack_out != 0)) - stack = *stack_out; - else stack = alloc_stack(0, um_in_interrupt()); - if(stack == 0) - return(-ENOMEM); + stack = alloc_stack(0, __cant_sleep()); + if (stack == 0) + return -ENOMEM; - ret = os_pipe(fds, 1, 0); - if(ret < 0){ - printk("run_helper : pipe failed, ret = %d\n", -ret); + ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds); + if (ret < 0) { + ret = -errno; + printk(UM_KERN_ERR "run_helper : pipe failed, errno = %d\n", + errno); goto out_free; } - ret = os_set_exec_close(fds[1], 1); - if(ret < 0){ - printk("run_helper : setting FD_CLOEXEC failed, ret = %d\n", - -ret); + ret = os_set_exec_close(fds[1]); + if (ret < 0) { + printk(UM_KERN_ERR "run_helper : setting FD_CLOEXEC failed, " + "ret = %d\n", -ret); goto out_close; } - sp = stack + page_size() - sizeof(void *); + sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *); data.pre_exec = pre_exec; data.pre_data = pre_data; data.argv = argv; data.fd = fds[1]; - pid = clone(helper_child, (void *) sp, CLONE_VM | SIGCHLD, &data); - if(pid < 0){ + data.buf = __cant_sleep() ? uml_kmalloc(PATH_MAX, UM_GFP_ATOMIC) : + uml_kmalloc(PATH_MAX, UM_GFP_KERNEL); + pid = clone(helper_child, (void *) sp, CLONE_VM, &data); + if (pid < 0) { ret = -errno; - printk("run_helper : clone failed, errno = %d\n", errno); - goto out_close; + printk(UM_KERN_ERR "run_helper : clone failed, errno = %d\n", + errno); + goto out_free2; } close(fds[1]); fds[1] = -1; - /*Read the errno value from the child.*/ - n = os_read_file(fds[0], &ret, sizeof(ret)); - if(n < 0){ - printk("run_helper : read on pipe failed, ret = %d\n", -n); - ret = n; - kill(pid, SIGKILL); - CATCH_EINTR(waitpid(pid, NULL, 0)); - } - else if(n != 0){ - CATCH_EINTR(n = waitpid(pid, NULL, 0)); - ret = -errno; - } else { + /* + * Read the errno value from the child, if the exec failed, or get 0 if + * the exec succeeded because the pipe fd was set as close-on-exec. + */ + n = read(fds[0], &ret, sizeof(ret)); + if (n == 0) { ret = pid; + } else { + if (n < 0) { + n = -errno; + printk(UM_KERN_ERR "run_helper : read on pipe failed, " + "ret = %d\n", -n); + ret = n; + } + CATCH_EINTR(waitpid(pid, NULL, __WCLONE)); } +out_free2: + kfree(data.buf); out_close: if (fds[1] != -1) close(fds[1]); close(fds[0]); out_free: - if(stack_out == NULL) - free_stack(stack, 0); - else *stack_out = stack; - return(ret); + free_stack(stack, 0); + return ret; } int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, - unsigned long *stack_out, int stack_order) + unsigned long *stack_out) { unsigned long stack, sp; int pid, status, err; - stack = alloc_stack(stack_order, um_in_interrupt()); - if(stack == 0) return(-ENOMEM); + stack = alloc_stack(0, __cant_sleep()); + if (stack == 0) + return -ENOMEM; - sp = stack + (page_size() << stack_order) - sizeof(void *); - pid = clone(proc, (void *) sp, flags | SIGCHLD, arg); - if(pid < 0){ + sp = stack + UM_KERN_PAGE_SIZE - sizeof(void *); + pid = clone(proc, (void *) sp, flags, arg); + if (pid < 0) { err = -errno; - printk("run_helper_thread : clone failed, errno = %d\n", - errno); + printk(UM_KERN_ERR "run_helper_thread : clone failed, " + "errno = %d\n", errno); return err; } - if(stack_out == NULL){ - CATCH_EINTR(pid = waitpid(pid, &status, 0)); - if(pid < 0){ + if (stack_out == NULL) { + CATCH_EINTR(pid = waitpid(pid, &status, __WCLONE)); + if (pid < 0) { err = -errno; - printk("run_helper_thread - wait failed, errno = %d\n", - errno); + printk(UM_KERN_ERR "run_helper_thread - wait failed, " + "errno = %d\n", errno); pid = err; } - if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) - printk("run_helper_thread - thread returned status " - "0x%x\n", status); - free_stack(stack, stack_order); - } - else *stack_out = stack; - return(pid); + if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) + printk(UM_KERN_ERR "run_helper_thread - thread " + "returned status 0x%x\n", status); + free_stack(stack, 0); + } else + *stack_out = stack; + return pid; } int helper_wait(int pid) { - int ret; - - CATCH_EINTR(ret = waitpid(pid, NULL, WNOHANG)); - if(ret < 0){ - ret = -errno; - printk("helper_wait : waitpid failed, errno = %d\n", errno); - } - return(ret); + int ret, status; + int wflags = __WCLONE; + + CATCH_EINTR(ret = waitpid(pid, &status, wflags)); + if (ret < 0) { + printk(UM_KERN_ERR "helper_wait : waitpid process %d failed, " + "errno = %d\n", pid, errno); + return -errno; + } else if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + printk(UM_KERN_ERR "helper_wait : process %d exited with " + "status 0x%x\n", pid, status); + return -ECHILD; + } else + return 0; } diff --git a/arch/um/os-Linux/include/file.h b/arch/um/os-Linux/include/file.h deleted file mode 100644 index d82711efacf..00000000000 --- a/arch/um/os-Linux/include/file.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#ifndef __OS_FILE_H__ -#define __OS_FILE_H__ - -#define DEV_NULL "/dev/null" - -#endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h new file mode 100644 index 00000000000..0dc2c9f135f --- /dev/null +++ b/arch/um/os-Linux/internal.h @@ -0,0 +1 @@ +void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc); diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c new file mode 100644 index 00000000000..b9afb74b79a --- /dev/null +++ b/arch/um/os-Linux/irq.c @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <errno.h> +#include <poll.h> +#include <signal.h> +#include <string.h> +#include <irq_user.h> +#include <os.h> +#include <um_malloc.h> + +/* + * Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd + * and os_free_irq_by_cb, which are called under irq_lock. + */ +static struct pollfd *pollfds = NULL; +static int pollfds_num = 0; +static int pollfds_size = 0; + +int os_waiting_for_events(struct irq_fd *active_fds) +{ + struct irq_fd *irq_fd; + int i, n, err; + + n = poll(pollfds, pollfds_num, 0); + if (n < 0) { + err = -errno; + if (errno != EINTR) + printk(UM_KERN_ERR "os_waiting_for_events:" + " poll returned %d, errno = %d\n", n, errno); + return err; + } + + if (n == 0) + return 0; + + irq_fd = active_fds; + + for (i = 0; i < pollfds_num; i++) { + if (pollfds[i].revents != 0) { + irq_fd->current_events = pollfds[i].revents; + pollfds[i].fd = -1; + } + irq_fd = irq_fd->next; + } + return n; +} + +int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds) +{ + if (pollfds_num == pollfds_size) { + if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) { + /* return min size needed for new pollfds area */ + return (pollfds_size + 1) * sizeof(pollfds[0]); + } + + if (pollfds != NULL) { + memcpy(tmp_pfd, pollfds, + sizeof(pollfds[0]) * pollfds_size); + /* remove old pollfds */ + kfree(pollfds); + } + pollfds = tmp_pfd; + pollfds_size++; + } else + kfree(tmp_pfd); /* remove not used tmp_pfd */ + + pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, + .events = events, + .revents = 0 }); + pollfds_num++; + + return 0; +} + +void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, + struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2) +{ + struct irq_fd **prev; + int i = 0; + + prev = &active_fds; + while (*prev != NULL) { + if ((*test)(*prev, arg)) { + struct irq_fd *old_fd = *prev; + if ((pollfds[i].fd != -1) && + (pollfds[i].fd != (*prev)->fd)) { + printk(UM_KERN_ERR "os_free_irq_by_cb - " + "mismatch between active_fds and " + "pollfds, fd %d vs %d\n", + (*prev)->fd, pollfds[i].fd); + goto out; + } + + pollfds_num--; + + /* + * This moves the *whole* array after pollfds[i] + * (though it doesn't spot as such)! + */ + memmove(&pollfds[i], &pollfds[i + 1], + (pollfds_num - i) * sizeof(pollfds[0])); + if (*last_irq_ptr2 == &old_fd->next) + *last_irq_ptr2 = prev; + + *prev = (*prev)->next; + if (old_fd->type == IRQ_WRITE) + ignore_sigio_fd(old_fd->fd); + kfree(old_fd); + continue; + } + prev = &(*prev)->next; + i++; + } + out: + return; +} + +int os_get_pollfd(int i) +{ + return pollfds[i].fd; +} + +void os_set_pollfd(int i, int fd) +{ + pollfds[i].fd = fd; +} + +void os_set_ioignore(void) +{ + signal(SIGIO, SIG_IGN); +} diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 23da27d2256..df9191acd92 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -1,59 +1,42 @@ /* - * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include <unistd.h> #include <stdio.h> #include <stdlib.h> -#include <string.h> -#include <signal.h> +#include <unistd.h> #include <errno.h> +#include <signal.h> +#include <string.h> #include <sys/resource.h> -#include <sys/mman.h> -#include <sys/user.h> -#include <asm/page.h> -#include "user_util.h" -#include "kern_util.h" -#include "mem_user.h" -#include "signal_user.h" -#include "time_user.h" -#include "irq_user.h" -#include "user.h" -#include "init.h" -#include "mode.h" -#include "choose-mode.h" -#include "uml-config.h" -#include "os.h" - -/* Set in set_stklim, which is called from main and __wrap_malloc. - * __wrap_malloc only calls it if main hasn't started. - */ -unsigned long stacksizelim; - -/* Set in main */ -char *linux_prog; +#include <as-layout.h> +#include <init.h> +#include <kern_util.h> +#include <os.h> +#include <um_malloc.h> #define PGD_BOUND (4 * 1024 * 1024) #define STACKSIZE (8 * 1024 * 1024) #define THREAD_NAME_LEN (256) +long elf_aux_hwcap; + static void set_stklim(void) { struct rlimit lim; - if(getrlimit(RLIMIT_STACK, &lim) < 0){ + if (getrlimit(RLIMIT_STACK, &lim) < 0) { perror("getrlimit"); exit(1); } - if((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)){ + if ((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)) { lim.rlim_cur = STACKSIZE; - if(setrlimit(RLIMIT_STACK, &lim) < 0){ + if (setrlimit(RLIMIT_STACK, &lim) < 0) { perror("setrlimit"); exit(1); } } - stacksizelim = (lim.rlim_cur + PGD_BOUND - 1) & ~(PGD_BOUND - 1); } static __init void do_uml_initcalls(void) @@ -61,7 +44,7 @@ static __init void do_uml_initcalls(void) initcall_t *call; call = &__uml_initcall_start; - while (call < &__uml_initcall_end){; + while (call < &__uml_initcall_end) { (*call)(); call++; } @@ -69,156 +52,179 @@ static __init void do_uml_initcalls(void) static void last_ditch_exit(int sig) { - signal(SIGINT, SIG_DFL); - signal(SIGTERM, SIG_DFL); - signal(SIGHUP, SIG_DFL); uml_cleanup(); exit(1); } -extern int uml_exitcode; - -extern void scan_elf_aux( char **envp); - -int main(int argc, char **argv, char **envp) +static void install_fatal_handler(int sig) { - char **new_argv; - sigset_t mask; - int ret, i, err; + struct sigaction action; - /* Enable all signals except SIGIO - in some environments, we can - * enter with some signals blocked - */ + /* All signals are enabled in this handler ... */ + sigemptyset(&action.sa_mask); - sigemptyset(&mask); - sigaddset(&mask, SIGIO); - if(sigprocmask(SIG_SETMASK, &mask, NULL) < 0){ - perror("sigprocmask"); + /* + * ... including the signal being handled, plus we want the + * handler reset to the default behavior, so that if an exit + * handler is hanging for some reason, the UML will just die + * after this signal is sent a second time. + */ + action.sa_flags = SA_RESETHAND | SA_NODEFER; + action.sa_restorer = NULL; + action.sa_handler = last_ditch_exit; + if (sigaction(sig, &action, NULL) < 0) { + printf("failed to install handler for signal %d - errno = %d\n", + sig, errno); exit(1); } +} -#ifdef UML_CONFIG_CMDLINE_ON_HOST - /* Allocate memory for thread command lines */ - if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){ +#define UML_LIB_PATH ":" OS_LIB_PATH "/uml" - char padding[THREAD_NAME_LEN] = { - [ 0 ... THREAD_NAME_LEN - 2] = ' ', '\0' - }; +static void setup_env_path(void) +{ + char *new_path = NULL; + char *old_path = NULL; + int path_len = 0; + + old_path = getenv("PATH"); + /* + * if no PATH variable is set or it has an empty value + * just use the default + /usr/lib/uml + */ + if (!old_path || (path_len = strlen(old_path)) == 0) { + if (putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH)) + perror("couldn't putenv"); + return; + } - new_argv = malloc((argc + 2) * sizeof(char*)); - if(!new_argv) { - perror("Allocating extended argv"); - exit(1); - } + /* append /usr/lib/uml to the existing path */ + path_len += strlen("PATH=" UML_LIB_PATH) + 1; + new_path = malloc(path_len); + if (!new_path) { + perror("couldn't malloc to set a new PATH"); + return; + } + snprintf(new_path, path_len, "PATH=%s" UML_LIB_PATH, old_path); + if (putenv(new_path)) { + perror("couldn't putenv to set a new PATH"); + free(new_path); + } +} - new_argv[0] = argv[0]; - new_argv[1] = padding; +extern void scan_elf_aux( char **envp); - for(i = 2; i <= argc; i++) - new_argv[i] = argv[i - 1]; - new_argv[argc + 1] = NULL; +int __init main(int argc, char **argv, char **envp) +{ + char **new_argv; + int ret, i, err; - execvp(new_argv[0], new_argv); - perror("execing with extended args"); - exit(1); - } -#endif + set_stklim(); - linux_prog = argv[0]; + setup_env_path(); - set_stklim(); + setsid(); new_argv = malloc((argc + 1) * sizeof(char *)); - if(new_argv == NULL){ + if (new_argv == NULL) { perror("Mallocing argv"); exit(1); } - for(i=0;i<argc;i++){ + for (i = 0; i < argc; i++) { new_argv[i] = strdup(argv[i]); - if(new_argv[i] == NULL){ + if (new_argv[i] == NULL) { perror("Mallocing an arg"); exit(1); } } new_argv[argc] = NULL; - set_handler(SIGINT, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); - set_handler(SIGTERM, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); - set_handler(SIGHUP, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); + /* + * Allow these signals to bring down a UML if all other + * methods of control fail. + */ + install_fatal_handler(SIGINT); + install_fatal_handler(SIGTERM); - scan_elf_aux( envp); +#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA + scan_elf_aux(envp); +#endif do_uml_initcalls(); + change_sig(SIGPIPE, 0); ret = linux_main(argc, argv); - /* Disable SIGPROF - I have no idea why libc doesn't do this or turn + /* + * Disable SIGPROF - I have no idea why libc doesn't do this or turn * off the profiling time, but UML dies with a SIGPROF just before * exiting when profiling is active. */ change_sig(SIGPROF, 0); - /* This signal stuff used to be in the reboot case. However, + /* + * This signal stuff used to be in the reboot case. However, * sometimes a SIGVTALRM can come in when we're halting (reproducably * when writing out gcov information, presumably because that takes * some time) and cause a segfault. */ - /* stop timers and set SIG*ALRM to be ignored */ + /* stop timers and set SIGVTALRM to be ignored */ disable_timer(); /* disable SIGIO for the fds and set SIGIO to be ignored */ err = deactivate_all_fds(); - if(err) + if (err) printf("deactivate_all_fds failed, errno = %d\n", -err); - /* Let any pending signals fire now. This ensures + /* + * Let any pending signals fire now. This ensures * that they won't be delivered after the exec, when * they are definitely not expected. */ unblock_signals(); /* Reboot */ - if(ret){ + if (ret) { printf("\n"); execvp(new_argv[0], new_argv); perror("Failed to exec kernel"); ret = 1; } printf("\n"); - return(uml_exitcode); + return uml_exitcode; } -#define CAN_KMALLOC() \ - (kmalloc_ok && CHOOSE_MODE((os_getpid() != tracing_pid), 1)) - extern void *__real_malloc(int); void *__wrap_malloc(int size) { void *ret; - if(!CAN_KMALLOC()) - return(__real_malloc(size)); - else if(size <= PAGE_SIZE) /* finding contiguos pages can be hard*/ - ret = um_kmalloc(size); - else ret = um_vmalloc(size); + if (!kmalloc_ok) + return __real_malloc(size); + else if (size <= UM_KERN_PAGE_SIZE) + /* finding contiguous pages can be hard*/ + ret = uml_kmalloc(size, UM_GFP_KERNEL); + else ret = vmalloc(size); - /* glibc people insist that if malloc fails, errno should be + /* + * glibc people insist that if malloc fails, errno should be * set by malloc as well. So we do. */ - if(ret == NULL) + if (ret == NULL) errno = ENOMEM; - return(ret); + return ret; } void *__wrap_calloc(int n, int size) { void *ptr = __wrap_malloc(n * size); - if(ptr == NULL) return(NULL); + if (ptr == NULL) + return NULL; memset(ptr, 0, n * size); - return(ptr); + return ptr; } extern void __real_free(void *); @@ -229,7 +235,8 @@ void __wrap_free(void *ptr) { unsigned long addr = (unsigned long) ptr; - /* We need to know how the allocation happened, so it can be correctly + /* + * We need to know how the allocation happened, so it can be correctly * freed. This is done by seeing what region of memory the pointer is * in - * physical memory - kmalloc/kfree @@ -247,12 +254,12 @@ void __wrap_free(void *ptr) * there is a possibility for memory leaks. */ - if((addr >= uml_physmem) && (addr < high_physmem)){ - if(CAN_KMALLOC()) + if ((addr >= uml_physmem) && (addr < high_physmem)) { + if (kmalloc_ok) kfree(ptr); } - else if((addr >= start_vm) && (addr < end_vm)){ - if(CAN_KMALLOC()) + else if ((addr >= start_vm) && (addr < end_vm)) { + if (kmalloc_ok) vfree(ptr); } else __real_free(ptr); diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c index 9d7d69a523b..897e9ad0c10 100644 --- a/arch/um/os-Linux/mem.c +++ b/arch/um/os-Linux/mem.c @@ -1,161 +1,205 @@ +/* + * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + #include <stdio.h> -#include <stdlib.h> #include <stddef.h> -#include <stdarg.h> +#include <stdlib.h> #include <unistd.h> #include <errno.h> -#include <string.h> #include <fcntl.h> -#include <sys/types.h> +#include <string.h> +#include <sys/stat.h> #include <sys/mman.h> -#include "kern_util.h" -#include "user.h" -#include "user_util.h" -#include "mem_user.h" -#include "init.h" -#include "os.h" -#include "tempfile.h" -#include "kern_constants.h" - -#include <sys/param.h> +#include <sys/vfs.h> +#include <linux/magic.h> +#include <init.h> +#include <os.h> +/* Set by make_tempfile() during early boot. */ static char *tempdir = NULL; -static void __init find_tempdir(void) +/* Check if dir is on tmpfs. Return 0 if yes, -1 if no or error. */ +static int __init check_tmpfs(const char *dir) { - char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL }; - int i; - char *dir = NULL; + struct statfs st; + + printf("Checking if %s is on tmpfs...", dir); + if (statfs(dir, &st) < 0) { + printf("%s\n", strerror(errno)); + } else if (st.f_type != TMPFS_MAGIC) { + printf("no\n"); + } else { + printf("OK\n"); + return 0; + } + return -1; +} - if(tempdir != NULL) return; /* We've already been called */ - for(i = 0; dirs[i]; i++){ - dir = getenv(dirs[i]); - if((dir != NULL) && (*dir != '\0')) - break; +/* + * Choose the tempdir to use. We want something on tmpfs so that our memory is + * not subject to the host's vm.dirty_ratio. If a tempdir is specified in the + * environment, we use that even if it's not on tmpfs, but we warn the user. + * Otherwise, we try common tmpfs locations, and if no tmpfs directory is found + * then we fall back to /tmp. + */ +static char * __init choose_tempdir(void) +{ + static const char * const vars[] = { + "TMPDIR", + "TMP", + "TEMP", + NULL + }; + static const char fallback_dir[] = "/tmp"; + static const char * const tmpfs_dirs[] = { + "/dev/shm", + fallback_dir, + NULL + }; + int i; + const char *dir; + + printf("Checking environment variables for a tempdir..."); + for (i = 0; vars[i]; i++) { + dir = getenv(vars[i]); + if ((dir != NULL) && (*dir != '\0')) { + printf("%s\n", dir); + if (check_tmpfs(dir) >= 0) + goto done; + else + goto warn; + } } - if((dir == NULL) || (*dir == '\0')) - dir = "/tmp"; - - tempdir = malloc(strlen(dir) + 2); - if(tempdir == NULL){ - fprintf(stderr, "Failed to malloc tempdir, " - "errno = %d\n", errno); - return; + printf("none found\n"); + + for (i = 0; tmpfs_dirs[i]; i++) { + dir = tmpfs_dirs[i]; + if (check_tmpfs(dir) >= 0) + goto done; } - strcpy(tempdir, dir); - strcat(tempdir, "/"); + + dir = fallback_dir; +warn: + printf("Warning: tempdir %s is not on tmpfs\n", dir); +done: + /* Make a copy since getenv results may not remain valid forever. */ + return strdup(dir); } /* - * This proc still used in tt-mode - * (file: kernel/tt/ptproxy/proxy.c, proc: start_debugger). - * So it isn't 'static' yet. + * Create an unlinked tempfile in a suitable tempdir. template must be the + * basename part of the template with a leading '/'. */ -int make_tempfile(const char *template, char **out_tempname, int do_unlink) +static int __init make_tempfile(const char *template) { - char tempname[MAXPATHLEN]; + char *tempname; int fd; - find_tempdir(); - if (*template != '/') - strcpy(tempname, tempdir); - else - *tempname = 0; + if (tempdir == NULL) { + tempdir = choose_tempdir(); + if (tempdir == NULL) { + fprintf(stderr, "Failed to choose tempdir: %s\n", + strerror(errno)); + return -1; + } + } + + tempname = malloc(strlen(tempdir) + strlen(template) + 1); + if (tempname == NULL) + return -1; + + strcpy(tempname, tempdir); strcat(tempname, template); fd = mkstemp(tempname); - if(fd < 0){ + if (fd < 0) { fprintf(stderr, "open - cannot create %s: %s\n", tempname, strerror(errno)); - return -1; + goto out; } - if(do_unlink && (unlink(tempname) < 0)){ + if (unlink(tempname) < 0) { perror("unlink"); - return -1; + goto close; } - if(out_tempname){ - *out_tempname = strdup(tempname); - if(*out_tempname == NULL){ - perror("strdup"); - return -1; - } - } - return(fd); + free(tempname); + return fd; +close: + close(fd); +out: + free(tempname); + return -1; } -#define TEMPNAME_TEMPLATE "vm_file-XXXXXX" +#define TEMPNAME_TEMPLATE "/vm_file-XXXXXX" -/* - * This proc is used in start_up.c - * So it isn't 'static'. - */ -int create_tmp_file(unsigned long long len) +static int __init create_tmp_file(unsigned long long len) { int fd, err; char zero; - fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1); - if(fd < 0) { + fd = make_tempfile(TEMPNAME_TEMPLATE); + if (fd < 0) exit(1); - } err = fchmod(fd, 0777); - if(err < 0){ - perror("os_mode_fd"); + if (err < 0) { + perror("fchmod"); exit(1); } - if (lseek64(fd, len, SEEK_SET) < 0) { - perror("os_seek_file"); + /* + * Seek to len - 1 because writing a character there will + * increase the file size by one byte, to the desired length. + */ + if (lseek64(fd, len - 1, SEEK_SET) < 0) { + perror("lseek64"); exit(1); } zero = 0; - err = os_write_file(fd, &zero, 1); - if(err != 1){ - errno = -err; - perror("os_write_file"); + err = write(fd, &zero, 1); + if (err != 1) { + perror("write"); exit(1); } - return(fd); + return fd; } -static int create_anon_file(unsigned long long len) +int __init create_mem_file(unsigned long long len) { - void *addr; - int fd; + int err, fd; - fd = open("/dev/anon", O_RDWR); - if(fd < 0) { - perror("opening /dev/anon"); - exit(1); - } + fd = create_tmp_file(len); - addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); - if(addr == MAP_FAILED){ - perror("mapping physmem file"); - exit(1); + err = os_set_exec_close(fd); + if (err < 0) { + errno = -err; + perror("exec_close"); } - munmap(addr, len); - - return(fd); + return fd; } -extern int have_devanon; - -int create_mem_file(unsigned long long len) +void __init check_tmpexec(void) { - int err, fd; - - if(have_devanon) - fd = create_anon_file(len); - else fd = create_tmp_file(len); - - err = os_set_exec_close(fd, 1); - if(err < 0){ - errno = -err; - perror("exec_close"); + void *addr; + int err, fd = create_tmp_file(UM_KERN_PAGE_SIZE); + + addr = mmap(NULL, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0); + printf("Checking PROT_EXEC mmap in %s...", tempdir); + if (addr == MAP_FAILED) { + err = errno; + printf("%s\n", strerror(err)); + close(fd); + if (err == EPERM) + printf("%s must be not mounted noexec\n", tempdir); + exit(1); } - return(fd); + printf("OK\n"); + munmap(addr, UM_KERN_PAGE_SIZE); + + close(fd); } diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index d9c52387c4a..33496fe2bb5 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -1,24 +1,22 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@addtoit.com) +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include <unistd.h> #include <stdio.h> +#include <stdlib.h> +#include <unistd.h> #include <errno.h> #include <signal.h> -#include <setjmp.h> -#include <linux/unistd.h> +#include <fcntl.h> #include <sys/mman.h> +#include <sys/ptrace.h> #include <sys/wait.h> -#include "ptrace_user.h" -#include "os.h" -#include "user.h" -#include "user_util.h" -#include "signal_user.h" -#include "process.h" -#include "irq_user.h" -#include "kern_util.h" +#include <asm/unistd.h> +#include <init.h> +#include <longjmp.h> +#include <os.h> +#include <skas_ptrace.h> #define ARBITRARY_ADDR -1 #define FAILURE_PID -1 @@ -29,62 +27,67 @@ unsigned long os_process_pc(int pid) { char proc_stat[STAT_PATH_LEN], buf[256]; - unsigned long pc; + unsigned long pc = ARBITRARY_ADDR; int fd, err; sprintf(proc_stat, "/proc/%d/stat", pid); - fd = os_open_file(proc_stat, of_read(OPENFLAGS()), 0); - if(fd < 0){ - printk("os_process_pc - couldn't open '%s', err = %d\n", - proc_stat, -fd); - return(ARBITRARY_ADDR); + fd = open(proc_stat, O_RDONLY, 0); + if (fd < 0) { + printk(UM_KERN_ERR "os_process_pc - couldn't open '%s', " + "errno = %d\n", proc_stat, errno); + goto out; } - err = os_read_file(fd, buf, sizeof(buf)); - if(err < 0){ - printk("os_process_pc - couldn't read '%s', err = %d\n", - proc_stat, -err); - os_close_file(fd); - return(ARBITRARY_ADDR); + CATCH_EINTR(err = read(fd, buf, sizeof(buf))); + if (err < 0) { + printk(UM_KERN_ERR "os_process_pc - couldn't read '%s', " + "err = %d\n", proc_stat, errno); + goto out_close; } os_close_file(fd); pc = ARBITRARY_ADDR; - if(sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d " - "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " - "%*d %*d %*d %*d %*d %lu", &pc) != 1){ - printk("os_process_pc - couldn't find pc in '%s'\n", buf); - } - return(pc); + if (sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d " + "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " + "%*d %*d %*d %*d %*d %lu", &pc) != 1) + printk(UM_KERN_ERR "os_process_pc - couldn't find pc in '%s'\n", + buf); + out_close: + close(fd); + out: + return pc; } int os_process_parent(int pid) { char stat[STAT_PATH_LEN]; char data[256]; - int parent, n, fd; + int parent = FAILURE_PID, n, fd; - if(pid == -1) return(-1); + if (pid == -1) + return parent; snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); - fd = os_open_file(stat, of_read(OPENFLAGS()), 0); - if(fd < 0){ - printk("Couldn't open '%s', err = %d\n", stat, -fd); - return(FAILURE_PID); + fd = open(stat, O_RDONLY, 0); + if (fd < 0) { + printk(UM_KERN_ERR "Couldn't open '%s', errno = %d\n", stat, + errno); + return parent; } - n = os_read_file(fd, data, sizeof(data)); - os_close_file(fd); + CATCH_EINTR(n = read(fd, data, sizeof(data))); + close(fd); - if(n < 0){ - printk("Couldn't read '%s', err = %d\n", stat, -n); - return(FAILURE_PID); + if (n < 0) { + printk(UM_KERN_ERR "Couldn't read '%s', errno = %d\n", stat, + errno); + return parent; } parent = FAILURE_PID; n = sscanf(data, "%*d " COMM_SCANF " %*c %d", &parent); - if(n != 1) - printk("Failed to scan '%s'\n", data); + if (n != 1) + printk(UM_KERN_ERR "Failed to scan '%s'\n", data); - return(parent); + return parent; } void os_stop_process(int pid) @@ -95,9 +98,23 @@ void os_stop_process(int pid) void os_kill_process(int pid, int reap_child) { kill(pid, SIGKILL); - if(reap_child) - CATCH_EINTR(waitpid(pid, NULL, 0)); - + if (reap_child) + CATCH_EINTR(waitpid(pid, NULL, __WALL)); +} + +/* This is here uniquely to have access to the userspace errno, i.e. the one + * used by ptrace in case of error. + */ + +long os_ptrace_ldt(long pid, long addr, long data) +{ + int ret; + + ret = ptrace(PTRACE_LDT, pid, addr, data); + + if (ret < 0) + return -errno; + return ret; } /* Kill off a ptraced child by all means available. kill it normally first, @@ -110,24 +127,17 @@ void os_kill_ptraced_process(int pid, int reap_child) kill(pid, SIGKILL); ptrace(PTRACE_KILL, pid); ptrace(PTRACE_CONT, pid); - if(reap_child) - CATCH_EINTR(waitpid(pid, NULL, 0)); -} - -void os_usr1_process(int pid) -{ - kill(pid, SIGUSR1); + if (reap_child) + CATCH_EINTR(waitpid(pid, NULL, __WALL)); } /* Don't use the glibc version, which caches the result in TLS. It misses some * syscalls, and also breaks with clone(), which does not unshare the TLS. */ -inline _syscall0(pid_t, getpid) - int os_getpid(void) { - return(getpid()); + return syscall(__NR_getpid); } int os_getpgrp(void) @@ -141,89 +151,147 @@ int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, void *loc; int prot; - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | (x ? PROT_EXEC : 0); loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, fd, off); - if(loc == MAP_FAILED) - return(-errno); - return(0); + if (loc == MAP_FAILED) + return -errno; + return 0; } int os_protect_memory(void *addr, unsigned long len, int r, int w, int x) { - int prot = ((r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + int prot = ((r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | (x ? PROT_EXEC : 0)); - if(mprotect(addr, len, prot) < 0) - return(-errno); - return(0); + if (mprotect(addr, len, prot) < 0) + return -errno; + + return 0; } int os_unmap_memory(void *addr, int len) { - int err; + int err; - err = munmap(addr, len); - if(err < 0) - return(-errno); - return(0); + err = munmap(addr, len); + if (err < 0) + return -errno; + return 0; } -void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) +#ifndef MADV_REMOVE +#define MADV_REMOVE KERNEL_MADV_REMOVE +#endif + +int os_drop_memory(void *addr, int length) { - int flags = 0, pages; + int err; - if(sig_stack != NULL){ - pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER); - set_sigstack(sig_stack, pages * page_size()); - flags = SA_ONSTACK; - } - if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1); + err = madvise(addr, length, MADV_REMOVE); + if (err < 0) + err = -errno; + return err; } -void init_new_thread_signals(int altstack) +int __init can_drop_memory(void) { - int flags = altstack ? SA_ONSTACK : 0; + void *addr; + int fd, ok = 0; + + printk(UM_KERN_INFO "Checking host MADV_REMOVE support..."); + fd = create_mem_file(UM_KERN_PAGE_SIZE); + if (fd < 0) { + printk(UM_KERN_ERR "Creating test memory file failed, " + "err = %d\n", -fd); + goto out; + } - set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGFPE, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGILL, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGBUS, (__sighandler_t) sig_handler, flags, - SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - set_handler(SIGUSR2, (__sighandler_t) sig_handler, - flags, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); - signal(SIGHUP, SIG_IGN); + addr = mmap64(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "Mapping test memory file failed, " + "err = %d\n", -errno); + goto out_close; + } + + if (madvise(addr, UM_KERN_PAGE_SIZE, MADV_REMOVE) != 0) { + printk(UM_KERN_ERR "MADV_REMOVE failed, err = %d\n", -errno); + goto out_unmap; + } - init_irq_signals(altstack); + printk(UM_KERN_CONT "OK\n"); + ok = 1; + +out_unmap: + munmap(addr, UM_KERN_PAGE_SIZE); +out_close: + close(fd); +out: + return ok; } -int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr) +static int os_page_mincore(void *addr) { - sigjmp_buf buf; - int n; + char vec[2]; + int ret; + + ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); + if (ret < 0) { + if (errno == ENOMEM || errno == EINVAL) + return 0; + else + return -errno; + } - *jmp_ptr = &buf; - n = sigsetjmp(buf, 1); - if(n != 0) - return(n); - (*fn)(arg); - return(0); + return vec[0] & 1; } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +int os_mincore(void *addr, unsigned long len) +{ + char *vec; + int ret, i; + + if (len <= UM_KERN_PAGE_SIZE) + return os_page_mincore(addr); + + vec = calloc(1, (len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); + if (!vec) + return -ENOMEM; + + ret = mincore(addr, UM_KERN_PAGE_SIZE, vec); + if (ret < 0) { + if (errno == ENOMEM || errno == EINVAL) + ret = 0; + else + ret = -errno; + + goto out; + } + + for (i = 0; i < ((len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); i++) { + if (!(vec[i] & 1)) { + ret = 0; + goto out; + } + } + + ret = 1; +out: + free(vec); + return ret; +} + +void init_new_thread_signals(void) +{ + set_handler(SIGSEGV); + set_handler(SIGTRAP); + set_handler(SIGFPE); + set_handler(SIGILL); + set_handler(SIGBUS); + signal(SIGHUP, SIG_IGN); + set_handler(SIGIO); + signal(SIGWINCH, SIG_IGN); +} diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c new file mode 100644 index 00000000000..2ff8d4fe83c --- /dev/null +++ b/arch/um/os-Linux/registers.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2004 PathScale, Inc + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <errno.h> +#include <string.h> +#include <sys/ptrace.h> +#include <sysdep/ptrace.h> +#include <sysdep/ptrace_user.h> +#include <registers.h> + +int save_registers(int pid, struct uml_pt_regs *regs) +{ + int err; + + err = ptrace(PTRACE_GETREGS, pid, 0, regs->gp); + if (err < 0) + return -errno; + return 0; +} + +int restore_registers(int pid, struct uml_pt_regs *regs) +{ + int err; + + err = ptrace(PTRACE_SETREGS, pid, 0, regs->gp); + if (err < 0) + return -errno; + return 0; +} + +/* This is set once at boot time and not changed thereafter */ + +static unsigned long exec_regs[MAX_REG_NR]; +static unsigned long exec_fp_regs[FP_SIZE]; + +int init_registers(int pid) +{ + int err; + + err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs); + if (err < 0) + return -errno; + + arch_init_registers(pid); + get_fp_registers(pid, exec_fp_regs); + return 0; +} + +void get_safe_registers(unsigned long *regs, unsigned long *fp_regs) +{ + memcpy(regs, exec_regs, sizeof(exec_regs)); + + if (fp_regs) + memcpy(fp_regs, exec_fp_regs, sizeof(exec_fp_regs)); +} diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c new file mode 100644 index 00000000000..46e762f926e --- /dev/null +++ b/arch/um/os-Linux/sigio.c @@ -0,0 +1,546 @@ +/* + * Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <pty.h> +#include <sched.h> +#include <signal.h> +#include <string.h> +#include <kern_util.h> +#include <init.h> +#include <os.h> +#include <sigio.h> +#include <um_malloc.h> + +/* + * Protected by sigio_lock(), also used by sigio_cleanup, which is an + * exitcall. + */ +static int write_sigio_pid = -1; +static unsigned long write_sigio_stack; + +/* + * These arrays are initialized before the sigio thread is started, and + * the descriptors closed after it is killed. So, it can't see them change. + * On the UML side, they are changed under the sigio_lock. + */ +#define SIGIO_FDS_INIT {-1, -1} + +static int write_sigio_fds[2] = SIGIO_FDS_INIT; +static int sigio_private[2] = SIGIO_FDS_INIT; + +struct pollfds { + struct pollfd *poll; + int size; + int used; +}; + +/* + * Protected by sigio_lock(). Used by the sigio thread, but the UML thread + * synchronizes with it. + */ +static struct pollfds current_poll; +static struct pollfds next_poll; +static struct pollfds all_sigio_fds; + +static int write_sigio_thread(void *unused) +{ + struct pollfds *fds, tmp; + struct pollfd *p; + int i, n, respond_fd; + char c; + + os_fix_helper_signals(); + fds = ¤t_poll; + while (1) { + n = poll(fds->poll, fds->used, -1); + if (n < 0) { + if (errno == EINTR) + continue; + printk(UM_KERN_ERR "write_sigio_thread : poll returned " + "%d, errno = %d\n", n, errno); + } + for (i = 0; i < fds->used; i++) { + p = &fds->poll[i]; + if (p->revents == 0) + continue; + if (p->fd == sigio_private[1]) { + CATCH_EINTR(n = read(sigio_private[1], &c, + sizeof(c))); + if (n != sizeof(c)) + printk(UM_KERN_ERR + "write_sigio_thread : " + "read on socket failed, " + "err = %d\n", errno); + tmp = current_poll; + current_poll = next_poll; + next_poll = tmp; + respond_fd = sigio_private[1]; + } + else { + respond_fd = write_sigio_fds[1]; + fds->used--; + memmove(&fds->poll[i], &fds->poll[i + 1], + (fds->used - i) * sizeof(*fds->poll)); + } + + CATCH_EINTR(n = write(respond_fd, &c, sizeof(c))); + if (n != sizeof(c)) + printk(UM_KERN_ERR "write_sigio_thread : " + "write on socket failed, err = %d\n", + errno); + } + } + + return 0; +} + +static int need_poll(struct pollfds *polls, int n) +{ + struct pollfd *new; + + if (n <= polls->size) + return 0; + + new = uml_kmalloc(n * sizeof(struct pollfd), UM_GFP_ATOMIC); + if (new == NULL) { + printk(UM_KERN_ERR "need_poll : failed to allocate new " + "pollfds\n"); + return -ENOMEM; + } + + memcpy(new, polls->poll, polls->used * sizeof(struct pollfd)); + kfree(polls->poll); + + polls->poll = new; + polls->size = n; + return 0; +} + +/* + * Must be called with sigio_lock held, because it's needed by the marked + * critical section. + */ +static void update_thread(void) +{ + unsigned long flags; + int n; + char c; + + flags = set_signals(0); + CATCH_EINTR(n = write(sigio_private[0], &c, sizeof(c))); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "update_thread : write failed, err = %d\n", + errno); + goto fail; + } + + CATCH_EINTR(n = read(sigio_private[0], &c, sizeof(c))); + if (n != sizeof(c)) { + printk(UM_KERN_ERR "update_thread : read failed, err = %d\n", + errno); + goto fail; + } + + set_signals(flags); + return; + fail: + /* Critical section start */ + if (write_sigio_pid != -1) { + os_kill_process(write_sigio_pid, 1); + free_stack(write_sigio_stack, 0); + } + write_sigio_pid = -1; + close(sigio_private[0]); + close(sigio_private[1]); + close(write_sigio_fds[0]); + close(write_sigio_fds[1]); + /* Critical section end */ + set_signals(flags); +} + +int add_sigio_fd(int fd) +{ + struct pollfd *p; + int err = 0, i, n; + + sigio_lock(); + for (i = 0; i < all_sigio_fds.used; i++) { + if (all_sigio_fds.poll[i].fd == fd) + break; + } + if (i == all_sigio_fds.used) + goto out; + + p = &all_sigio_fds.poll[i]; + + for (i = 0; i < current_poll.used; i++) { + if (current_poll.poll[i].fd == fd) + goto out; + } + + n = current_poll.used; + err = need_poll(&next_poll, n + 1); + if (err) + goto out; + + memcpy(next_poll.poll, current_poll.poll, + current_poll.used * sizeof(struct pollfd)); + next_poll.poll[n] = *p; + next_poll.used = n + 1; + update_thread(); + out: + sigio_unlock(); + return err; +} + +int ignore_sigio_fd(int fd) +{ + struct pollfd *p; + int err = 0, i, n = 0; + + /* + * This is called from exitcalls elsewhere in UML - if + * sigio_cleanup has already run, then update_thread will hang + * or fail because the thread is no longer running. + */ + if (write_sigio_pid == -1) + return -EIO; + + sigio_lock(); + for (i = 0; i < current_poll.used; i++) { + if (current_poll.poll[i].fd == fd) + break; + } + if (i == current_poll.used) + goto out; + + err = need_poll(&next_poll, current_poll.used - 1); + if (err) + goto out; + + for (i = 0; i < current_poll.used; i++) { + p = ¤t_poll.poll[i]; + if (p->fd != fd) + next_poll.poll[n++] = *p; + } + next_poll.used = current_poll.used - 1; + + update_thread(); + out: + sigio_unlock(); + return err; +} + +static struct pollfd *setup_initial_poll(int fd) +{ + struct pollfd *p; + + p = uml_kmalloc(sizeof(struct pollfd), UM_GFP_KERNEL); + if (p == NULL) { + printk(UM_KERN_ERR "setup_initial_poll : failed to allocate " + "poll\n"); + return NULL; + } + *p = ((struct pollfd) { .fd = fd, + .events = POLLIN, + .revents = 0 }); + return p; +} + +static void write_sigio_workaround(void) +{ + struct pollfd *p; + int err; + int l_write_sigio_fds[2]; + int l_sigio_private[2]; + int l_write_sigio_pid; + + /* We call this *tons* of times - and most ones we must just fail. */ + sigio_lock(); + l_write_sigio_pid = write_sigio_pid; + sigio_unlock(); + + if (l_write_sigio_pid != -1) + return; + + err = os_pipe(l_write_sigio_fds, 1, 1); + if (err < 0) { + printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 1 failed, " + "err = %d\n", -err); + return; + } + err = os_pipe(l_sigio_private, 1, 1); + if (err < 0) { + printk(UM_KERN_ERR "write_sigio_workaround - os_pipe 2 failed, " + "err = %d\n", -err); + goto out_close1; + } + + p = setup_initial_poll(l_sigio_private[1]); + if (!p) + goto out_close2; + + sigio_lock(); + + /* + * Did we race? Don't try to optimize this, please, it's not so likely + * to happen, and no more than once at the boot. + */ + if (write_sigio_pid != -1) + goto out_free; + + current_poll = ((struct pollfds) { .poll = p, + .used = 1, + .size = 1 }); + + if (write_sigio_irq(l_write_sigio_fds[0])) + goto out_clear_poll; + + memcpy(write_sigio_fds, l_write_sigio_fds, sizeof(l_write_sigio_fds)); + memcpy(sigio_private, l_sigio_private, sizeof(l_sigio_private)); + + write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, + CLONE_FILES | CLONE_VM, + &write_sigio_stack); + + if (write_sigio_pid < 0) + goto out_clear; + + sigio_unlock(); + return; + +out_clear: + write_sigio_pid = -1; + write_sigio_fds[0] = -1; + write_sigio_fds[1] = -1; + sigio_private[0] = -1; + sigio_private[1] = -1; +out_clear_poll: + current_poll = ((struct pollfds) { .poll = NULL, + .size = 0, + .used = 0 }); +out_free: + sigio_unlock(); + kfree(p); +out_close2: + close(l_sigio_private[0]); + close(l_sigio_private[1]); +out_close1: + close(l_write_sigio_fds[0]); + close(l_write_sigio_fds[1]); +} + +void sigio_broken(int fd, int read) +{ + int err; + + write_sigio_workaround(); + + sigio_lock(); + err = need_poll(&all_sigio_fds, all_sigio_fds.used + 1); + if (err) { + printk(UM_KERN_ERR "maybe_sigio_broken - failed to add pollfd " + "for descriptor %d\n", fd); + goto out; + } + + all_sigio_fds.poll[all_sigio_fds.used++] = + ((struct pollfd) { .fd = fd, + .events = read ? POLLIN : POLLOUT, + .revents = 0 }); +out: + sigio_unlock(); +} + +/* Changed during early boot */ +static int pty_output_sigio; +static int pty_close_sigio; + +void maybe_sigio_broken(int fd, int read) +{ + if (!isatty(fd)) + return; + + if ((read || pty_output_sigio) && (!read || pty_close_sigio)) + return; + + sigio_broken(fd, read); +} + +static void sigio_cleanup(void) +{ + if (write_sigio_pid == -1) + return; + + os_kill_process(write_sigio_pid, 1); + free_stack(write_sigio_stack, 0); + write_sigio_pid = -1; +} + +__uml_exitcall(sigio_cleanup); + +/* Used as a flag during SIGIO testing early in boot */ +static int got_sigio; + +static void __init handler(int sig) +{ + got_sigio = 1; +} + +struct openpty_arg { + int master; + int slave; + int err; +}; + +static void openpty_cb(void *arg) +{ + struct openpty_arg *info = arg; + + info->err = 0; + if (openpty(&info->master, &info->slave, NULL, NULL, NULL)) + info->err = -errno; +} + +static int async_pty(int master, int slave) +{ + int flags; + + flags = fcntl(master, F_GETFL); + if (flags < 0) + return -errno; + + if ((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || + (fcntl(master, F_SETOWN, os_getpid()) < 0)) + return -errno; + + if ((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) + return -errno; + + return 0; +} + +static void __init check_one_sigio(void (*proc)(int, int)) +{ + struct sigaction old, new; + struct openpty_arg pty = { .master = -1, .slave = -1 }; + int master, slave, err; + + initial_thread_cb(openpty_cb, &pty); + if (pty.err) { + printk(UM_KERN_ERR "check_one_sigio failed, errno = %d\n", + -pty.err); + return; + } + + master = pty.master; + slave = pty.slave; + + if ((master == -1) || (slave == -1)) { + printk(UM_KERN_ERR "check_one_sigio failed to allocate a " + "pty\n"); + return; + } + + /* Not now, but complain so we now where we failed. */ + err = raw(master); + if (err < 0) { + printk(UM_KERN_ERR "check_one_sigio : raw failed, errno = %d\n", + -err); + return; + } + + err = async_pty(master, slave); + if (err < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigio_async failed, " + "err = %d\n", -err); + return; + } + + if (sigaction(SIGIO, NULL, &old) < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigaction 1 failed, " + "errno = %d\n", errno); + return; + } + + new = old; + new.sa_handler = handler; + if (sigaction(SIGIO, &new, NULL) < 0) { + printk(UM_KERN_ERR "check_one_sigio : sigaction 2 failed, " + "errno = %d\n", errno); + return; + } + + got_sigio = 0; + (*proc)(master, slave); + + close(master); + close(slave); + + if (sigaction(SIGIO, &old, NULL) < 0) + printk(UM_KERN_ERR "check_one_sigio : sigaction 3 failed, " + "errno = %d\n", errno); +} + +static void tty_output(int master, int slave) +{ + int n; + char buf[512]; + + printk(UM_KERN_INFO "Checking that host ptys support output SIGIO..."); + + memset(buf, 0, sizeof(buf)); + + while (write(master, buf, sizeof(buf)) > 0) ; + if (errno != EAGAIN) + printk(UM_KERN_ERR "tty_output : write failed, errno = %d\n", + errno); + while (((n = read(slave, buf, sizeof(buf))) > 0) && + !({ barrier(); got_sigio; })) + ; + + if (got_sigio) { + printk(UM_KERN_CONT "Yes\n"); + pty_output_sigio = 1; + } else if (n == -EAGAIN) + printk(UM_KERN_CONT "No, enabling workaround\n"); + else + printk(UM_KERN_CONT "tty_output : read failed, err = %d\n", n); +} + +static void tty_close(int master, int slave) +{ + printk(UM_KERN_INFO "Checking that host ptys support SIGIO on " + "close..."); + + close(slave); + if (got_sigio) { + printk(UM_KERN_CONT "Yes\n"); + pty_close_sigio = 1; + } else + printk(UM_KERN_CONT "No, enabling workaround\n"); +} + +static void __init check_sigio(void) +{ + if ((access("/dev/ptmx", R_OK) < 0) && + (access("/dev/ptyp0", R_OK) < 0)) { + printk(UM_KERN_WARNING "No pseudo-terminals available - " + "skipping pty SIGIO check\n"); + return; + } + check_one_sigio(tty_output); + check_one_sigio(tty_close); +} + +/* Here because it only does the SIGIO testing for now */ +void __init os_check_bugs(void) +{ + check_sigio(); +} diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index c7bfd5ee392..7b605e4dfff 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -1,48 +1,314 @@ /* * Copyright (C) 2004 PathScale, Inc + * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ +#include <stdlib.h> +#include <stdarg.h> +#include <errno.h> #include <signal.h> -#include "time_user.h" -#include "mode.h" -#include "sysdep/signal.h" +#include <strings.h> +#include <as-layout.h> +#include <kern_util.h> +#include <os.h> +#include <sysdep/mcontext.h> +#include "internal.h" -void sig_handler(ARCH_SIGHDLR_PARAM) +void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { + [SIGTRAP] = relay_signal, + [SIGFPE] = relay_signal, + [SIGILL] = relay_signal, + [SIGWINCH] = winch, + [SIGBUS] = bus_handler, + [SIGSEGV] = segv_handler, + [SIGIO] = sigio_handler, + [SIGVTALRM] = timer_handler }; + +static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) { - struct sigcontext *sc; + struct uml_pt_regs r; + int save_errno = errno; + + r.is_user = 0; + if (sig == SIGSEGV) { + /* For segfaults, we want the data from the sigcontext. */ + get_regs_from_mc(&r, mc); + GET_FAULTINFO_FROM_MC(r.faultinfo, mc); + } + + /* enable signals if sig isn't IRQ signal */ + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM)) + unblock_signals(); - ARCH_GET_SIGCONTEXT(sc, sig); - CHOOSE_MODE_PROC(sig_handler_common_tt, sig_handler_common_skas, - sig, sc); + (*sig_info[sig])(sig, si, &r); + + errno = save_errno; } -extern int timer_irq_inited; +/* + * These are the asynchronous signals. SIGPROF is excluded because we want to + * be able to profile all of UML, not just the non-critical sections. If + * profiling is not thread-safe, then that is not my problem. We can disable + * profiling when SMP is enabled in that case. + */ +#define SIGIO_BIT 0 +#define SIGIO_MASK (1 << SIGIO_BIT) + +#define SIGVTALRM_BIT 1 +#define SIGVTALRM_MASK (1 << SIGVTALRM_BIT) + +static int signals_enabled; +static unsigned int signals_pending; -void alarm_handler(ARCH_SIGHDLR_PARAM) +void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { - struct sigcontext *sc; + int enabled; - ARCH_GET_SIGCONTEXT(sc, sig); - if(!timer_irq_inited) return; + enabled = signals_enabled; + if (!enabled && (sig == SIGIO)) { + signals_pending |= SIGIO_MASK; + return; + } - if(sig == SIGALRM) - switch_timers(0); + block_signals(); - CHOOSE_MODE_PROC(sig_handler_common_tt, sig_handler_common_skas, - sig, sc); + sig_handler_common(sig, si, mc); - if(sig == SIGALRM) - switch_timers(1); + set_signals(enabled); } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +static void real_alarm_handler(mcontext_t *mc) +{ + struct uml_pt_regs regs; + + if (mc != NULL) + get_regs_from_mc(®s, mc); + regs.is_user = 0; + unblock_signals(); + timer_handler(SIGVTALRM, NULL, ®s); +} + +void alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) +{ + int enabled; + + enabled = signals_enabled; + if (!signals_enabled) { + signals_pending |= SIGVTALRM_MASK; + return; + } + + block_signals(); + + real_alarm_handler(mc); + set_signals(enabled); +} + +void timer_init(void) +{ + set_handler(SIGVTALRM); +} + +void set_sigstack(void *sig_stack, int size) +{ + stack_t stack = ((stack_t) { .ss_flags = 0, + .ss_sp = (__ptr_t) sig_stack, + .ss_size = size - sizeof(void *) }); + + if (sigaltstack(&stack, NULL) != 0) + panic("enabling signal stack failed, errno = %d\n", errno); +} + +static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { + [SIGSEGV] = sig_handler, + [SIGBUS] = sig_handler, + [SIGILL] = sig_handler, + [SIGFPE] = sig_handler, + [SIGTRAP] = sig_handler, + + [SIGIO] = sig_handler, + [SIGWINCH] = sig_handler, + [SIGVTALRM] = alarm_handler +}; + + +static void hard_handler(int sig, siginfo_t *si, void *p) +{ + struct ucontext *uc = p; + mcontext_t *mc = &uc->uc_mcontext; + unsigned long pending = 1UL << sig; + + do { + int nested, bail; + + /* + * pending comes back with one bit set for each + * interrupt that arrived while setting up the stack, + * plus a bit for this interrupt, plus the zero bit is + * set if this is a nested interrupt. + * If bail is true, then we interrupted another + * handler setting up the stack. In this case, we + * have to return, and the upper handler will deal + * with this interrupt. + */ + bail = to_irq_stack(&pending); + if (bail) + return; + + nested = pending & 1; + pending &= ~1; + + while ((sig = ffs(pending)) != 0){ + sig--; + pending &= ~(1 << sig); + (*handlers[sig])(sig, (struct siginfo *)si, mc); + } + + /* + * Again, pending comes back with a mask of signals + * that arrived while tearing down the stack. If this + * is non-zero, we just go back, set up the stack + * again, and handle the new interrupts. + */ + if (!nested) + pending = from_irq_stack(nested); + } while (pending); +} + +void set_handler(int sig) +{ + struct sigaction action; + int flags = SA_SIGINFO | SA_ONSTACK; + sigset_t sig_mask; + + action.sa_sigaction = hard_handler; + + /* block irq ones */ + sigemptyset(&action.sa_mask); + sigaddset(&action.sa_mask, SIGVTALRM); + sigaddset(&action.sa_mask, SIGIO); + sigaddset(&action.sa_mask, SIGWINCH); + + if (sig == SIGSEGV) + flags |= SA_NODEFER; + + if (sigismember(&action.sa_mask, sig)) + flags |= SA_RESTART; /* if it's an irq signal */ + + action.sa_flags = flags; + action.sa_restorer = NULL; + if (sigaction(sig, &action, NULL) < 0) + panic("sigaction failed - errno = %d\n", errno); + + sigemptyset(&sig_mask); + sigaddset(&sig_mask, sig); + if (sigprocmask(SIG_UNBLOCK, &sig_mask, NULL) < 0) + panic("sigprocmask failed - errno = %d\n", errno); +} + +int change_sig(int signal, int on) +{ + sigset_t sigset; + + sigemptyset(&sigset); + sigaddset(&sigset, signal); + if (sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, NULL) < 0) + return -errno; + + return 0; +} + +void block_signals(void) +{ + signals_enabled = 0; + /* + * This must return with signals disabled, so this barrier + * ensures that writes are flushed out before the return. + * This might matter if gcc figures out how to inline this and + * decides to shuffle this code into the caller. + */ + barrier(); +} + +void unblock_signals(void) +{ + int save_pending; + + if (signals_enabled == 1) + return; + + /* + * We loop because the IRQ handler returns with interrupts off. So, + * interrupts may have arrived and we need to re-enable them and + * recheck signals_pending. + */ + while (1) { + /* + * Save and reset save_pending after enabling signals. This + * way, signals_pending won't be changed while we're reading it. + */ + signals_enabled = 1; + + /* + * Setting signals_enabled and reading signals_pending must + * happen in this order. + */ + barrier(); + + save_pending = signals_pending; + if (save_pending == 0) + return; + + signals_pending = 0; + + /* + * We have pending interrupts, so disable signals, as the + * handlers expect them off when they are called. They will + * be enabled again above. + */ + + signals_enabled = 0; + + /* + * Deal with SIGIO first because the alarm handler might + * schedule, leaving the pending SIGIO stranded until we come + * back here. + * + * SIGIO's handler doesn't use siginfo or mcontext, + * so they can be NULL. + */ + if (save_pending & SIGIO_MASK) + sig_handler_common(SIGIO, NULL, NULL); + + if (save_pending & SIGVTALRM_MASK) + real_alarm_handler(NULL); + } +} + +int get_signals(void) +{ + return signals_enabled; +} + +int set_signals(int enable) +{ + int ret; + if (signals_enabled == enable) + return enable; + + ret = signals_enabled; + if (enable) + unblock_signals(); + else block_signals(); + + return ret; +} + +int os_is_signal_stack(void) +{ + stack_t ss; + sigaltstack(NULL, &ss); + + return ss.ss_flags & SS_ONSTACK; +} diff --git a/arch/um/os-Linux/skas/Makefile b/arch/um/os-Linux/skas/Makefile new file mode 100644 index 00000000000..d2ea3409e07 --- /dev/null +++ b/arch/um/os-Linux/skas/Makefile @@ -0,0 +1,10 @@ +# +# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com) +# Licensed under the GPL +# + +obj-y := mem.o process.o + +USER_OBJS := $(obj-y) + +include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c new file mode 100644 index 00000000000..689b18db798 --- /dev/null +++ b/arch/um/os-Linux/skas/mem.c @@ -0,0 +1,283 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/mman.h> +#include <init.h> +#include <as-layout.h> +#include <mm_id.h> +#include <os.h> +#include <proc_mm.h> +#include <ptrace_user.h> +#include <registers.h> +#include <skas.h> +#include <sysdep/ptrace.h> +#include <sysdep/stub.h> + +extern unsigned long batch_syscall_stub, __syscall_stub_start; + +extern void wait_stub_done(int pid); + +static inline unsigned long *check_init_stack(struct mm_id * mm_idp, + unsigned long *stack) +{ + if (stack == NULL) { + stack = (unsigned long *) mm_idp->stack + 2; + *stack = 0; + } + return stack; +} + +static unsigned long syscall_regs[MAX_REG_NR]; + +static int __init init_syscall_regs(void) +{ + get_safe_registers(syscall_regs, NULL); + syscall_regs[REGS_IP_INDEX] = STUB_CODE + + ((unsigned long) &batch_syscall_stub - + (unsigned long) &__syscall_stub_start); + return 0; +} + +__initcall(init_syscall_regs); + +extern int proc_mm; + +static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr) +{ + int n, i; + long ret, offset; + unsigned long * data; + unsigned long * syscall; + int err, pid = mm_idp->u.pid; + + if (proc_mm) + /* FIXME: Need to look up userspace_pid by cpu */ + pid = userspace_pid[0]; + + n = ptrace_setregs(pid, syscall_regs); + if (n < 0) { + printk(UM_KERN_ERR "Registers - \n"); + for (i = 0; i < MAX_REG_NR; i++) + printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); + panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n", + -n); + } + + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) + panic("Failed to continue stub, pid = %d, errno = %d\n", pid, + errno); + + wait_stub_done(pid); + + /* + * When the stub stops, we find the following values on the + * beginning of the stack: + * (long )return_value + * (long )offset to failed sycall-data (0, if no error) + */ + ret = *((unsigned long *) mm_idp->stack); + offset = *((unsigned long *) mm_idp->stack + 1); + if (offset) { + data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA); + printk(UM_KERN_ERR "do_syscall_stub : ret = %ld, offset = %ld, " + "data = %p\n", ret, offset, data); + syscall = (unsigned long *)((unsigned long)data + data[0]); + printk(UM_KERN_ERR "do_syscall_stub: syscall %ld failed, " + "return value = 0x%lx, expected return value = 0x%lx\n", + syscall[0], ret, syscall[7]); + printk(UM_KERN_ERR " syscall parameters: " + "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + syscall[1], syscall[2], syscall[3], + syscall[4], syscall[5], syscall[6]); + for (n = 1; n < data[0]/sizeof(long); n++) { + if (n == 1) + printk(UM_KERN_ERR " additional syscall " + "data:"); + if (n % 4 == 1) + printk("\n" UM_KERN_ERR " "); + printk(" 0x%lx", data[n]); + } + if (n > 1) + printk("\n"); + } + else ret = 0; + + *addr = check_init_stack(mm_idp, NULL); + + return ret; +} + +long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args, long expected, void **addr, + int done) +{ + unsigned long *stack = check_init_stack(mm_idp, *addr); + + *stack += sizeof(long); + stack += *stack / sizeof(long); + + *stack++ = syscall; + *stack++ = args[0]; + *stack++ = args[1]; + *stack++ = args[2]; + *stack++ = args[3]; + *stack++ = args[4]; + *stack++ = args[5]; + *stack++ = expected; + *stack = 0; + + if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) < + UM_KERN_PAGE_SIZE - 10 * sizeof(long))) { + *addr = stack; + return 0; + } + + return do_syscall_stub(mm_idp, addr); +} + +long syscall_stub_data(struct mm_id * mm_idp, + unsigned long *data, int data_count, + void **addr, void **stub_addr) +{ + unsigned long *stack; + int ret = 0; + + /* + * If *addr still is uninitialized, it *must* contain NULL. + * Thus in this case do_syscall_stub correctly won't be called. + */ + if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >= + UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) { + ret = do_syscall_stub(mm_idp, addr); + /* in case of error, don't overwrite data on stack */ + if (ret) + return ret; + } + + stack = check_init_stack(mm_idp, *addr); + *addr = stack; + + *stack = data_count * sizeof(long); + + memcpy(stack + 1, data, data_count * sizeof(long)); + + *stub_addr = (void *)(((unsigned long)(stack + 1) & + ~UM_KERN_PAGE_MASK) + STUB_DATA); + + return 0; +} + +int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot, + int phys_fd, unsigned long long offset, int done, void **data) +{ + int ret; + + if (proc_mm) { + struct proc_mm_op map; + int fd = mm_idp->u.mm_fd; + + map = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = virt, + .len = len, + .prot = prot, + .flags = MAP_SHARED | + MAP_FIXED, + .fd = phys_fd, + .offset= offset + } } } ); + CATCH_EINTR(ret = write(fd, &map, sizeof(map))); + if (ret != sizeof(map)) { + ret = -errno; + printk(UM_KERN_ERR "map : /proc/mm map failed, " + "err = %d\n", -ret); + } + else ret = 0; + } + else { + unsigned long args[] = { virt, len, prot, + MAP_SHARED | MAP_FIXED, phys_fd, + MMAP_OFFSET(offset) }; + + ret = run_syscall_stub(mm_idp, STUB_MMAP_NR, args, virt, + data, done); + } + + return ret; +} + +int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + int done, void **data) +{ + int ret; + + if (proc_mm) { + struct proc_mm_op unmap; + int fd = mm_idp->u.mm_fd; + + unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, + .u = + { .munmap = + { .addr = + (unsigned long) addr, + .len = len } } } ); + CATCH_EINTR(ret = write(fd, &unmap, sizeof(unmap))); + if (ret != sizeof(unmap)) { + ret = -errno; + printk(UM_KERN_ERR "unmap - proc_mm write returned " + "%d\n", ret); + } + else ret = 0; + } + else { + unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, + 0 }; + + ret = run_syscall_stub(mm_idp, __NR_munmap, args, 0, + data, done); + } + + return ret; +} + +int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len, + unsigned int prot, int done, void **data) +{ + struct proc_mm_op protect; + int ret; + + if (proc_mm) { + int fd = mm_idp->u.mm_fd; + + protect = ((struct proc_mm_op) { .op = MM_MPROTECT, + .u = + { .mprotect = + { .addr = + (unsigned long) addr, + .len = len, + .prot = prot } } } ); + + CATCH_EINTR(ret = write(fd, &protect, sizeof(protect))); + if (ret != sizeof(protect)) { + ret = -errno; + printk(UM_KERN_ERR "protect failed, err = %d", -ret); + } + else ret = 0; + } + else { + unsigned long args[] = { addr, len, prot, 0, 0, 0 }; + + ret = run_syscall_stub(mm_idp, __NR_mprotect, args, 0, + data, done); + } + + return ret; +} diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c new file mode 100644 index 00000000000..908579f2b0a --- /dev/null +++ b/arch/um/os-Linux/skas/process.c @@ -0,0 +1,744 @@ +/* + * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdlib.h> +#include <unistd.h> +#include <sched.h> +#include <errno.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include <asm/unistd.h> +#include <as-layout.h> +#include <init.h> +#include <kern_util.h> +#include <mem.h> +#include <os.h> +#include <proc_mm.h> +#include <ptrace_user.h> +#include <registers.h> +#include <skas.h> +#include <skas_ptrace.h> +#include <sysdep/stub.h> + +int is_skas_winch(int pid, int fd, void *data) +{ + return pid == getpgrp(); +} + +static int ptrace_dump_regs(int pid) +{ + unsigned long regs[MAX_REG_NR]; + int i; + + if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) + return -errno; + + printk(UM_KERN_ERR "Stub registers -\n"); + for (i = 0; i < ARRAY_SIZE(regs); i++) + printk(UM_KERN_ERR "\t%d - %lx\n", i, regs[i]); + + return 0; +} + +/* + * Signals that are OK to receive in the stub - we'll just continue it. + * SIGWINCH will happen when UML is inside a detached screen. + */ +#define STUB_SIG_MASK ((1 << SIGVTALRM) | (1 << SIGWINCH)) + +/* Signals that the stub will finish with - anything else is an error */ +#define STUB_DONE_MASK (1 << SIGTRAP) + +void wait_stub_done(int pid) +{ + int n, status, err; + + while (1) { + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); + if ((n < 0) || !WIFSTOPPED(status)) + goto bad_wait; + + if (((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0) + break; + + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) { + printk(UM_KERN_ERR "wait_stub_done : continue failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + } + + if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0) + return; + +bad_wait: + err = ptrace_dump_regs(pid); + if (err) + printk(UM_KERN_ERR "Failed to get registers from stub, " + "errno = %d\n", -err); + printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, " + "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno, + status); + fatal_sigsegv(); +} + +extern unsigned long current_stub_stack(void); + +static void get_skas_faultinfo(int pid, struct faultinfo *fi) +{ + int err; + + if (ptrace_faultinfo) { + err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); + if (err) { + printk(UM_KERN_ERR "get_skas_faultinfo - " + "PTRACE_FAULTINFO failed, errno = %d\n", errno); + fatal_sigsegv(); + } + + /* Special handling for i386, which has different structs */ + if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) + memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, + sizeof(struct faultinfo) - + sizeof(struct ptrace_faultinfo)); + } + else { + unsigned long fpregs[FP_SIZE]; + + err = get_fp_registers(pid, fpregs); + if (err < 0) { + printk(UM_KERN_ERR "save_fp_registers returned %d\n", + err); + fatal_sigsegv(); + } + err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV); + if (err) { + printk(UM_KERN_ERR "Failed to continue stub, pid = %d, " + "errno = %d\n", pid, errno); + fatal_sigsegv(); + } + wait_stub_done(pid); + + /* + * faultinfo is prepared by the stub-segv-handler at start of + * the stub stack page. We just have to copy it. + */ + memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); + + err = put_fp_registers(pid, fpregs); + if (err < 0) { + printk(UM_KERN_ERR "put_fp_registers returned %d\n", + err); + fatal_sigsegv(); + } + } +} + +static void handle_segv(int pid, struct uml_pt_regs * regs) +{ + get_skas_faultinfo(pid, ®s->faultinfo); + segv(regs->faultinfo, 0, 1, NULL); +} + +/* + * To use the same value of using_sysemu as the caller, ask it that value + * (in local_using_sysemu + */ +static void handle_trap(int pid, struct uml_pt_regs *regs, + int local_using_sysemu) +{ + int err, status; + + if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) + fatal_sigsegv(); + + /* Mark this as a syscall */ + UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp); + + if (!local_using_sysemu) + { + err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, + __NR_getpid); + if (err < 0) { + printk(UM_KERN_ERR "handle_trap - nullifying syscall " + "failed, errno = %d\n", errno); + fatal_sigsegv(); + } + + err = ptrace(PTRACE_SYSCALL, pid, 0, 0); + if (err < 0) { + printk(UM_KERN_ERR "handle_trap - continuing to end of " + "syscall failed, errno = %d\n", errno); + fatal_sigsegv(); + } + + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); + if ((err < 0) || !WIFSTOPPED(status) || + (WSTOPSIG(status) != SIGTRAP + 0x80)) { + err = ptrace_dump_regs(pid); + if (err) + printk(UM_KERN_ERR "Failed to get registers " + "from process, errno = %d\n", -err); + printk(UM_KERN_ERR "handle_trap - failed to wait at " + "end of syscall, errno = %d, status = %d\n", + errno, status); + fatal_sigsegv(); + } + } + + handle_syscall(regs); +} + +extern int __syscall_stub_start; + +static int userspace_tramp(void *stack) +{ + void *addr; + int err; + + ptrace(PTRACE_TRACEME, 0, 0, 0); + + signal(SIGTERM, SIG_DFL); + signal(SIGWINCH, SIG_IGN); + err = set_interval(); + if (err) { + printk(UM_KERN_ERR "userspace_tramp - setting timer failed, " + "errno = %d\n", err); + exit(1); + } + + if (!proc_mm) { + /* + * This has a pte, but it can't be mapped in with the usual + * tlb_flush mechanism because this is part of that mechanism + */ + int fd; + unsigned long long offset; + fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); + addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE, + PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, " + "errno = %d\n", STUB_CODE, errno); + exit(1); + } + + if (stack != NULL) { + fd = phys_mapping(to_phys(stack), &offset); + addr = mmap((void *) STUB_DATA, + UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, offset); + if (addr == MAP_FAILED) { + printk(UM_KERN_ERR "mapping segfault stack " + "at 0x%lx failed, errno = %d\n", + STUB_DATA, errno); + exit(1); + } + } + } + if (!ptrace_faultinfo && (stack != NULL)) { + struct sigaction sa; + + unsigned long v = STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) &__syscall_stub_start; + + set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE); + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO; + sa.sa_sigaction = (void *) v; + sa.sa_restorer = NULL; + if (sigaction(SIGSEGV, &sa, NULL) < 0) { + printk(UM_KERN_ERR "userspace_tramp - setting SIGSEGV " + "handler failed - errno = %d\n", errno); + exit(1); + } + } + + kill(os_getpid(), SIGSTOP); + return 0; +} + +/* Each element set once, and only accessed by a single processor anyway */ +#undef NR_CPUS +#define NR_CPUS 1 +int userspace_pid[NR_CPUS]; + +int start_userspace(unsigned long stub_stack) +{ + void *stack; + unsigned long sp; + int pid, status, n, flags, err; + + stack = mmap(NULL, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (stack == MAP_FAILED) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : mmap failed, " + "errno = %d\n", errno); + return err; + } + + sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *); + + flags = CLONE_FILES; + if (proc_mm) + flags |= CLONE_VM; + else + flags |= SIGCHLD; + + pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); + if (pid < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : clone failed, " + "errno = %d\n", errno); + return err; + } + + do { + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : wait failed, " + "errno = %d\n", errno); + goto out_kill; + } + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); + + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { + err = -EINVAL; + printk(UM_KERN_ERR "start_userspace : expected SIGSTOP, got " + "status = %d\n", status); + goto out_kill; + } + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *) PTRACE_O_TRACESYSGOOD) < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : PTRACE_OLDSETOPTIONS " + "failed, errno = %d\n", errno); + goto out_kill; + } + + if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { + err = -errno; + printk(UM_KERN_ERR "start_userspace : munmap failed, " + "errno = %d\n", errno); + goto out_kill; + } + + return pid; + + out_kill: + os_kill_ptraced_process(pid, 1); + return err; +} + +void userspace(struct uml_pt_regs *regs) +{ + struct itimerval timer; + unsigned long long nsecs, now; + int err, status, op, pid = userspace_pid[0]; + /* To prevent races if using_sysemu changes under us.*/ + int local_using_sysemu; + siginfo_t si; + + /* Handle any immediate reschedules or signals */ + interrupt_end(); + + if (getitimer(ITIMER_VIRTUAL, &timer)) + printk(UM_KERN_ERR "Failed to get itimer, errno = %d\n", errno); + nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC + + timer.it_value.tv_usec * UM_NSEC_PER_USEC; + nsecs += os_nsecs(); + + while (1) { + /* + * This can legitimately fail if the process loads a + * bogus value into a segment register. It will + * segfault and PTRACE_GETREGS will read that value + * out of the process. However, PTRACE_SETREGS will + * fail. In this case, there is nothing to do but + * just kill the process. + */ + if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) + fatal_sigsegv(); + + if (put_fp_registers(pid, regs->fp)) + fatal_sigsegv(); + + /* Now we set local_using_sysemu to be used for one loop */ + local_using_sysemu = get_using_sysemu(); + + op = SELECT_PTRACE_OPERATION(local_using_sysemu, + singlestepping(NULL)); + + if (ptrace(op, pid, 0, 0)) { + printk(UM_KERN_ERR "userspace - ptrace continue " + "failed, op = %d, errno = %d\n", op, errno); + fatal_sigsegv(); + } + + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); + if (err < 0) { + printk(UM_KERN_ERR "userspace - wait failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + + regs->is_user = 1; + if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "userspace - PTRACE_GETREGS failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + + if (get_fp_registers(pid, regs->fp)) { + printk(UM_KERN_ERR "userspace - get_fp_registers failed, " + "errno = %d\n", errno); + fatal_sigsegv(); + } + + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ + + if (WIFSTOPPED(status)) { + int sig = WSTOPSIG(status); + + ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); + + switch (sig) { + case SIGSEGV: + if (PTRACE_FULL_FAULTINFO || + !ptrace_faultinfo) { + get_skas_faultinfo(pid, + ®s->faultinfo); + (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, + regs); + } + else handle_segv(pid, regs); + break; + case SIGTRAP + 0x80: + handle_trap(pid, regs, local_using_sysemu); + break; + case SIGTRAP: + relay_signal(SIGTRAP, (struct siginfo *)&si, regs); + break; + case SIGVTALRM: + now = os_nsecs(); + if (now < nsecs) + break; + block_signals(); + (*sig_info[sig])(sig, (struct siginfo *)&si, regs); + unblock_signals(); + nsecs = timer.it_value.tv_sec * + UM_NSEC_PER_SEC + + timer.it_value.tv_usec * + UM_NSEC_PER_USEC; + nsecs += os_nsecs(); + break; + case SIGIO: + case SIGILL: + case SIGBUS: + case SIGFPE: + case SIGWINCH: + block_signals(); + (*sig_info[sig])(sig, (struct siginfo *)&si, regs); + unblock_signals(); + break; + default: + printk(UM_KERN_ERR "userspace - child stopped " + "with signal %d\n", sig); + fatal_sigsegv(); + } + pid = userspace_pid[0]; + interrupt_end(); + + /* Avoid -ERESTARTSYS handling in host */ + if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) + PT_SYSCALL_NR(regs->gp) = -1; + } + } +} + +static unsigned long thread_regs[MAX_REG_NR]; +static unsigned long thread_fp_regs[FP_SIZE]; + +static int __init init_thread_regs(void) +{ + get_safe_registers(thread_regs, thread_fp_regs); + /* Set parent's instruction pointer to start of clone-stub */ + thread_regs[REGS_IP_INDEX] = STUB_CODE + + (unsigned long) stub_clone_handler - + (unsigned long) &__syscall_stub_start; + thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE - + sizeof(void *); +#ifdef __SIGNAL_FRAMESIZE + thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE; +#endif + return 0; +} + +__initcall(init_thread_regs); + +int copy_context_skas0(unsigned long new_stack, int pid) +{ + struct timeval tv = { .tv_sec = 0, .tv_usec = UM_USEC_PER_SEC / UM_HZ }; + int err; + unsigned long current_stack = current_stub_stack(); + struct stub_data *data = (struct stub_data *) current_stack; + struct stub_data *child_data = (struct stub_data *) new_stack; + unsigned long long new_offset; + int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); + + /* + * prepare offset and fd of child's stack as argument for parent's + * and child's mmap2 calls + */ + *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), + .fd = new_fd, + .timer = ((struct itimerval) + { .it_value = tv, + .it_interval = tv }) }); + + err = ptrace_setregs(pid, thread_regs); + if (err < 0) { + err = -errno; + printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_SETREGS " + "failed, pid = %d, errno = %d\n", pid, -err); + return err; + } + + err = put_fp_registers(pid, thread_fp_regs); + if (err < 0) { + printk(UM_KERN_ERR "copy_context_skas0 : put_fp_registers " + "failed, pid = %d, err = %d\n", pid, err); + return err; + } + + /* set a well known return code for detection of child write failure */ + child_data->err = 12345678; + + /* + * Wait, until parent has finished its work: read child's pid from + * parent's stack, and check, if bad result. + */ + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) { + err = -errno; + printk(UM_KERN_ERR "Failed to continue new process, pid = %d, " + "errno = %d\n", pid, errno); + return err; + } + + wait_stub_done(pid); + + pid = data->err; + if (pid < 0) { + printk(UM_KERN_ERR "copy_context_skas0 - stub-parent reports " + "error %d\n", -pid); + return pid; + } + + /* + * Wait, until child has finished too: read child's result from + * child's stack and check it. + */ + wait_stub_done(pid); + if (child_data->err != STUB_DATA) { + printk(UM_KERN_ERR "copy_context_skas0 - stub-child reports " + "error %ld\n", child_data->err); + err = child_data->err; + goto out_kill; + } + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *)PTRACE_O_TRACESYSGOOD) < 0) { + err = -errno; + printk(UM_KERN_ERR "copy_context_skas0 : PTRACE_OLDSETOPTIONS " + "failed, errno = %d\n", errno); + goto out_kill; + } + + return pid; + + out_kill: + os_kill_ptraced_process(pid, 1); + return err; +} + +/* + * This is used only, if stub pages are needed, while proc_mm is + * available. Opening /proc/mm creates a new mm_context, which lacks + * the stub-pages. Thus, we map them using /proc/mm-fd + */ +int map_stub_pages(int fd, unsigned long code, unsigned long data, + unsigned long stack) +{ + struct proc_mm_op mmop; + int n; + unsigned long long code_offset; + int code_fd = phys_mapping(to_phys((void *) &__syscall_stub_start), + &code_offset); + + mmop = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = code, + .len = UM_KERN_PAGE_SIZE, + .prot = PROT_EXEC, + .flags = MAP_FIXED | MAP_PRIVATE, + .fd = code_fd, + .offset = code_offset + } } }); + CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop))); + if (n != sizeof(mmop)) { + n = errno; + printk(UM_KERN_ERR "mmap args - addr = 0x%lx, fd = %d, " + "offset = %llx\n", code, code_fd, + (unsigned long long) code_offset); + printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for code " + "failed, err = %d\n", n); + return -n; + } + + if (stack) { + unsigned long long map_offset; + int map_fd = phys_mapping(to_phys((void *)stack), &map_offset); + mmop = ((struct proc_mm_op) + { .op = MM_MMAP, + .u = + { .mmap = + { .addr = data, + .len = UM_KERN_PAGE_SIZE, + .prot = PROT_READ | PROT_WRITE, + .flags = MAP_FIXED | MAP_SHARED, + .fd = map_fd, + .offset = map_offset + } } }); + CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop))); + if (n != sizeof(mmop)) { + n = errno; + printk(UM_KERN_ERR "map_stub_pages : /proc/mm map for " + "data failed, err = %d\n", n); + return -n; + } + } + + return 0; +} + +void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)) +{ + (*buf)[0].JB_IP = (unsigned long) handler; + (*buf)[0].JB_SP = (unsigned long) stack + UM_THREAD_SIZE - + sizeof(void *); +} + +#define INIT_JMP_NEW_THREAD 0 +#define INIT_JMP_CALLBACK 1 +#define INIT_JMP_HALT 2 +#define INIT_JMP_REBOOT 3 + +void switch_threads(jmp_buf *me, jmp_buf *you) +{ + if (UML_SETJMP(me) == 0) + UML_LONGJMP(you, 1); +} + +static jmp_buf initial_jmpbuf; + +/* XXX Make these percpu */ +static void (*cb_proc)(void *arg); +static void *cb_arg; +static jmp_buf *cb_back; + +int start_idle_thread(void *stack, jmp_buf *switch_buf) +{ + int n; + + set_handler(SIGWINCH); + + /* + * Can't use UML_SETJMP or UML_LONGJMP here because they save + * and restore signals, with the possible side-effect of + * trying to handle any signals which came when they were + * blocked, which can't be done on this stack. + * Signals must be blocked when jumping back here and restored + * after returning to the jumper. + */ + n = setjmp(initial_jmpbuf); + switch (n) { + case INIT_JMP_NEW_THREAD: + (*switch_buf)[0].JB_IP = (unsigned long) new_thread_handler; + (*switch_buf)[0].JB_SP = (unsigned long) stack + + UM_THREAD_SIZE - sizeof(void *); + break; + case INIT_JMP_CALLBACK: + (*cb_proc)(cb_arg); + longjmp(*cb_back, 1); + break; + case INIT_JMP_HALT: + kmalloc_ok = 0; + return 0; + case INIT_JMP_REBOOT: + kmalloc_ok = 0; + return 1; + default: + printk(UM_KERN_ERR "Bad sigsetjmp return in " + "start_idle_thread - %d\n", n); + fatal_sigsegv(); + } + longjmp(*switch_buf, 1); +} + +void initial_thread_cb_skas(void (*proc)(void *), void *arg) +{ + jmp_buf here; + + cb_proc = proc; + cb_arg = arg; + cb_back = &here; + + block_signals(); + if (UML_SETJMP(&here) == 0) + UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK); + unblock_signals(); + + cb_proc = NULL; + cb_arg = NULL; + cb_back = NULL; +} + +void halt_skas(void) +{ + block_signals(); + UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); +} + +void reboot_skas(void) +{ + block_signals(); + UML_LONGJMP(&initial_jmpbuf, INIT_JMP_REBOOT); +} + +void __switch_mm(struct mm_id *mm_idp) +{ + int err; + + /* FIXME: need cpu pid in __switch_mm */ + if (proc_mm) { + err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, + mm_idp->u.mm_fd); + if (err) { + printk(UM_KERN_ERR "__switch_mm - PTRACE_SWITCH_MM " + "failed, errno = %d\n", errno); + fatal_sigsegv(); + } + } + else userspace_pid[0] = mm_idp->u.pid; +} diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 37517d49c4a..337518c5042 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -1,102 +1,114 @@ /* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #include <stdio.h> -#include <stddef.h> -#include <stdarg.h> #include <stdlib.h> -#include <string.h> +#include <stdarg.h> #include <unistd.h> -#include <signal.h> -#include <sched.h> -#include <fcntl.h> #include <errno.h> -#include <setjmp.h> -#include <sys/time.h> -#include <sys/wait.h> +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <string.h> #include <sys/mman.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/time.h> +#include <sys/resource.h> #include <asm/unistd.h> -#include <asm/page.h> -#include <sys/types.h> -#include "user_util.h" -#include "kern_util.h" -#include "user.h" -#include "signal_kern.h" -#include "signal_user.h" -#include "sysdep/ptrace.h" -#include "sysdep/sigcontext.h" -#include "irq_user.h" -#include "ptrace_user.h" -#include "mem_user.h" -#include "time_user.h" -#include "init.h" -#include "os.h" -#include "uml-config.h" -#include "choose-mode.h" -#include "mode.h" -#include "tempfile.h" -#include "kern_constants.h" - -#ifdef UML_CONFIG_MODE_SKAS -#include "skas.h" -#include "skas_ptrace.h" -#include "registers.h" -#endif - -static int ptrace_child(void *arg) +#include <init.h> +#include <os.h> +#include <mem_user.h> +#include <ptrace_user.h> +#include <registers.h> +#include <skas.h> +#include <skas_ptrace.h> + +static void ptrace_child(void) { int ret; + /* Calling os_getpid because some libcs cached getpid incorrectly */ int pid = os_getpid(), ppid = getppid(); int sc_result; - if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ + if (change_sig(SIGWINCH, 0) < 0 || + ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) { perror("ptrace"); - os_kill_process(pid, 0); + kill(pid, SIGKILL); } - os_stop_process(pid); + kill(pid, SIGSTOP); - /*This syscall will be intercepted by the parent. Don't call more than - * once, please.*/ + /* + * This syscall will be intercepted by the parent. Don't call more than + * once, please. + */ sc_result = os_getpid(); if (sc_result == pid) - ret = 1; /*Nothing modified by the parent, we are running - normally.*/ + /* Nothing modified by the parent, we are running normally. */ + ret = 1; else if (sc_result == ppid) - ret = 0; /*Expected in check_ptrace and check_sysemu when they - succeed in modifying the stack frame*/ + /* + * Expected in check_ptrace and check_sysemu when they succeed + * in modifying the stack frame + */ + ret = 0; else - ret = 2; /*Serious trouble! This could be caused by a bug in - host 2.6 SKAS3/2.6 patch before release -V6, together - with a bug in the UML code itself.*/ - _exit(ret); + /* Serious trouble! This could be caused by a bug in host 2.6 + * SKAS3/2.6 patch before release -V6, together with a bug in + * the UML code itself. + */ + ret = 2; + + exit(ret); +} + +static void fatal_perror(const char *str) +{ + perror(str); + exit(1); +} + +static void fatal(char *fmt, ...) +{ + va_list list; + + va_start(list, fmt); + vfprintf(stderr, fmt, list); + va_end(list); + + exit(1); } -static int start_ptraced_child(void **stack_out) +static void non_fatal(char *fmt, ...) +{ + va_list list; + + va_start(list, fmt); + vfprintf(stderr, fmt, list); + va_end(list); +} + +static int start_ptraced_child(void) { - void *stack; - unsigned long sp; int pid, n, status; - stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if(stack == MAP_FAILED) - panic("check_ptrace : mmap failed, errno = %d", errno); - sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); - pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); - if(pid < 0) - panic("start_ptraced_child : clone failed, errno = %d", errno); + pid = fork(); + if (pid == 0) + ptrace_child(); + else if (pid < 0) + fatal_perror("start_ptraced_child : fork failed"); + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : clone failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) - panic("check_ptrace : expected SIGSTOP, got status = %d", + if (n < 0) + fatal_perror("check_ptrace : waitpid failed"); + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) + fatal("check_ptrace : expected SIGSTOP, got status = %d", status); - *stack_out = stack; - return(pid); + return pid; } /* When testing for SYSEMU support, if it is one of the broken versions, we @@ -105,43 +117,53 @@ static int start_ptraced_child(void **stack_out) * So only for SYSEMU features we test mustpanic, while normal host features * must work anyway! */ -static int stop_ptraced_child(int pid, void *stack, int exitcode, - int mustpanic) +static int stop_ptraced_child(int pid, int exitcode, int mustexit) { int status, n, ret = 0; - if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) - panic("check_ptrace : ptrace failed, errno = %d", errno); + if (ptrace(PTRACE_CONT, pid, 0, 0) < 0) { + perror("stop_ptraced_child : ptrace failed"); + return -1; + } CATCH_EINTR(n = waitpid(pid, &status, 0)); - if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { + if (!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) { int exit_with = WEXITSTATUS(status); if (exit_with == 2) - printk("check_ptrace : child exited with status 2. " - "Serious trouble happening! Try updating your " - "host skas patch!\nDisabling SYSEMU support."); - printk("check_ptrace : child exited with exitcode %d, while " - "expecting %d; status 0x%x", exit_with, - exitcode, status); - if (mustpanic) - panic("\n"); - else - printk("\n"); + non_fatal("check_ptrace : child exited with status 2. " + "\nDisabling SYSEMU support.\n"); + non_fatal("check_ptrace : child exited with exitcode %d, while " + "expecting %d; status 0x%x\n", exit_with, + exitcode, status); + if (mustexit) + exit(1); ret = -1; } - if(munmap(stack, PAGE_SIZE) < 0) - panic("check_ptrace : munmap failed, errno = %d", errno); return ret; } -int ptrace_faultinfo = 1; -int ptrace_ldt = 1; -int proc_mm = 1; -int skas_needs_stub = 0; +/* Changed only during early boot */ +int ptrace_faultinfo; +static int disable_ptrace_faultinfo; + +int ptrace_ldt; +static int disable_ptrace_ldt; + +int proc_mm; +static int disable_proc_mm; + +int have_switch_mm; +static int disable_switch_mm; + +int skas_needs_stub; static int __init skas0_cmd_param(char *str, int* add) { - ptrace_faultinfo = proc_mm = 0; + disable_ptrace_faultinfo = 1; + disable_ptrace_ldt = 1; + disable_proc_mm = 1; + disable_switch_mm = 1; + return 0; } @@ -151,16 +173,14 @@ static int __init mode_skas0_cmd_param(char *str, int* add) __attribute__((alias("skas0_cmd_param"))); __uml_setup("skas0", skas0_cmd_param, - "skas0\n" - " Disables SKAS3 usage, so that SKAS0 is used, unless \n" - " you specify mode=tt.\n\n"); +"skas0\n" +" Disables SKAS3 and SKAS4 usage, so that SKAS0 is used\n\n"); __uml_setup("mode=skas0", mode_skas0_cmd_param, - "mode=skas0\n" - " Disables SKAS3 usage, so that SKAS0 is used, unless you \n" - " specify mode=tt. Note that this was recently added - on \n" - " older kernels you must use simply \"skas0\".\n\n"); +"mode=skas0\n" +" Disables SKAS3 and SKAS4 usage, so that SKAS0 is used.\n\n"); +/* Changed only during early boot */ static int force_sysemu_disabled = 0; static int __init nosysemu_cmd_param(char *str, int* add) @@ -180,159 +200,185 @@ __uml_setup("nosysemu", nosysemu_cmd_param, static void __init check_sysemu(void) { - void *stack; - int pid, n, status, count=0; + unsigned long regs[MAX_REG_NR]; + int pid, n, status, count=0; - printk("Checking syscall emulation patch for ptrace..."); + non_fatal("Checking syscall emulation patch for ptrace..."); sysemu_supported = 0; - pid = start_ptraced_child(&stack); + pid = start_ptraced_child(); - if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) + if (ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0) goto fail; CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); if (n < 0) - panic("check_sysemu : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) - panic("check_sysemu : expected SIGTRAP, " - "got status = %d", status); - - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, - os_getpid()); - if(n < 0) - panic("check_sysemu : failed to modify system " - "call return, errno = %d", errno); - - if (stop_ptraced_child(pid, stack, 0, 0) < 0) + fatal_perror("check_sysemu : wait failed"); + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) + fatal("check_sysemu : expected SIGTRAP, got status = %d\n", + status); + + if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0) + fatal_perror("check_sysemu : PTRACE_GETREGS failed"); + if (PT_SYSCALL_NR(regs) != __NR_getpid) { + non_fatal("check_sysemu got system call number %d, " + "expected %d...", PT_SYSCALL_NR(regs), __NR_getpid); + goto fail; + } + + n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, os_getpid()); + if (n < 0) { + non_fatal("check_sysemu : failed to modify system call " + "return"); + goto fail; + } + + if (stop_ptraced_child(pid, 0, 0) < 0) goto fail_stopped; sysemu_supported = 1; - printk("OK\n"); + non_fatal("OK\n"); set_using_sysemu(!force_sysemu_disabled); - printk("Checking advanced syscall emulation patch for ptrace..."); - pid = start_ptraced_child(&stack); + non_fatal("Checking advanced syscall emulation patch for ptrace..."); + pid = start_ptraced_child(); - if(ptrace(PTRACE_OLDSETOPTIONS, pid, 0, - (void *) PTRACE_O_TRACESYSGOOD) < 0) - panic("check_ptrace: PTRACE_OLDSETOPTIONS failed, errno = %d", - errno); + if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0, + (void *) PTRACE_O_TRACESYSGOOD) < 0)) + fatal_perror("check_sysemu: PTRACE_OLDSETOPTIONS failed"); - while(1){ + while (1) { count++; - if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) + if (ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0) goto fail; CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(WIFSTOPPED(status) && (WSTOPSIG(status) == (SIGTRAP|0x80))){ - if (!count) - panic("check_ptrace : SYSEMU_SINGLESTEP " - "doesn't singlestep"); - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, + if (n < 0) + fatal_perror("check_sysemu: wait failed"); + + if (WIFSTOPPED(status) && + (WSTOPSIG(status) == (SIGTRAP|0x80))) { + if (!count) { + non_fatal("check_sysemu: SYSEMU_SINGLESTEP " + "doesn't singlestep"); + goto fail; + } + n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, os_getpid()); - if(n < 0) - panic("check_sysemu : failed to modify system " - "call return, errno = %d", errno); + if (n < 0) + fatal_perror("check_sysemu : failed to modify " + "system call return"); break; } - else if(WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP)) + else if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP)) count++; - else - panic("check_ptrace : expected SIGTRAP or " - "(SIGTRAP|0x80), got status = %d", status); + else { + non_fatal("check_sysemu: expected SIGTRAP or " + "(SIGTRAP | 0x80), got status = %d\n", + status); + goto fail; + } } - if (stop_ptraced_child(pid, stack, 0, 0) < 0) + if (stop_ptraced_child(pid, 0, 0) < 0) goto fail_stopped; sysemu_supported = 2; - printk("OK\n"); + non_fatal("OK\n"); - if ( !force_sysemu_disabled ) + if (!force_sysemu_disabled) set_using_sysemu(sysemu_supported); return; fail: - stop_ptraced_child(pid, stack, 1, 0); + stop_ptraced_child(pid, 1, 0); fail_stopped: - printk("missing\n"); + non_fatal("missing\n"); } static void __init check_ptrace(void) { - void *stack; int pid, syscall, n, status; - printk("Checking that ptrace can change system call numbers..."); - pid = start_ptraced_child(&stack); + non_fatal("Checking that ptrace can change system call numbers..."); + pid = start_ptraced_child(); + + if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0, + (void *) PTRACE_O_TRACESYSGOOD) < 0)) + fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed"); - if(ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0) - panic("check_ptrace: PTRACE_OLDSETOPTIONS failed, errno = %d", errno); + while (1) { + if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) + fatal_perror("check_ptrace : ptrace failed"); - while(1){ - if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) - panic("check_ptrace : ptrace failed, errno = %d", - errno); CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); - if(n < 0) - panic("check_ptrace : wait failed, errno = %d", errno); - if(!WIFSTOPPED(status) || (WSTOPSIG(status) != (SIGTRAP|0x80))) - panic("check_ptrace : expected (SIGTRAP|0x80), " - "got status = %d", status); + if (n < 0) + fatal_perror("check_ptrace : wait failed"); - syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, + if (!WIFSTOPPED(status) || + (WSTOPSIG(status) != (SIGTRAP | 0x80))) + fatal("check_ptrace : expected (SIGTRAP|0x80), " + "got status = %d", status); + + syscall = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET, 0); - if(syscall == __NR_getpid){ - n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, + if (syscall == __NR_getpid) { + n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_getppid); - if(n < 0) - panic("check_ptrace : failed to modify system " - "call, errno = %d", errno); + if (n < 0) + fatal_perror("check_ptrace : failed to modify " + "system call"); break; } } - stop_ptraced_child(pid, stack, 0, 1); - printk("OK\n"); + stop_ptraced_child(pid, 0, 1); + non_fatal("OK\n"); check_sysemu(); } -extern int create_tmp_file(unsigned long long len); +extern void check_tmpexec(void); -static void check_tmpexec(void) +static void __init check_coredump_limit(void) { - void *addr; - int err, fd = create_tmp_file(UM_KERN_PAGE_SIZE); - - addr = mmap(NULL, UM_KERN_PAGE_SIZE, - PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0); - printf("Checking PROT_EXEC mmap in /tmp..."); - fflush(stdout); - if(addr == MAP_FAILED){ - err = errno; - perror("failed"); - if(err == EPERM) - printf("/tmp must be not mounted noexec\n"); - exit(1); + struct rlimit lim; + int err = getrlimit(RLIMIT_CORE, &lim); + + if (err) { + perror("Getting core dump limit"); + return; } - printf("OK\n"); - munmap(addr, UM_KERN_PAGE_SIZE); - close(fd); + printf("Core dump limits :\n\tsoft - "); + if (lim.rlim_cur == RLIM_INFINITY) + printf("NONE\n"); + else printf("%lu\n", lim.rlim_cur); + + printf("\thard - "); + if (lim.rlim_max == RLIM_INFINITY) + printf("NONE\n"); + else printf("%lu\n", lim.rlim_max); } -void os_early_checks(void) +void __init os_early_checks(void) { + int pid; + + /* Print out the core dump limits early */ + check_coredump_limit(); + check_ptrace(); /* Need to check this early because mmapping happens before the * kernel is running. */ check_tmpexec(); + + pid = start_ptraced_child(); + if (init_registers(pid)) + fatal("Failed to initialize default registers"); + stop_ptraced_child(pid, 1, 1); } static int __init noprocmm_cmd_param(char *str, int* add) { - proc_mm = 0; + disable_proc_mm = 1; return 0; } @@ -344,7 +390,7 @@ __uml_setup("noprocmm", noprocmm_cmd_param, static int __init noptracefaultinfo_cmd_param(char *str, int* add) { - ptrace_faultinfo = 0; + disable_ptrace_faultinfo = 1; return 0; } @@ -356,7 +402,7 @@ __uml_setup("noptracefaultinfo", noptracefaultinfo_cmd_param, static int __init noptraceldt_cmd_param(char *str, int* add) { - ptrace_ldt = 0; + disable_ptrace_ldt = 1; return 0; } @@ -366,39 +412,33 @@ __uml_setup("noptraceldt", noptraceldt_cmd_param, " To support PTRACE_LDT, the host needs to be patched using\n" " the current skas3 patch.\n\n"); -#ifdef UML_CONFIG_MODE_SKAS static inline void check_skas3_ptrace_faultinfo(void) { struct ptrace_faultinfo fi; - void *stack; int pid, n; - printf(" - PTRACE_FAULTINFO..."); - pid = start_ptraced_child(&stack); + non_fatal(" - PTRACE_FAULTINFO..."); + pid = start_ptraced_child(); n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); if (n < 0) { - ptrace_faultinfo = 0; - if(errno == EIO) - printf("not found\n"); + if (errno == EIO) + non_fatal("not found\n"); else perror("not found"); - } + } else if (disable_ptrace_faultinfo) + non_fatal("found but disabled on command line\n"); else { - if (!ptrace_faultinfo) - printf("found but disabled on command line\n"); - else - printf("found\n"); + ptrace_faultinfo = 1; + non_fatal("found\n"); } - init_registers(pid); - stop_ptraced_child(pid, stack, 1, 1); + stop_ptraced_child(pid, 1, 1); } static inline void check_skas3_ptrace_ldt(void) { #ifdef PTRACE_LDT - void *stack; int pid, n; unsigned char ldtbuf[40]; struct ptrace_ldt ldt_op = (struct ptrace_ldt) { @@ -406,121 +446,84 @@ static inline void check_skas3_ptrace_ldt(void) .ptr = ldtbuf, .bytecount = sizeof(ldtbuf)}; - printf(" - PTRACE_LDT..."); - pid = start_ptraced_child(&stack); + non_fatal(" - PTRACE_LDT..."); + pid = start_ptraced_child(); n = ptrace(PTRACE_LDT, pid, 0, (unsigned long) &ldt_op); if (n < 0) { - if(errno == EIO) - printf("not found\n"); - else { + if (errno == EIO) + non_fatal("not found\n"); + else perror("not found"); - } - ptrace_ldt = 0; - } + } else if (disable_ptrace_ldt) + non_fatal("found, but use is disabled\n"); else { - if(ptrace_ldt) - printf("found\n"); - else - printf("found, but use is disabled\n"); + ptrace_ldt = 1; + non_fatal("found\n"); } - stop_ptraced_child(pid, stack, 1, 1); -#else - /* PTRACE_LDT might be disabled via cmdline option. - * We want to override this, else we might use the stub - * without real need - */ - ptrace_ldt = 1; + stop_ptraced_child(pid, 1, 1); #endif } static inline void check_skas3_proc_mm(void) { - printf(" - /proc/mm..."); - if (os_access("/proc/mm", OS_ACC_W_OK) < 0) { - proc_mm = 0; - printf("not found\n"); - } + non_fatal(" - /proc/mm..."); + if (access("/proc/mm", W_OK) < 0) + perror("not found"); + else if (disable_proc_mm) + non_fatal("found but disabled on command line\n"); else { - if (!proc_mm) - printf("found but disabled on command line\n"); - else - printf("found\n"); + proc_mm = 1; + non_fatal("found\n"); } } -int can_do_skas(void) +void can_do_skas(void) { - printf("Checking for the skas3 patch in the host:\n"); + non_fatal("Checking for the skas3 patch in the host:\n"); check_skas3_proc_mm(); check_skas3_ptrace_faultinfo(); check_skas3_ptrace_ldt(); - if(!proc_mm || !ptrace_faultinfo || !ptrace_ldt) + if (!proc_mm || !ptrace_faultinfo || !ptrace_ldt) skas_needs_stub = 1; - - return 1; -} -#else -int can_do_skas(void) -{ - return(0); -} -#endif - -int have_devanon = 0; - -void check_devanon(void) -{ - int fd; - - printk("Checking for /dev/anon on the host..."); - fd = open("/dev/anon", O_RDWR); - if(fd < 0){ - printk("Not available (open failed with errno %d)\n", errno); - return; - } - - printk("OK\n"); - have_devanon = 1; } int __init parse_iomem(char *str, int *add) { struct iomem_region *new; - struct uml_stat buf; + struct stat64 buf; char *file, *driver; - int fd, err, size; + int fd, size; driver = str; file = strchr(str,','); - if(file == NULL){ - printf("parse_iomem : failed to parse iomem\n"); + if (file == NULL) { + fprintf(stderr, "parse_iomem : failed to parse iomem\n"); goto out; } *file = '\0'; file++; - fd = os_open_file(file, of_rdwr(OPENFLAGS()), 0); - if(fd < 0){ - os_print_error(fd, "parse_iomem - Couldn't open io file"); + fd = open(file, O_RDWR, 0); + if (fd < 0) { + perror("parse_iomem - Couldn't open io file"); goto out; } - err = os_stat_fd(fd, &buf); - if(err < 0){ - os_print_error(err, "parse_iomem - cannot stat_fd file"); + if (fstat64(fd, &buf) < 0) { + perror("parse_iomem - cannot stat_fd file"); goto out_close; } new = malloc(sizeof(*new)); - if(new == NULL){ + if (new == NULL) { perror("Couldn't allocate iomem_region struct"); goto out_close; } - size = (buf.ust_size + UM_KERN_PAGE_SIZE) & ~(UM_KERN_PAGE_SIZE - 1); + size = (buf.st_size + UM_KERN_PAGE_SIZE) & ~(UM_KERN_PAGE_SIZE - 1); *new = ((struct iomem_region) { .next = iomem_regions, .driver = driver, @@ -531,10 +534,9 @@ int __init parse_iomem(char *str, int *add) iomem_regions = new; iomem_size += new->size + UM_KERN_PAGE_SIZE; - return(0); + return 0; out_close: - os_close_file(fd); + close(fd); out: - return(1); + return 1; } - diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile deleted file mode 100644 index 340ef26f594..00000000000 --- a/arch/um/os-Linux/sys-i386/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# -# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -# Licensed under the GPL -# - -obj-$(CONFIG_MODE_SKAS) = registers.o - -USER_OBJS := $(obj-y) - -include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c deleted file mode 100644 index aee4812333c..00000000000 --- a/arch/um/os-Linux/sys-i386/registers.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2004 PathScale, Inc - * Licensed under the GPL - */ - -#include <errno.h> -#include <string.h> -#include <setjmp.h> -#include "sysdep/ptrace_user.h" -#include "sysdep/ptrace.h" -#include "uml-config.h" -#include "skas_ptregs.h" -#include "registers.h" -#include "user.h" - -/* These are set once at boot time and not changed thereafter */ - -static unsigned long exec_regs[HOST_FRAME_SIZE]; -static unsigned long exec_fp_regs[HOST_FP_SIZE]; -static unsigned long exec_fpx_regs[HOST_XFP_SIZE]; -static int have_fpx_regs = 1; - -void init_thread_registers(union uml_pt_regs *to) -{ - memcpy(to->skas.regs, exec_regs, sizeof(to->skas.regs)); - memcpy(to->skas.fp, exec_fp_regs, sizeof(to->skas.fp)); - if(have_fpx_regs) - memcpy(to->skas.xfp, exec_fpx_regs, sizeof(to->skas.xfp)); -} - -/* XXX These need to use [GS]ETFPXREGS and copy_sc_{to,from}_user_skas needs - * to pass in a sufficiently large buffer - */ -int save_fp_registers(int pid, unsigned long *fp_regs) -{ - if(ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0) - return(-errno); - return(0); -} - -int restore_fp_registers(int pid, unsigned long *fp_regs) -{ - if(ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0) - return(-errno); - return(0); -} - -static int move_registers(int pid, int int_op, union uml_pt_regs *regs, - int fp_op, unsigned long *fp_regs) -{ - if(ptrace(int_op, pid, 0, regs->skas.regs) < 0) - return(-errno); - - if(ptrace(fp_op, pid, 0, fp_regs) < 0) - return(-errno); - - return(0); -} - -void save_registers(int pid, union uml_pt_regs *regs) -{ - unsigned long *fp_regs; - int err, fp_op; - - if(have_fpx_regs){ - fp_op = PTRACE_GETFPXREGS; - fp_regs = regs->skas.xfp; - } - else { - fp_op = PTRACE_GETFPREGS; - fp_regs = regs->skas.fp; - } - - err = move_registers(pid, PTRACE_GETREGS, regs, fp_op, fp_regs); - if(err) - panic("save_registers - saving registers failed, errno = %d\n", - -err); -} - -void restore_registers(int pid, union uml_pt_regs *regs) -{ - unsigned long *fp_regs; - int err, fp_op; - - if(have_fpx_regs){ - fp_op = PTRACE_SETFPXREGS; - fp_regs = regs->skas.xfp; - } - else { - fp_op = PTRACE_SETFPREGS; - fp_regs = regs->skas.fp; - } - - err = move_registers(pid, PTRACE_SETREGS, regs, fp_op, fp_regs); - if(err) - panic("restore_registers - saving registers failed, " - "errno = %d\n", -err); -} - -void init_registers(int pid) -{ - int err; - - err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs); - if(err) - panic("check_ptrace : PTRACE_GETREGS failed, errno = %d", - err); - - errno = 0; - err = ptrace(PTRACE_GETFPXREGS, pid, 0, exec_fpx_regs); - if(!err) - return; - if(errno != EIO) - panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", - errno); - - have_fpx_regs = 0; - - err = ptrace(PTRACE_GETFPREGS, pid, 0, exec_fp_regs); - if(err) - panic("check_ptrace : PTRACE_GETFPREGS failed, errno = %d", - err); -} - -void get_safe_registers(unsigned long *regs) -{ - memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long)); -} - -void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer) -{ - struct __jmp_buf_tag *jmpbuf = buffer; - - UPT_SET(uml_regs, EIP, jmpbuf->__jmpbuf[JB_PC]); - UPT_SET(uml_regs, UESP, jmpbuf->__jmpbuf[JB_SP]); - UPT_SET(uml_regs, EBP, jmpbuf->__jmpbuf[JB_BP]); -} diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile deleted file mode 100644 index 340ef26f594..00000000000 --- a/arch/um/os-Linux/sys-x86_64/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# -# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -# Licensed under the GPL -# - -obj-$(CONFIG_MODE_SKAS) = registers.o - -USER_OBJS := $(obj-y) - -include arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c deleted file mode 100644 index 4b638dfb52b..00000000000 --- a/arch/um/os-Linux/sys-x86_64/registers.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (C) 2004 PathScale, Inc - * Licensed under the GPL - */ - -#include <errno.h> -#include <string.h> -#include <setjmp.h> -#include "ptrace_user.h" -#include "uml-config.h" -#include "skas_ptregs.h" -#include "registers.h" -#include "user.h" - -/* These are set once at boot time and not changed thereafter */ - -static unsigned long exec_regs[HOST_FRAME_SIZE]; -static unsigned long exec_fp_regs[HOST_FP_SIZE]; - -void init_thread_registers(union uml_pt_regs *to) -{ - memcpy(to->skas.regs, exec_regs, sizeof(to->skas.regs)); - memcpy(to->skas.fp, exec_fp_regs, sizeof(to->skas.fp)); -} - -static int move_registers(int pid, int int_op, int fp_op, - union uml_pt_regs *regs) -{ - if(ptrace(int_op, pid, 0, regs->skas.regs) < 0) - return(-errno); - - if(ptrace(fp_op, pid, 0, regs->skas.fp) < 0) - return(-errno); - - return(0); -} - -void save_registers(int pid, union uml_pt_regs *regs) -{ - int err; - - err = move_registers(pid, PTRACE_GETREGS, PTRACE_GETFPREGS, regs); - if(err) - panic("save_registers - saving registers failed, errno = %d\n", - -err); -} - -void restore_registers(int pid, union uml_pt_regs *regs) -{ - int err; - - err = move_registers(pid, PTRACE_SETREGS, PTRACE_SETFPREGS, regs); - if(err) - panic("restore_registers - saving registers failed, " - "errno = %d\n", -err); -} - -void init_registers(int pid) -{ - int err; - - err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs); - if(err) - panic("check_ptrace : PTRACE_GETREGS failed, errno = %d", - err); - - err = ptrace(PTRACE_GETFPREGS, pid, 0, exec_fp_regs); - if(err) - panic("check_ptrace : PTRACE_GETFPREGS failed, errno = %d", - err); -} - -void get_safe_registers(unsigned long *regs) -{ - memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long)); -} - -void get_thread_regs(union uml_pt_regs *uml_regs, void *buffer) -{ - struct __jmp_buf_tag *jmpbuf = buffer; - - UPT_SET(uml_regs, RIP, jmpbuf->__jmpbuf[JB_PC]); - UPT_SET(uml_regs, RSP, jmpbuf->__jmpbuf[JB_RSP]); - UPT_SET(uml_regs, RBP, jmpbuf->__jmpbuf[JB_RBP]); -} diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c index cf30a39bc48..e9824d5dd7d 100644 --- a/arch/um/os-Linux/time.c +++ b/arch/um/os-Linux/time.c @@ -1,21 +1,186 @@ -#include <stdlib.h> +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stddef.h> +#include <errno.h> +#include <signal.h> +#include <time.h> #include <sys/time.h> +#include <kern_util.h> +#include <os.h> +#include "internal.h" + +int set_interval(void) +{ + int usec = UM_USEC_PER_SEC / UM_HZ; + struct itimerval interval = ((struct itimerval) { { 0, usec }, + { 0, usec } }); + + if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) + return -errno; + + return 0; +} + +int timer_one_shot(int ticks) +{ + unsigned long usec = ticks * UM_USEC_PER_SEC / UM_HZ; + unsigned long sec = usec / UM_USEC_PER_SEC; + struct itimerval interval; + + usec %= UM_USEC_PER_SEC; + interval = ((struct itimerval) { { 0, 0 }, { sec, usec } }); + + if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) + return -errno; + + return 0; +} + +/** + * timeval_to_ns - Convert timeval to nanoseconds + * @ts: pointer to the timeval variable to be converted + * + * Returns the scalar nanosecond representation of the timeval + * parameter. + * + * Ripped from linux/time.h because it's a kernel header, and thus + * unusable from here. + */ +static inline long long timeval_to_ns(const struct timeval *tv) +{ + return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) + + tv->tv_usec * UM_NSEC_PER_USEC; +} + +long long disable_timer(void) +{ + struct itimerval time = ((struct itimerval) { { 0, 0 }, { 0, 0 } }); + long long remain, max = UM_NSEC_PER_SEC / UM_HZ; + + if (setitimer(ITIMER_VIRTUAL, &time, &time) < 0) + printk(UM_KERN_ERR "disable_timer - setitimer failed, " + "errno = %d\n", errno); + + remain = timeval_to_ns(&time.it_value); + if (remain > max) + remain = max; + + return remain; +} -unsigned long long os_usecs(void) +long long os_nsecs(void) { struct timeval tv; gettimeofday(&tv, NULL); - return((unsigned long long) tv.tv_sec * 1000000 + tv.tv_usec); + return timeval_to_ns(&tv); } -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +#ifdef UML_CONFIG_NO_HZ_COMMON +static int after_sleep_interval(struct timespec *ts) +{ + return 0; +} + +static void deliver_alarm(void) +{ + alarm_handler(SIGVTALRM, NULL, NULL); +} + +static unsigned long long sleep_time(unsigned long long nsecs) +{ + return nsecs; +} + +#else +unsigned long long last_tick; +unsigned long long skew; + +static void deliver_alarm(void) +{ + unsigned long long this_tick = os_nsecs(); + int one_tick = UM_NSEC_PER_SEC / UM_HZ; + + /* Protection against the host's time going backwards */ + if ((last_tick != 0) && (this_tick < last_tick)) + this_tick = last_tick; + + if (last_tick == 0) + last_tick = this_tick - one_tick; + + skew += this_tick - last_tick; + + while (skew >= one_tick) { + alarm_handler(SIGVTALRM, NULL, NULL); + skew -= one_tick; + } + + last_tick = this_tick; +} + +static unsigned long long sleep_time(unsigned long long nsecs) +{ + return nsecs > skew ? nsecs - skew : 0; +} + +static inline long long timespec_to_us(const struct timespec *ts) +{ + return ((long long) ts->tv_sec * UM_USEC_PER_SEC) + + ts->tv_nsec / UM_NSEC_PER_USEC; +} + +static int after_sleep_interval(struct timespec *ts) +{ + int usec = UM_USEC_PER_SEC / UM_HZ; + long long start_usecs = timespec_to_us(ts); + struct timeval tv; + struct itimerval interval; + + /* + * It seems that rounding can increase the value returned from + * setitimer to larger than the one passed in. Over time, + * this will cause the remaining time to be greater than the + * tick interval. If this happens, then just reduce the first + * tick to the interval value. + */ + if (start_usecs > usec) + start_usecs = usec; + + start_usecs -= skew / UM_NSEC_PER_USEC; + if (start_usecs < 0) + start_usecs = 0; + + tv = ((struct timeval) { .tv_sec = start_usecs / UM_USEC_PER_SEC, + .tv_usec = start_usecs % UM_USEC_PER_SEC }); + interval = ((struct itimerval) { { 0, usec }, tv }); + + if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1) + return -errno; + + return 0; +} +#endif + +void idle_sleep(unsigned long long nsecs) +{ + struct timespec ts; + + /* + * nsecs can come in as zero, in which case, this starts a + * busy loop. To prevent this, reset nsecs to the tick + * interval if it is zero. + */ + if (nsecs == 0) + nsecs = UM_NSEC_PER_SEC / UM_HZ; + + nsecs = sleep_time(nsecs); + ts = ((struct timespec) { .tv_sec = nsecs / UM_NSEC_PER_SEC, + .tv_nsec = nsecs % UM_NSEC_PER_SEC }); + + if (nanosleep(&ts, &ts) == 0) + deliver_alarm(); + after_sleep_interval(&ts); +} diff --git a/arch/um/os-Linux/tt.c b/arch/um/os-Linux/tt.c deleted file mode 100644 index a6db8877931..00000000000 --- a/arch/um/os-Linux/tt.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <stdio.h> -#include <unistd.h> -#include <signal.h> -#include <sched.h> -#include <errno.h> -#include <stdarg.h> -#include <stdlib.h> -#include <setjmp.h> -#include <sys/time.h> -#include <sys/ptrace.h> -#include <linux/ptrace.h> -#include <sys/wait.h> -#include <sys/mman.h> -#include <asm/ptrace.h> -#include <asm/unistd.h> -#include <asm/page.h> -#include "user_util.h" -#include "kern_util.h" -#include "user.h" -#include "signal_kern.h" -#include "signal_user.h" -#include "sysdep/ptrace.h" -#include "sysdep/sigcontext.h" -#include "irq_user.h" -#include "ptrace_user.h" -#include "time_user.h" -#include "init.h" -#include "os.h" -#include "uml-config.h" -#include "choose-mode.h" -#include "mode.h" -#include "tempfile.h" - -int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x, - int must_succeed) -{ - int err; - - err = os_protect_memory((void *) addr, len, r, w, x); - if(err < 0){ - if(must_succeed) - panic("protect failed, err = %d", -err); - else return(err); - } - return(0); -} - -/* - *------------------------- - * only for tt mode (will be deleted in future...) - *------------------------- - */ - -struct tramp { - int (*tramp)(void *); - void *tramp_data; - unsigned long temp_stack; - int flags; - int pid; -}; - -/* See above for why sigkill is here */ - -int sigkill = SIGKILL; - -int outer_tramp(void *arg) -{ - struct tramp *t; - int sig = sigkill; - - t = arg; - t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2, - t->flags, t->tramp_data); - if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL); - kill(os_getpid(), sig); - _exit(0); -} - -int start_fork_tramp(void *thread_arg, unsigned long temp_stack, - int clone_flags, int (*tramp)(void *)) -{ - struct tramp arg; - unsigned long sp; - int new_pid, status, err; - - /* The trampoline will run on the temporary stack */ - sp = stack_sp(temp_stack); - - clone_flags |= CLONE_FILES | SIGCHLD; - - arg.tramp = tramp; - arg.tramp_data = thread_arg; - arg.temp_stack = temp_stack; - arg.flags = clone_flags; - - /* Start the process and wait for it to kill itself */ - new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); - if(new_pid < 0) - return(new_pid); - - CATCH_EINTR(err = waitpid(new_pid, &status, 0)); - if(err < 0) - panic("Waiting for outer trampoline failed - errno = %d", - errno); - - if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) - panic("outer trampoline didn't exit with SIGKILL, " - "status = %d", status); - - return(arg.pid); -} - -void forward_pending_sigio(int target) -{ - sigset_t sigs; - - if(sigpending(&sigs)) - panic("forward_pending_sigio : sigpending failed"); - if(sigismember(&sigs, SIGIO)) - kill(target, SIGIO); -} - diff --git a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c index 4cfdd18ea1e..721d8afa329 100644 --- a/arch/um/os-Linux/tty.c +++ b/arch/um/os-Linux/tty.c @@ -1,13 +1,14 @@ -/* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #include <stdlib.h> +#include <unistd.h> #include <errno.h> -#include "os.h" -#include "user.h" -#include "kern_util.h" +#include <fcntl.h> +#include <kern_util.h> +#include <os.h> struct grantpt_info { int fd; @@ -26,36 +27,34 @@ static void grantpt_cb(void *arg) int get_pty(void) { struct grantpt_info info; - int fd; - - fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0); - if(fd < 0){ - printk("get_pty : Couldn't open /dev/ptmx - err = %d\n", -fd); - return(fd); + int fd, err; + + fd = open("/dev/ptmx", O_RDWR); + if (fd < 0) { + err = -errno; + printk(UM_KERN_ERR "get_pty : Couldn't open /dev/ptmx - " + "err = %d\n", errno); + return err; } info.fd = fd; initial_thread_cb(grantpt_cb, &info); - if(info.res < 0){ - printk("get_pty : Couldn't grant pty - errno = %d\n", - -info.err); - return(-1); + if (info.res < 0) { + err = -info.err; + printk(UM_KERN_ERR "get_pty : Couldn't grant pty - " + "errno = %d\n", -info.err); + goto out; } - if(unlockpt(fd) < 0){ - printk("get_pty : Couldn't unlock pty - errno = %d\n", errno); - return(-1); + + if (unlockpt(fd) < 0) { + err = -errno; + printk(UM_KERN_ERR "get_pty : Couldn't unlock pty - " + "errno = %d\n", errno); + goto out; } - return(fd); + return fd; +out: + close(fd); + return err; } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/os-Linux/uaccess.c b/arch/um/os-Linux/uaccess.c deleted file mode 100644 index 38d710158c3..00000000000 --- a/arch/um/os-Linux/uaccess.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) - * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) - * Licensed under the GPL - */ - -#include <setjmp.h> -#include <string.h> - -unsigned long __do_user_copy(void *to, const void *from, int n, - void **fault_addr, void **fault_catcher, - void (*op)(void *to, const void *from, - int n), int *faulted_out) -{ - unsigned long *faddrp = (unsigned long *) fault_addr, ret; - - sigjmp_buf jbuf; - *fault_catcher = &jbuf; - if(sigsetjmp(jbuf, 1) == 0){ - (*op)(to, from, n); - ret = 0; - *faulted_out = 0; - } - else { - ret = *faddrp; - *faulted_out = 1; - } - *fault_addr = NULL; - *fault_catcher = NULL; - return ret; -} - diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c new file mode 100644 index 00000000000..c1dc89261f6 --- /dev/null +++ b/arch/um/os-Linux/umid.c @@ -0,0 +1,394 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <unistd.h> +#include <sys/stat.h> +#include <init.h> +#include <os.h> + +#define UML_DIR "~/.uml/" + +#define UMID_LEN 64 + +/* Changed by set_umid, which is run early in boot */ +static char umid[UMID_LEN] = { 0 }; + +/* Changed by set_uml_dir and make_uml_dir, which are run early in boot */ +static char *uml_dir = UML_DIR; + +static int __init make_uml_dir(void) +{ + char dir[512] = { '\0' }; + int len, err; + + if (*uml_dir == '~') { + char *home = getenv("HOME"); + + err = -ENOENT; + if (home == NULL) { + printk(UM_KERN_ERR "make_uml_dir : no value in " + "environment for $HOME\n"); + goto err; + } + strlcpy(dir, home, sizeof(dir)); + uml_dir++; + } + strlcat(dir, uml_dir, sizeof(dir)); + len = strlen(dir); + if (len > 0 && dir[len - 1] != '/') + strlcat(dir, "/", sizeof(dir)); + + err = -ENOMEM; + uml_dir = malloc(strlen(dir) + 1); + if (uml_dir == NULL) { + printf("make_uml_dir : malloc failed, errno = %d\n", errno); + goto err; + } + strcpy(uml_dir, dir); + + if ((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)) { + printf("Failed to mkdir '%s': %s\n", uml_dir, strerror(errno)); + err = -errno; + goto err_free; + } + return 0; + +err_free: + free(uml_dir); +err: + uml_dir = NULL; + return err; +} + +/* + * Unlinks the files contained in @dir and then removes @dir. + * Doesn't handle directory trees, so it's not like rm -rf, but almost such. We + * ignore ENOENT errors for anything (they happen, strangely enough - possibly + * due to races between multiple dying UML threads). + */ +static int remove_files_and_dir(char *dir) +{ + DIR *directory; + struct dirent *ent; + int len; + char file[256]; + int ret; + + directory = opendir(dir); + if (directory == NULL) { + if (errno != ENOENT) + return -errno; + else + return 0; + } + + while ((ent = readdir(directory)) != NULL) { + if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) + continue; + len = strlen(dir) + sizeof("/") + strlen(ent->d_name) + 1; + if (len > sizeof(file)) { + ret = -E2BIG; + goto out; + } + + sprintf(file, "%s/%s", dir, ent->d_name); + if (unlink(file) < 0 && errno != ENOENT) { + ret = -errno; + goto out; + } + } + + if (rmdir(dir) < 0 && errno != ENOENT) { + ret = -errno; + goto out; + } + + ret = 0; +out: + closedir(directory); + return ret; +} + +/* + * This says that there isn't already a user of the specified directory even if + * there are errors during the checking. This is because if these errors + * happen, the directory is unusable by the pre-existing UML, so we might as + * well take it over. This could happen either by + * the existing UML somehow corrupting its umid directory + * something other than UML sticking stuff in the directory + * this boot racing with a shutdown of the other UML + * In any of these cases, the directory isn't useful for anything else. + * + * Boolean return: 1 if in use, 0 otherwise. + */ +static inline int is_umdir_used(char *dir) +{ + char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; + char pid[sizeof("nnnnn\0")], *end; + int dead, fd, p, n, err; + + n = snprintf(file, sizeof(file), "%s/pid", dir); + if (n >= sizeof(file)) { + printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n"); + err = -E2BIG; + goto out; + } + + dead = 0; + fd = open(file, O_RDONLY); + if (fd < 0) { + fd = -errno; + if (fd != -ENOENT) { + printk(UM_KERN_ERR "is_umdir_used : couldn't open pid " + "file '%s', err = %d\n", file, -fd); + } + goto out; + } + + err = 0; + n = read(fd, pid, sizeof(pid)); + if (n < 0) { + printk(UM_KERN_ERR "is_umdir_used : couldn't read pid file " + "'%s', err = %d\n", file, errno); + goto out_close; + } else if (n == 0) { + printk(UM_KERN_ERR "is_umdir_used : couldn't read pid file " + "'%s', 0-byte read\n", file); + goto out_close; + } + + p = strtoul(pid, &end, 0); + if (end == pid) { + printk(UM_KERN_ERR "is_umdir_used : couldn't parse pid file " + "'%s', errno = %d\n", file, errno); + goto out_close; + } + + if ((kill(p, 0) == 0) || (errno != ESRCH)) { + printk(UM_KERN_ERR "umid \"%s\" is already in use by pid %d\n", + umid, p); + return 1; + } + +out_close: + close(fd); +out: + return 0; +} + +/* + * Try to remove the directory @dir unless it's in use. + * Precondition: @dir exists. + * Returns 0 for success, < 0 for failure in removal or if the directory is in + * use. + */ +static int umdir_take_if_dead(char *dir) +{ + int ret; + if (is_umdir_used(dir)) + return -EEXIST; + + ret = remove_files_and_dir(dir); + if (ret) { + printk(UM_KERN_ERR "is_umdir_used - remove_files_and_dir " + "failed with err = %d\n", ret); + } + return ret; +} + +static void __init create_pid_file(void) +{ + char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; + char pid[sizeof("nnnnn\0")]; + int fd, n; + + if (umid_file_name("pid", file, sizeof(file))) + return; + + fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644); + if (fd < 0) { + printk(UM_KERN_ERR "Open of machine pid file \"%s\" failed: " + "%s\n", file, strerror(errno)); + return; + } + + snprintf(pid, sizeof(pid), "%d\n", getpid()); + n = write(fd, pid, strlen(pid)); + if (n != strlen(pid)) + printk(UM_KERN_ERR "Write of pid file failed - err = %d\n", + errno); + + close(fd); +} + +int __init set_umid(char *name) +{ + if (strlen(name) > UMID_LEN - 1) + return -E2BIG; + + strlcpy(umid, name, sizeof(umid)); + + return 0; +} + +/* Changed in make_umid, which is called during early boot */ +static int umid_setup = 0; + +static int __init make_umid(void) +{ + int fd, err; + char tmp[256]; + + if (umid_setup) + return 0; + + make_uml_dir(); + + if (*umid == '\0') { + strlcpy(tmp, uml_dir, sizeof(tmp)); + strlcat(tmp, "XXXXXX", sizeof(tmp)); + fd = mkstemp(tmp); + if (fd < 0) { + printk(UM_KERN_ERR "make_umid - mkstemp(%s) failed: " + "%s\n", tmp, strerror(errno)); + err = -errno; + goto err; + } + + close(fd); + + set_umid(&tmp[strlen(uml_dir)]); + + /* + * There's a nice tiny little race between this unlink and + * the mkdir below. It'd be nice if there were a mkstemp + * for directories. + */ + if (unlink(tmp)) { + err = -errno; + goto err; + } + } + + snprintf(tmp, sizeof(tmp), "%s%s", uml_dir, umid); + err = mkdir(tmp, 0777); + if (err < 0) { + err = -errno; + if (err != -EEXIST) + goto err; + + if (umdir_take_if_dead(tmp) < 0) + goto err; + + err = mkdir(tmp, 0777); + } + if (err) { + err = -errno; + printk(UM_KERN_ERR "Failed to create '%s' - err = %d\n", umid, + errno); + goto err; + } + + umid_setup = 1; + + create_pid_file(); + + err = 0; + err: + return err; +} + +static int __init make_umid_init(void) +{ + if (!make_umid()) + return 0; + + /* + * If initializing with the given umid failed, then try again with + * a random one. + */ + printk(UM_KERN_ERR "Failed to initialize umid \"%s\", trying with a " + "random umid\n", umid); + *umid = '\0'; + make_umid(); + + return 0; +} + +__initcall(make_umid_init); + +int __init umid_file_name(char *name, char *buf, int len) +{ + int n, err; + + err = make_umid(); + if (err) + return err; + + n = snprintf(buf, len, "%s%s/%s", uml_dir, umid, name); + if (n >= len) { + printk(UM_KERN_ERR "umid_file_name : buffer too short\n"); + return -E2BIG; + } + + return 0; +} + +char *get_umid(void) +{ + return umid; +} + +static int __init set_uml_dir(char *name, int *add) +{ + if (*name == '\0') { + printf("uml_dir can't be an empty string\n"); + return 0; + } + + if (name[strlen(name) - 1] == '/') { + uml_dir = name; + return 0; + } + + uml_dir = malloc(strlen(name) + 2); + if (uml_dir == NULL) { + printf("Failed to malloc uml_dir - error = %d\n", errno); + + /* + * Return 0 here because do_initcalls doesn't look at + * the return value. + */ + return 0; + } + sprintf(uml_dir, "%s/", name); + + return 0; +} + +__uml_setup("uml_dir=", set_uml_dir, +"uml_dir=<directory>\n" +" The location to place the pid and umid files.\n\n" +); + +static void remove_umid_dir(void) +{ + char dir[strlen(uml_dir) + UMID_LEN + 1], err; + + sprintf(dir, "%s%s", uml_dir, umid); + err = remove_files_and_dir(dir); + if (err) + printf("remove_umid_dir - remove_files_and_dir failed with " + "err = %d\n", err); +} + +__uml_exitcall(remove_umid_dir); diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c index 56d3f870926..db4a034aeee 100644 --- a/arch/um/os-Linux/user_syms.c +++ b/arch/um/os-Linux/user_syms.c @@ -1,11 +1,12 @@ -#include "linux/types.h" -#include "linux/module.h" +#include <linux/types.h> +#include <linux/module.h> /* Some of this are builtin function (some are not but could in the future), * so I *must* declare good prototypes for them and then EXPORT them. * The kernel code uses the macro defined by include/linux/string.h, * so I undef macros; the userspace code does not include that and I - * add an EXPORT for the glibc one.*/ + * add an EXPORT for the glibc one. + */ #undef strlen #undef strstr @@ -13,28 +14,38 @@ #undef memset extern size_t strlen(const char *); -extern void *memcpy(void *, const void *, size_t); extern void *memmove(void *, const void *, size_t); extern void *memset(void *, int, size_t); extern int printf(const char *, ...); -EXPORT_SYMBOL(strlen); +/* If it's not defined, the export is included in lib/string.c.*/ +#ifdef __HAVE_ARCH_STRSTR +EXPORT_SYMBOL(strstr); +#endif + +#ifndef __x86_64__ +extern void *memcpy(void *, const void *, size_t); EXPORT_SYMBOL(memcpy); +#endif + EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(printf); -EXPORT_SYMBOL(strstr); - /* Here, instead, I can provide a fake prototype. Yes, someone cares: genksyms. * However, the modules will use the CRC defined *here*, no matter if it is * good; so the versions of these symbols will always match */ #define EXPORT_SYMBOL_PROTO(sym) \ - int sym(void); \ - EXPORT_SYMBOL(sym); + int sym(void); \ + EXPORT_SYMBOL(sym); -#ifdef SUBARCH_i386 +extern void readdir64(void) __attribute__((weak)); +EXPORT_SYMBOL(readdir64); +extern void truncate64(void) __attribute__((weak)); +EXPORT_SYMBOL(truncate64); + +#ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA EXPORT_SYMBOL(vsyscall_ehdr); EXPORT_SYMBOL(vsyscall_end); #endif @@ -51,12 +62,18 @@ EXPORT_SYMBOL_PROTO(dup2); EXPORT_SYMBOL_PROTO(__xstat); EXPORT_SYMBOL_PROTO(__lxstat); EXPORT_SYMBOL_PROTO(__lxstat64); +EXPORT_SYMBOL_PROTO(__fxstat64); EXPORT_SYMBOL_PROTO(lseek); EXPORT_SYMBOL_PROTO(lseek64); EXPORT_SYMBOL_PROTO(chown); +EXPORT_SYMBOL_PROTO(fchown); EXPORT_SYMBOL_PROTO(truncate); +EXPORT_SYMBOL_PROTO(ftruncate64); EXPORT_SYMBOL_PROTO(utime); +EXPORT_SYMBOL_PROTO(utimes); +EXPORT_SYMBOL_PROTO(futimes); EXPORT_SYMBOL_PROTO(chmod); +EXPORT_SYMBOL_PROTO(fchmod); EXPORT_SYMBOL_PROTO(rename); EXPORT_SYMBOL_PROTO(__xmknod); @@ -86,13 +103,18 @@ EXPORT_SYMBOL_PROTO(getuid); EXPORT_SYMBOL_PROTO(fsync); EXPORT_SYMBOL_PROTO(fdatasync); -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ +EXPORT_SYMBOL_PROTO(lstat64); +EXPORT_SYMBOL_PROTO(fstat64); +EXPORT_SYMBOL_PROTO(mknod); + +/* Export symbols used by GCC for the stack protector. */ +extern void __stack_smash_handler(void *) __attribute__((weak)); +EXPORT_SYMBOL(__stack_smash_handler); + +extern long __guard __attribute__((weak)); +EXPORT_SYMBOL(__guard); + +#ifdef _FORTIFY_SOURCE +extern int __sprintf_chk(char *str, int flag, size_t strlen, const char *format); +EXPORT_SYMBOL(__sprintf_chk); +#endif diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c new file mode 100644 index 00000000000..faee55ef6d2 --- /dev/null +++ b/arch/um/os-Linux/util.c @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <signal.h> +#include <string.h> +#include <termios.h> +#include <wait.h> +#include <sys/mman.h> +#include <sys/utsname.h> +#include <os.h> + +void stack_protections(unsigned long address) +{ + if (mprotect((void *) address, UM_THREAD_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC) < 0) + panic("protecting stack failed, errno = %d", errno); +} + +int raw(int fd) +{ + struct termios tt; + int err; + + CATCH_EINTR(err = tcgetattr(fd, &tt)); + if (err < 0) + return -errno; + + cfmakeraw(&tt); + + CATCH_EINTR(err = tcsetattr(fd, TCSADRAIN, &tt)); + if (err < 0) + return -errno; + + /* + * XXX tcsetattr could have applied only some changes + * (and cfmakeraw() is a set of changes) + */ + return 0; +} + +void setup_machinename(char *machine_out) +{ + struct utsname host; + + uname(&host); +#ifdef UML_CONFIG_UML_X86 +# ifndef UML_CONFIG_64BIT + if (!strcmp(host.machine, "x86_64")) { + strcpy(machine_out, "i686"); + return; + } +# else + if (!strcmp(host.machine, "i686")) { + strcpy(machine_out, "x86_64"); + return; + } +# endif +#endif + strcpy(machine_out, host.machine); +} + +void setup_hostinfo(char *buf, int len) +{ + struct utsname host; + + uname(&host); + snprintf(buf, len, "%s %s %s %s %s", host.sysname, host.nodename, + host.release, host.version, host.machine); +} + +/* + * We cannot use glibc's abort(). It makes use of tgkill() which + * has no effect within UML's kernel threads. + * After that glibc would execute an invalid instruction to kill + * the calling process and UML crashes with SIGSEGV. + */ +static inline void __attribute__ ((noreturn)) uml_abort(void) +{ + sigset_t sig; + + fflush(NULL); + + if (!sigemptyset(&sig) && !sigaddset(&sig, SIGABRT)) + sigprocmask(SIG_UNBLOCK, &sig, 0); + + for (;;) + if (kill(getpid(), SIGABRT) < 0) + exit(127); +} + +/* + * UML helper threads must not handle SIGWINCH/INT/TERM + */ +void os_fix_helper_signals(void) +{ + signal(SIGWINCH, SIG_IGN); + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); +} + +void os_dump_core(void) +{ + int pid; + + signal(SIGSEGV, SIG_DFL); + + /* + * We are about to SIGTERM this entire process group to ensure that + * nothing is around to run after the kernel exits. The + * kernel wants to abort, not die through SIGTERM, so we + * ignore it here. + */ + + signal(SIGTERM, SIG_IGN); + kill(0, SIGTERM); + /* + * Most of the other processes associated with this UML are + * likely sTopped, so give them a SIGCONT so they see the + * SIGTERM. + */ + kill(0, SIGCONT); + + /* + * Now, having sent signals to everyone but us, make sure they + * die by ptrace. Processes can survive what's been done to + * them so far - the mechanism I understand is receiving a + * SIGSEGV and segfaulting immediately upon return. There is + * always a SIGSEGV pending, and (I'm guessing) signals are + * processed in numeric order so the SIGTERM (signal 15 vs + * SIGSEGV being signal 11) is never handled. + * + * Run a waitpid loop until we get some kind of error. + * Hopefully, it's ECHILD, but there's not a lot we can do if + * it's something else. Tell os_kill_ptraced_process not to + * wait for the child to report its death because there's + * nothing reasonable to do if that fails. + */ + + while ((pid = waitpid(-1, NULL, WNOHANG | __WALL)) > 0) + os_kill_ptraced_process(pid, 0); + + uml_abort(); +} + +void um_early_printk(const char *s, unsigned int n) +{ + printf("%.*s", n, s); +} |
