diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ia64/ia32/sys_ia32.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/ia64/ia32/sys_ia32.c')
-rw-r--r-- | arch/ia64/ia32/sys_ia32.c | 2747 |
1 files changed, 2747 insertions, 0 deletions
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c new file mode 100644 index 00000000000..247a21c64ae --- /dev/null +++ b/arch/ia64/ia32/sys_ia32.c @@ -0,0 +1,2747 @@ +/* + * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Derived from sys_sparc32.c. + * + * Copyright (C) 2000 VA Linux Co + * Copyright (C) 2000 Don Dugger <n0ano@valinux.com> + * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com> + * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 2000-2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2004 Gordon Jin <gordon.jin@intel.com> + * + * These routines maintain argument size conversion between 32bit and 64bit + * environment. + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/syscalls.h> +#include <linux/sysctl.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/signal.h> +#include <linux/resource.h> +#include <linux/times.h> +#include <linux/utsname.h> +#include <linux/timex.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/sem.h> +#include <linux/msg.h> +#include <linux/mm.h> +#include <linux/shm.h> +#include <linux/slab.h> +#include <linux/uio.h> +#include <linux/nfs_fs.h> +#include <linux/quota.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> +#include <linux/nfsd/syscall.h> +#include <linux/poll.h> +#include <linux/eventpoll.h> +#include <linux/personality.h> +#include <linux/ptrace.h> +#include <linux/stat.h> +#include <linux/ipc.h> +#include <linux/compat.h> +#include <linux/vfs.h> +#include <linux/mman.h> + +#include <asm/intrinsics.h> +#include <asm/semaphore.h> +#include <asm/types.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> + +#include "ia32priv.h" + +#include <net/scm.h> +#include <net/sock.h> + +#define DEBUG 0 + +#if DEBUG +# define DBG(fmt...) printk(KERN_DEBUG fmt) +#else +# define DBG(fmt...) +#endif + +#define ROUND_UP(x,a) ((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1))) + +#define OFFSET4K(a) ((a) & 0xfff) +#define PAGE_START(addr) ((addr) & PAGE_MASK) +#define MINSIGSTKSZ_IA32 2048 + +#define high2lowuid(uid) ((uid) > 65535 ? 65534 : (uid)) +#define high2lowgid(gid) ((gid) > 65535 ? 65534 : (gid)) + +/* + * Anything that modifies or inspects ia32 user virtual memory must hold this semaphore + * while doing so. + */ +/* XXX make per-mm: */ +static DECLARE_MUTEX(ia32_mmap_sem); + +asmlinkage long +sys32_execve (char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp, + struct pt_regs *regs) +{ + long error; + char *filename; + unsigned long old_map_base, old_task_size, tssd; + + filename = getname(name); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + return error; + + old_map_base = current->thread.map_base; + old_task_size = current->thread.task_size; + tssd = ia64_get_kr(IA64_KR_TSSD); + + /* we may be exec'ing a 64-bit process: reset map base, task-size, and io-base: */ + current->thread.map_base = DEFAULT_MAP_BASE; + current->thread.task_size = DEFAULT_TASK_SIZE; + ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob); + ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1); + + error = compat_do_execve(filename, argv, envp, regs); + putname(filename); + + if (error < 0) { + /* oops, execve failed, switch back to old values... */ + ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE); + ia64_set_kr(IA64_KR_TSSD, tssd); + current->thread.map_base = old_map_base; + current->thread.task_size = old_task_size; + } + + return error; +} + +int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) +{ + int err; + + if ((u64) stat->size > MAX_NON_LFS || + !old_valid_dev(stat->dev) || + !old_valid_dev(stat->rdev)) + return -EOVERFLOW; + + if (clear_user(ubuf, sizeof(*ubuf))) + return -EFAULT; + + err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); + err |= __put_user(stat->ino, &ubuf->st_ino); + err |= __put_user(stat->mode, &ubuf->st_mode); + err |= __put_user(stat->nlink, &ubuf->st_nlink); + err |= __put_user(high2lowuid(stat->uid), &ubuf->st_uid); + err |= __put_user(high2lowgid(stat->gid), &ubuf->st_gid); + err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev); + err |= __put_user(stat->size, &ubuf->st_size); + err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime); + err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec); + err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime); + err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec); + err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime); + err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec); + err |= __put_user(stat->blksize, &ubuf->st_blksize); + err |= __put_user(stat->blocks, &ubuf->st_blocks); + return err; +} + +#if PAGE_SHIFT > IA32_PAGE_SHIFT + + +static int +get_page_prot (struct vm_area_struct *vma, unsigned long addr) +{ + int prot = 0; + + if (!vma || vma->vm_start > addr) + return 0; + + if (vma->vm_flags & VM_READ) + prot |= PROT_READ; + if (vma->vm_flags & VM_WRITE) + prot |= PROT_WRITE; + if (vma->vm_flags & VM_EXEC) + prot |= PROT_EXEC; + return prot; +} + +/* + * Map a subpage by creating an anonymous page that contains the union of the old page and + * the subpage. + */ +static unsigned long +mmap_subpage (struct file *file, unsigned long start, unsigned long end, int prot, int flags, + loff_t off) +{ + void *page = NULL; + struct inode *inode; + unsigned long ret = 0; + struct vm_area_struct *vma = find_vma(current->mm, start); + int old_prot = get_page_prot(vma, start); + + DBG("mmap_subpage(file=%p,start=0x%lx,end=0x%lx,prot=%x,flags=%x,off=0x%llx)\n", + file, start, end, prot, flags, off); + + + /* Optimize the case where the old mmap and the new mmap are both anonymous */ + if ((old_prot & PROT_WRITE) && (flags & MAP_ANONYMOUS) && !vma->vm_file) { + if (clear_user((void __user *) start, end - start)) { + ret = -EFAULT; + goto out; + } + goto skip_mmap; + } + + page = (void *) get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + if (old_prot) + copy_from_user(page, (void __user *) PAGE_START(start), PAGE_SIZE); + + down_write(¤t->mm->mmap_sem); + { + ret = do_mmap(NULL, PAGE_START(start), PAGE_SIZE, prot | PROT_WRITE, + flags | MAP_FIXED | MAP_ANONYMOUS, 0); + } + up_write(¤t->mm->mmap_sem); + + if (IS_ERR((void *) ret)) + goto out; + + if (old_prot) { + /* copy back the old page contents. */ + if (offset_in_page(start)) + copy_to_user((void __user *) PAGE_START(start), page, + offset_in_page(start)); + if (offset_in_page(end)) + copy_to_user((void __user *) end, page + offset_in_page(end), + PAGE_SIZE - offset_in_page(end)); + } + + if (!(flags & MAP_ANONYMOUS)) { + /* read the file contents */ + inode = file->f_dentry->d_inode; + if (!inode->i_fop || !file->f_op->read + || ((*file->f_op->read)(file, (char __user *) start, end - start, &off) < 0)) + { + ret = -EINVAL; + goto out; + } + } + + skip_mmap: + if (!(prot & PROT_WRITE)) + ret = sys_mprotect(PAGE_START(start), PAGE_SIZE, prot | old_prot); + out: + if (page) + free_page((unsigned long) page); + return ret; +} + +/* SLAB cache for partial_page structures */ +kmem_cache_t *partial_page_cachep; + +/* + * init partial_page_list. + * return 0 means kmalloc fail. + */ +struct partial_page_list* +ia32_init_pp_list(void) +{ + struct partial_page_list *p; + + if ((p = kmalloc(sizeof(*p), GFP_KERNEL)) == NULL) + return p; + p->pp_head = NULL; + p->ppl_rb = RB_ROOT; + p->pp_hint = NULL; + atomic_set(&p->pp_count, 1); + return p; +} + +/* + * Search for the partial page with @start in partial page list @ppl. + * If finds the partial page, return the found partial page. + * Else, return 0 and provide @pprev, @rb_link, @rb_parent to + * be used by later __ia32_insert_pp(). + */ +static struct partial_page * +__ia32_find_pp(struct partial_page_list *ppl, unsigned int start, + struct partial_page **pprev, struct rb_node ***rb_link, + struct rb_node **rb_parent) +{ + struct partial_page *pp; + struct rb_node **__rb_link, *__rb_parent, *rb_prev; + + pp = ppl->pp_hint; + if (pp && pp->base == start) + return pp; + + __rb_link = &ppl->ppl_rb.rb_node; + rb_prev = __rb_parent = NULL; + + while (*__rb_link) { + __rb_parent = *__rb_link; + pp = rb_entry(__rb_parent, struct partial_page, pp_rb); + + if (pp->base == start) { + ppl->pp_hint = pp; + return pp; + } else if (pp->base < start) { + rb_prev = __rb_parent; + __rb_link = &__rb_parent->rb_right; + } else { + __rb_link = &__rb_parent->rb_left; + } + } + + *rb_link = __rb_link; + *rb_parent = __rb_parent; + *pprev = NULL; + if (rb_prev) + *pprev = rb_entry(rb_prev, struct partial_page, pp_rb); + return NULL; +} + +/* + * insert @pp into @ppl. + */ +static void +__ia32_insert_pp(struct partial_page_list *ppl, struct partial_page *pp, + struct partial_page *prev, struct rb_node **rb_link, + struct rb_node *rb_parent) +{ + /* link list */ + if (prev) { + pp->next = prev->next; + prev->next = pp; + } else { + ppl->pp_head = pp; + if (rb_parent) + pp->next = rb_entry(rb_parent, + struct partial_page, pp_rb); + else + pp->next = NULL; + } + + /* link rb */ + rb_link_node(&pp->pp_rb, rb_parent, rb_link); + rb_insert_color(&pp->pp_rb, &ppl->ppl_rb); + + ppl->pp_hint = pp; +} + +/* + * delete @pp from partial page list @ppl. + */ +static void +__ia32_delete_pp(struct partial_page_list *ppl, struct partial_page *pp, + struct partial_page *prev) +{ + if (prev) { + prev->next = pp->next; + if (ppl->pp_hint == pp) + ppl->pp_hint = prev; + } else { + ppl->pp_head = pp->next; + if (ppl->pp_hint == pp) + ppl->pp_hint = pp->next; + } + rb_erase(&pp->pp_rb, &ppl->ppl_rb); + kmem_cache_free(partial_page_cachep, pp); +} + +static struct partial_page * +__pp_prev(struct partial_page *pp) +{ + struct rb_node *prev = rb_prev(&pp->pp_rb); + if (prev) + return rb_entry(prev, struct partial_page, pp_rb); + else + return NULL; +} + +/* + * Delete partial pages with address between @start and @end. + * @start and @end are page aligned. + */ +static void +__ia32_delete_pp_range(unsigned int start, unsigned int end) +{ + struct partial_page *pp, *prev; + struct rb_node **rb_link, *rb_parent; + + if (start >= end) + return; + + pp = __ia32_find_pp(current->thread.ppl, start, &prev, + &rb_link, &rb_parent); + if (pp) + prev = __pp_prev(pp); + else { + if (prev) + pp = prev->next; + else + pp = current->thread.ppl->pp_head; + } + + while (pp && pp->base < end) { + struct partial_page *tmp = pp->next; + __ia32_delete_pp(current->thread.ppl, pp, prev); + pp = tmp; + } +} + +/* + * Set the range between @start and @end in bitmap. + * @start and @end should be IA32 page aligned and in the same IA64 page. + */ +static int +__ia32_set_pp(unsigned int start, unsigned int end, int flags) +{ + struct partial_page *pp, *prev; + struct rb_node ** rb_link, *rb_parent; + unsigned int pstart, start_bit, end_bit, i; + + pstart = PAGE_START(start); + start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE; + end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE; + if (end_bit == 0) + end_bit = PAGE_SIZE / IA32_PAGE_SIZE; + pp = __ia32_find_pp(current->thread.ppl, pstart, &prev, + &rb_link, &rb_parent); + if (pp) { + for (i = start_bit; i < end_bit; i++) + set_bit(i, &pp->bitmap); + /* + * Check: if this partial page has been set to a full page, + * then delete it. + */ + if (find_first_zero_bit(&pp->bitmap, sizeof(pp->bitmap)*8) >= + PAGE_SIZE/IA32_PAGE_SIZE) { + __ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp)); + } + return 0; + } + + /* + * MAP_FIXED may lead to overlapping mmap. + * In this case, the requested mmap area may already mmaped as a full + * page. So check vma before adding a new partial page. + */ + if (flags & MAP_FIXED) { + struct vm_area_struct *vma = find_vma(current->mm, pstart); + if (vma && vma->vm_start <= pstart) + return 0; + } + + /* new a partial_page */ + pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL); + if (!pp) + return -ENOMEM; + pp->base = pstart; + pp->bitmap = 0; + for (i=start_bit; i<end_bit; i++) + set_bit(i, &(pp->bitmap)); + pp->next = NULL; + __ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent); + return 0; +} + +/* + * @start and @end should be IA32 page aligned, but don't need to be in the + * same IA64 page. Split @start and @end to make sure they're in the same IA64 + * page, then call __ia32_set_pp(). + */ +static void +ia32_set_pp(unsigned int start, unsigned int end, int flags) +{ + down_write(¤t->mm->mmap_sem); + if (flags & MAP_FIXED) { + /* + * MAP_FIXED may lead to overlapping mmap. When this happens, + * a series of complete IA64 pages results in deletion of + * old partial pages in that range. + */ + __ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end)); + } + + if (end < PAGE_ALIGN(start)) { + __ia32_set_pp(start, end, flags); + } else { + if (offset_in_page(start)) + __ia32_set_pp(start, PAGE_ALIGN(start), flags); + if (offset_in_page(end)) + __ia32_set_pp(PAGE_START(end), end, flags); + } + up_write(¤t->mm->mmap_sem); +} + +/* + * Unset the range between @start and @end in bitmap. + * @start and @end should be IA32 page aligned and in the same IA64 page. + * After doing that, if the bitmap is 0, then free the page and return 1, + * else return 0; + * If not find the partial page in the list, then + * If the vma exists, then the full page is set to a partial page; + * Else return -ENOMEM. + */ +static int +__ia32_unset_pp(unsigned int start, unsigned int end) +{ + struct partial_page *pp, *prev; + struct rb_node ** rb_link, *rb_parent; + unsigned int pstart, start_bit, end_bit, i; + struct vm_area_struct *vma; + + pstart = PAGE_START(start); + start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE; + end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE; + if (end_bit == 0) + end_bit = PAGE_SIZE / IA32_PAGE_SIZE; + + pp = __ia32_find_pp(current->thread.ppl, pstart, &prev, + &rb_link, &rb_parent); + if (pp) { + for (i = start_bit; i < end_bit; i++) + clear_bit(i, &pp->bitmap); + if (pp->bitmap == 0) { + __ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp)); + return 1; + } + return 0; + } + + vma = find_vma(current->mm, pstart); + if (!vma || vma->vm_start > pstart) { + return -ENOMEM; + } + + /* new a partial_page */ + pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL); + if (!pp) + return -ENOMEM; + pp->base = pstart; + pp->bitmap = 0; + for (i = 0; i < start_bit; i++) + set_bit(i, &(pp->bitmap)); + for (i = end_bit; i < PAGE_SIZE / IA32_PAGE_SIZE; i++) + set_bit(i, &(pp->bitmap)); + pp->next = NULL; + __ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent); + return 0; +} + +/* + * Delete pp between PAGE_ALIGN(start) and PAGE_START(end) by calling + * __ia32_delete_pp_range(). Unset possible partial pages by calling + * __ia32_unset_pp(). + * The returned value see __ia32_unset_pp(). + */ +static int +ia32_unset_pp(unsigned int *startp, unsigned int *endp) +{ + unsigned int start = *startp, end = *endp; + int ret = 0; + + down_write(¤t->mm->mmap_sem); + + __ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end)); + + if (end < PAGE_ALIGN(start)) { + ret = __ia32_unset_pp(start, end); + if (ret == 1) { + *startp = PAGE_START(start); + *endp = PAGE_ALIGN(end); + } + if (ret == 0) { + /* to shortcut sys_munmap() in sys32_munmap() */ + *startp = PAGE_START(start); + *endp = PAGE_START(end); + } + } else { + if (offset_in_page(start)) { + ret = __ia32_unset_pp(start, PAGE_ALIGN(start)); + if (ret == 1) + *startp = PAGE_START(start); + if (ret == 0) + *startp = PAGE_ALIGN(start); + if (ret < 0) + goto out; + } + if (offset_in_page(end)) { + ret = __ia32_unset_pp(PAGE_START(end), end); + if (ret == 1) + *endp = PAGE_ALIGN(end); + if (ret == 0) + *endp = PAGE_START(end); + } + } + + out: + up_write(¤t->mm->mmap_sem); + return ret; +} + +/* + * Compare the range between @start and @end with bitmap in partial page. + * @start and @end should be IA32 page aligned and in the same IA64 page. + */ +static int +__ia32_compare_pp(unsigned int start, unsigned int end) +{ + struct partial_page *pp, *prev; + struct rb_node ** rb_link, *rb_parent; + unsigned int pstart, start_bit, end_bit, size; + unsigned int first_bit, next_zero_bit; /* the first range in bitmap */ + + pstart = PAGE_START(start); + + pp = __ia32_find_pp(current->thread.ppl, pstart, &prev, + &rb_link, &rb_parent); + if (!pp) + return 1; + + start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE; + end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE; + size = sizeof(pp->bitmap) * 8; + first_bit = find_first_bit(&pp->bitmap, size); + next_zero_bit = find_next_zero_bit(&pp->bitmap, size, first_bit); + if ((start_bit < first_bit) || (end_bit > next_zero_bit)) { + /* exceeds the first range in bitmap */ + return -ENOMEM; + } else if ((start_bit == first_bit) && (end_bit == next_zero_bit)) { + first_bit = find_next_bit(&pp->bitmap, size, next_zero_bit); + if ((next_zero_bit < first_bit) && (first_bit < size)) + return 1; /* has next range */ + else + return 0; /* no next range */ + } else + return 1; +} + +/* + * @start and @end should be IA32 page aligned, but don't need to be in the + * same IA64 page. Split @start and @end to make sure they're in the same IA64 + * page, then call __ia32_compare_pp(). + * + * Take this as example: the range is the 1st and 2nd 4K page. + * Return 0 if they fit bitmap exactly, i.e. bitmap = 00000011; + * Return 1 if the range doesn't cover whole bitmap, e.g. bitmap = 00001111; + * Return -ENOMEM if the range exceeds the bitmap, e.g. bitmap = 00000001 or + * bitmap = 00000101. + */ +static int +ia32_compare_pp(unsigned int *startp, unsigned int *endp) +{ + unsigned int start = *startp, end = *endp; + int retval = 0; + + down_write(¤t->mm->mmap_sem); + + if (end < PAGE_ALIGN(start)) { + retval = __ia32_compare_pp(start, end); + if (retval == 0) { + *startp = PAGE_START(start); + *endp = PAGE_ALIGN(end); + } + } else { + if (offset_in_page(start)) { + retval = __ia32_compare_pp(start, + PAGE_ALIGN(start)); + if (retval == 0) + *startp = PAGE_START(start); + if (retval < 0) + goto out; + } + if (offset_in_page(end)) { + retval = __ia32_compare_pp(PAGE_START(end), end); + if (retval == 0) + *endp = PAGE_ALIGN(end); + } + } + + out: + up_write(¤t->mm->mmap_sem); + return retval; +} + +static void +__ia32_drop_pp_list(struct partial_page_list *ppl) +{ + struct partial_page *pp = ppl->pp_head; + + while (pp) { + struct partial_page *next = pp->next; + kmem_cache_free(partial_page_cachep, pp); + pp = next; + } + + kfree(ppl); +} + +void +ia32_drop_partial_page_list(struct task_struct *task) +{ + struct partial_page_list* ppl = task->thread.ppl; + + if (ppl && atomic_dec_and_test(&ppl->pp_count)) + __ia32_drop_pp_list(ppl); +} + +/* + * Copy current->thread.ppl to ppl (already initialized). + */ +static int +__ia32_copy_pp_list(struct partial_page_list *ppl) +{ + struct partial_page *pp, *tmp, *prev; + struct rb_node **rb_link, *rb_parent; + + ppl->pp_head = NULL; + ppl->pp_hint = NULL; + ppl->ppl_rb = RB_ROOT; + rb_link = &ppl->ppl_rb.rb_node; + rb_parent = NULL; + prev = NULL; + + for (pp = current->thread.ppl->pp_head; pp; pp = pp->next) { + tmp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + *tmp = *pp; + __ia32_insert_pp(ppl, tmp, prev, rb_link, rb_parent); + prev = tmp; + rb_link = &tmp->pp_rb.rb_right; + rb_parent = &tmp->pp_rb; + } + return 0; +} + +int +ia32_copy_partial_page_list(struct task_struct *p, unsigned long clone_flags) +{ + int retval = 0; + + if (clone_flags & CLONE_VM) { + atomic_inc(¤t->thread.ppl->pp_count); + p->thread.ppl = current->thread.ppl; + } else { + p->thread.ppl = ia32_init_pp_list(); + if (!p->thread.ppl) + return -ENOMEM; + down_write(¤t->mm->mmap_sem); + { + retval = __ia32_copy_pp_list(p->thread.ppl); + } + up_write(¤t->mm->mmap_sem); + } + + return retval; +} + +static unsigned long +emulate_mmap (struct file *file, unsigned long start, unsigned long len, int prot, int flags, + loff_t off) +{ + unsigned long tmp, end, pend, pstart, ret, is_congruent, fudge = 0; + struct inode *inode; + loff_t poff; + + end = start + len; + pstart = PAGE_START(start); + pend = PAGE_ALIGN(end); + + if (flags & MAP_FIXED) { + ia32_set_pp((unsigned int)start, (unsigned int)end, flags); + if (start > pstart) { + if (flags & MAP_SHARED) + printk(KERN_INFO + "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n", + current->comm, current->pid, start); + ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags, + off); + if (IS_ERR((void *) ret)) + return ret; + pstart += PAGE_SIZE; + if (pstart >= pend) + goto out; /* done */ + } + if (end < pend) { + if (flags & MAP_SHARED) + printk(KERN_INFO + "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n", + current->comm, current->pid, end); + ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags, + (off + len) - offset_in_page(end)); + if (IS_ERR((void *) ret)) + return ret; + pend -= PAGE_SIZE; + if (pstart >= pend) + goto out; /* done */ + } + } else { + /* + * If a start address was specified, use it if the entire rounded out area + * is available. + */ + if (start && !pstart) + fudge = 1; /* handle case of mapping to range (0,PAGE_SIZE) */ + tmp = arch_get_unmapped_area(file, pstart - fudge, pend - pstart, 0, flags); + if (tmp != pstart) { + pstart = tmp; + start = pstart + offset_in_page(off); /* make start congruent with off */ + end = start + len; + pend = PAGE_ALIGN(end); + } + } + + poff = off + (pstart - start); /* note: (pstart - start) may be negative */ + is_congruent = (flags & MAP_ANONYMOUS) || (offset_in_page(poff) == 0); + + if ((flags & MAP_SHARED) && !is_congruent) + printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap " + "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off); + + DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend, + is_congruent ? "congruent" : "not congruent", poff); + + down_write(¤t->mm->mmap_sem); + { + if (!(flags & MAP_ANONYMOUS) && is_congruent) + ret = do_mmap(file, pstart, pend - pstart, prot, flags | MAP_FIXED, poff); + else + ret = do_mmap(NULL, pstart, pend - pstart, + prot | ((flags & MAP_ANONYMOUS) ? 0 : PROT_WRITE), + flags | MAP_FIXED | MAP_ANONYMOUS, 0); + } + up_write(¤t->mm->mmap_sem); + + if (IS_ERR((void *) ret)) + return ret; + + if (!is_congruent) { + /* read the file contents */ + inode = file->f_dentry->d_inode; + if (!inode->i_fop || !file->f_op->read + || ((*file->f_op->read)(file, (char __user *) pstart, pend - pstart, &poff) + < 0)) + { + sys_munmap(pstart, pend - pstart); + return -EINVAL; + } + if (!(prot & PROT_WRITE) && sys_mprotect(pstart, pend - pstart, prot) < 0) + return -EINVAL; + } + + if (!(flags & MAP_FIXED)) + ia32_set_pp((unsigned int)start, (unsigned int)end, flags); +out: + return start; +} + +#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */ + +static inline unsigned int +get_prot32 (unsigned int prot) +{ + if (prot & PROT_WRITE) + /* on x86, PROT_WRITE implies PROT_READ which implies PROT_EEC */ + prot |= PROT_READ | PROT_WRITE | PROT_EXEC; + else if (prot & (PROT_READ | PROT_EXEC)) + /* on x86, there is no distinction between PROT_READ and PROT_EXEC */ + prot |= (PROT_READ | PROT_EXEC); + + return prot; +} + +unsigned long +ia32_do_mmap (struct file *file, unsigned long addr, unsigned long len, int prot, int flags, + loff_t offset) +{ + DBG("ia32_do_mmap(file=%p,addr=0x%lx,len=0x%lx,prot=%x,flags=%x,offset=0x%llx)\n", + file, addr, len, prot, flags, offset); + + if (file && (!file->f_op || !file->f_op->mmap)) + return -ENODEV; + + len = IA32_PAGE_ALIGN(len); + if (len == 0) + return addr; + + if (len > IA32_PAGE_OFFSET || addr > IA32_PAGE_OFFSET - len) + { + if (flags & MAP_FIXED) + return -ENOMEM; + else + return -EINVAL; + } + + if (OFFSET4K(offset)) + return -EINVAL; + + prot = get_prot32(prot); + +#if PAGE_SHIFT > IA32_PAGE_SHIFT + down(&ia32_mmap_sem); + { + addr = emulate_mmap(file, addr, len, prot, flags, offset); + } + up(&ia32_mmap_sem); +#else + down_write(¤t->mm->mmap_sem); + { + addr = do_mmap(file, addr, len, prot, flags, offset); + } + up_write(¤t->mm->mmap_sem); +#endif + DBG("ia32_do_mmap: returning 0x%lx\n", addr); + return addr; +} + +/* + * Linux/i386 didn't use to be able to handle more than 4 system call parameters, so these + * system calls used a memory block for parameter passing.. + */ + +struct mmap_arg_struct { + unsigned int addr; + unsigned int len; + unsigned int prot; + unsigned int flags; + unsigned int fd; + unsigned int offset; +}; + +asmlinkage long +sys32_mmap (struct mmap_arg_struct __user *arg) +{ + struct mmap_arg_struct a; + struct file *file = NULL; + unsigned long addr; + int flags; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + + if (OFFSET4K(a.offset)) + return -EINVAL; + + flags = a.flags; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(a.fd); + if (!file) + return -EBADF; + } + + addr = ia32_do_mmap(file, a.addr, a.len, a.prot, flags, a.offset); + + if (file) + fput(file); + return addr; +} + +asmlinkage long +sys32_mmap2 (unsigned int addr, unsigned int len, unsigned int prot, unsigned int flags, + unsigned int fd, unsigned int pgoff) +{ + struct file *file = NULL; + unsigned long retval; + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + if (!(flags & MAP_ANONYMOUS)) { + file = fget(fd); + if (!file) + return -EBADF; + } + + retval = ia32_do_mmap(file, addr, len, prot, flags, + (unsigned long) pgoff << IA32_PAGE_SHIFT); + + if (file) + fput(file); + return retval; +} + +asmlinkage long +sys32_munmap (unsigned int start, unsigned int len) +{ + unsigned int end = start + len; + long ret; + +#if PAGE_SHIFT <= IA32_PAGE_SHIFT + ret = sys_munmap(start, end - start); +#else + if (OFFSET4K(start)) + return -EINVAL; + + end = IA32_PAGE_ALIGN(end); + if (start >= end) + return -EINVAL; + + ret = ia32_unset_pp(&start, &end); + if (ret < 0) + return ret; + + if (start >= end) + return 0; + + down(&ia32_mmap_sem); + { + ret = sys_munmap(start, end - start); + } + up(&ia32_mmap_sem); +#endif + return ret; +} + +#if PAGE_SHIFT > IA32_PAGE_SHIFT + +/* + * When mprotect()ing a partial page, we set the permission to the union of the old + * settings and the new settings. In other words, it's only possible to make access to a + * partial page less restrictive. + */ +static long +mprotect_subpage (unsigned long address, int new_prot) +{ + int old_prot; + struct vm_area_struct *vma; + + if (new_prot == PROT_NONE) + return 0; /* optimize case where nothing changes... */ + vma = find_vma(current->mm, address); + old_prot = get_page_prot(vma, address); + return sys_mprotect(address, PAGE_SIZE, new_prot | old_prot); +} + +#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */ + +asmlinkage long +sys32_mprotect (unsigned int start, unsigned int len, int prot) +{ + unsigned int end = start + len; +#if PAGE_SHIFT > IA32_PAGE_SHIFT + long retval = 0; +#endif + + prot = get_prot32(prot); + +#if PAGE_SHIFT <= IA32_PAGE_SHIFT + return sys_mprotect(start, end - start, prot); +#else + if (OFFSET4K(start)) + return -EINVAL; + + end = IA32_PAGE_ALIGN(end); + if (end < start) + return -EINVAL; + + retval = ia32_compare_pp(&start, &end); + + if (retval < 0) + return retval; + + down(&ia32_mmap_sem); + { + if (offset_in_page(start)) { + /* start address is 4KB aligned but not page aligned. */ + retval = mprotect_subpage(PAGE_START(start), prot); + if (retval < 0) + goto out; + + start = PAGE_ALIGN(start); + if (start >= end) + goto out; /* retval is already zero... */ + } + + if (offset_in_page(end)) { + /* end address is 4KB aligned but not page aligned. */ + retval = mprotect_subpage(PAGE_START(end), prot); + if (retval < 0) + goto out; + + end = PAGE_START(end); + } + retval = sys_mprotect(start, end - start, prot); + } + out: + up(&ia32_mmap_sem); + return retval; +#endif +} + +asmlinkage long +sys32_mremap (unsigned int addr, unsigned int old_len, unsigned int new_len, + unsigned int flags, unsigned int new_addr) +{ + long ret; + +#if PAGE_SHIFT <= IA32_PAGE_SHIFT + ret = sys_mremap(addr, old_len, new_len, flags, new_addr); +#else + unsigned int old_end, new_end; + + if (OFFSET4K(addr)) + return -EINVAL; + + old_len = IA32_PAGE_ALIGN(old_len); + new_len = IA32_PAGE_ALIGN(new_len); + old_end = addr + old_len; + new_end = addr + new_len; + + if (!new_len) + return -EINVAL; + + if ((flags & MREMAP_FIXED) && (OFFSET4K(new_addr))) + return -EINVAL; + + if (old_len >= new_len) { + ret = sys32_munmap(addr + new_len, old_len - new_len); + if (ret && old_len != new_len) + return ret; + ret = addr; + if (!(flags & MREMAP_FIXED) || (new_addr == addr)) + return ret; + old_len = new_len; + } + + addr = PAGE_START(addr); + old_len = PAGE_ALIGN(old_end) - addr; + new_len = PAGE_ALIGN(new_end) - addr; + + down(&ia32_mmap_sem); + { + ret = sys_mremap(addr, old_len, new_len, flags, new_addr); + } + up(&ia32_mmap_sem); + + if ((ret >= 0) && (old_len < new_len)) { + /* mremap expanded successfully */ + ia32_set_pp(old_end, new_end, flags); + } +#endif + return ret; +} + +asmlinkage long +sys32_pipe (int __user *fd) +{ + int retval; + int fds[2]; + + retval = do_pipe(fds); + if (retval) + goto out; + if (copy_to_user(fd, fds, sizeof(fds))) + retval = -EFAULT; + out: + return retval; +} + +static inline long +get_tv32 (struct timeval *o, struct compat_timeval __user *i) +{ + return (!access_ok(VERIFY_READ, i, sizeof(*i)) || + (__get_user(o->tv_sec, &i->tv_sec) | __get_user(o->tv_usec, &i->tv_usec))); +} + +static inline long +put_tv32 (struct compat_timeval __user *o, struct timeval *i) +{ + return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || + (__put_user(i->tv_sec, &o->tv_sec) | __put_user(i->tv_usec, &o->tv_usec))); +} + +asmlinkage unsigned long +sys32_alarm (unsigned int seconds) +{ + struct itimerval it_new, it_old; + unsigned int oldalarm; + + it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; + it_new.it_value.tv_sec = seconds; + it_new.it_value.tv_usec = 0; + do_setitimer(ITIMER_REAL, &it_new, &it_old); + oldalarm = it_old.it_value.tv_sec; + /* ehhh.. We can't return |