aboutsummaryrefslogtreecommitdiff
path: root/arch/ia64/ia32/sys_ia32.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ia64/ia32/sys_ia32.c
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/ia64/ia32/sys_ia32.c')
-rw-r--r--arch/ia64/ia32/sys_ia32.c2747
1 files changed, 2747 insertions, 0 deletions
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
new file mode 100644
index 00000000000..247a21c64ae
--- /dev/null
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -0,0 +1,2747 @@
+/*
+ * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Derived from sys_sparc32.c.
+ *
+ * Copyright (C) 2000 VA Linux Co
+ * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
+ * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 2000-2003, 2005 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2004 Gordon Jin <gordon.jin@intel.com>
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * environment.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/sysctl.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/signal.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/utsname.h>
+#include <linux/timex.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/mm.h>
+#include <linux/shm.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/nfs_fs.h>
+#include <linux/quota.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/nfsd/syscall.h>
+#include <linux/poll.h>
+#include <linux/eventpoll.h>
+#include <linux/personality.h>
+#include <linux/ptrace.h>
+#include <linux/stat.h>
+#include <linux/ipc.h>
+#include <linux/compat.h>
+#include <linux/vfs.h>
+#include <linux/mman.h>
+
+#include <asm/intrinsics.h>
+#include <asm/semaphore.h>
+#include <asm/types.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+
+#include "ia32priv.h"
+
+#include <net/scm.h>
+#include <net/sock.h>
+
+#define DEBUG 0
+
+#if DEBUG
+# define DBG(fmt...) printk(KERN_DEBUG fmt)
+#else
+# define DBG(fmt...)
+#endif
+
+#define ROUND_UP(x,a) ((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1)))
+
+#define OFFSET4K(a) ((a) & 0xfff)
+#define PAGE_START(addr) ((addr) & PAGE_MASK)
+#define MINSIGSTKSZ_IA32 2048
+
+#define high2lowuid(uid) ((uid) > 65535 ? 65534 : (uid))
+#define high2lowgid(gid) ((gid) > 65535 ? 65534 : (gid))
+
+/*
+ * Anything that modifies or inspects ia32 user virtual memory must hold this semaphore
+ * while doing so.
+ */
+/* XXX make per-mm: */
+static DECLARE_MUTEX(ia32_mmap_sem);
+
+asmlinkage long
+sys32_execve (char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp,
+ struct pt_regs *regs)
+{
+ long error;
+ char *filename;
+ unsigned long old_map_base, old_task_size, tssd;
+
+ filename = getname(name);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ return error;
+
+ old_map_base = current->thread.map_base;
+ old_task_size = current->thread.task_size;
+ tssd = ia64_get_kr(IA64_KR_TSSD);
+
+ /* we may be exec'ing a 64-bit process: reset map base, task-size, and io-base: */
+ current->thread.map_base = DEFAULT_MAP_BASE;
+ current->thread.task_size = DEFAULT_TASK_SIZE;
+ ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
+ ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
+
+ error = compat_do_execve(filename, argv, envp, regs);
+ putname(filename);
+
+ if (error < 0) {
+ /* oops, execve failed, switch back to old values... */
+ ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE);
+ ia64_set_kr(IA64_KR_TSSD, tssd);
+ current->thread.map_base = old_map_base;
+ current->thread.task_size = old_task_size;
+ }
+
+ return error;
+}
+
+int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
+{
+ int err;
+
+ if ((u64) stat->size > MAX_NON_LFS ||
+ !old_valid_dev(stat->dev) ||
+ !old_valid_dev(stat->rdev))
+ return -EOVERFLOW;
+
+ if (clear_user(ubuf, sizeof(*ubuf)))
+ return -EFAULT;
+
+ err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
+ err |= __put_user(stat->ino, &ubuf->st_ino);
+ err |= __put_user(stat->mode, &ubuf->st_mode);
+ err |= __put_user(stat->nlink, &ubuf->st_nlink);
+ err |= __put_user(high2lowuid(stat->uid), &ubuf->st_uid);
+ err |= __put_user(high2lowgid(stat->gid), &ubuf->st_gid);
+ err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
+ err |= __put_user(stat->size, &ubuf->st_size);
+ err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
+ err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
+ err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
+ err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
+ err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
+ err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
+ err |= __put_user(stat->blksize, &ubuf->st_blksize);
+ err |= __put_user(stat->blocks, &ubuf->st_blocks);
+ return err;
+}
+
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+
+
+static int
+get_page_prot (struct vm_area_struct *vma, unsigned long addr)
+{
+ int prot = 0;
+
+ if (!vma || vma->vm_start > addr)
+ return 0;
+
+ if (vma->vm_flags & VM_READ)
+ prot |= PROT_READ;
+ if (vma->vm_flags & VM_WRITE)
+ prot |= PROT_WRITE;
+ if (vma->vm_flags & VM_EXEC)
+ prot |= PROT_EXEC;
+ return prot;
+}
+
+/*
+ * Map a subpage by creating an anonymous page that contains the union of the old page and
+ * the subpage.
+ */
+static unsigned long
+mmap_subpage (struct file *file, unsigned long start, unsigned long end, int prot, int flags,
+ loff_t off)
+{
+ void *page = NULL;
+ struct inode *inode;
+ unsigned long ret = 0;
+ struct vm_area_struct *vma = find_vma(current->mm, start);
+ int old_prot = get_page_prot(vma, start);
+
+ DBG("mmap_subpage(file=%p,start=0x%lx,end=0x%lx,prot=%x,flags=%x,off=0x%llx)\n",
+ file, start, end, prot, flags, off);
+
+
+ /* Optimize the case where the old mmap and the new mmap are both anonymous */
+ if ((old_prot & PROT_WRITE) && (flags & MAP_ANONYMOUS) && !vma->vm_file) {
+ if (clear_user((void __user *) start, end - start)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ goto skip_mmap;
+ }
+
+ page = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ if (old_prot)
+ copy_from_user(page, (void __user *) PAGE_START(start), PAGE_SIZE);
+
+ down_write(&current->mm->mmap_sem);
+ {
+ ret = do_mmap(NULL, PAGE_START(start), PAGE_SIZE, prot | PROT_WRITE,
+ flags | MAP_FIXED | MAP_ANONYMOUS, 0);
+ }
+ up_write(&current->mm->mmap_sem);
+
+ if (IS_ERR((void *) ret))
+ goto out;
+
+ if (old_prot) {
+ /* copy back the old page contents. */
+ if (offset_in_page(start))
+ copy_to_user((void __user *) PAGE_START(start), page,
+ offset_in_page(start));
+ if (offset_in_page(end))
+ copy_to_user((void __user *) end, page + offset_in_page(end),
+ PAGE_SIZE - offset_in_page(end));
+ }
+
+ if (!(flags & MAP_ANONYMOUS)) {
+ /* read the file contents */
+ inode = file->f_dentry->d_inode;
+ if (!inode->i_fop || !file->f_op->read
+ || ((*file->f_op->read)(file, (char __user *) start, end - start, &off) < 0))
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ skip_mmap:
+ if (!(prot & PROT_WRITE))
+ ret = sys_mprotect(PAGE_START(start), PAGE_SIZE, prot | old_prot);
+ out:
+ if (page)
+ free_page((unsigned long) page);
+ return ret;
+}
+
+/* SLAB cache for partial_page structures */
+kmem_cache_t *partial_page_cachep;
+
+/*
+ * init partial_page_list.
+ * return 0 means kmalloc fail.
+ */
+struct partial_page_list*
+ia32_init_pp_list(void)
+{
+ struct partial_page_list *p;
+
+ if ((p = kmalloc(sizeof(*p), GFP_KERNEL)) == NULL)
+ return p;
+ p->pp_head = NULL;
+ p->ppl_rb = RB_ROOT;
+ p->pp_hint = NULL;
+ atomic_set(&p->pp_count, 1);
+ return p;
+}
+
+/*
+ * Search for the partial page with @start in partial page list @ppl.
+ * If finds the partial page, return the found partial page.
+ * Else, return 0 and provide @pprev, @rb_link, @rb_parent to
+ * be used by later __ia32_insert_pp().
+ */
+static struct partial_page *
+__ia32_find_pp(struct partial_page_list *ppl, unsigned int start,
+ struct partial_page **pprev, struct rb_node ***rb_link,
+ struct rb_node **rb_parent)
+{
+ struct partial_page *pp;
+ struct rb_node **__rb_link, *__rb_parent, *rb_prev;
+
+ pp = ppl->pp_hint;
+ if (pp && pp->base == start)
+ return pp;
+
+ __rb_link = &ppl->ppl_rb.rb_node;
+ rb_prev = __rb_parent = NULL;
+
+ while (*__rb_link) {
+ __rb_parent = *__rb_link;
+ pp = rb_entry(__rb_parent, struct partial_page, pp_rb);
+
+ if (pp->base == start) {
+ ppl->pp_hint = pp;
+ return pp;
+ } else if (pp->base < start) {
+ rb_prev = __rb_parent;
+ __rb_link = &__rb_parent->rb_right;
+ } else {
+ __rb_link = &__rb_parent->rb_left;
+ }
+ }
+
+ *rb_link = __rb_link;
+ *rb_parent = __rb_parent;
+ *pprev = NULL;
+ if (rb_prev)
+ *pprev = rb_entry(rb_prev, struct partial_page, pp_rb);
+ return NULL;
+}
+
+/*
+ * insert @pp into @ppl.
+ */
+static void
+__ia32_insert_pp(struct partial_page_list *ppl, struct partial_page *pp,
+ struct partial_page *prev, struct rb_node **rb_link,
+ struct rb_node *rb_parent)
+{
+ /* link list */
+ if (prev) {
+ pp->next = prev->next;
+ prev->next = pp;
+ } else {
+ ppl->pp_head = pp;
+ if (rb_parent)
+ pp->next = rb_entry(rb_parent,
+ struct partial_page, pp_rb);
+ else
+ pp->next = NULL;
+ }
+
+ /* link rb */
+ rb_link_node(&pp->pp_rb, rb_parent, rb_link);
+ rb_insert_color(&pp->pp_rb, &ppl->ppl_rb);
+
+ ppl->pp_hint = pp;
+}
+
+/*
+ * delete @pp from partial page list @ppl.
+ */
+static void
+__ia32_delete_pp(struct partial_page_list *ppl, struct partial_page *pp,
+ struct partial_page *prev)
+{
+ if (prev) {
+ prev->next = pp->next;
+ if (ppl->pp_hint == pp)
+ ppl->pp_hint = prev;
+ } else {
+ ppl->pp_head = pp->next;
+ if (ppl->pp_hint == pp)
+ ppl->pp_hint = pp->next;
+ }
+ rb_erase(&pp->pp_rb, &ppl->ppl_rb);
+ kmem_cache_free(partial_page_cachep, pp);
+}
+
+static struct partial_page *
+__pp_prev(struct partial_page *pp)
+{
+ struct rb_node *prev = rb_prev(&pp->pp_rb);
+ if (prev)
+ return rb_entry(prev, struct partial_page, pp_rb);
+ else
+ return NULL;
+}
+
+/*
+ * Delete partial pages with address between @start and @end.
+ * @start and @end are page aligned.
+ */
+static void
+__ia32_delete_pp_range(unsigned int start, unsigned int end)
+{
+ struct partial_page *pp, *prev;
+ struct rb_node **rb_link, *rb_parent;
+
+ if (start >= end)
+ return;
+
+ pp = __ia32_find_pp(current->thread.ppl, start, &prev,
+ &rb_link, &rb_parent);
+ if (pp)
+ prev = __pp_prev(pp);
+ else {
+ if (prev)
+ pp = prev->next;
+ else
+ pp = current->thread.ppl->pp_head;
+ }
+
+ while (pp && pp->base < end) {
+ struct partial_page *tmp = pp->next;
+ __ia32_delete_pp(current->thread.ppl, pp, prev);
+ pp = tmp;
+ }
+}
+
+/*
+ * Set the range between @start and @end in bitmap.
+ * @start and @end should be IA32 page aligned and in the same IA64 page.
+ */
+static int
+__ia32_set_pp(unsigned int start, unsigned int end, int flags)
+{
+ struct partial_page *pp, *prev;
+ struct rb_node ** rb_link, *rb_parent;
+ unsigned int pstart, start_bit, end_bit, i;
+
+ pstart = PAGE_START(start);
+ start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
+ end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
+ if (end_bit == 0)
+ end_bit = PAGE_SIZE / IA32_PAGE_SIZE;
+ pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
+ &rb_link, &rb_parent);
+ if (pp) {
+ for (i = start_bit; i < end_bit; i++)
+ set_bit(i, &pp->bitmap);
+ /*
+ * Check: if this partial page has been set to a full page,
+ * then delete it.
+ */
+ if (find_first_zero_bit(&pp->bitmap, sizeof(pp->bitmap)*8) >=
+ PAGE_SIZE/IA32_PAGE_SIZE) {
+ __ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp));
+ }
+ return 0;
+ }
+
+ /*
+ * MAP_FIXED may lead to overlapping mmap.
+ * In this case, the requested mmap area may already mmaped as a full
+ * page. So check vma before adding a new partial page.
+ */
+ if (flags & MAP_FIXED) {
+ struct vm_area_struct *vma = find_vma(current->mm, pstart);
+ if (vma && vma->vm_start <= pstart)
+ return 0;
+ }
+
+ /* new a partial_page */
+ pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
+ if (!pp)
+ return -ENOMEM;
+ pp->base = pstart;
+ pp->bitmap = 0;
+ for (i=start_bit; i<end_bit; i++)
+ set_bit(i, &(pp->bitmap));
+ pp->next = NULL;
+ __ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent);
+ return 0;
+}
+
+/*
+ * @start and @end should be IA32 page aligned, but don't need to be in the
+ * same IA64 page. Split @start and @end to make sure they're in the same IA64
+ * page, then call __ia32_set_pp().
+ */
+static void
+ia32_set_pp(unsigned int start, unsigned int end, int flags)
+{
+ down_write(&current->mm->mmap_sem);
+ if (flags & MAP_FIXED) {
+ /*
+ * MAP_FIXED may lead to overlapping mmap. When this happens,
+ * a series of complete IA64 pages results in deletion of
+ * old partial pages in that range.
+ */
+ __ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end));
+ }
+
+ if (end < PAGE_ALIGN(start)) {
+ __ia32_set_pp(start, end, flags);
+ } else {
+ if (offset_in_page(start))
+ __ia32_set_pp(start, PAGE_ALIGN(start), flags);
+ if (offset_in_page(end))
+ __ia32_set_pp(PAGE_START(end), end, flags);
+ }
+ up_write(&current->mm->mmap_sem);
+}
+
+/*
+ * Unset the range between @start and @end in bitmap.
+ * @start and @end should be IA32 page aligned and in the same IA64 page.
+ * After doing that, if the bitmap is 0, then free the page and return 1,
+ * else return 0;
+ * If not find the partial page in the list, then
+ * If the vma exists, then the full page is set to a partial page;
+ * Else return -ENOMEM.
+ */
+static int
+__ia32_unset_pp(unsigned int start, unsigned int end)
+{
+ struct partial_page *pp, *prev;
+ struct rb_node ** rb_link, *rb_parent;
+ unsigned int pstart, start_bit, end_bit, i;
+ struct vm_area_struct *vma;
+
+ pstart = PAGE_START(start);
+ start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
+ end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
+ if (end_bit == 0)
+ end_bit = PAGE_SIZE / IA32_PAGE_SIZE;
+
+ pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
+ &rb_link, &rb_parent);
+ if (pp) {
+ for (i = start_bit; i < end_bit; i++)
+ clear_bit(i, &pp->bitmap);
+ if (pp->bitmap == 0) {
+ __ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp));
+ return 1;
+ }
+ return 0;
+ }
+
+ vma = find_vma(current->mm, pstart);
+ if (!vma || vma->vm_start > pstart) {
+ return -ENOMEM;
+ }
+
+ /* new a partial_page */
+ pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
+ if (!pp)
+ return -ENOMEM;
+ pp->base = pstart;
+ pp->bitmap = 0;
+ for (i = 0; i < start_bit; i++)
+ set_bit(i, &(pp->bitmap));
+ for (i = end_bit; i < PAGE_SIZE / IA32_PAGE_SIZE; i++)
+ set_bit(i, &(pp->bitmap));
+ pp->next = NULL;
+ __ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent);
+ return 0;
+}
+
+/*
+ * Delete pp between PAGE_ALIGN(start) and PAGE_START(end) by calling
+ * __ia32_delete_pp_range(). Unset possible partial pages by calling
+ * __ia32_unset_pp().
+ * The returned value see __ia32_unset_pp().
+ */
+static int
+ia32_unset_pp(unsigned int *startp, unsigned int *endp)
+{
+ unsigned int start = *startp, end = *endp;
+ int ret = 0;
+
+ down_write(&current->mm->mmap_sem);
+
+ __ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end));
+
+ if (end < PAGE_ALIGN(start)) {
+ ret = __ia32_unset_pp(start, end);
+ if (ret == 1) {
+ *startp = PAGE_START(start);
+ *endp = PAGE_ALIGN(end);
+ }
+ if (ret == 0) {
+ /* to shortcut sys_munmap() in sys32_munmap() */
+ *startp = PAGE_START(start);
+ *endp = PAGE_START(end);
+ }
+ } else {
+ if (offset_in_page(start)) {
+ ret = __ia32_unset_pp(start, PAGE_ALIGN(start));
+ if (ret == 1)
+ *startp = PAGE_START(start);
+ if (ret == 0)
+ *startp = PAGE_ALIGN(start);
+ if (ret < 0)
+ goto out;
+ }
+ if (offset_in_page(end)) {
+ ret = __ia32_unset_pp(PAGE_START(end), end);
+ if (ret == 1)
+ *endp = PAGE_ALIGN(end);
+ if (ret == 0)
+ *endp = PAGE_START(end);
+ }
+ }
+
+ out:
+ up_write(&current->mm->mmap_sem);
+ return ret;
+}
+
+/*
+ * Compare the range between @start and @end with bitmap in partial page.
+ * @start and @end should be IA32 page aligned and in the same IA64 page.
+ */
+static int
+__ia32_compare_pp(unsigned int start, unsigned int end)
+{
+ struct partial_page *pp, *prev;
+ struct rb_node ** rb_link, *rb_parent;
+ unsigned int pstart, start_bit, end_bit, size;
+ unsigned int first_bit, next_zero_bit; /* the first range in bitmap */
+
+ pstart = PAGE_START(start);
+
+ pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
+ &rb_link, &rb_parent);
+ if (!pp)
+ return 1;
+
+ start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
+ end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
+ size = sizeof(pp->bitmap) * 8;
+ first_bit = find_first_bit(&pp->bitmap, size);
+ next_zero_bit = find_next_zero_bit(&pp->bitmap, size, first_bit);
+ if ((start_bit < first_bit) || (end_bit > next_zero_bit)) {
+ /* exceeds the first range in bitmap */
+ return -ENOMEM;
+ } else if ((start_bit == first_bit) && (end_bit == next_zero_bit)) {
+ first_bit = find_next_bit(&pp->bitmap, size, next_zero_bit);
+ if ((next_zero_bit < first_bit) && (first_bit < size))
+ return 1; /* has next range */
+ else
+ return 0; /* no next range */
+ } else
+ return 1;
+}
+
+/*
+ * @start and @end should be IA32 page aligned, but don't need to be in the
+ * same IA64 page. Split @start and @end to make sure they're in the same IA64
+ * page, then call __ia32_compare_pp().
+ *
+ * Take this as example: the range is the 1st and 2nd 4K page.
+ * Return 0 if they fit bitmap exactly, i.e. bitmap = 00000011;
+ * Return 1 if the range doesn't cover whole bitmap, e.g. bitmap = 00001111;
+ * Return -ENOMEM if the range exceeds the bitmap, e.g. bitmap = 00000001 or
+ * bitmap = 00000101.
+ */
+static int
+ia32_compare_pp(unsigned int *startp, unsigned int *endp)
+{
+ unsigned int start = *startp, end = *endp;
+ int retval = 0;
+
+ down_write(&current->mm->mmap_sem);
+
+ if (end < PAGE_ALIGN(start)) {
+ retval = __ia32_compare_pp(start, end);
+ if (retval == 0) {
+ *startp = PAGE_START(start);
+ *endp = PAGE_ALIGN(end);
+ }
+ } else {
+ if (offset_in_page(start)) {
+ retval = __ia32_compare_pp(start,
+ PAGE_ALIGN(start));
+ if (retval == 0)
+ *startp = PAGE_START(start);
+ if (retval < 0)
+ goto out;
+ }
+ if (offset_in_page(end)) {
+ retval = __ia32_compare_pp(PAGE_START(end), end);
+ if (retval == 0)
+ *endp = PAGE_ALIGN(end);
+ }
+ }
+
+ out:
+ up_write(&current->mm->mmap_sem);
+ return retval;
+}
+
+static void
+__ia32_drop_pp_list(struct partial_page_list *ppl)
+{
+ struct partial_page *pp = ppl->pp_head;
+
+ while (pp) {
+ struct partial_page *next = pp->next;
+ kmem_cache_free(partial_page_cachep, pp);
+ pp = next;
+ }
+
+ kfree(ppl);
+}
+
+void
+ia32_drop_partial_page_list(struct task_struct *task)
+{
+ struct partial_page_list* ppl = task->thread.ppl;
+
+ if (ppl && atomic_dec_and_test(&ppl->pp_count))
+ __ia32_drop_pp_list(ppl);
+}
+
+/*
+ * Copy current->thread.ppl to ppl (already initialized).
+ */
+static int
+__ia32_copy_pp_list(struct partial_page_list *ppl)
+{
+ struct partial_page *pp, *tmp, *prev;
+ struct rb_node **rb_link, *rb_parent;
+
+ ppl->pp_head = NULL;
+ ppl->pp_hint = NULL;
+ ppl->ppl_rb = RB_ROOT;
+ rb_link = &ppl->ppl_rb.rb_node;
+ rb_parent = NULL;
+ prev = NULL;
+
+ for (pp = current->thread.ppl->pp_head; pp; pp = pp->next) {
+ tmp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+ *tmp = *pp;
+ __ia32_insert_pp(ppl, tmp, prev, rb_link, rb_parent);
+ prev = tmp;
+ rb_link = &tmp->pp_rb.rb_right;
+ rb_parent = &tmp->pp_rb;
+ }
+ return 0;
+}
+
+int
+ia32_copy_partial_page_list(struct task_struct *p, unsigned long clone_flags)
+{
+ int retval = 0;
+
+ if (clone_flags & CLONE_VM) {
+ atomic_inc(&current->thread.ppl->pp_count);
+ p->thread.ppl = current->thread.ppl;
+ } else {
+ p->thread.ppl = ia32_init_pp_list();
+ if (!p->thread.ppl)
+ return -ENOMEM;
+ down_write(&current->mm->mmap_sem);
+ {
+ retval = __ia32_copy_pp_list(p->thread.ppl);
+ }
+ up_write(&current->mm->mmap_sem);
+ }
+
+ return retval;
+}
+
+static unsigned long
+emulate_mmap (struct file *file, unsigned long start, unsigned long len, int prot, int flags,
+ loff_t off)
+{
+ unsigned long tmp, end, pend, pstart, ret, is_congruent, fudge = 0;
+ struct inode *inode;
+ loff_t poff;
+
+ end = start + len;
+ pstart = PAGE_START(start);
+ pend = PAGE_ALIGN(end);
+
+ if (flags & MAP_FIXED) {
+ ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
+ if (start > pstart) {
+ if (flags & MAP_SHARED)
+ printk(KERN_INFO
+ "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
+ current->comm, current->pid, start);
+ ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
+ off);
+ if (IS_ERR((void *) ret))
+ return ret;
+ pstart += PAGE_SIZE;
+ if (pstart >= pend)
+ goto out; /* done */
+ }
+ if (end < pend) {
+ if (flags & MAP_SHARED)
+ printk(KERN_INFO
+ "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
+ current->comm, current->pid, end);
+ ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
+ (off + len) - offset_in_page(end));
+ if (IS_ERR((void *) ret))
+ return ret;
+ pend -= PAGE_SIZE;
+ if (pstart >= pend)
+ goto out; /* done */
+ }
+ } else {
+ /*
+ * If a start address was specified, use it if the entire rounded out area
+ * is available.
+ */
+ if (start && !pstart)
+ fudge = 1; /* handle case of mapping to range (0,PAGE_SIZE) */
+ tmp = arch_get_unmapped_area(file, pstart - fudge, pend - pstart, 0, flags);
+ if (tmp != pstart) {
+ pstart = tmp;
+ start = pstart + offset_in_page(off); /* make start congruent with off */
+ end = start + len;
+ pend = PAGE_ALIGN(end);
+ }
+ }
+
+ poff = off + (pstart - start); /* note: (pstart - start) may be negative */
+ is_congruent = (flags & MAP_ANONYMOUS) || (offset_in_page(poff) == 0);
+
+ if ((flags & MAP_SHARED) && !is_congruent)
+ printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
+ "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off);
+
+ DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
+ is_congruent ? "congruent" : "not congruent", poff);
+
+ down_write(&current->mm->mmap_sem);
+ {
+ if (!(flags & MAP_ANONYMOUS) && is_congruent)
+ ret = do_mmap(file, pstart, pend - pstart, prot, flags | MAP_FIXED, poff);
+ else
+ ret = do_mmap(NULL, pstart, pend - pstart,
+ prot | ((flags & MAP_ANONYMOUS) ? 0 : PROT_WRITE),
+ flags | MAP_FIXED | MAP_ANONYMOUS, 0);
+ }
+ up_write(&current->mm->mmap_sem);
+
+ if (IS_ERR((void *) ret))
+ return ret;
+
+ if (!is_congruent) {
+ /* read the file contents */
+ inode = file->f_dentry->d_inode;
+ if (!inode->i_fop || !file->f_op->read
+ || ((*file->f_op->read)(file, (char __user *) pstart, pend - pstart, &poff)
+ < 0))
+ {
+ sys_munmap(pstart, pend - pstart);
+ return -EINVAL;
+ }
+ if (!(prot & PROT_WRITE) && sys_mprotect(pstart, pend - pstart, prot) < 0)
+ return -EINVAL;
+ }
+
+ if (!(flags & MAP_FIXED))
+ ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
+out:
+ return start;
+}
+
+#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */
+
+static inline unsigned int
+get_prot32 (unsigned int prot)
+{
+ if (prot & PROT_WRITE)
+ /* on x86, PROT_WRITE implies PROT_READ which implies PROT_EEC */
+ prot |= PROT_READ | PROT_WRITE | PROT_EXEC;
+ else if (prot & (PROT_READ | PROT_EXEC))
+ /* on x86, there is no distinction between PROT_READ and PROT_EXEC */
+ prot |= (PROT_READ | PROT_EXEC);
+
+ return prot;
+}
+
+unsigned long
+ia32_do_mmap (struct file *file, unsigned long addr, unsigned long len, int prot, int flags,
+ loff_t offset)
+{
+ DBG("ia32_do_mmap(file=%p,addr=0x%lx,len=0x%lx,prot=%x,flags=%x,offset=0x%llx)\n",
+ file, addr, len, prot, flags, offset);
+
+ if (file && (!file->f_op || !file->f_op->mmap))
+ return -ENODEV;
+
+ len = IA32_PAGE_ALIGN(len);
+ if (len == 0)
+ return addr;
+
+ if (len > IA32_PAGE_OFFSET || addr > IA32_PAGE_OFFSET - len)
+ {
+ if (flags & MAP_FIXED)
+ return -ENOMEM;
+ else
+ return -EINVAL;
+ }
+
+ if (OFFSET4K(offset))
+ return -EINVAL;
+
+ prot = get_prot32(prot);
+
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+ down(&ia32_mmap_sem);
+ {
+ addr = emulate_mmap(file, addr, len, prot, flags, offset);
+ }
+ up(&ia32_mmap_sem);
+#else
+ down_write(&current->mm->mmap_sem);
+ {
+ addr = do_mmap(file, addr, len, prot, flags, offset);
+ }
+ up_write(&current->mm->mmap_sem);
+#endif
+ DBG("ia32_do_mmap: returning 0x%lx\n", addr);
+ return addr;
+}
+
+/*
+ * Linux/i386 didn't use to be able to handle more than 4 system call parameters, so these
+ * system calls used a memory block for parameter passing..
+ */
+
+struct mmap_arg_struct {
+ unsigned int addr;
+ unsigned int len;
+ unsigned int prot;
+ unsigned int flags;
+ unsigned int fd;
+ unsigned int offset;
+};
+
+asmlinkage long
+sys32_mmap (struct mmap_arg_struct __user *arg)
+{
+ struct mmap_arg_struct a;
+ struct file *file = NULL;
+ unsigned long addr;
+ int flags;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ return -EFAULT;
+
+ if (OFFSET4K(a.offset))
+ return -EINVAL;
+
+ flags = a.flags;
+
+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+ if (!(flags & MAP_ANONYMOUS)) {
+ file = fget(a.fd);
+ if (!file)
+ return -EBADF;
+ }
+
+ addr = ia32_do_mmap(file, a.addr, a.len, a.prot, flags, a.offset);
+
+ if (file)
+ fput(file);
+ return addr;
+}
+
+asmlinkage long
+sys32_mmap2 (unsigned int addr, unsigned int len, unsigned int prot, unsigned int flags,
+ unsigned int fd, unsigned int pgoff)
+{
+ struct file *file = NULL;
+ unsigned long retval;
+
+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+ if (!(flags & MAP_ANONYMOUS)) {
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+ }
+
+ retval = ia32_do_mmap(file, addr, len, prot, flags,
+ (unsigned long) pgoff << IA32_PAGE_SHIFT);
+
+ if (file)
+ fput(file);
+ return retval;
+}
+
+asmlinkage long
+sys32_munmap (unsigned int start, unsigned int len)
+{
+ unsigned int end = start + len;
+ long ret;
+
+#if PAGE_SHIFT <= IA32_PAGE_SHIFT
+ ret = sys_munmap(start, end - start);
+#else
+ if (OFFSET4K(start))
+ return -EINVAL;
+
+ end = IA32_PAGE_ALIGN(end);
+ if (start >= end)
+ return -EINVAL;
+
+ ret = ia32_unset_pp(&start, &end);
+ if (ret < 0)
+ return ret;
+
+ if (start >= end)
+ return 0;
+
+ down(&ia32_mmap_sem);
+ {
+ ret = sys_munmap(start, end - start);
+ }
+ up(&ia32_mmap_sem);
+#endif
+ return ret;
+}
+
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+
+/*
+ * When mprotect()ing a partial page, we set the permission to the union of the old
+ * settings and the new settings. In other words, it's only possible to make access to a
+ * partial page less restrictive.
+ */
+static long
+mprotect_subpage (unsigned long address, int new_prot)
+{
+ int old_prot;
+ struct vm_area_struct *vma;
+
+ if (new_prot == PROT_NONE)
+ return 0; /* optimize case where nothing changes... */
+ vma = find_vma(current->mm, address);
+ old_prot = get_page_prot(vma, address);
+ return sys_mprotect(address, PAGE_SIZE, new_prot | old_prot);
+}
+
+#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */
+
+asmlinkage long
+sys32_mprotect (unsigned int start, unsigned int len, int prot)
+{
+ unsigned int end = start + len;
+#if PAGE_SHIFT > IA32_PAGE_SHIFT
+ long retval = 0;
+#endif
+
+ prot = get_prot32(prot);
+
+#if PAGE_SHIFT <= IA32_PAGE_SHIFT
+ return sys_mprotect(start, end - start, prot);
+#else
+ if (OFFSET4K(start))
+ return -EINVAL;
+
+ end = IA32_PAGE_ALIGN(end);
+ if (end < start)
+ return -EINVAL;
+
+ retval = ia32_compare_pp(&start, &end);
+
+ if (retval < 0)
+ return retval;
+
+ down(&ia32_mmap_sem);
+ {
+ if (offset_in_page(start)) {
+ /* start address is 4KB aligned but not page aligned. */
+ retval = mprotect_subpage(PAGE_START(start), prot);
+ if (retval < 0)
+ goto out;
+
+ start = PAGE_ALIGN(start);
+ if (start >= end)
+ goto out; /* retval is already zero... */
+ }
+
+ if (offset_in_page(end)) {
+ /* end address is 4KB aligned but not page aligned. */
+ retval = mprotect_subpage(PAGE_START(end), prot);
+ if (retval < 0)
+ goto out;
+
+ end = PAGE_START(end);
+ }
+ retval = sys_mprotect(start, end - start, prot);
+ }
+ out:
+ up(&ia32_mmap_sem);
+ return retval;
+#endif
+}
+
+asmlinkage long
+sys32_mremap (unsigned int addr, unsigned int old_len, unsigned int new_len,
+ unsigned int flags, unsigned int new_addr)
+{
+ long ret;
+
+#if PAGE_SHIFT <= IA32_PAGE_SHIFT
+ ret = sys_mremap(addr, old_len, new_len, flags, new_addr);
+#else
+ unsigned int old_end, new_end;
+
+ if (OFFSET4K(addr))
+ return -EINVAL;
+
+ old_len = IA32_PAGE_ALIGN(old_len);
+ new_len = IA32_PAGE_ALIGN(new_len);
+ old_end = addr + old_len;
+ new_end = addr + new_len;
+
+ if (!new_len)
+ return -EINVAL;
+
+ if ((flags & MREMAP_FIXED) && (OFFSET4K(new_addr)))
+ return -EINVAL;
+
+ if (old_len >= new_len) {
+ ret = sys32_munmap(addr + new_len, old_len - new_len);
+ if (ret && old_len != new_len)
+ return ret;
+ ret = addr;
+ if (!(flags & MREMAP_FIXED) || (new_addr == addr))
+ return ret;
+ old_len = new_len;
+ }
+
+ addr = PAGE_START(addr);
+ old_len = PAGE_ALIGN(old_end) - addr;
+ new_len = PAGE_ALIGN(new_end) - addr;
+
+ down(&ia32_mmap_sem);
+ {
+ ret = sys_mremap(addr, old_len, new_len, flags, new_addr);
+ }
+ up(&ia32_mmap_sem);
+
+ if ((ret >= 0) && (old_len < new_len)) {
+ /* mremap expanded successfully */
+ ia32_set_pp(old_end, new_end, flags);
+ }
+#endif
+ return ret;
+}
+
+asmlinkage long
+sys32_pipe (int __user *fd)
+{
+ int retval;
+ int fds[2];
+
+ retval = do_pipe(fds);
+ if (retval)
+ goto out;
+ if (copy_to_user(fd, fds, sizeof(fds)))
+ retval = -EFAULT;
+ out:
+ return retval;
+}
+
+static inline long
+get_tv32 (struct timeval *o, struct compat_timeval __user *i)
+{
+ return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
+ (__get_user(o->tv_sec, &i->tv_sec) | __get_user(o->tv_usec, &i->tv_usec)));
+}
+
+static inline long
+put_tv32 (struct compat_timeval __user *o, struct timeval *i)
+{
+ return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
+ (__put_user(i->tv_sec, &o->tv_sec) | __put_user(i->tv_usec, &o->tv_usec)));
+}
+
+asmlinkage unsigned long
+sys32_alarm (unsigned int seconds)
+{
+ struct itimerval it_new, it_old;
+ unsigned int oldalarm;
+
+ it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+ it_new.it_value.tv_sec = seconds;
+ it_new.it_value.tv_usec = 0;
+ do_setitimer(ITIMER_REAL, &it_new, &it_old);
+ oldalarm = it_old.it_value.tv_sec;
+ /* ehhh.. We can't return