diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 8 | ||||
-rw-r--r-- | mm/bootmem.c | 24 | ||||
-rw-r--r-- | mm/filemap.c | 51 | ||||
-rw-r--r-- | mm/kmemleak.c | 4 | ||||
-rw-r--r-- | mm/memcontrol.c | 6 | ||||
-rw-r--r-- | mm/memory-failure.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 46 | ||||
-rw-r--r-- | mm/mremap.c | 241 | ||||
-rw-r--r-- | mm/page-writeback.c | 12 | ||||
-rw-r--r-- | mm/slab.c | 8 | ||||
-rw-r--r-- | mm/slub.c | 20 | ||||
-rw-r--r-- | mm/truncate.c | 2 | ||||
-rw-r--r-- | mm/util.c | 44 |
13 files changed, 288 insertions, 180 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 11aee09dd2a..0e8ca034770 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -604,10 +604,14 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) /* * Finally, kill the kernel threads. We don't need to be RCU - * safe anymore, since the bdi is gone from visibility. + * safe anymore, since the bdi is gone from visibility. Force + * unfreeze of the thread before calling kthread_stop(), otherwise + * it would never exet if it is currently stuck in the refrigerator. */ - list_for_each_entry(wb, &bdi->wb_list, list) + list_for_each_entry(wb, &bdi->wb_list, list) { + thaw_process(wb->task); kthread_stop(wb->task); + } } /* diff --git a/mm/bootmem.c b/mm/bootmem.c index 555d5d2731c..d1dc23cc7f1 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -143,6 +143,30 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); } +/* + * free_bootmem_late - free bootmem pages directly to page allocator + * @addr: starting address of the range + * @size: size of the range in bytes + * + * This is only useful when the bootmem allocator has already been torn + * down, but we are still initializing the system. Pages are given directly + * to the page allocator, no bootmem metadata is updated because it is gone. + */ +void __init free_bootmem_late(unsigned long addr, unsigned long size) +{ + unsigned long cursor, end; + + kmemleak_free_part(__va(addr), size); + + cursor = PFN_UP(addr); + end = PFN_DOWN(addr + size); + + for (; cursor < end; cursor++) { + __free_pages_bootmem(pfn_to_page(cursor), 0); + totalram_pages++; + } +} + static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) { int aligned; diff --git a/mm/filemap.c b/mm/filemap.c index ef169f37156..8b4d88f9249 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -260,27 +260,27 @@ int filemap_flush(struct address_space *mapping) EXPORT_SYMBOL(filemap_flush); /** - * wait_on_page_writeback_range - wait for writeback to complete - * @mapping: target address_space - * @start: beginning page index - * @end: ending page index + * filemap_fdatawait_range - wait for writeback to complete + * @mapping: address space structure to wait for + * @start_byte: offset in bytes where the range starts + * @end_byte: offset in bytes where the range ends (inclusive) * - * Wait for writeback to complete against pages indexed by start->end - * inclusive + * Walk the list of under-writeback pages of the given address space + * in the given range and wait for all of them. */ -int wait_on_page_writeback_range(struct address_space *mapping, - pgoff_t start, pgoff_t end) +int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, + loff_t end_byte) { + pgoff_t index = start_byte >> PAGE_CACHE_SHIFT; + pgoff_t end = end_byte >> PAGE_CACHE_SHIFT; struct pagevec pvec; int nr_pages; int ret = 0; - pgoff_t index; - if (end < start) + if (end_byte < start_byte) return 0; pagevec_init(&pvec, 0); - index = start; while ((index <= end) && (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_WRITEBACK, @@ -310,25 +310,6 @@ int wait_on_page_writeback_range(struct address_space *mapping, return ret; } - -/** - * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range - * @mapping: address space structure to wait for - * @start: offset in bytes where the range starts - * @end: offset in bytes where the range ends (inclusive) - * - * Walk the list of under-writeback pages of the given address space - * in the given range and wait for all of them. - * - * This is just a simple wrapper so that callers don't have to convert offsets - * to page indexes themselves - */ -int filemap_fdatawait_range(struct address_space *mapping, loff_t start, - loff_t end) -{ - return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT, - end >> PAGE_CACHE_SHIFT); -} EXPORT_SYMBOL(filemap_fdatawait_range); /** @@ -345,8 +326,7 @@ int filemap_fdatawait(struct address_space *mapping) if (i_size == 0) return 0; - return wait_on_page_writeback_range(mapping, 0, - (i_size - 1) >> PAGE_CACHE_SHIFT); + return filemap_fdatawait_range(mapping, 0, i_size - 1); } EXPORT_SYMBOL(filemap_fdatawait); @@ -393,9 +373,8 @@ int filemap_write_and_wait_range(struct address_space *mapping, WB_SYNC_ALL); /* See comment of filemap_write_and_wait() */ if (err != -EIO) { - int err2 = wait_on_page_writeback_range(mapping, - lstart >> PAGE_CACHE_SHIFT, - lend >> PAGE_CACHE_SHIFT); + int err2 = filemap_fdatawait_range(mapping, + lstart, lend); if (!err) err = err2; } @@ -1844,7 +1823,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr, /* * Copy as much as we can into the page and return the number of bytes which - * were sucessfully copied. If a fault is encountered then return the number of + * were successfully copied. If a fault is encountered then return the number of * bytes which were copied. */ size_t iov_iter_copy_from_user_atomic(struct page *page, diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 8bf765c4f58..13f33b3081e 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1050,8 +1050,8 @@ static void scan_object(struct kmemleak_object *object) unsigned long flags; /* - * Once the object->lock is aquired, the corresponding memory block - * cannot be freed (the same lock is aquired in delete_object). + * Once the object->lock is acquired, the corresponding memory block + * cannot be freed (the same lock is acquired in delete_object). */ spin_lock_irqsave(&object->lock, flags); if (object->flags & OBJECT_NO_SCAN) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f99f5991d6b..c31a310aa14 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -209,7 +209,7 @@ struct mem_cgroup { int prev_priority; /* for recording reclaim priority */ /* - * While reclaiming in a hiearchy, we cache the last child we + * While reclaiming in a hierarchy, we cache the last child we * reclaimed from. */ int last_scanned_child; @@ -1720,7 +1720,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, /* * While swap-in, try_charge -> commit or cancel, the page is locked. * And when try_charge() successfully returns, one refcnt to memcg without - * struct page_cgroup is aquired. This refcnt will be cumsumed by + * struct page_cgroup is acquired. This refcnt will be consumed by * "commit()" or removed by "cancel()" */ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, @@ -2466,7 +2466,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, cgroup_lock(); /* - * If parent's use_hiearchy is set, we can't make any modifications + * If parent's use_hierarchy is set, we can't make any modifications * in the child subtrees. If it is unset, then the change can * occur, provided the current cgroup has no children. * diff --git a/mm/memory-failure.c b/mm/memory-failure.c index dacc6418387..1ac49fef95a 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -174,7 +174,7 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, list_for_each_entry_safe (tk, next, to_kill, nd) { if (doit) { /* - * In case something went wrong with munmaping + * In case something went wrong with munmapping * make sure the process doesn't catch the * signal and then access the memory. Just kill it. * the signal handlers diff --git a/mm/mmap.c b/mm/mmap.c index 73f5e4b6401..ed70a68e882 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -20,7 +20,6 @@ #include <linux/fs.h> #include <linux/personality.h> #include <linux/security.h> -#include <linux/ima.h> #include <linux/hugetlb.h> #include <linux/profile.h> #include <linux/module.h> @@ -932,13 +931,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, if (!(flags & MAP_FIXED)) addr = round_hint_to_min(addr); - error = arch_mmap_check(addr, len, flags); - if (error) - return error; - /* Careful about overflows.. */ len = PAGE_ALIGN(len); - if (!len || len > TASK_SIZE) + if (!len) return -ENOMEM; /* offset overflow? */ @@ -949,24 +944,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, if (mm->map_count > sysctl_max_map_count) return -ENOMEM; - if (flags & MAP_HUGETLB) { - struct user_struct *user = NULL; - if (file) - return -EINVAL; - - /* - * VM_NORESERVE is used because the reservations will be - * taken when vm_ops->mmap() is called - * A dummy user value is used because we are not locking - * memory so no accounting is necessary - */ - len = ALIGN(len, huge_page_size(&default_hstate)); - file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, - &user, HUGETLB_ANONHUGE_INODE); - if (IS_ERR(file)) - return PTR_ERR(file); - } - /* Obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space. */ @@ -1061,9 +1038,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, error = security_file_mmap(file, reqprot, prot, flags, addr, 0); if (error) return error; - error = ima_file_mmap(file, prot); - if (error) - return error; return mmap_region(file, addr, len, flags, vm_flags, pgoff); } @@ -1459,6 +1433,14 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + unsigned long error = arch_mmap_check(addr, len, flags); + if (error) + return error; + + /* Careful about overflows.. */ + if (len > TASK_SIZE) + return -ENOMEM; + get_area = current->mm->get_unmapped_area; if (file && file->f_op && file->f_op->get_unmapped_area) get_area = file->f_op->get_unmapped_area; @@ -2003,20 +1985,14 @@ unsigned long do_brk(unsigned long addr, unsigned long len) if (!len) return addr; - if ((addr + len) > TASK_SIZE || (addr + len) < addr) - return -EINVAL; - - if (is_hugepage_only_range(mm, addr, len)) - return -EINVAL; - error = security_file_mmap(NULL, 0, 0, 0, addr, 1); if (error) return error; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; - error = arch_mmap_check(addr, len, flags); - if (error) + error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); + if (error & ~PAGE_MASK) return error; /* diff --git a/mm/mremap.c b/mm/mremap.c index 97bff254771..845190898d5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -261,6 +261,137 @@ static unsigned long move_vma(struct vm_area_struct *vma, return new_addr; } +static struct vm_area_struct *vma_to_resize(unsigned long addr, + unsigned long old_len, unsigned long new_len, unsigned long *p) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = find_vma(mm, addr); + + if (!vma || vma->vm_start > addr) + goto Efault; + + if (is_vm_hugetlb_page(vma)) + goto Einval; + + /* We can't remap across vm area boundaries */ + if (old_len > vma->vm_end - addr) + goto Efault; + + if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) { + if (new_len > old_len) + goto Efault; + } + + if (vma->vm_flags & VM_LOCKED) { + unsigned long locked, lock_limit; + locked = mm->locked_vm << PAGE_SHIFT; + lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + locked += new_len - old_len; + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) + goto Eagain; + } + + if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) + goto Enomem; + + if (vma->vm_flags & VM_ACCOUNT) { + unsigned long charged = (new_len - old_len) >> PAGE_SHIFT; + if (security_vm_enough_memory(charged)) + goto Efault; + *p = charged; + } + + return vma; + +Efault: /* very odd choice for most of the cases, but... */ + return ERR_PTR(-EFAULT); +Einval: + return ERR_PTR(-EINVAL); +Enomem: + return ERR_PTR(-ENOMEM); +Eagain: + return ERR_PTR(-EAGAIN); +} + +static unsigned long mremap_to(unsigned long addr, + unsigned long old_len, unsigned long new_addr, + unsigned long new_len) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long ret = -EINVAL; + unsigned long charged = 0; + unsigned long map_flags; + + if (new_addr & ~PAGE_MASK) + goto out; + + if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) + goto out; + + /* Check if the location we're moving into overlaps the + * old location at all, and fail if it does. + */ + if ((new_addr <= addr) && (new_addr+new_len) > addr) + goto out; + + if ((addr <= new_addr) && (addr+old_len) > new_addr) + goto out; + + ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + if (ret) + goto out; + + ret = do_munmap(mm, new_addr, new_len); + if (ret) + goto out; + + if (old_len >= new_len) { + ret = do_munmap(mm, addr+new_len, old_len - new_len); + if (ret && old_len != new_len) + goto out; + old_len = new_len; + } + + vma = vma_to_resize(addr, old_len, new_len, &charged); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out; + } + + map_flags = MAP_FIXED; + if (vma->vm_flags & VM_MAYSHARE) + map_flags |= MAP_SHARED; + + ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff + + ((addr - vma->vm_start) >> PAGE_SHIFT), + map_flags); + if (ret & ~PAGE_MASK) + goto out1; + + ret = move_vma(vma, addr, old_len, new_len, new_addr); + if (!(ret & ~PAGE_MASK)) + goto out; +out1: + vm_unacct_memory(charged); + +out: + return ret; +} + +static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) +{ + unsigned long end = vma->vm_end + delta; + if (end < vma->vm_end) /* overflow */ + return 0; + if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */ + return 0; + if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start, + 0, MAP_FIXED) & ~PAGE_MASK) + return 0; + return 1; +} + /* * Expand (or shrink) an existing mapping, potentially moving it at the * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) @@ -294,32 +425,10 @@ unsigned long do_mremap(unsigned long addr, if (!new_len) goto out; - /* new_addr is only valid if MREMAP_FIXED is specified */ if (flags & MREMAP_FIXED) { - if (new_addr & ~PAGE_MASK) - goto out; - if (!(flags & MREMAP_MAYMOVE)) - goto out; - - if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) - goto out; - - /* Check if the location we're moving into overlaps the - * old location at all, and fail if it does. - */ - if ((new_addr <= addr) && (new_addr+new_len) > addr) - goto out; - - if ((addr <= new_addr) && (addr+old_len) > new_addr) - goto out; - - ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); - if (ret) - goto out; - - ret = do_munmap(mm, new_addr, new_len); - if (ret) - goto out; + if (flags & MREMAP_MAYMOVE) + ret = mremap_to(addr, old_len, new_addr, new_len); + goto out; } /* @@ -332,60 +441,23 @@ unsigned long do_mremap(unsigned long addr, if (ret && old_len != new_len) goto out; ret = addr; - if (!(flags & MREMAP_FIXED) || (new_addr == addr)) - goto out; - old_len = new_len; + goto out; } /* - * Ok, we need to grow.. or relocate. + * Ok, we need to grow.. */ - ret = -EFAULT; - vma = find_vma(mm, addr); - if (!vma || vma->vm_start > addr) - goto out; - if (is_vm_hugetlb_page(vma)) { - ret = -EINVAL; - goto out; - } - /* We can't remap across vm area boundaries */ - if (old_len > vma->vm_end - addr) - goto out; - if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) { - if (new_len > old_len) - goto out; - } - if (vma->vm_flags & VM_LOCKED) { - unsigned long locked, lock_limit; - locked = mm->locked_vm << PAGE_SHIFT; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; - locked += new_len - old_len; - ret = -EAGAIN; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - goto out; - } - if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) { - ret = -ENOMEM; + vma = vma_to_resize(addr, old_len, new_len, &charged); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto out; } - if (vma->vm_flags & VM_ACCOUNT) { - charged = (new_len - old_len) >> PAGE_SHIFT; - if (security_vm_enough_memory(charged)) - goto out_nc; - } - /* old_len exactly to the end of the area.. - * And we're not relocating the area. */ - if (old_len == vma->vm_end - addr && - !((flags & MREMAP_FIXED) && (addr != new_addr)) && - (old_len != new_len || !(flags & MREMAP_MAYMOVE))) { - unsigned long max_addr = TASK_SIZE; - if (vma->vm_next) - max_addr = vma->vm_next->vm_start; + if (old_len == vma->vm_end - addr) { /* can we just expand the current mapping? */ - if (max_addr - addr >= new_len) { + if (vma_expandable(vma, new_len - old_len)) { int pages = (new_len - old_len) >> PAGE_SHIFT; vma_adjust(vma, vma->vm_start, @@ -409,28 +481,27 @@ unsigned long do_mremap(unsigned long addr, */ ret = -ENOMEM; if (flags & MREMAP_MAYMOVE) { - if (!(flags & MREMAP_FIXED)) { - unsigned long map_flags = 0; - if (vma->vm_flags & VM_MAYSHARE) - map_flags |= MAP_SHARED; - - new_addr = get_unmapped_area(vma->vm_file, 0, new_len, - vma->vm_pgoff, map_flags); - if (new_addr & ~PAGE_MASK) { - ret = new_addr; - goto out; - } - - ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); - if (ret) - goto out; + unsigned long map_flags = 0; + if (vma->vm_flags & VM_MAYSHARE) + map_flags |= MAP_SHARED; + + new_addr = get_unmapped_area(vma->vm_file, 0, new_len, + vma->vm_pgoff + + ((addr - vma->vm_start) >> PAGE_SHIFT), + map_flags); + if (new_addr & ~PAGE_MASK) { + ret = new_addr; + goto out; } + + ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + if (ret) + goto out; ret = move_vma(vma, addr, old_len, new_len, new_addr); } out: if (ret & ~PAGE_MASK) vm_unacct_memory(charged); -out_nc: return ret; } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2c5d79236ea..0b19943ecf8 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -821,7 +821,6 @@ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data) { - struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; int done = 0; struct pagevec pvec; @@ -834,11 +833,6 @@ int write_cache_pages(struct address_space *mapping, int range_whole = 0; long nr_to_write = wbc->nr_to_write; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - pagevec_init(&pvec, 0); if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ @@ -957,12 +951,6 @@ continue_unlock: break; } } - - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - done = 1; - break; - } } pagevec_release(&pvec); cond_resched(); diff --git a/mm/slab.c b/mm/slab.c index a07540e5843..a6c9166996a 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3117,13 +3117,19 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) } else { STATS_INC_ALLOCMISS(cachep); objp = cache_alloc_refill(cachep, flags); + /* + * the 'ac' may be updated by cache_alloc_refill(), + * and kmemleak_erase() requires its correct value. + */ + ac = cpu_cache_get(cachep); } /* * To avoid a false negative, if an object that is in one of the * per-CPU caches is leaked, we need to make sure kmemleak doesn't * treat the array pointers as a reference to the object. */ - kmemleak_erase(&ac->entry[ac->avail]); + if (objp) + kmemleak_erase(&ac->entry[ac->avail]); return objp; } diff --git a/mm/slub.c b/mm/slub.c index 4996fc71955..da0ce55965d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1735,7 +1735,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, } local_irq_restore(flags); - if (unlikely((gfpflags & __GFP_ZERO) && object)) + if (unlikely(gfpflags & __GFP_ZERO) && object) memset(object, 0, objsize); kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); @@ -4371,12 +4371,28 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) return len + sprintf(buf + len, "\n"); } +static void clear_stat(struct kmem_cache *s, enum stat_item si) +{ + int cpu; + + for_each_online_cpu(cpu) + get_cpu_slab(s, cpu)->stat[si] = 0; +} + #define STAT_ATTR(si, text) \ static ssize_t text##_show(struct kmem_cache *s, char *buf) \ { \ return show_stat(s, buf, si); \ } \ -SLAB_ATTR_RO(text); \ +static ssize_t text##_store(struct kmem_cache *s, \ + const char *buf, size_t length) \ +{ \ + if (buf[0] != '0') \ + return -EINVAL; \ + clear_stat(s, si); \ + return length; \ +} \ +SLAB_ATTR(text); \ STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); diff --git a/mm/truncate.c b/mm/truncate.c index 450cebdabfc..2c147a7e5f2 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -490,7 +490,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); * Any pages which are found to be mapped into pagetables are unmapped prior to * invalidation. * - * Returns -EIO if any pages could not be invalidated. + * Returns -EBUSY if any pages could not be invalidated. */ int invalidate_inode_pages2(struct address_space *mapping) { diff --git a/mm/util.c b/mm/util.c index 7c35ad95f92..b377ce43080 100644 --- a/mm/util.c +++ b/mm/util.c @@ -4,6 +4,10 @@ #include <linux/module.h> #include <linux/err.h> #include <linux/sched.h> +#include <linux/hugetlb.h> +#include <linux/syscalls.h> +#include <linux/mman.h> +#include <linux/file.h> #include <asm/uaccess.h> #define CREATE_TRACE_POINTS @@ -268,6 +272,46 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, } EXPORT_SYMBOL_GPL(get_user_pages_fast); +SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, pgoff) +{ + struct file * file = NULL; + unsigned long retval = -EBADF; + + if (!(flags & MAP_ANONYMOUS)) { + if (unlikely(flags & MAP_HUGETLB)) + return -EINVAL; + file = fget(fd); + if (!file) + goto out; + } else if (flags & MAP_HUGETLB) { + struct user_struct *user = NULL; + /* + * VM_NORESERVE is used because the reservations will be + * taken when vm_ops->mmap() is called + * A dummy user value is used because we are not locking + * memory so no accounting is necessary + */ + len = ALIGN(len, huge_page_size(&default_hstate)); + file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, + &user, HUGETLB_ANONHUGE_INODE); + if (IS_ERR(file)) + return PTR_ERR(file); + } + + flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); + + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + + if (file) + fput(file); +out: + return retval; +} + /* Tracepoints definitions. */ EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); |