diff options
Diffstat (limited to 'mm/mmap.c')
| -rw-r--r-- | mm/mmap.c | 273 |
1 files changed, 164 insertions, 109 deletions
diff --git a/mm/mmap.c b/mm/mmap.c index 9d548512ff8..129b847d30c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -6,10 +6,13 @@ * Address space accounting code <alan@lxorguk.ukuu.org.uk> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/slab.h> #include <linux/backing-dev.h> #include <linux/mm.h> +#include <linux/vmacache.h> #include <linux/shm.h> #include <linux/mman.h> #include <linux/pagemap.h> @@ -36,6 +39,7 @@ #include <linux/sched/sysctl.h> #include <linux/notifier.h> #include <linux/memory.h> +#include <linux/printk.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -86,6 +90,7 @@ EXPORT_SYMBOL(vm_get_page_prot); int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ +unsigned long sysctl_overcommit_kbytes __read_mostly; int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ @@ -179,14 +184,12 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) goto error; } - allowed = (totalram_pages - hugetlb_total_pages()) - * sysctl_overcommit_ratio / 100; + allowed = vm_commit_limit(); /* * Reserve some for root */ if (!cap_sys_admin) allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); - allowed += total_swap_pages; /* * Don't let a single process grow so big a user can't recover @@ -361,20 +364,20 @@ static int browse_rb(struct rb_root *root) struct vm_area_struct *vma; vma = rb_entry(nd, struct vm_area_struct, vm_rb); if (vma->vm_start < prev) { - printk("vm_start %lx prev %lx\n", vma->vm_start, prev); + pr_info("vm_start %lx prev %lx\n", vma->vm_start, prev); bug = 1; } if (vma->vm_start < pend) { - printk("vm_start %lx pend %lx\n", vma->vm_start, pend); + pr_info("vm_start %lx pend %lx\n", vma->vm_start, pend); bug = 1; } if (vma->vm_start > vma->vm_end) { - printk("vm_end %lx < vm_start %lx\n", + pr_info("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start); bug = 1; } if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) { - printk("free gap %lx, correct %lx\n", + pr_info("free gap %lx, correct %lx\n", vma->rb_subtree_gap, vma_compute_subtree_gap(vma)); bug = 1; @@ -388,7 +391,7 @@ static int browse_rb(struct rb_root *root) for (nd = pn; nd; nd = rb_prev(nd)) j++; if (i != j) { - printk("backwards %d, forwards %d\n", j, i); + pr_info("backwards %d, forwards %d\n", j, i); bug = 1; } return bug ? -1 : i; @@ -406,7 +409,7 @@ static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore) } } -void validate_mm(struct mm_struct *mm) +static void validate_mm(struct mm_struct *mm) { int bug = 0; int i = 0; @@ -423,17 +426,17 @@ void validate_mm(struct mm_struct *mm) i++; } if (i != mm->map_count) { - printk("map_count %d vm_next %d\n", mm->map_count, i); + pr_info("map_count %d vm_next %d\n", mm->map_count, i); bug = 1; } if (highest_address != mm->highest_vm_end) { - printk("mm->highest_vm_end %lx, found %lx\n", + pr_info("mm->highest_vm_end %lx, found %lx\n", mm->highest_vm_end, highest_address); bug = 1; } i = browse_rb(&mm->mm_rb); if (i != mm->map_count) { - printk("map_count %d rb %d\n", mm->map_count, i); + pr_info("map_count %d rb %d\n", mm->map_count, i); bug = 1; } BUG_ON(bug); @@ -640,11 +643,10 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, { struct address_space *mapping = NULL; - if (vma->vm_file) + if (vma->vm_file) { mapping = vma->vm_file->f_mapping; - - if (mapping) mutex_lock(&mapping->i_mmap_mutex); + } __vma_link(mm, vma, prev, rb_link, rb_parent); __vma_link_file(vma); @@ -682,8 +684,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, prev->vm_next = next = vma->vm_next; if (next) next->vm_prev = prev; - if (mm->mmap_cache == vma) - mm->mmap_cache = prev; + + /* Kill the cache */ + vmacache_invalidate(mm); } /* @@ -895,7 +898,15 @@ again: remove_next = 1 + (end > next->vm_end); static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags) { - if (vma->vm_flags ^ vm_flags) + /* + * VM_SOFTDIRTY should not prevent from VMA merging, if we + * match the flags but dirty bit -- the caller should mark + * merged VMA as dirty. If dirty bit won't be excluded from + * comparison, we increase pressue on the memory system forcing + * the kernel to generate new VMAs when old one could be + * extended instead. + */ + if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY) return 0; if (vma->vm_file != file) return 0; @@ -1084,7 +1095,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct * return a->vm_end == b->vm_start && mpol_equal(vma_policy(a), vma_policy(b)) && a->vm_file == b->vm_file && - !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) && + !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) && b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); } @@ -1192,6 +1203,24 @@ static inline unsigned long round_hint_to_min(unsigned long hint) return hint; } +static inline int mlock_future_check(struct mm_struct *mm, + unsigned long flags, + unsigned long len) +{ + unsigned long locked, lock_limit; + + /* mlock MCL_FUTURE? */ + if (flags & VM_LOCKED) { + locked = len >> PAGE_SHIFT; + locked += mm->locked_vm; + lock_limit = rlimit(RLIMIT_MEMLOCK); + lock_limit >>= PAGE_SHIFT; + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) + return -EAGAIN; + } + return 0; +} + /* * The caller must hold down_write(¤t->mm->mmap_sem). */ @@ -1253,16 +1282,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, if (!can_do_mlock()) return -EPERM; - /* mlock MCL_FUTURE? */ - if (vm_flags & VM_LOCKED) { - unsigned long locked, lock_limit; - locked = len >> PAGE_SHIFT; - locked += mm->locked_vm; - lock_limit = rlimit(RLIMIT_MEMLOCK); - lock_limit >>= PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - return -EAGAIN; - } + if (mlock_future_check(mm, vm_flags, len)) + return -EAGAIN; if (file) { struct inode *inode = file_inode(file); @@ -1282,7 +1303,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, /* * Make sure there are no mandatory locks on the file. */ - if (locks_verify_locked(inode)) + if (locks_verify_locked(file)) return -EAGAIN; vm_flags |= VM_SHARED | VM_MAYSHARE; @@ -1299,7 +1320,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, vm_flags &= ~VM_MAYEXEC; } - if (!file->f_op || !file->f_op->mmap) + if (!file->f_op->mmap) return -ENODEV; if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) return -EINVAL; @@ -1856,7 +1877,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma; struct vm_unmapped_area_info info; - if (len > TASK_SIZE) + if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) @@ -1865,14 +1886,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vma->vm_start)) return addr; } info.flags = 0; info.length = len; - info.low_limit = TASK_UNMAPPED_BASE; + info.low_limit = mm->mmap_base; info.high_limit = TASK_SIZE; info.align_mask = 0; return vm_unmapped_area(&info); @@ -1895,7 +1916,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, struct vm_unmapped_area_info info; /* requested length too big for entire address space */ - if (len > TASK_SIZE) + if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) @@ -1905,14 +1926,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vma->vm_start)) return addr; } info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; - info.low_limit = PAGE_SIZE; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); info.high_limit = mm->mmap_base; info.align_mask = 0; addr = vm_unmapped_area(&info); @@ -1951,7 +1972,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, return -ENOMEM; get_area = current->mm->get_unmapped_area; - if (file && file->f_op && file->f_op->get_unmapped_area) + if (file && file->f_op->get_unmapped_area) get_area = file->f_op->get_unmapped_area; addr = get_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) @@ -1972,34 +1993,33 @@ EXPORT_SYMBOL(get_unmapped_area); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma = NULL; + struct rb_node *rb_node; + struct vm_area_struct *vma; /* Check the cache first. */ - /* (Cache hit rate is typically around 35%.) */ - vma = ACCESS_ONCE(mm->mmap_cache); - if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) { - struct rb_node *rb_node; + vma = vmacache_find(mm, addr); + if (likely(vma)) + return vma; - rb_node = mm->mm_rb.rb_node; - vma = NULL; + rb_node = mm->mm_rb.rb_node; + vma = NULL; - while (rb_node) { - struct vm_area_struct *vma_tmp; - - vma_tmp = rb_entry(rb_node, - struct vm_area_struct, vm_rb); - - if (vma_tmp->vm_end > addr) { - vma = vma_tmp; - if (vma_tmp->vm_start <= addr) - break; - rb_node = rb_node->rb_left; - } else - rb_node = rb_node->rb_right; - } - if (vma) - mm->mmap_cache = vma; + while (rb_node) { + struct vm_area_struct *tmp; + + tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb); + + if (tmp->vm_end > addr) { + vma = tmp; + if (tmp->vm_start <= addr) + break; + rb_node = rb_node->rb_left; + } else + rb_node = rb_node->rb_right; } + + if (vma) + vmacache_update(addr, vma); return vma; } @@ -2371,7 +2391,9 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, } else mm->highest_vm_end = prev ? prev->vm_end : 0; tail_vma->vm_next = NULL; - mm->mmap_cache = NULL; /* Kill the cache. */ + + /* Kill the cache */ + vmacache_invalidate(mm); } /* @@ -2593,18 +2615,9 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) if (error & ~PAGE_MASK) return error; - /* - * mlock MCL_FUTURE? - */ - if (mm->def_flags & VM_LOCKED) { - unsigned long locked, lock_limit; - locked = len >> PAGE_SHIFT; - locked += mm->locked_vm; - lock_limit = rlimit(RLIMIT_MEMLOCK); - lock_limit >>= PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - return -EAGAIN; - } + error = mlock_future_check(mm, mm->def_flags, len); + if (error) + return error; /* * mm->mmap_sem is required to protect against another thread @@ -2726,7 +2739,8 @@ void exit_mmap(struct mm_struct *mm) } vm_unacct_memory(nr_accounted); - WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); + WARN_ON(atomic_long_read(&mm->nr_ptes) > + (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); } /* Insert vm structure into process list sorted by address @@ -2860,6 +2874,31 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages) return 1; } +static int special_mapping_fault(struct vm_area_struct *vma, + struct vm_fault *vmf); + +/* + * Having a close hook prevents vma merging regardless of flags. + */ +static void special_mapping_close(struct vm_area_struct *vma) +{ +} + +static const char *special_mapping_name(struct vm_area_struct *vma) +{ + return ((struct vm_special_mapping *)vma->vm_private_data)->name; +} + +static const struct vm_operations_struct special_mapping_vmops = { + .close = special_mapping_close, + .fault = special_mapping_fault, + .name = special_mapping_name, +}; + +static const struct vm_operations_struct legacy_special_mapping_vmops = { + .close = special_mapping_close, + .fault = special_mapping_fault, +}; static int special_mapping_fault(struct vm_area_struct *vma, struct vm_fault *vmf) @@ -2875,7 +2914,13 @@ static int special_mapping_fault(struct vm_area_struct *vma, */ pgoff = vmf->pgoff - vma->vm_pgoff; - for (pages = vma->vm_private_data; pgoff && *pages; ++pages) + if (vma->vm_ops == &legacy_special_mapping_vmops) + pages = vma->vm_private_data; + else + pages = ((struct vm_special_mapping *)vma->vm_private_data)-> + pages; + + for (; pgoff && *pages; ++pages) pgoff--; if (*pages) { @@ -2888,37 +2933,18 @@ static int special_mapping_fault(struct vm_area_struct *vma, return VM_FAULT_SIGBUS; } -/* - * Having a close hook prevents vma merging regardless of flags. - */ -static void special_mapping_close(struct vm_area_struct *vma) -{ -} - -static const struct vm_operations_struct special_mapping_vmops = { - .close = special_mapping_close, - .fault = special_mapping_fault, -}; - -/* - * Called with mm->mmap_sem held for writing. - * Insert a new vma covering the given region, with the given flags. - * Its pages are supplied by the given array of struct page *. - * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated. - * The region past the last page supplied will always produce SIGBUS. - * The array pointer and the pages it points to are assumed to stay alive - * for as long as this mapping might exist. - */ -int install_special_mapping(struct mm_struct *mm, - unsigned long addr, unsigned long len, - unsigned long vm_flags, struct page **pages) +static struct vm_area_struct *__install_special_mapping( + struct mm_struct *mm, + unsigned long addr, unsigned long len, + unsigned long vm_flags, const struct vm_operations_struct *ops, + void *priv) { int ret; struct vm_area_struct *vma; vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (unlikely(vma == NULL)) - return -ENOMEM; + return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&vma->anon_vma_chain); vma->vm_mm = mm; @@ -2928,8 +2954,8 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); - vma->vm_ops = &special_mapping_vmops; - vma->vm_private_data = pages; + vma->vm_ops = ops; + vma->vm_private_data = priv; ret = insert_vm_struct(mm, vma); if (ret) @@ -2939,11 +2965,40 @@ int install_special_mapping(struct mm_struct *mm, perf_event_mmap(vma); - return 0; + return vma; out: kmem_cache_free(vm_area_cachep, vma); - return ret; + return ERR_PTR(ret); +} + +/* + * Called with mm->mmap_sem held for writing. + * Insert a new vma covering the given region, with the given flags. + * Its pages are supplied by the given array of struct page *. + * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated. + * The region past the last page supplied will always produce SIGBUS. + * The array pointer and the pages it points to are assumed to stay alive + * for as long as this mapping might exist. + */ +struct vm_area_struct *_install_special_mapping( + struct mm_struct *mm, + unsigned long addr, unsigned long len, + unsigned long vm_flags, const struct vm_special_mapping *spec) +{ + return __install_special_mapping(mm, addr, len, vm_flags, + &special_mapping_vmops, (void *)spec); +} + +int install_special_mapping(struct mm_struct *mm, + unsigned long addr, unsigned long len, + unsigned long vm_flags, struct page **pages) +{ + struct vm_area_struct *vma = __install_special_mapping( + mm, addr, len, vm_flags, &legacy_special_mapping_vmops, + (void *)pages); + + return PTR_ERR_OR_ZERO(vma); } static DEFINE_MUTEX(mm_all_locks_mutex); @@ -3141,7 +3196,7 @@ static int init_user_reserve(void) sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17); return 0; } -module_init(init_user_reserve) +subsys_initcall(init_user_reserve); /* * Initialise sysctl_admin_reserve_kbytes. @@ -3162,7 +3217,7 @@ static int init_admin_reserve(void) sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13); return 0; } -module_init(init_admin_reserve) +subsys_initcall(init_admin_reserve); /* * Reinititalise user and admin reserves if memory is added or removed. @@ -3228,8 +3283,8 @@ static struct notifier_block reserve_mem_nb = { static int __meminit init_reserve_notifier(void) { if (register_hotmemory_notifier(&reserve_mem_nb)) - printk("Failed registering memory add/remove notifier for admin reserve"); + pr_err("Failed registering memory add/remove notifier for admin reserve\n"); return 0; } -module_init(init_reserve_notifier) +subsys_initcall(init_reserve_notifier); |
