aboutsummaryrefslogtreecommitdiff
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c250
1 files changed, 153 insertions, 97 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 834b2d785f1..129b847d30c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -6,10 +6,13 @@
* Address space accounting code <alan@lxorguk.ukuu.org.uk>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/mm.h>
+#include <linux/vmacache.h>
#include <linux/shm.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
@@ -36,6 +39,7 @@
#include <linux/sched/sysctl.h>
#include <linux/notifier.h>
#include <linux/memory.h>
+#include <linux/printk.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -86,6 +90,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */
+unsigned long sysctl_overcommit_kbytes __read_mostly;
int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
@@ -359,20 +364,20 @@ static int browse_rb(struct rb_root *root)
struct vm_area_struct *vma;
vma = rb_entry(nd, struct vm_area_struct, vm_rb);
if (vma->vm_start < prev) {
- printk("vm_start %lx prev %lx\n", vma->vm_start, prev);
+ pr_info("vm_start %lx prev %lx\n", vma->vm_start, prev);
bug = 1;
}
if (vma->vm_start < pend) {
- printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
+ pr_info("vm_start %lx pend %lx\n", vma->vm_start, pend);
bug = 1;
}
if (vma->vm_start > vma->vm_end) {
- printk("vm_end %lx < vm_start %lx\n",
+ pr_info("vm_end %lx < vm_start %lx\n",
vma->vm_end, vma->vm_start);
bug = 1;
}
if (vma->rb_subtree_gap != vma_compute_subtree_gap(vma)) {
- printk("free gap %lx, correct %lx\n",
+ pr_info("free gap %lx, correct %lx\n",
vma->rb_subtree_gap,
vma_compute_subtree_gap(vma));
bug = 1;
@@ -386,7 +391,7 @@ static int browse_rb(struct rb_root *root)
for (nd = pn; nd; nd = rb_prev(nd))
j++;
if (i != j) {
- printk("backwards %d, forwards %d\n", j, i);
+ pr_info("backwards %d, forwards %d\n", j, i);
bug = 1;
}
return bug ? -1 : i;
@@ -404,7 +409,7 @@ static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
}
}
-void validate_mm(struct mm_struct *mm)
+static void validate_mm(struct mm_struct *mm)
{
int bug = 0;
int i = 0;
@@ -421,17 +426,17 @@ void validate_mm(struct mm_struct *mm)
i++;
}
if (i != mm->map_count) {
- printk("map_count %d vm_next %d\n", mm->map_count, i);
+ pr_info("map_count %d vm_next %d\n", mm->map_count, i);
bug = 1;
}
if (highest_address != mm->highest_vm_end) {
- printk("mm->highest_vm_end %lx, found %lx\n",
+ pr_info("mm->highest_vm_end %lx, found %lx\n",
mm->highest_vm_end, highest_address);
bug = 1;
}
i = browse_rb(&mm->mm_rb);
if (i != mm->map_count) {
- printk("map_count %d rb %d\n", mm->map_count, i);
+ pr_info("map_count %d rb %d\n", mm->map_count, i);
bug = 1;
}
BUG_ON(bug);
@@ -638,11 +643,10 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
{
struct address_space *mapping = NULL;
- if (vma->vm_file)
+ if (vma->vm_file) {
mapping = vma->vm_file->f_mapping;
-
- if (mapping)
mutex_lock(&mapping->i_mmap_mutex);
+ }
__vma_link(mm, vma, prev, rb_link, rb_parent);
__vma_link_file(vma);
@@ -680,8 +684,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
prev->vm_next = next = vma->vm_next;
if (next)
next->vm_prev = prev;
- if (mm->mmap_cache == vma)
- mm->mmap_cache = prev;
+
+ /* Kill the cache */
+ vmacache_invalidate(mm);
}
/*
@@ -893,7 +898,15 @@ again: remove_next = 1 + (end > next->vm_end);
static inline int is_mergeable_vma(struct vm_area_struct *vma,
struct file *file, unsigned long vm_flags)
{
- if (vma->vm_flags ^ vm_flags)
+ /*
+ * VM_SOFTDIRTY should not prevent from VMA merging, if we
+ * match the flags but dirty bit -- the caller should mark
+ * merged VMA as dirty. If dirty bit won't be excluded from
+ * comparison, we increase pressue on the memory system forcing
+ * the kernel to generate new VMAs when old one could be
+ * extended instead.
+ */
+ if ((vma->vm_flags ^ vm_flags) & ~VM_SOFTDIRTY)
return 0;
if (vma->vm_file != file)
return 0;
@@ -1082,7 +1095,7 @@ static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *
return a->vm_end == b->vm_start &&
mpol_equal(vma_policy(a), vma_policy(b)) &&
a->vm_file == b->vm_file &&
- !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
+ !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC|VM_SOFTDIRTY)) &&
b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
}
@@ -1190,6 +1203,24 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
return hint;
}
+static inline int mlock_future_check(struct mm_struct *mm,
+ unsigned long flags,
+ unsigned long len)
+{
+ unsigned long locked, lock_limit;
+
+ /* mlock MCL_FUTURE? */
+ if (flags & VM_LOCKED) {
+ locked = len >> PAGE_SHIFT;
+ locked += mm->locked_vm;
+ lock_limit = rlimit(RLIMIT_MEMLOCK);
+ lock_limit >>= PAGE_SHIFT;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ return -EAGAIN;
+ }
+ return 0;
+}
+
/*
* The caller must hold down_write(&current->mm->mmap_sem).
*/
@@ -1251,16 +1282,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
if (!can_do_mlock())
return -EPERM;
- /* mlock MCL_FUTURE? */
- if (vm_flags & VM_LOCKED) {
- unsigned long locked, lock_limit;
- locked = len >> PAGE_SHIFT;
- locked += mm->locked_vm;
- lock_limit = rlimit(RLIMIT_MEMLOCK);
- lock_limit >>= PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
- return -EAGAIN;
- }
+ if (mlock_future_check(mm, vm_flags, len))
+ return -EAGAIN;
if (file) {
struct inode *inode = file_inode(file);
@@ -1280,7 +1303,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
/*
* Make sure there are no mandatory locks on the file.
*/
- if (locks_verify_locked(inode))
+ if (locks_verify_locked(file))
return -EAGAIN;
vm_flags |= VM_SHARED | VM_MAYSHARE;
@@ -1970,34 +1993,33 @@ EXPORT_SYMBOL(get_unmapped_area);
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
- struct vm_area_struct *vma = NULL;
+ struct rb_node *rb_node;
+ struct vm_area_struct *vma;
/* Check the cache first. */
- /* (Cache hit rate is typically around 35%.) */
- vma = ACCESS_ONCE(mm->mmap_cache);
- if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
- struct rb_node *rb_node;
+ vma = vmacache_find(mm, addr);
+ if (likely(vma))
+ return vma;
- rb_node = mm->mm_rb.rb_node;
- vma = NULL;
+ rb_node = mm->mm_rb.rb_node;
+ vma = NULL;
- while (rb_node) {
- struct vm_area_struct *vma_tmp;
-
- vma_tmp = rb_entry(rb_node,
- struct vm_area_struct, vm_rb);
-
- if (vma_tmp->vm_end > addr) {
- vma = vma_tmp;
- if (vma_tmp->vm_start <= addr)
- break;
- rb_node = rb_node->rb_left;
- } else
- rb_node = rb_node->rb_right;
- }
- if (vma)
- mm->mmap_cache = vma;
+ while (rb_node) {
+ struct vm_area_struct *tmp;
+
+ tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+
+ if (tmp->vm_end > addr) {
+ vma = tmp;
+ if (tmp->vm_start <= addr)
+ break;
+ rb_node = rb_node->rb_left;
+ } else
+ rb_node = rb_node->rb_right;
}
+
+ if (vma)
+ vmacache_update(addr, vma);
return vma;
}
@@ -2369,7 +2391,9 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
} else
mm->highest_vm_end = prev ? prev->vm_end : 0;
tail_vma->vm_next = NULL;
- mm->mmap_cache = NULL; /* Kill the cache. */
+
+ /* Kill the cache */
+ vmacache_invalidate(mm);
}
/*
@@ -2591,18 +2615,9 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
if (error & ~PAGE_MASK)
return error;
- /*
- * mlock MCL_FUTURE?
- */
- if (mm->def_flags & VM_LOCKED) {
- unsigned long locked, lock_limit;
- locked = len >> PAGE_SHIFT;
- locked += mm->locked_vm;
- lock_limit = rlimit(RLIMIT_MEMLOCK);
- lock_limit >>= PAGE_SHIFT;
- if (locked > lock_limit && !capable(CAP_IPC_LOCK))
- return -EAGAIN;
- }
+ error = mlock_future_check(mm, mm->def_flags, len);
+ if (error)
+ return error;
/*
* mm->mmap_sem is required to protect against another thread
@@ -2859,6 +2874,31 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
return 1;
}
+static int special_mapping_fault(struct vm_area_struct *vma,
+ struct vm_fault *vmf);
+
+/*
+ * Having a close hook prevents vma merging regardless of flags.
+ */
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
+static const char *special_mapping_name(struct vm_area_struct *vma)
+{
+ return ((struct vm_special_mapping *)vma->vm_private_data)->name;
+}
+
+static const struct vm_operations_struct special_mapping_vmops = {
+ .close = special_mapping_close,
+ .fault = special_mapping_fault,
+ .name = special_mapping_name,
+};
+
+static const struct vm_operations_struct legacy_special_mapping_vmops = {
+ .close = special_mapping_close,
+ .fault = special_mapping_fault,
+};
static int special_mapping_fault(struct vm_area_struct *vma,
struct vm_fault *vmf)
@@ -2874,7 +2914,13 @@ static int special_mapping_fault(struct vm_area_struct *vma,
*/
pgoff = vmf->pgoff - vma->vm_pgoff;
- for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
+ if (vma->vm_ops == &legacy_special_mapping_vmops)
+ pages = vma->vm_private_data;
+ else
+ pages = ((struct vm_special_mapping *)vma->vm_private_data)->
+ pages;
+
+ for (; pgoff && *pages; ++pages)
pgoff--;
if (*pages) {
@@ -2887,37 +2933,18 @@ static int special_mapping_fault(struct vm_area_struct *vma,
return VM_FAULT_SIGBUS;
}
-/*
- * Having a close hook prevents vma merging regardless of flags.
- */
-static void special_mapping_close(struct vm_area_struct *vma)
-{
-}
-
-static const struct vm_operations_struct special_mapping_vmops = {
- .close = special_mapping_close,
- .fault = special_mapping_fault,
-};
-
-/*
- * Called with mm->mmap_sem held for writing.
- * Insert a new vma covering the given region, with the given flags.
- * Its pages are supplied by the given array of struct page *.
- * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
- * The region past the last page supplied will always produce SIGBUS.
- * The array pointer and the pages it points to are assumed to stay alive
- * for as long as this mapping might exist.
- */
-int install_special_mapping(struct mm_struct *mm,
- unsigned long addr, unsigned long len,
- unsigned long vm_flags, struct page **pages)
+static struct vm_area_struct *__install_special_mapping(
+ struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long vm_flags, const struct vm_operations_struct *ops,
+ void *priv)
{
int ret;
struct vm_area_struct *vma;
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (unlikely(vma == NULL))
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&vma->anon_vma_chain);
vma->vm_mm = mm;
@@ -2927,8 +2954,8 @@ int install_special_mapping(struct mm_struct *mm,
vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
- vma->vm_ops = &special_mapping_vmops;
- vma->vm_private_data = pages;
+ vma->vm_ops = ops;
+ vma->vm_private_data = priv;
ret = insert_vm_struct(mm, vma);
if (ret)
@@ -2938,11 +2965,40 @@ int install_special_mapping(struct mm_struct *mm,
perf_event_mmap(vma);
- return 0;
+ return vma;
out:
kmem_cache_free(vm_area_cachep, vma);
- return ret;
+ return ERR_PTR(ret);
+}
+
+/*
+ * Called with mm->mmap_sem held for writing.
+ * Insert a new vma covering the given region, with the given flags.
+ * Its pages are supplied by the given array of struct page *.
+ * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
+ * The region past the last page supplied will always produce SIGBUS.
+ * The array pointer and the pages it points to are assumed to stay alive
+ * for as long as this mapping might exist.
+ */
+struct vm_area_struct *_install_special_mapping(
+ struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long vm_flags, const struct vm_special_mapping *spec)
+{
+ return __install_special_mapping(mm, addr, len, vm_flags,
+ &special_mapping_vmops, (void *)spec);
+}
+
+int install_special_mapping(struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long vm_flags, struct page **pages)
+{
+ struct vm_area_struct *vma = __install_special_mapping(
+ mm, addr, len, vm_flags, &legacy_special_mapping_vmops,
+ (void *)pages);
+
+ return PTR_ERR_OR_ZERO(vma);
}
static DEFINE_MUTEX(mm_all_locks_mutex);
@@ -3140,7 +3196,7 @@ static int init_user_reserve(void)
sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
return 0;
}
-module_init(init_user_reserve)
+subsys_initcall(init_user_reserve);
/*
* Initialise sysctl_admin_reserve_kbytes.
@@ -3161,7 +3217,7 @@ static int init_admin_reserve(void)
sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
return 0;
}
-module_init(init_admin_reserve)
+subsys_initcall(init_admin_reserve);
/*
* Reinititalise user and admin reserves if memory is added or removed.
@@ -3227,8 +3283,8 @@ static struct notifier_block reserve_mem_nb = {
static int __meminit init_reserve_notifier(void)
{
if (register_hotmemory_notifier(&reserve_mem_nb))
- printk("Failed registering memory add/remove notifier for admin reserve");
+ pr_err("Failed registering memory add/remove notifier for admin reserve\n");
return 0;
}
-module_init(init_reserve_notifier)
+subsys_initcall(init_reserve_notifier);