aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig22
-rw-r--r--mm/Makefile5
-rw-r--r--mm/allocpercpu.c177
-rw-r--r--mm/bootmem.c8
-rw-r--r--mm/filemap.c15
-rw-r--r--mm/hugetlb.c551
-rw-r--r--mm/hwpoison-inject.c113
-rw-r--r--mm/internal.h35
-rw-r--r--mm/kmemleak.c188
-rw-r--r--mm/ksm.c953
-rw-r--r--mm/madvise.c21
-rw-r--r--mm/memcontrol.c442
-rw-r--r--mm/memory-failure.c571
-rw-r--r--mm/memory.c35
-rw-r--r--mm/memory_hotplug.c16
-rw-r--r--mm/mempolicy.c69
-rw-r--r--mm/migrate.c133
-rw-r--r--mm/mincore.c37
-rw-r--r--mm/mlock.c45
-rw-r--r--mm/mmap.c50
-rw-r--r--mm/nommu.c8
-rw-r--r--mm/oom_kill.c103
-rw-r--r--mm/page_alloc.c119
-rw-r--r--mm/page_io.c17
-rw-r--r--mm/pagewalk.c32
-rw-r--r--mm/percpu.c24
-rw-r--r--mm/readahead.c12
-rw-r--r--mm/rmap.c354
-rw-r--r--mm/shmem.c84
-rw-r--r--mm/shmem_acl.c171
-rw-r--r--mm/slab.c42
-rw-r--r--mm/slub.c4
-rw-r--r--mm/swapfile.c847
-rw-r--r--mm/truncate.c6
-rw-r--r--mm/vmalloc.c11
-rw-r--r--mm/vmscan.c321
-rw-r--r--mm/vmstat.c10
37 files changed, 3622 insertions, 2029 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 44cf6f0a3a6..17b8947aa7d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -158,11 +158,13 @@ config PAGEFLAGS_EXTENDED
# Default to 4 for wider testing, though 8 might be more appropriate.
# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
# PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
+# DEBUG_SPINLOCK and DEBUG_LOCK_ALLOC spinlock_t also enlarge struct page.
#
config SPLIT_PTLOCK_CPUS
int
- default "4096" if ARM && !CPU_CACHE_VIPT
- default "4096" if PARISC && !PA20
+ default "999999" if ARM && !CPU_CACHE_VIPT
+ default "999999" if PARISC && !PA20
+ default "999999" if DEBUG_SPINLOCK || DEBUG_LOCK_ALLOC
default "4"
#
@@ -200,14 +202,6 @@ config VIRT_TO_BUS
def_bool y
depends on !ARCH_NO_VIRT_TO_BUS
-config HAVE_MLOCK
- bool
- default y if MMU=y
-
-config HAVE_MLOCKED_PAGE_BIT
- bool
- default y if HAVE_MLOCK=y
-
config MMU_NOTIFIER
bool
@@ -218,7 +212,7 @@ config KSM
Enable Kernel Samepage Merging: KSM periodically scans those areas
of an application's address space that an app has advised may be
mergeable. When it finds pages of identical content, it replaces
- the many instances by a single resident page with that content, so
+ the many instances by a single page with that content, so
saving memory until one or another app needs to modify the content.
Recommended for use with KVM, or with other duplicative applications.
See Documentation/vm/ksm.txt for more information: KSM is inactive
@@ -227,6 +221,7 @@ config KSM
config DEFAULT_MMAP_MIN_ADDR
int "Low address space to protect from user allocation"
+ depends on MMU
default 4096
help
This is the portion of low virtual memory which should be protected
@@ -257,8 +252,9 @@ config MEMORY_FAILURE
special hardware support and typically ECC memory.
config HWPOISON_INJECT
- tristate "Poison pages injector"
- depends on MEMORY_FAILURE && DEBUG_KERNEL
+ tristate "HWPoison pages injector"
+ depends on MEMORY_FAILURE && DEBUG_KERNEL && PROC_FS
+ select PROC_PAGE_MONITOR
config NOMMU_INITIAL_TRIM_EXCESS
int "Turn on mmap() excess space trimming before booting"
diff --git a/mm/Makefile b/mm/Makefile
index ebf849042ed..7a68d2ab556 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_NUMA) += mempolicy.o
obj-$(CONFIG_SPARSEMEM) += sparse.o
obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
-obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
obj-$(CONFIG_SLOB) += slob.o
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
obj-$(CONFIG_KSM) += ksm.o
@@ -34,11 +33,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
-ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
obj-$(CONFIG_SMP) += percpu.o
-else
-obj-$(CONFIG_SMP) += allocpercpu.o
-endif
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
deleted file mode 100644
index df34ceae0c6..00000000000
--- a/mm/allocpercpu.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * linux/mm/allocpercpu.c
- *
- * Separated from slab.c August 11, 2006 Christoph Lameter
- */
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/bootmem.h>
-#include <asm/sections.h>
-
-#ifndef cache_line_size
-#define cache_line_size() L1_CACHE_BYTES
-#endif
-
-/**
- * percpu_depopulate - depopulate per-cpu data for given cpu
- * @__pdata: per-cpu data to depopulate
- * @cpu: depopulate per-cpu data for this cpu
- *
- * Depopulating per-cpu data for a cpu going offline would be a typical
- * use case. You need to register a cpu hotplug handler for that purpose.
- */
-static void percpu_depopulate(void *__pdata, int cpu)
-{
- struct percpu_data *pdata = __percpu_disguise(__pdata);
-
- kfree(pdata->ptrs[cpu]);
- pdata->ptrs[cpu] = NULL;
-}
-
-/**
- * percpu_depopulate_mask - depopulate per-cpu data for some cpu's
- * @__pdata: per-cpu data to depopulate
- * @mask: depopulate per-cpu data for cpu's selected through mask bits
- */
-static void __percpu_depopulate_mask(void *__pdata, const cpumask_t *mask)
-{
- int cpu;
- for_each_cpu_mask_nr(cpu, *mask)
- percpu_depopulate(__pdata, cpu);
-}
-
-#define percpu_depopulate_mask(__pdata, mask) \
- __percpu_depopulate_mask((__pdata), &(mask))
-
-/**
- * percpu_populate - populate per-cpu data for given cpu
- * @__pdata: per-cpu data to populate further
- * @size: size of per-cpu object
- * @gfp: may sleep or not etc.
- * @cpu: populate per-data for this cpu
- *
- * Populating per-cpu data for a cpu coming online would be a typical
- * use case. You need to register a cpu hotplug handler for that purpose.
- * Per-cpu object is populated with zeroed buffer.
- */
-static void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
-{
- struct percpu_data *pdata = __percpu_disguise(__pdata);
- int node = cpu_to_node(cpu);
-
- /*
- * We should make sure each CPU gets private memory.
- */
- size = roundup(size, cache_line_size());
-
- BUG_ON(pdata->ptrs[cpu]);
- if (node_online(node))
- pdata->ptrs[cpu] = kmalloc_node(size, gfp|__GFP_ZERO, node);
- else
- pdata->ptrs[cpu] = kzalloc(size, gfp);
- return pdata->ptrs[cpu];
-}
-
-/**
- * percpu_populate_mask - populate per-cpu data for more cpu's
- * @__pdata: per-cpu data to populate further
- * @size: size of per-cpu object
- * @gfp: may sleep or not etc.
- * @mask: populate per-cpu data for cpu's selected through mask bits
- *
- * Per-cpu objects are populated with zeroed buffers.
- */
-static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
- cpumask_t *mask)
-{
- cpumask_t populated;
- int cpu;
-
- cpus_clear(populated);
- for_each_cpu_mask_nr(cpu, *mask)
- if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) {
- __percpu_depopulate_mask(__pdata, &populated);
- return -ENOMEM;
- } else
- cpu_set(cpu, populated);
- return 0;
-}
-
-#define percpu_populate_mask(__pdata, size, gfp, mask) \
- __percpu_populate_mask((__pdata), (size), (gfp), &(mask))
-
-/**
- * alloc_percpu - initial setup of per-cpu data
- * @size: size of per-cpu object
- * @align: alignment
- *
- * Allocate dynamic percpu area. Percpu objects are populated with
- * zeroed buffers.
- */
-void *__alloc_percpu(size_t size, size_t align)
-{
- /*
- * We allocate whole cache lines to avoid false sharing
- */
- size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size());
- void *pdata = kzalloc(sz, GFP_KERNEL);
- void *__pdata = __percpu_disguise(pdata);
-
- /*
- * Can't easily make larger alignment work with kmalloc. WARN
- * on it. Larger alignment should only be used for module
- * percpu sections on SMP for which this path isn't used.
- */
- WARN_ON_ONCE(align > SMP_CACHE_BYTES);
-
- if (unlikely(!pdata))
- return NULL;
- if (likely(!__percpu_populate_mask(__pdata, size, GFP_KERNEL,
- &cpu_possible_map)))
- return __pdata;
- kfree(pdata);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(__alloc_percpu);
-
-/**
- * free_percpu - final cleanup of per-cpu data
- * @__pdata: object to clean up
- *
- * We simply clean up any per-cpu object left. No need for the client to
- * track and specify through a bis mask which per-cpu objects are to free.
- */
-void free_percpu(void *__pdata)
-{
- if (unlikely(!__pdata))
- return;
- __percpu_depopulate_mask(__pdata, cpu_possible_mask);
- kfree(__percpu_disguise(__pdata));
-}
-EXPORT_SYMBOL_GPL(free_percpu);
-
-/*
- * Generic percpu area setup.
- */
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-
-EXPORT_SYMBOL(__per_cpu_offset);
-
-void __init setup_per_cpu_areas(void)
-{
- unsigned long size, i;
- char *ptr;
- unsigned long nr_possible_cpus = num_possible_cpus();
-
- /* Copy section for each CPU (we discard the original) */
- size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
- ptr = alloc_bootmem_pages(size * nr_possible_cpus);
-
- for_each_possible_cpu(i) {
- __per_cpu_offset[i] = ptr - __per_cpu_start;
- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
- ptr += size;
- }
-}
-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
diff --git a/mm/bootmem.c b/mm/bootmem.c
index d1dc23cc7f1..7d1486875e1 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -432,8 +432,8 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
return mark_bootmem(start, end, 1, flags);
}
-static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx,
- unsigned long step)
+static unsigned long __init align_idx(struct bootmem_data *bdata,
+ unsigned long idx, unsigned long step)
{
unsigned long base = bdata->node_min_pfn;
@@ -445,8 +445,8 @@ static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx,
return ALIGN(base + idx, step) - base;
}
-static unsigned long align_off(struct bootmem_data *bdata, unsigned long off,
- unsigned long align)
+static unsigned long __init align_off(struct bootmem_data *bdata,
+ unsigned long off, unsigned long align)
{
unsigned long base = PFN_PHYS(bdata->node_min_pfn);
diff --git a/mm/filemap.c b/mm/filemap.c
index 8b4d88f9249..96ac6b0eb6c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2240,7 +2240,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
size_t count, ssize_t written)
{
struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
ssize_t status;
struct iov_iter i;
@@ -2252,15 +2251,6 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
*ppos = pos + status;
}
- /*
- * If we get here for O_DIRECT writes then we must have fallen through
- * to buffered writes (block instantiation inside i_size). So we sync
- * the file data here, to try to honour O_DIRECT expectations.
- */
- if (unlikely(file->f_flags & O_DIRECT) && written)
- status = filemap_write_and_wait_range(mapping,
- pos, pos + written - 1);
-
return written ? written : status;
}
EXPORT_SYMBOL(generic_file_buffered_write);
@@ -2359,10 +2349,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
* semantics.
*/
endbyte = pos + written_buffered - written - 1;
- err = do_sync_mapping_range(file->f_mapping, pos, endbyte,
- SYNC_FILE_RANGE_WAIT_BEFORE|
- SYNC_FILE_RANGE_WRITE|
- SYNC_FILE_RANGE_WAIT_AFTER);
+ err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
if (err == 0) {
written = written_buffered;
invalidate_mapping_pages(mapping,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5d7601b0287..65f38c21820 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -24,6 +24,7 @@
#include <asm/io.h>
#include <linux/hugetlb.h>
+#include <linux/node.h>
#include "internal.h"
const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
@@ -622,42 +623,66 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
}
/*
- * Use a helper variable to find the next node and then
- * copy it back to next_nid_to_alloc afterwards:
- * otherwise there's a window in which a racer might
- * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
- * But we don't need to use a spin_lock here: it really
- * doesn't matter if occasionally a racer chooses the
- * same nid as we do. Move nid forward in the mask even
- * if we just successfully allocated a hugepage so that
- * the next caller gets hugepages on the next node.
+ * common helper functions for hstate_next_node_to_{alloc|free}.
+ * We may have allocated or freed a huge page based on a different
+ * nodes_allowed previously, so h->next_node_to_{alloc|free} might
+ * be outside of *nodes_allowed. Ensure that we use an allowed
+ * node for alloc or free.
*/
-static int hstate_next_node_to_alloc(struct hstate *h)
+static int next_node_allowed(int nid, nodemask_t *nodes_allowed)
{
- int next_nid;
- next_nid = next_node(h->next_nid_to_alloc, node_online_map);
- if (next_nid == MAX_NUMNODES)
- next_nid = first_node(node_online_map);
- h->next_nid_to_alloc = next_nid;
- return next_nid;
+ nid = next_node(nid, *nodes_allowed);
+ if (nid == MAX_NUMNODES)
+ nid = first_node(*nodes_allowed);
+ VM_BUG_ON(nid >= MAX_NUMNODES);
+
+ return nid;
+}
+
+static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed)
+{
+ if (!node_isset(nid, *nodes_allowed))
+ nid = next_node_allowed(nid, nodes_allowed);
+ return nid;
+}
+
+/*
+ * returns the previously saved node ["this node"] from which to
+ * allocate a persistent huge page for the pool and advance the
+ * next node from which to allocate, handling wrap at end of node
+ * mask.
+ */
+static int hstate_next_node_to_alloc(struct hstate *h,
+ nodemask_t *nodes_allowed)
+{
+ int nid;
+
+ VM_BUG_ON(!nodes_allowed);
+
+ nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed);
+ h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed);
+
+ return nid;
}
-static int alloc_fresh_huge_page(struct hstate *h)
+static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
{
struct page *page;
int start_nid;
int next_nid;
int ret = 0;
- start_nid = h->next_nid_to_alloc;
+ start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
next_nid = start_nid;
do {
page = alloc_fresh_huge_page_node(h, next_nid);
- if (page)
+ if (page) {
ret = 1;
- next_nid = hstate_next_node_to_alloc(h);
- } while (!page && next_nid != start_nid);
+ break;
+ }
+ next_nid = hstate_next_node_to_alloc(h, nodes_allowed);
+ } while (next_nid != start_nid);
if (ret)
count_vm_event(HTLB_BUDDY_PGALLOC);
@@ -668,17 +693,21 @@ static int alloc_fresh_huge_page(struct hstate *h)
}
/*
- * helper for free_pool_huge_page() - find next node
- * from which to free a huge page
+ * helper for free_pool_huge_page() - return the previously saved
+ * node ["this node"] from which to free a huge page. Advance the
+ * next node id whether or not we find a free huge page to free so
+ * that the next attempt to free addresses the next node.
*/
-static int hstate_next_node_to_free(struct hstate *h)
+static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
{
- int next_nid;
- next_nid = next_node(h->next_nid_to_free, node_online_map);
- if (next_nid == MAX_NUMNODES)
- next_nid = first_node(node_online_map);
- h->next_nid_to_free = next_nid;
- return next_nid;
+ int nid;
+
+ VM_BUG_ON(!nodes_allowed);
+
+ nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed);
+ h->next_nid_to_free = next_node_allowed(nid, nodes_allowed);
+
+ return nid;
}
/*
@@ -687,13 +716,14 @@ static int hstate_next_node_to_free(struct hstate *h)
* balanced over allowed nodes.
* Called with hugetlb_lock locked.
*/
-static int free_pool_huge_page(struct hstate *h, bool acct_surplus)
+static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
+ bool acct_surplus)
{
int start_nid;
int next_nid;
int ret = 0;
- start_nid = h->next_nid_to_free;
+ start_nid = hstate_next_node_to_free(h, nodes_allowed);
next_nid = start_nid;
do {
@@ -715,9 +745,10 @@ static int free_pool_huge_page(struct hstate *h, bool acct_surplus)
}
update_and_free_page(h, page);
ret = 1;
+ break;
}
- next_nid = hstate_next_node_to_free(h);
- } while (!ret && next_nid != start_nid);
+ next_nid = hstate_next_node_to_free(h, nodes_allowed);
+ } while (next_nid != start_nid);
return ret;
}
@@ -911,14 +942,14 @@ static void return_unused_surplus_pages(struct hstate *h,
/*
* We want to release as many surplus pages as possible, spread
- * evenly across all nodes. Iterate across all nodes until we
- * can no longer free unreserved surplus pages. This occurs when
- * the nodes with surplus pages have no free pages.
- * free_pool_huge_page() will balance the the frees across the
- * on-line nodes for us and will handle the hstate accounting.
+ * evenly across all nodes with memory. Iterate across these nodes
+ * until we can no longer free unreserved surplus pages. This occurs
+ * when the nodes with surplus pages have no free pages.
+ * free_pool_huge_page() will balance the the freed pages across the
+ * on-line nodes with memory and will handle the hstate accounting.
*/
while (nr_pages--) {
- if (!free_pool_huge_page(h, 1))
+ if (!free_pool_huge_page(h, &node_states[N_HIGH_MEMORY], 1))
break;
}
}
@@ -1022,16 +1053,16 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
int __weak alloc_bootmem_huge_page(struct hstate *h)
{
struct huge_bootmem_page *m;
- int nr_nodes = nodes_weight(node_online_map);
+ int nr_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
while (nr_nodes) {
void *addr;
addr = __alloc_bootmem_node_nopanic(
- NODE_DATA(h->next_nid_to_alloc),
+ NODE_DATA(hstate_next_node_to_alloc(h,
+ &node_states[N_HIGH_MEMORY])),
huge_page_size(h), huge_page_size(h), 0);
- hstate_next_node_to_alloc(h);
if (addr) {
/*
* Use the beginning of the huge page to store the
@@ -1084,7 +1115,8 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
if (h->order >= MAX_ORDER) {
if (!alloc_bootmem_huge_page(h))
break;
- } else if (!alloc_fresh_huge_page(h))
+ } else if (!alloc_fresh_huge_page(h,
+ &node_states[N_HIGH_MEMORY]))
break;
}
h->max_huge_pages = i;
@@ -1126,14 +1158,15 @@ static void __init report_hugepages(void)
}
#ifdef CONFIG_HIGHMEM
-static void try_to_free_low(struct hstate *h, unsigned long count)
+static void try_to_free_low(struct hstate *h, unsigned long count,
+ nodemask_t *nodes_allowed)
{
int i;
if (h->order >= MAX_ORDER)
return;
- for (i = 0; i < MAX_NUMNODES; ++i) {
+ for_each_node_mask(i, *nodes_allowed) {
struct page *page, *next;
struct list_head *freel = &h->hugepage_freelists[i];
list_for_each_entry_safe(page, next, freel, lru) {
@@ -1149,7 +1182,8 @@ static void try_to_free_low(struct hstate *h, unsigned long count)
}
}
#else
-static inline void try_to_free_low(struct hstate *h, unsigned long count)
+static inline void try_to_free_low(struct hstate *h, unsigned long count,
+ nodemask_t *nodes_allowed)
{
}
#endif
@@ -1159,7 +1193,8 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
* balanced by operating on them in a round-robin fashion.
* Returns 1 if an adjustment was made.
*/
-static int adjust_pool_surplus(struct hstate *h, int delta)
+static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
+ int delta)
{
int start_nid, next_nid;
int ret = 0;
@@ -1167,29 +1202,33 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
VM_BUG_ON(delta != -1 && delta != 1);
if (delta < 0)
- start_nid = h->next_nid_to_alloc;
+ start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
else
- start_nid = h->next_nid_to_free;
+ start_nid = hstate_next_node_to_free(h, nodes_allowed);
next_nid = start_nid;
do {
int nid = next_nid;
if (delta < 0) {
- next_nid = hstate_next_node_to_alloc(h);
/*
* To shrink on this node, there must be a surplus page
*/
- if (!h->surplus_huge_pages_node[nid])
+ if (!h->surplus_huge_pages_node[nid]) {
+ next_nid = hstate_next_node_to_alloc(h,
+ nodes_allowed);
continue;
+ }
}
if (delta > 0) {
- next_nid = hstate_next_node_to_free(h);
/*
* Surplus cannot exceed the total number of pages
*/
if (h->surplus_huge_pages_node[nid] >=
- h->nr_huge_pages_node[nid])
+ h->nr_huge_pages_node[nid]) {
+ next_nid = hstate_next_node_to_free(h,
+ nodes_allowed);
continue;
+ }
}
h->surplus_huge_pages += delta;
@@ -1202,7 +1241,8 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
}
#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
-static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
+static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
+ nodemask_t *nodes_allowed)
{
unsigned long min_count, ret;
@@ -1222,7 +1262,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
*/
spin_lock(&hugetlb_lock);
while (h->surplus_huge_pages && count > persistent_huge_pages(h)) {
- if (!adjust_pool_surplus(h, -1))
+ if (!adjust_pool_surplus(h, nodes_allowed, -1))
break;
}
@@ -1233,11 +1273,14 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
* and reducing the surplus.
*/
spin_unlock(&hugetlb_lock);
- ret = alloc_fresh_huge_page(h);
+ ret = alloc_fresh_huge_page(h, nodes_allowed);
spin_lock(&hugetlb_lock);
if (!ret)
goto out;
+ /* Bail for signals. Probably ctrl-c from user */
+ if (signal_pending(current))
+ goto out;
}
/*
@@ -1257,13 +1300,13 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
*/
min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages;
min_count = max(count, min_count);
- try_to_free_low(h, min_count);
+ try_to_free_low(h, min_count, nodes_allowed);
while (min_count < persistent_huge_pages(h)) {
- if (!free_pool_huge_page(h, 0))
+ if (!free_pool_huge_page(h, nodes_allowed, 0))
break;
}
while (count < persistent_huge_pages(h)) {
- if (!adjust_pool_surplus(h, 1))
+ if (!adjust_pool_surplus(h, nodes_allowed, 1))
break;
}
out:
@@ -1282,43 +1325,117 @@ out:
static struct kobject *hugepages_kobj;
static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
-static struct hstate *kobj_to_hstate(struct kobject *kobj)
+static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp);
+
+static struct hstate *kobj_to_hstate(struct kobject *kobj, int *nidp)
{
int i;
+
for (i = 0; i < HUGE_MAX_HSTATE; i++)
- if (hstate_kobjs[i] == kobj)
+ if (hstate_kobjs[i] == kobj) {
+ if (nidp)
+ *nidp = NUMA_NO_NODE;
return &hstates[i];
- BUG();
- return NULL;
+ }
+
+ return kobj_to_node_hstate(kobj, nidp);
}
-static ssize_t nr_hugepages_show(struct kobject *kobj,
+static ssize_t nr_hugepages_show_common(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- struct hstate *h = kobj_to_hstate(kobj);
- return sprintf(buf, "%lu\n", h->nr_huge_pages);
+ struct hstate *h;
+ unsigned long nr_huge_pages;
+ int nid;
+
+ h = kobj_to_hstate(kobj, &nid);
+ if (nid == NUMA_NO_NODE)
+ nr_huge_pages = h->nr_huge_pages;
+ else
+ nr_huge_pages = h->nr_huge_pages_node[nid];
+
+ return sprintf(buf, "%lu\n", nr_huge_pages);
}
-static ssize_t nr_hugepages_store(struct kobject *kobj,
- struct kobj_attribute *attr, const char *buf, size_t count)
+static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
+ struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t len)
{
int err;
- unsigned long input;
- struct hstate *h = kobj_to_hstate(kobj);
+ int nid;
+ unsigned long count;
+ struct hstate *h;
+ NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
- err = strict_strtoul(buf, 10, &input);
+ err = strict_strtoul(buf, 10, &count);
if (err)
return 0;
- h->max_huge_pages = set_max_huge_pages(h, input);
+ h = kobj_to_hstate(kobj, &nid);
+ if (nid == NUMA_NO_NODE) {
+ /*
+ * global hstate attribute
+ */
+ if (!(obey_mempolicy &&
+ init_nodemask_of_mempolicy(nodes_allowed))) {
+ NODEMASK_FREE(nodes_allowed);
+ nodes_allowed = &node_states[N_HIGH_MEMORY];
+ }
+ } else if (nodes_allowed) {
+ /*
+ * per node hstate attribute: adjust count to global,
+ * but restrict alloc/free to the specified node.
+ */
+ count += h->nr_huge_pages - h->nr_huge_pages_node[nid];
+ init_nodemask_of_node(nodes_allowed, nid);
+ } else
+ nodes_allowed = &node_states[N_HIGH_MEMORY];
+
+ h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
- return count;
+ if (nodes_allowed != &node_states[N_HIGH_MEMORY])
+ NODEMASK_FREE(nodes_allowed);
+
+ return len;
+}
+
+static ssize_t nr_hugepages_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return nr_hugepages_show_common(kobj, attr, buf);
+}
+
+static ssize_t nr_hugepages_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t len)
+{
+ return nr_hugepages_store_common(false, kobj, attr, buf, len);
}
HSTATE_ATTR(nr_hugepages);
+#ifdef CONFIG_NUMA
+
+/*
+ * hstate attribute for optionally mempolicy-based constraint on persistent
+ * huge page alloc/free.
+ */
+static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return nr_hugepages_show_common(kobj, attr, buf);
+}
+
+static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t len)
+{
+ return nr_hugepages_store_common(true, kobj, attr, buf, len);
+}
+HSTATE_ATTR(nr_hugepages_mempolicy);
+#endif
+
+
static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- struct hstate *h = kobj_to_hstate(kobj);
+ struct hstate *h = kobj_to_hstate(kobj, NULL);
return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
}
static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
@@ -1326,7 +1443,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
{
int err;
unsigned long input;
- struct hstate *h = kobj_to_hstate(kobj);
+ struct hstate *h = kobj_to_hstate(kobj, NULL);
err = strict_strtoul(buf, 10, &input);
if (err)
@@ -1343,15 +1460,24 @@ HSTATE_ATTR(nr_overcommit_hugepages);
static ssize_t free_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- struct hstate *h = kobj_to_hstate(kobj);
- return sprintf(buf, "%lu\n", h->free_huge_pages);
+ struct hstate *h;
+ unsigned long free_huge_pages;
+ int nid;
+
+ h = kobj_to_hstate(kobj, &nid);
+ if (nid == NUMA_NO_NODE)
+ free_huge_pages = h->free_huge_pages;
+ else
+ free_huge_pages = h->free_huge_pages_node[nid];
+
+ return sprintf(buf, "%lu\n", free_huge_pages);
}
HSTATE_ATTR_RO(free_hugepages);
static ssize_t resv_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- struct hstate *h = kobj_to_hstate(kobj);
+ struct hstate *h = kobj_to_hstate(kobj, NULL);
return sprintf(buf, "%lu\n", h->resv_huge_pages);