aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorSteven Whitehouse <swhiteho@redhat.com>2006-09-28 08:29:59 -0400
committerSteven Whitehouse <swhiteho@redhat.com>2006-09-28 08:29:59 -0400
commit185a257f2f73bcd89050ad02da5bedbc28fc43fa (patch)
tree5e32586114534ed3f2165614cba3d578f5d87307 /mm
parent3f1a9aaeffd8d1cbc5ab9776c45cbd66af1c9699 (diff)
parenta77c64c1a641950626181b4857abb701d8f38ccc (diff)
Merge branch 'master' into gfs2
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile2
-rw-r--r--mm/allocpercpu.c129
-rw-r--r--mm/bootmem.c202
-rw-r--r--mm/filemap.c25
-rw-r--r--mm/fremap.c4
-rw-r--r--mm/highmem.c13
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/internal.h4
-rw-r--r--mm/memory.c194
-rw-r--r--mm/mempolicy.c23
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mmap.c12
-rw-r--r--mm/mprotect.c51
-rw-r--r--mm/msync.c196
-rw-r--r--mm/nommu.c247
-rw-r--r--mm/oom_kill.c97
-rw-r--r--mm/page-writeback.c29
-rw-r--r--mm/page_alloc.c974
-rw-r--r--mm/page_io.c48
-rw-r--r--mm/rmap.c65
-rw-r--r--mm/shmem.c5
-rw-r--r--mm/slab.c434
-rw-r--r--mm/slob.c52
-rw-r--r--mm/swap.c49
-rw-r--r--mm/truncate.c25
-rw-r--r--mm/vmalloc.c38
-rw-r--r--mm/vmscan.c140
-rw-r--r--mm/vmstat.c52
28 files changed, 2180 insertions, 942 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 9dd824c11ee..60c56c0b5e1 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -23,4 +23,4 @@ obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
-
+obj-$(CONFIG_SMP) += allocpercpu.o
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
new file mode 100644
index 00000000000..eaa9abeea53
--- /dev/null
+++ b/mm/allocpercpu.c
@@ -0,0 +1,129 @@
+/*
+ * linux/mm/allocpercpu.c
+ *
+ * Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com>
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+
+/**
+ * percpu_depopulate - depopulate per-cpu data for given cpu
+ * @__pdata: per-cpu data to depopulate
+ * @cpu: depopulate per-cpu data for this cpu
+ *
+ * Depopulating per-cpu data for a cpu going offline would be a typical
+ * use case. You need to register a cpu hotplug handler for that purpose.
+ */
+void percpu_depopulate(void *__pdata, int cpu)
+{
+ struct percpu_data *pdata = __percpu_disguise(__pdata);
+ if (pdata->ptrs[cpu]) {
+ kfree(pdata->ptrs[cpu]);
+ pdata->ptrs[cpu] = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(percpu_depopulate);
+
+/**
+ * percpu_depopulate_mask - depopulate per-cpu data for some cpu's
+ * @__pdata: per-cpu data to depopulate
+ * @mask: depopulate per-cpu data for cpu's selected through mask bits
+ */
+void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+{
+ int cpu;
+ for_each_cpu_mask(cpu, *mask)
+ percpu_depopulate(__pdata, cpu);
+}
+EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
+
+/**
+ * percpu_populate - populate per-cpu data for given cpu
+ * @__pdata: per-cpu data to populate further
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @cpu: populate per-data for this cpu
+ *
+ * Populating per-cpu data for a cpu coming online would be a typical
+ * use case. You need to register a cpu hotplug handler for that purpose.
+ * Per-cpu object is populated with zeroed buffer.
+ */
+void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
+{
+ struct percpu_data *pdata = __percpu_disguise(__pdata);
+ int node = cpu_to_node(cpu);
+
+ BUG_ON(pdata->ptrs[cpu]);
+ if (node_online(node)) {
+ /* FIXME: kzalloc_node(size, gfp, node) */
+ pdata->ptrs[cpu] = kmalloc_node(size, gfp, node);
+ if (pdata->ptrs[cpu])
+ memset(pdata->ptrs[cpu], 0, size);
+ } else
+ pdata->ptrs[cpu] = kzalloc(size, gfp);
+ return pdata->ptrs[cpu];
+}
+EXPORT_SYMBOL_GPL(percpu_populate);
+
+/**
+ * percpu_populate_mask - populate per-cpu data for more cpu's
+ * @__pdata: per-cpu data to populate further
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @mask: populate per-cpu data for cpu's selected through mask bits
+ *
+ * Per-cpu objects are populated with zeroed buffers.
+ */
+int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+ cpumask_t *mask)
+{
+ cpumask_t populated = CPU_MASK_NONE;
+ int cpu;
+
+ for_each_cpu_mask(cpu, *mask)
+ if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) {
+ __percpu_depopulate_mask(__pdata, &populated);
+ return -ENOMEM;
+ } else
+ cpu_set(cpu, populated);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__percpu_populate_mask);
+
+/**
+ * percpu_alloc_mask - initial setup of per-cpu data
+ * @size: size of per-cpu object
+ * @gfp: may sleep or not etc.
+ * @mask: populate per-data for cpu's selected through mask bits
+ *
+ * Populating per-cpu data for all online cpu's would be a typical use case,
+ * which is simplified by the percpu_alloc() wrapper.
+ * Per-cpu objects are populated with zeroed buffers.
+ */
+void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+{
+ void *pdata = kzalloc(sizeof(struct percpu_data), gfp);
+ void *__pdata = __percpu_disguise(pdata);
+
+ if (unlikely(!pdata))
+ return NULL;
+ if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask)))
+ return __pdata;
+ kfree(pdata);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(__percpu_alloc_mask);
+
+/**
+ * percpu_free - final cleanup of per-cpu data
+ * @__pdata: object to clean up
+ *
+ * We simply clean up any per-cpu object left. No need for the client to
+ * track and specify through a bis mask which per-cpu objects are to free.
+ */
+void percpu_free(void *__pdata)
+{
+ __percpu_depopulate_mask(__pdata, &cpu_possible_map);
+ kfree(__percpu_disguise(__pdata));
+}
+EXPORT_SYMBOL_GPL(percpu_free);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 50353e0dac1..d53112fcb40 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -8,17 +8,15 @@
* free memory collector. It's used to deal with reserved
* system memory and memory holes as well.
*/
-
-#include <linux/mm.h>
-#include <linux/kernel_stat.h>
-#include <linux/swap.h>
-#include <linux/interrupt.h>
#include <linux/init.h>
+#include <linux/pfn.h>
#include <linux/bootmem.h>
-#include <linux/mmzone.h>
#include <linux/module.h>
-#include <asm/dma.h>
+
+#include <asm/bug.h>
#include <asm/io.h>
+#include <asm/processor.h>
+
#include "internal.h"
/*
@@ -41,7 +39,7 @@ unsigned long saved_max_pfn;
#endif
/* return the number of _pages_ that will be allocated for the boot bitmap */
-unsigned long __init bootmem_bootmap_pages (unsigned long pages)
+unsigned long __init bootmem_bootmap_pages(unsigned long pages)
{
unsigned long mapsize;
@@ -51,12 +49,14 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages)
return mapsize;
}
+
/*
* link bdata in order
*/
-static void link_bootmem(bootmem_data_t *bdata)
+static void __init link_bootmem(bootmem_data_t *bdata)
{
bootmem_data_t *ent;
+
if (list_empty(&bdata_list)) {
list_add(&bdata->list, &bdata_list);
return;
@@ -69,22 +69,32 @@ static void link_bootmem(bootmem_data_t *bdata)
}
}
list_add_tail(&bdata->list, &bdata_list);
- return;
}
+/*
+ * Given an initialised bdata, it returns the size of the boot bitmap
+ */
+static unsigned long __init get_mapsize(bootmem_data_t *bdata)
+{
+ unsigned long mapsize;
+ unsigned long start = PFN_DOWN(bdata->node_boot_start);
+ unsigned long end = bdata->node_low_pfn;
+
+ mapsize = ((end - start) + 7) / 8;
+ return ALIGN(mapsize, sizeof(long));
+}
/*
* Called once to set up the allocator itself.
*/
-static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
+static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
unsigned long mapstart, unsigned long start, unsigned long end)
{
bootmem_data_t *bdata = pgdat->bdata;
- unsigned long mapsize = ((end - start)+7)/8;
+ unsigned long mapsize;
- mapsize = ALIGN(mapsize, sizeof(long));
- bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
- bdata->node_boot_start = (start << PAGE_SHIFT);
+ bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
+ bdata->node_boot_start = PFN_PHYS(start);
bdata->node_low_pfn = end;
link_bootmem(bdata);
@@ -92,6 +102,7 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
* Initially all pages are reserved - setup_arch() has to
* register free RAM areas explicitly.
*/
+ mapsize = get_mapsize(bdata);
memset(bdata->node_bootmem_map, 0xff, mapsize);
return mapsize;
@@ -102,22 +113,22 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
* might be used for boot-time allocations - or it might get added
* to the free page pool later on.
*/
-static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
+static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
+ unsigned long size)
{
+ unsigned long sidx, eidx;
unsigned long i;
+
/*
* round up, partially reserved pages are considered
* fully reserved.
*/
- unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE;
- unsigned long eidx = (addr + size - bdata->node_boot_start +
- PAGE_SIZE-1)/PAGE_SIZE;
- unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE;
-
BUG_ON(!size);
- BUG_ON(sidx >= eidx);
- BUG_ON((addr >> PAGE_SHIFT) >= bdata->node_low_pfn);
- BUG_ON(end > bdata->node_low_pfn);
+ BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
+ BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
+
+ sidx = PFN_DOWN(addr - bdata->node_boot_start);
+ eidx = PFN_UP(addr + size - bdata->node_boot_start);
for (i = sidx; i < eidx; i++)
if (test_and_set_bit(i, bdata->node_bootmem_map)) {
@@ -127,20 +138,18 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long add
}
}
-static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size)
+static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
+ unsigned long size)
{
+ unsigned long sidx, eidx;
unsigned long i;
- unsigned long start;
+
/*
* round down end of usable mem, partially free pages are
* considered reserved.
*/
- unsigned long sidx;
- unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE;
- unsigned long end = (addr + size)/PAGE_SIZE;
-
BUG_ON(!size);
- BUG_ON(end > bdata->node_low_pfn);
+ BUG_ON(PFN_DOWN(addr + size) > bdata->node_low_pfn);
if (addr < bdata->last_success)
bdata->last_success = addr;
@@ -148,8 +157,8 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
/*
* Round up the beginning of the address.
*/
- start = (addr + PAGE_SIZE-1) / PAGE_SIZE;
- sidx = start - (bdata->node_boot_start/PAGE_SIZE);
+ sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
+ eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
for (i = sidx; i < eidx; i++) {
if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
@@ -175,10 +184,10 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
unsigned long align, unsigned long goal, unsigned long limit)
{
unsigned long offset, remaining_size, areasize, preferred;
- unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn;
+ unsigned long i, start = 0, incr, eidx, end_pfn;
void *ret;
- if(!size) {
+ if (!size) {
printk("__alloc_bootmem_core(): zero-sized request\n");
BUG();
}
@@ -187,23 +196,22 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
if (limit && bdata->node_boot_start >= limit)
return NULL;
- limit >>=PAGE_SHIFT;
+ end_pfn = bdata->node_low_pfn;
+ limit = PFN_DOWN(limit);
if (limit && end_pfn > limit)
end_pfn = limit;
- eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+ eidx = end_pfn - PFN_DOWN(bdata->node_boot_start);
offset = 0;
- if (align &&
- (bdata->node_boot_start & (align - 1UL)) != 0)
- offset = (align - (bdata->node_boot_start & (align - 1UL)));
- offset >>= PAGE_SHIFT;
+ if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
+ offset = align - (bdata->node_boot_start & (align - 1UL));
+ offset = PFN_DOWN(offset);
/*
* We try to allocate bootmem pages above 'goal'
* first, then we try to allocate lower pages.
*/
- if (goal && (goal >= bdata->node_boot_start) &&
- ((goal >> PAGE_SHIFT) < end_pfn)) {
+ if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) {
preferred = goal - bdata->node_boot_start;
if (bdata->last_success >= preferred)
@@ -212,9 +220,8 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
} else
preferred = 0;
- preferred = ALIGN(preferred, align) >> PAGE_SHIFT;
- preferred += offset;
- areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
+ preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
+ areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
incr = align >> PAGE_SHIFT ? : 1;
restart_scan:
@@ -229,7 +236,7 @@ restart_scan:
for (j = i + 1; j < i + areasize; ++j) {
if (j >= eidx)
goto fail_block;
- if (test_bit (j, bdata->node_bootmem_map))
+ if (test_bit(j, bdata->node_bootmem_map))
goto fail_block;
}
start = i;
@@ -245,7 +252,7 @@ restart_scan:
return NULL;
found:
- bdata->last_success = start << PAGE_SHIFT;
+ bdata->last_success = PFN_PHYS(start);
BUG_ON(start >= eidx);
/*
@@ -257,19 +264,21 @@ found:
bdata->last_offset && bdata->last_pos+1 == start) {
offset = ALIGN(bdata->last_offset, align);
BUG_ON(offset > PAGE_SIZE);
- remaining_size = PAGE_SIZE-offset;
+ remaining_size = PAGE_SIZE - offset;
if (size < remaining_size) {
areasize = 0;
/* last_pos unchanged */
- bdata->last_offset = offset+size;
- ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
- bdata->node_boot_start);
+ bdata->last_offset = offset + size;
+ ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
+ offset +
+ bdata->node_boot_start);
} else {
remaining_size = size - remaining_size;
- areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE;
- ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
- bdata->node_boot_start);
- bdata->last_pos = start+areasize-1;
+ areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
+ ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
+ offset +
+ bdata->node_boot_start);
+ bdata->last_pos = start + areasize - 1;
bdata->last_offset = remaining_size;
}
bdata->last_offset &= ~PAGE_MASK;
@@ -282,7 +291,7 @@ found:
/*
* Reserve the area now:
*/
- for (i = start; i < start+areasize; i++)
+ for (i = start; i < start + areasize; i++)
if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
BUG();
memset(ret, 0, size);
@@ -303,8 +312,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
count = 0;
/* first extant page of the node */
- pfn = bdata->node_boot_start >> PAGE_SHIFT;
- idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+ pfn = PFN_DOWN(bdata->node_boot_start);
+ idx = bdata->node_low_pfn - pfn;
map = bdata->node_bootmem_map;
/* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */
if (bdata->node_boot_start == 0 ||
@@ -333,7 +342,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
}
}
} else {
- i+=BITS_PER_LONG;
+ i += BITS_PER_LONG;
}
pfn += BITS_PER_LONG;
}
@@ -345,9 +354,10 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
*/
page = virt_to_page(bdata->node_bootmem_map);
count = 0;
- for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
- count++;
+ idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT;
+ for (i = 0; i < idx; i++, page++) {
__free_pages_bootmem(page, 0);
+ count++;
}
total += count;
bdata->node_bootmem_map = NULL;
@@ -355,64 +365,72 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
return total;
}
-unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn)
+unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
+ unsigned long startpfn, unsigned long endpfn)
{
- return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn));
+ return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
}
-void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size)
+void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+ unsigned long size)
{
reserve_bootmem_core(pgdat->bdata, physaddr, size);
}
-void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size)
+void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+ unsigned long size)
{
free_bootmem_core(pgdat->bdata, physaddr, size);
}
-unsigned long __init free_all_bootmem_node (pg_data_t *pgdat)
+unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
{
- return(free_all_bootmem_core(pgdat));
+ return free_all_bootmem_core(pgdat);
}
-unsigned long __init init_bootmem (unsigned long start, unsigned long pages)
+unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
{
max_low_pfn = pages;
min_low_pfn = start;
- return(init_bootmem_core(NODE_DATA(0), start, 0, pages));
+ return init_bootmem_core(NODE_DATA(0), start, 0, pages);
}
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-void __init reserve_bootmem (unsigned long addr, unsigned long size)
+void __init reserve_bootmem(unsigned long addr, unsigned long size)
{
reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size);
}
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-void __init free_bootmem (unsigned long addr, unsigned long size)
+void __init free_bootmem(unsigned long addr, unsigned long size)
{
free_bootmem_core(NODE_DATA(0)->bdata, addr, size);
}
-unsigned long __init free_all_bootmem (void)
+unsigned long __init free_all_bootmem(void)
{
- return(free_all_bootmem_core(NODE_DATA(0)));
+ return free_all_bootmem_core(NODE_DATA(0));
}
-void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
+ unsigned long goal)
{
bootmem_data_t *bdata;
void *ptr;
- list_for_each_entry(bdata, &bdata_list, list)
- if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0)))
- return(ptr);
+ list_for_each_entry(bdata, &bdata_list, list) {
+ ptr = __alloc_bootmem_core(bdata, size, align, goal, 0);
+ if (ptr)
+ return ptr;
+ }
return NULL;
}
-void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem(unsigned long size, unsigned long align,
+ unsigned long goal)
{
void *mem = __alloc_bootmem_nopanic(size,align,goal);
+
if (mem)
return mem;
/*
@@ -424,29 +442,34 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned
}
-void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align,
- unsigned long goal)
+void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
+ unsigned long align, unsigned long goal)
{
void *ptr;
ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
if (ptr)
- return (ptr);
+ return ptr;
return __alloc_bootmem(size, align, goal);
}
-#define LOW32LIMIT 0xffffffff
+#ifndef ARCH_LOW_ADDRESS_LIMIT
+#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
+#endif
-void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
+ unsigned long goal)
{
bootmem_data_t *bdata;
void *ptr;
- list_for_each_entry(bdata, &bdata_list, list)
- if ((ptr = __alloc_bootmem_core(bdata, size,
- align, goal, LOW32LIMIT)))
- return(ptr);
+ list_for_each_entry(bdata, &bdata_list, list) {
+ ptr = __alloc_bootmem_core(bdata, size, align, goal,
+ ARCH_LOW_ADDRESS_LIMIT);
+ if (ptr)
+ return ptr;
+ }
/*
* Whoops, we cannot satisfy the allocation request.
@@ -459,5 +482,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsig
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
- return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT);
+ return __alloc_bootmem_core(pgdat->bdata, size, align, goal,
+ ARCH_LOW_ADDRESS_LIMIT);
}
diff --git a/mm/filemap.c b/mm/filemap.c
index 3195806d78e..87d4a398cd1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -488,6 +488,12 @@ struct page *page_cache_alloc_cold(struct address_space *x)
EXPORT_SYMBOL(page_cache_alloc_cold);
#endif
+static int __sleep_on_page_lock(void *word)
+{
+ io_schedule();
+ return 0;
+}
+
/*
* In order to wait for pages to become available there must be
* waitqueues associated with pages. By using a hash table of
@@ -577,13 +583,24 @@ void fastcall __lock_page(struct page *page)
}
EXPORT_SYMBOL(__lock_page);
+/*
+ * Variant of lock_page that does not require the caller to hold a reference
+ * on the page's mapping.
+ */
+void fastcall __lock_page_nosync(struct page *page)
+{
+ DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+ __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
+ TASK_UNINTERRUPTIBLE);
+}
+
/**
* find_get_page - find and get a page reference
* @mapping: the address_space to search
* @offset: the page index
*
- * A rather lightweight function, finding and getting a reference to a
- * hashed page atomically.
+ * Is there a pagecache struct page at the given (mapping, offset) tuple?
+ * If yes, increment its refcount and return it; if no, return NULL.
*/
struct page * find_get_page(struct address_space *mapping, unsigned long offset)
{
@@ -970,7 +987,7 @@ page_not_up_to_date:
/* Get exclusive access to the page ... */
lock_page(page);
- /* Did it get unhashed before we got the lock? */
+ /* Did it get truncated before we got the lock? */
if (!page->mapping) {
unlock_page(page);
page_cache_release(page);
@@ -1612,7 +1629,7 @@ no_cached_page:
page_not_uptodate:
lock_page(page);
- /* Did it get unhashed while we waited for it? */
+ /* Did it get truncated while we waited for it? */
if (!page->mapping) {
unlock_page(page);
goto err;
diff --git a/mm/fremap.c b/mm/fremap.c
index 21b7d0cbc98..aa30618ec6b 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -79,9 +79,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
inc_mm_counter(mm, file_rss);
flush_icache_page(vma, page);
- set_pte_at(mm, addr, pte, mk_pte(page, prot));
+ pte_val = mk_pte(page, prot);
+ set_pte_at(mm, addr, pte, pte_val);
page_add_file_rmap(page);
- pte_val = *pte;
update_mmu_cache(vma, addr, pte_val);
lazy_mmu_prot_update(pte_val);
err = 0;
diff --git a/mm/highmem.c b/mm/highmem.c
index 9b2a5403c44..ee5519b176e 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -46,6 +46,19 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
*/
#ifdef CONFIG_HIGHMEM
+unsigned long totalhigh_pages __read_mostly;
+
+unsigned int nr_free_highpages (void)
+{
+ pg_data_t *pgdat;
+ unsigned int pages = 0;
+
+ for_each_online_pgdat(pgdat)
+ pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+
+ return pages;
+}
+
static int pkmap_count[LAST_PKMAP];
static unsigned int last_pkmap_nr;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index df499973255..7c7d03dbf73 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -72,7 +72,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
struct zone **z;
for (z = zonelist->zones; *z; z++) {
- nid = (*z)->zone_pgdat->node_id;
+ nid = zone_to_nid(*z);
if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
!list_empty(&hugepage_freelists[nid]))
break;
@@ -177,7 +177,7 @@ static void update_and_free_page(struct page *page)
{
int i;
nr_huge_pages--;
- nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--;
+ nr_huge_pages_node[page_to_nid(page)]--;
for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
@@ -191,7 +191,8 @@ static void update_and_free_page(struct page *page)
#ifdef CONFIG_HIGHMEM
static void try_to_free_low(unsigned long count)
{
- int i, nid;
+ int i;
+
for (i = 0; i < MAX_NUMNODES; ++i) {
struct page *page, *next;
list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
@@ -199,9 +200,8 @@ static void try_to_free_low(unsigned long count)
continue;
list_del(&page->lru);
update_and_free_page(page);
- nid = page_zone(page)->zone_pgdat->node_id;
free_huge_pages--;
- free_huge_pages_node[nid]--;
+ free_huge_pages_node[page_to_nid(page)]--;
if (count >= nr_huge_pages)
return;
}
diff --git a/mm/internal.h b/mm/internal.h
index d20e3cc4aef..d527b80b292 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -24,8 +24,8 @@ static inline void set_page_count(struct page *page, int v)
*/
static inline void set_page_refcounted(struct page *page)
{
- BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page);
- BUG_ON(atomic_read(&page->_count));
+ VM_BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page);
+ VM_BUG_ON(atomic_read(&page->_count));
set_page_count(page, 1);
}
diff --git a/mm/memory.c b/mm/memory.c
index 109e9866237..601159a46ab 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -49,6 +49,7 @@
#include <linux/module.h>
#include <linux/delayacct.h>
#include <linux/init.h>
+#include <linux/writeback.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
@@ -1226,7 +1227,12 @@ out:
return retval;
}
-/*
+/**
+ * vm_insert_page - insert single page into user vma
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @page: source kernel page
+ *
* This allows drivers to insert individual pages they've allocated
* into a user vma.
*
@@ -1318,7 +1324,16 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
return 0;
}
-/* Note: this is only safe if the mm semaphore is held when called. */
+/**
+ * remap_pfn_range - remap kernel memory to userspace
+ * @vma: user vma to map to
+ * @addr: target user address to start at
+ * @pfn: physical address of kernel memory
+ * @size: size of map area
+ * @prot: page protection flags for this mapping
+ *
+ * Note: this is only safe if the mm semaphore is held when called.
+ */
int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
@@ -1458,14 +1473,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
{
struct page *old_page, *new_page;
pte_t entry;
- int reuse, ret = VM_FAULT_MINOR;
+ int reuse = 0, ret = VM_FAULT_MINOR;
+ struct page *dirty_page = NULL;
old_page = vm_normal_page(vma, address, orig_pte);
if (!old_page)
goto gotten;
- if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) ==
- (VM_SHARED|VM_WRITE))) {
+ /*
+ * Take out anonymous pages first, anonymous shared vmas are
+ * not dirty accountable.
+ */
+ if (PageAnon(old_page)) {
+ if (!TestSetPageLocked(old_page)) {
+ reuse = can_share_swap_page(old_page);
+ unlock_page(old_page);
+ }
+ } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
+ (VM_WRITE|VM_SHARED))) {
+ /*
+ * Only catch write-faults on shared writable pages,
+ * read-only shared pages can get COWed by
+ * get_user_pages(.write=1, .force=1).
+ */
if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
/*
* Notify the address space that the page is about to
@@ -1494,13 +1524,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (!pte_same(*page_table, orig_pte))
goto unlock;
}
-
+ dirty_page = old_page;
+ get_page(dirty_page);
reuse = 1;
- } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
- reuse = can_share_swap_page(old_page);
- unlock_page(old_page);
- } else {
- reuse = 0;
}
if (reuse) {
@@ -1566,6 +1592,10 @@ gotten:
page_cache_release(old_page);
unlock:
pte_unmap_unlock(page_table, ptl);
+ if (dirty_page) {
+ set_page_dirty_balance(dirty_page);
+ put_page(dirty_page);
+ }
return ret;
oom: