diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2006-09-28 08:29:59 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-09-28 08:29:59 -0400 |
commit | 185a257f2f73bcd89050ad02da5bedbc28fc43fa (patch) | |
tree | 5e32586114534ed3f2165614cba3d578f5d87307 /mm | |
parent | 3f1a9aaeffd8d1cbc5ab9776c45cbd66af1c9699 (diff) | |
parent | a77c64c1a641950626181b4857abb701d8f38ccc (diff) |
Merge branch 'master' into gfs2
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Makefile | 2 | ||||
-rw-r--r-- | mm/allocpercpu.c | 129 | ||||
-rw-r--r-- | mm/bootmem.c | 202 | ||||
-rw-r--r-- | mm/filemap.c | 25 | ||||
-rw-r--r-- | mm/fremap.c | 4 | ||||
-rw-r--r-- | mm/highmem.c | 13 | ||||
-rw-r--r-- | mm/hugetlb.c | 10 | ||||
-rw-r--r-- | mm/internal.h | 4 | ||||
-rw-r--r-- | mm/memory.c | 194 | ||||
-rw-r--r-- | mm/mempolicy.c | 23 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 12 | ||||
-rw-r--r-- | mm/mprotect.c | 51 | ||||
-rw-r--r-- | mm/msync.c | 196 | ||||
-rw-r--r-- | mm/nommu.c | 247 | ||||
-rw-r--r-- | mm/oom_kill.c | 97 | ||||
-rw-r--r-- | mm/page-writeback.c | 29 | ||||
-rw-r--r-- | mm/page_alloc.c | 974 | ||||
-rw-r--r-- | mm/page_io.c | 48 | ||||
-rw-r--r-- | mm/rmap.c | 65 | ||||
-rw-r--r-- | mm/shmem.c | 5 | ||||
-rw-r--r-- | mm/slab.c | 434 | ||||
-rw-r--r-- | mm/slob.c | 52 | ||||
-rw-r--r-- | mm/swap.c | 49 | ||||
-rw-r--r-- | mm/truncate.c | 25 | ||||
-rw-r--r-- | mm/vmalloc.c | 38 | ||||
-rw-r--r-- | mm/vmscan.c | 140 | ||||
-rw-r--r-- | mm/vmstat.c | 52 |
28 files changed, 2180 insertions, 942 deletions
diff --git a/mm/Makefile b/mm/Makefile index 9dd824c11ee..60c56c0b5e1 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -23,4 +23,4 @@ obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o - +obj-$(CONFIG_SMP) += allocpercpu.o diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c new file mode 100644 index 00000000000..eaa9abeea53 --- /dev/null +++ b/mm/allocpercpu.c @@ -0,0 +1,129 @@ +/* + * linux/mm/allocpercpu.c + * + * Separated from slab.c August 11, 2006 Christoph Lameter <clameter@sgi.com> + */ +#include <linux/mm.h> +#include <linux/module.h> + +/** + * percpu_depopulate - depopulate per-cpu data for given cpu + * @__pdata: per-cpu data to depopulate + * @cpu: depopulate per-cpu data for this cpu + * + * Depopulating per-cpu data for a cpu going offline would be a typical + * use case. You need to register a cpu hotplug handler for that purpose. + */ +void percpu_depopulate(void *__pdata, int cpu) +{ + struct percpu_data *pdata = __percpu_disguise(__pdata); + if (pdata->ptrs[cpu]) { + kfree(pdata->ptrs[cpu]); + pdata->ptrs[cpu] = NULL; + } +} +EXPORT_SYMBOL_GPL(percpu_depopulate); + +/** + * percpu_depopulate_mask - depopulate per-cpu data for some cpu's + * @__pdata: per-cpu data to depopulate + * @mask: depopulate per-cpu data for cpu's selected through mask bits + */ +void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) +{ + int cpu; + for_each_cpu_mask(cpu, *mask) + percpu_depopulate(__pdata, cpu); +} +EXPORT_SYMBOL_GPL(__percpu_depopulate_mask); + +/** + * percpu_populate - populate per-cpu data for given cpu + * @__pdata: per-cpu data to populate further + * @size: size of per-cpu object + * @gfp: may sleep or not etc. + * @cpu: populate per-data for this cpu + * + * Populating per-cpu data for a cpu coming online would be a typical + * use case. You need to register a cpu hotplug handler for that purpose. + * Per-cpu object is populated with zeroed buffer. + */ +void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) +{ + struct percpu_data *pdata = __percpu_disguise(__pdata); + int node = cpu_to_node(cpu); + + BUG_ON(pdata->ptrs[cpu]); + if (node_online(node)) { + /* FIXME: kzalloc_node(size, gfp, node) */ + pdata->ptrs[cpu] = kmalloc_node(size, gfp, node); + if (pdata->ptrs[cpu]) + memset(pdata->ptrs[cpu], 0, size); + } else + pdata->ptrs[cpu] = kzalloc(size, gfp); + return pdata->ptrs[cpu]; +} +EXPORT_SYMBOL_GPL(percpu_populate); + +/** + * percpu_populate_mask - populate per-cpu data for more cpu's + * @__pdata: per-cpu data to populate further + * @size: size of per-cpu object + * @gfp: may sleep or not etc. + * @mask: populate per-cpu data for cpu's selected through mask bits + * + * Per-cpu objects are populated with zeroed buffers. + */ +int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, + cpumask_t *mask) +{ + cpumask_t populated = CPU_MASK_NONE; + int cpu; + + for_each_cpu_mask(cpu, *mask) + if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) { + __percpu_depopulate_mask(__pdata, &populated); + return -ENOMEM; + } else + cpu_set(cpu, populated); + return 0; +} +EXPORT_SYMBOL_GPL(__percpu_populate_mask); + +/** + * percpu_alloc_mask - initial setup of per-cpu data + * @size: size of per-cpu object + * @gfp: may sleep or not etc. + * @mask: populate per-data for cpu's selected through mask bits + * + * Populating per-cpu data for all online cpu's would be a typical use case, + * which is simplified by the percpu_alloc() wrapper. + * Per-cpu objects are populated with zeroed buffers. + */ +void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +{ + void *pdata = kzalloc(sizeof(struct percpu_data), gfp); + void *__pdata = __percpu_disguise(pdata); + + if (unlikely(!pdata)) + return NULL; + if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask))) + return __pdata; + kfree(pdata); + return NULL; +} +EXPORT_SYMBOL_GPL(__percpu_alloc_mask); + +/** + * percpu_free - final cleanup of per-cpu data + * @__pdata: object to clean up + * + * We simply clean up any per-cpu object left. No need for the client to + * track and specify through a bis mask which per-cpu objects are to free. + */ +void percpu_free(void *__pdata) +{ + __percpu_depopulate_mask(__pdata, &cpu_possible_map); + kfree(__percpu_disguise(__pdata)); +} +EXPORT_SYMBOL_GPL(percpu_free); diff --git a/mm/bootmem.c b/mm/bootmem.c index 50353e0dac1..d53112fcb40 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -8,17 +8,15 @@ * free memory collector. It's used to deal with reserved * system memory and memory holes as well. */ - -#include <linux/mm.h> -#include <linux/kernel_stat.h> -#include <linux/swap.h> -#include <linux/interrupt.h> #include <linux/init.h> +#include <linux/pfn.h> #include <linux/bootmem.h> -#include <linux/mmzone.h> #include <linux/module.h> -#include <asm/dma.h> + +#include <asm/bug.h> #include <asm/io.h> +#include <asm/processor.h> + #include "internal.h" /* @@ -41,7 +39,7 @@ unsigned long saved_max_pfn; #endif /* return the number of _pages_ that will be allocated for the boot bitmap */ -unsigned long __init bootmem_bootmap_pages (unsigned long pages) +unsigned long __init bootmem_bootmap_pages(unsigned long pages) { unsigned long mapsize; @@ -51,12 +49,14 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages) return mapsize; } + /* * link bdata in order */ -static void link_bootmem(bootmem_data_t *bdata) +static void __init link_bootmem(bootmem_data_t *bdata) { bootmem_data_t *ent; + if (list_empty(&bdata_list)) { list_add(&bdata->list, &bdata_list); return; @@ -69,22 +69,32 @@ static void link_bootmem(bootmem_data_t *bdata) } } list_add_tail(&bdata->list, &bdata_list); - return; } +/* + * Given an initialised bdata, it returns the size of the boot bitmap + */ +static unsigned long __init get_mapsize(bootmem_data_t *bdata) +{ + unsigned long mapsize; + unsigned long start = PFN_DOWN(bdata->node_boot_start); + unsigned long end = bdata->node_low_pfn; + + mapsize = ((end - start) + 7) / 8; + return ALIGN(mapsize, sizeof(long)); +} /* * Called once to set up the allocator itself. */ -static unsigned long __init init_bootmem_core (pg_data_t *pgdat, +static unsigned long __init init_bootmem_core(pg_data_t *pgdat, unsigned long mapstart, unsigned long start, unsigned long end) { bootmem_data_t *bdata = pgdat->bdata; - unsigned long mapsize = ((end - start)+7)/8; + unsigned long mapsize; - mapsize = ALIGN(mapsize, sizeof(long)); - bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); - bdata->node_boot_start = (start << PAGE_SHIFT); + bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); + bdata->node_boot_start = PFN_PHYS(start); bdata->node_low_pfn = end; link_bootmem(bdata); @@ -92,6 +102,7 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat, * Initially all pages are reserved - setup_arch() has to * register free RAM areas explicitly. */ + mapsize = get_mapsize(bdata); memset(bdata->node_bootmem_map, 0xff, mapsize); return mapsize; @@ -102,22 +113,22 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat, * might be used for boot-time allocations - or it might get added * to the free page pool later on. */ -static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) +static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, + unsigned long size) { + unsigned long sidx, eidx; unsigned long i; + /* * round up, partially reserved pages are considered * fully reserved. */ - unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE; - unsigned long eidx = (addr + size - bdata->node_boot_start + - PAGE_SIZE-1)/PAGE_SIZE; - unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE; - BUG_ON(!size); - BUG_ON(sidx >= eidx); - BUG_ON((addr >> PAGE_SHIFT) >= bdata->node_low_pfn); - BUG_ON(end > bdata->node_low_pfn); + BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn); + BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn); + + sidx = PFN_DOWN(addr - bdata->node_boot_start); + eidx = PFN_UP(addr + size - bdata->node_boot_start); for (i = sidx; i < eidx; i++) if (test_and_set_bit(i, bdata->node_bootmem_map)) { @@ -127,20 +138,18 @@ static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long add } } -static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) +static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, + unsigned long size) { + unsigned long sidx, eidx; unsigned long i; - unsigned long start; + /* * round down end of usable mem, partially free pages are * considered reserved. */ - unsigned long sidx; - unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE; - unsigned long end = (addr + size)/PAGE_SIZE; - BUG_ON(!size); - BUG_ON(end > bdata->node_low_pfn); + BUG_ON(PFN_DOWN(addr + size) > bdata->node_low_pfn); if (addr < bdata->last_success) bdata->last_success = addr; @@ -148,8 +157,8 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, /* * Round up the beginning of the address. */ - start = (addr + PAGE_SIZE-1) / PAGE_SIZE; - sidx = start - (bdata->node_boot_start/PAGE_SIZE); + sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start); + eidx = PFN_DOWN(addr + size - bdata->node_boot_start); for (i = sidx; i < eidx; i++) { if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map))) @@ -175,10 +184,10 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { unsigned long offset, remaining_size, areasize, preferred; - unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn; + unsigned long i, start = 0, incr, eidx, end_pfn; void *ret; - if(!size) { + if (!size) { printk("__alloc_bootmem_core(): zero-sized request\n"); BUG(); } @@ -187,23 +196,22 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, if (limit && bdata->node_boot_start >= limit) return NULL; - limit >>=PAGE_SHIFT; + end_pfn = bdata->node_low_pfn; + limit = PFN_DOWN(limit); if (limit && end_pfn > limit) end_pfn = limit; - eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT); + eidx = end_pfn - PFN_DOWN(bdata->node_boot_start); offset = 0; - if (align && - (bdata->node_boot_start & (align - 1UL)) != 0) - offset = (align - (bdata->node_boot_start & (align - 1UL))); - offset >>= PAGE_SHIFT; + if (align && (bdata->node_boot_start & (align - 1UL)) != 0) + offset = align - (bdata->node_boot_start & (align - 1UL)); + offset = PFN_DOWN(offset); /* * We try to allocate bootmem pages above 'goal' * first, then we try to allocate lower pages. */ - if (goal && (goal >= bdata->node_boot_start) && - ((goal >> PAGE_SHIFT) < end_pfn)) { + if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) { preferred = goal - bdata->node_boot_start; if (bdata->last_success >= preferred) @@ -212,9 +220,8 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, } else preferred = 0; - preferred = ALIGN(preferred, align) >> PAGE_SHIFT; - preferred += offset; - areasize = (size+PAGE_SIZE-1)/PAGE_SIZE; + preferred = PFN_DOWN(ALIGN(preferred, align)) + offset; + areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; incr = align >> PAGE_SHIFT ? : 1; restart_scan: @@ -229,7 +236,7 @@ restart_scan: for (j = i + 1; j < i + areasize; ++j) { if (j >= eidx) goto fail_block; - if (test_bit (j, bdata->node_bootmem_map)) + if (test_bit(j, bdata->node_bootmem_map)) goto fail_block; } start = i; @@ -245,7 +252,7 @@ restart_scan: return NULL; found: - bdata->last_success = start << PAGE_SHIFT; + bdata->last_success = PFN_PHYS(start); BUG_ON(start >= eidx); /* @@ -257,19 +264,21 @@ found: bdata->last_offset && bdata->last_pos+1 == start) { offset = ALIGN(bdata->last_offset, align); BUG_ON(offset > PAGE_SIZE); - remaining_size = PAGE_SIZE-offset; + remaining_size = PAGE_SIZE - offset; if (size < remaining_size) { areasize = 0; /* last_pos unchanged */ - bdata->last_offset = offset+size; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + - bdata->node_boot_start); + bdata->last_offset = offset + size; + ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + + offset + + bdata->node_boot_start); } else { remaining_size = size - remaining_size; - areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + - bdata->node_boot_start); - bdata->last_pos = start+areasize-1; + areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; + ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + + offset + + bdata->node_boot_start); + bdata->last_pos = start + areasize - 1; bdata->last_offset = remaining_size; } bdata->last_offset &= ~PAGE_MASK; @@ -282,7 +291,7 @@ found: /* * Reserve the area now: */ - for (i = start; i < start+areasize; i++) + for (i = start; i < start + areasize; i++) if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) BUG(); memset(ret, 0, size); @@ -303,8 +312,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) count = 0; /* first extant page of the node */ - pfn = bdata->node_boot_start >> PAGE_SHIFT; - idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); + pfn = PFN_DOWN(bdata->node_boot_start); + idx = bdata->node_low_pfn - pfn; map = bdata->node_bootmem_map; /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */ if (bdata->node_boot_start == 0 || @@ -333,7 +342,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) } } } else { - i+=BITS_PER_LONG; + i += BITS_PER_LONG; } pfn += BITS_PER_LONG; } @@ -345,9 +354,10 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) */ page = virt_to_page(bdata->node_bootmem_map); count = 0; - for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { - count++; + idx = (get_mapsize(bdata) + PAGE_SIZE-1) >> PAGE_SHIFT; + for (i = 0; i < idx; i++, page++) { __free_pages_bootmem(page, 0); + count++; } total += count; bdata->node_bootmem_map = NULL; @@ -355,64 +365,72 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) return total; } -unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn) +unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, + unsigned long startpfn, unsigned long endpfn) { - return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn)); + return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); } -void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) +void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, + unsigned long size) { reserve_bootmem_core(pgdat->bdata, physaddr, size); } -void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) +void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, + unsigned long size) { free_bootmem_core(pgdat->bdata, physaddr, size); } -unsigned long __init free_all_bootmem_node (pg_data_t *pgdat) +unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) { - return(free_all_bootmem_core(pgdat)); + return free_all_bootmem_core(pgdat); } -unsigned long __init init_bootmem (unsigned long start, unsigned long pages) +unsigned long __init init_bootmem(unsigned long start, unsigned long pages) { max_low_pfn = pages; min_low_pfn = start; - return(init_bootmem_core(NODE_DATA(0), start, 0, pages)); + return init_bootmem_core(NODE_DATA(0), start, 0, pages); } #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -void __init reserve_bootmem (unsigned long addr, unsigned long size) +void __init reserve_bootmem(unsigned long addr, unsigned long size) { reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size); } #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -void __init free_bootmem (unsigned long addr, unsigned long size) +void __init free_bootmem(unsigned long addr, unsigned long size) { free_bootmem_core(NODE_DATA(0)->bdata, addr, size); } -unsigned long __init free_all_bootmem (void) +unsigned long __init free_all_bootmem(void) { - return(free_all_bootmem_core(NODE_DATA(0))); + return free_all_bootmem_core(NODE_DATA(0)); } -void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, + unsigned long goal) { bootmem_data_t *bdata; void *ptr; - list_for_each_entry(bdata, &bdata_list, list) - if ((ptr = __alloc_bootmem_core(bdata, size, align, goal, 0))) - return(ptr); + list_for_each_entry(bdata, &bdata_list, list) { + ptr = __alloc_bootmem_core(bdata, size, align, goal, 0); + if (ptr) + return ptr; + } return NULL; } -void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem(unsigned long size, unsigned long align, + unsigned long goal) { void *mem = __alloc_bootmem_nopanic(size,align,goal); + if (mem) return mem; /* @@ -424,29 +442,34 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned } -void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, - unsigned long goal) +void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal) { void *ptr; ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); if (ptr) - return (ptr); + return ptr; return __alloc_bootmem(size, align, goal); } -#define LOW32LIMIT 0xffffffff +#ifndef ARCH_LOW_ADDRESS_LIMIT +#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL +#endif -void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, + unsigned long goal) { bootmem_data_t *bdata; void *ptr; - list_for_each_entry(bdata, &bdata_list, list) - if ((ptr = __alloc_bootmem_core(bdata, size, - align, goal, LOW32LIMIT))) - return(ptr); + list_for_each_entry(bdata, &bdata_list, list) { + ptr = __alloc_bootmem_core(bdata, size, align, goal, + ARCH_LOW_ADDRESS_LIMIT); + if (ptr) + return ptr; + } /* * Whoops, we cannot satisfy the allocation request. @@ -459,5 +482,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsig void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { - return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT); + return __alloc_bootmem_core(pgdat->bdata, size, align, goal, + ARCH_LOW_ADDRESS_LIMIT); } diff --git a/mm/filemap.c b/mm/filemap.c index 3195806d78e..87d4a398cd1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -488,6 +488,12 @@ struct page *page_cache_alloc_cold(struct address_space *x) EXPORT_SYMBOL(page_cache_alloc_cold); #endif +static int __sleep_on_page_lock(void *word) +{ + io_schedule(); + return 0; +} + /* * In order to wait for pages to become available there must be * waitqueues associated with pages. By using a hash table of @@ -577,13 +583,24 @@ void fastcall __lock_page(struct page *page) } EXPORT_SYMBOL(__lock_page); +/* + * Variant of lock_page that does not require the caller to hold a reference + * on the page's mapping. + */ +void fastcall __lock_page_nosync(struct page *page) +{ + DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); + __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock, + TASK_UNINTERRUPTIBLE); +} + /** * find_get_page - find and get a page reference * @mapping: the address_space to search * @offset: the page index * - * A rather lightweight function, finding and getting a reference to a - * hashed page atomically. + * Is there a pagecache struct page at the given (mapping, offset) tuple? + * If yes, increment its refcount and return it; if no, return NULL. */ struct page * find_get_page(struct address_space *mapping, unsigned long offset) { @@ -970,7 +987,7 @@ page_not_up_to_date: /* Get exclusive access to the page ... */ lock_page(page); - /* Did it get unhashed before we got the lock? */ + /* Did it get truncated before we got the lock? */ if (!page->mapping) { unlock_page(page); page_cache_release(page); @@ -1612,7 +1629,7 @@ no_cached_page: page_not_uptodate: lock_page(page); - /* Did it get unhashed while we waited for it? */ + /* Did it get truncated while we waited for it? */ if (!page->mapping) { unlock_page(page); goto err; diff --git a/mm/fremap.c b/mm/fremap.c index 21b7d0cbc98..aa30618ec6b 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -79,9 +79,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, inc_mm_counter(mm, file_rss); flush_icache_page(vma, page); - set_pte_at(mm, addr, pte, mk_pte(page, prot)); + pte_val = mk_pte(page, prot); + set_pte_at(mm, addr, pte, pte_val); page_add_file_rmap(page); - pte_val = *pte; update_mmu_cache(vma, addr, pte_val); lazy_mmu_prot_update(pte_val); err = 0; diff --git a/mm/highmem.c b/mm/highmem.c index 9b2a5403c44..ee5519b176e 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -46,6 +46,19 @@ static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) */ #ifdef CONFIG_HIGHMEM +unsigned long totalhigh_pages __read_mostly; + +unsigned int nr_free_highpages (void) +{ + pg_data_t *pgdat; + unsigned int pages = 0; + + for_each_online_pgdat(pgdat) + pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; + + return pages; +} + static int pkmap_count[LAST_PKMAP]; static unsigned int last_pkmap_nr; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index df499973255..7c7d03dbf73 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -72,7 +72,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma, struct zone **z; for (z = zonelist->zones; *z; z++) { - nid = (*z)->zone_pgdat->node_id; + nid = zone_to_nid(*z); if (cpuset_zone_allowed(*z, GFP_HIGHUSER) && !list_empty(&hugepage_freelists[nid])) break; @@ -177,7 +177,7 @@ static void update_and_free_page(struct page *page) { int i; nr_huge_pages--; - nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--; + nr_huge_pages_node[page_to_nid(page)]--; for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | @@ -191,7 +191,8 @@ static void update_and_free_page(struct page *page) #ifdef CONFIG_HIGHMEM static void try_to_free_low(unsigned long count) { - int i, nid; + int i; + for (i = 0; i < MAX_NUMNODES; ++i) { struct page *page, *next; list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { @@ -199,9 +200,8 @@ static void try_to_free_low(unsigned long count) continue; list_del(&page->lru); update_and_free_page(page); - nid = page_zone(page)->zone_pgdat->node_id; free_huge_pages--; - free_huge_pages_node[nid]--; + free_huge_pages_node[page_to_nid(page)]--; if (count >= nr_huge_pages) return; } diff --git a/mm/internal.h b/mm/internal.h index d20e3cc4aef..d527b80b292 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -24,8 +24,8 @@ static inline void set_page_count(struct page *page, int v) */ static inline void set_page_refcounted(struct page *page) { - BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page); - BUG_ON(atomic_read(&page->_count)); + VM_BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page); + VM_BUG_ON(atomic_read(&page->_count)); set_page_count(page, 1); } diff --git a/mm/memory.c b/mm/memory.c index 109e9866237..601159a46ab 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -49,6 +49,7 @@ #include <linux/module.h> #include <linux/delayacct.h> #include <linux/init.h> +#include <linux/writeback.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> @@ -1226,7 +1227,12 @@ out: return retval; } -/* +/** + * vm_insert_page - insert single page into user vma + * @vma: user vma to map to + * @addr: target user address of this page + * @page: source kernel page + * * This allows drivers to insert individual pages they've allocated * into a user vma. * @@ -1318,7 +1324,16 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd, return 0; } -/* Note: this is only safe if the mm semaphore is held when called. */ +/** + * remap_pfn_range - remap kernel memory to userspace + * @vma: user vma to map to + * @addr: target user address to start at + * @pfn: physical address of kernel memory + * @size: size of map area + * @prot: page protection flags for this mapping + * + * Note: this is only safe if the mm semaphore is held when called. + */ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t prot) { @@ -1458,14 +1473,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *old_page, *new_page; pte_t entry; - int reuse, ret = VM_FAULT_MINOR; + int reuse = 0, ret = VM_FAULT_MINOR; + struct page *dirty_page = NULL; old_page = vm_normal_page(vma, address, orig_pte); if (!old_page) goto gotten; - if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) == - (VM_SHARED|VM_WRITE))) { + /* + * Take out anonymous pages first, anonymous shared vmas are + * not dirty accountable. + */ + if (PageAnon(old_page)) { + if (!TestSetPageLocked(old_page)) { + reuse = can_share_swap_page(old_page); + unlock_page(old_page); + } + } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == + (VM_WRITE|VM_SHARED))) { + /* + * Only catch write-faults on shared writable pages, + * read-only shared pages can get COWed by + * get_user_pages(.write=1, .force=1). + */ if (vma->vm_ops && vma->vm_ops->page_mkwrite) { /* * Notify the address space that the page is about to @@ -1494,13 +1524,9 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, if (!pte_same(*page_table, orig_pte)) goto unlock; } - + dirty_page = old_page; + get_page(dirty_page); reuse = 1; - } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { - reuse = can_share_swap_page(old_page); - unlock_page(old_page); - } else { - reuse = 0; } if (reuse) { @@ -1566,6 +1592,10 @@ gotten: page_cache_release(old_page); unlock: pte_unmap_unlock(page_table, ptl); + if (dirty_page) { + set_page_dirty_balance(dirty_page); + put_page(dirty_page); + } return ret; oom: |