aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig12
-rw-r--r--mm/bootmem.c196
-rw-r--r--mm/dmapool.c12
-rw-r--r--mm/fadvise.c2
-rw-r--r--mm/filemap.c10
-rw-r--r--mm/filemap_xip.c200
-rw-r--r--mm/hugetlb.c78
-rw-r--r--mm/internal.h3
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memory.c228
-rw-r--r--mm/memory_hotplug.c186
-rw-r--r--mm/mempolicy.c1051
-rw-r--r--mm/mincore.c2
-rw-r--r--mm/mmap.c33
-rw-r--r--mm/mmzone.c30
-rw-r--r--mm/nommu.c6
-rw-r--r--mm/oom_kill.c58
-rw-r--r--mm/page_alloc.c274
-rw-r--r--mm/pagewalk.c8
-rw-r--r--mm/rmap.c8
-rw-r--r--mm/shmem.c144
-rw-r--r--mm/slab.c17
-rw-r--r--mm/slub.c18
-rw-r--r--mm/sparse.c145
-rw-r--r--mm/swap.c37
-rw-r--r--mm/swapfile.c8
-rw-r--r--mm/truncate.c11
-rw-r--r--mm/vmalloc.c141
-rw-r--r--mm/vmscan.c46
-rw-r--r--mm/vmstat.c11
30 files changed, 1919 insertions, 1058 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 0016ebd4dcb..3aa819d628c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -143,6 +143,18 @@ config MEMORY_HOTREMOVE
depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
depends on MIGRATION
+#
+# If we have space for more page flags then we can enable additional
+# optimizations and functionality.
+#
+# Regular Sparsemem takes page flag bits for the sectionid if it does not
+# use a virtual memmap. Disable extended page flags for 32 bit platforms
+# that require the use of a sectionid in the page flags.
+#
+config PAGEFLAGS_EXTENDED
+ def_bool y
+ depends on 64BIT || SPARSEMEM_VMEMMAP || !NUMA || !SPARSEMEM
+
# Heavily threaded applications may benefit from splitting the mm-wide
# page_table_lock, so that faults on different parts of the user address
# space can be handled with less contention: split it at this NR_CPUS.
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 2ccea700968..e8fb927392b 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -111,44 +111,74 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat,
* might be used for boot-time allocations - or it might get added
* to the free page pool later on.
*/
-static int __init reserve_bootmem_core(bootmem_data_t *bdata,
+static int __init can_reserve_bootmem_core(bootmem_data_t *bdata,
unsigned long addr, unsigned long size, int flags)
{
unsigned long sidx, eidx;
unsigned long i;
- int ret;
+
+ BUG_ON(!size);
+
+ /* out of range, don't hold other */
+ if (addr + size < bdata->node_boot_start ||
+ PFN_DOWN(addr) > bdata->node_low_pfn)
+ return 0;
/*
- * round up, partially reserved pages are considered
- * fully reserved.
+ * Round up to index to the range.
*/
+ if (addr > bdata->node_boot_start)
+ sidx= PFN_DOWN(addr - bdata->node_boot_start);
+ else
+ sidx = 0;
+
+ eidx = PFN_UP(addr + size - bdata->node_boot_start);
+ if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+ eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
+
+ for (i = sidx; i < eidx; i++) {
+ if (test_bit(i, bdata->node_bootmem_map)) {
+ if (flags & BOOTMEM_EXCLUSIVE)
+ return -EBUSY;
+ }
+ }
+
+ return 0;
+
+}
+
+static void __init reserve_bootmem_core(bootmem_data_t *bdata,
+ unsigned long addr, unsigned long size, int flags)
+{
+ unsigned long sidx, eidx;
+ unsigned long i;
+
BUG_ON(!size);
- BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
- BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
- BUG_ON(addr < bdata->node_boot_start);
- sidx = PFN_DOWN(addr - bdata->node_boot_start);
+ /* out of range */
+ if (addr + size < bdata->node_boot_start ||
+ PFN_DOWN(addr) > bdata->node_low_pfn)
+ return;
+
+ /*
+ * Round up to index to the range.
+ */
+ if (addr > bdata->node_boot_start)
+ sidx= PFN_DOWN(addr - bdata->node_boot_start);
+ else
+ sidx = 0;
+
eidx = PFN_UP(addr + size - bdata->node_boot_start);
+ if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
+ eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
- for (i = sidx; i < eidx; i++)
+ for (i = sidx; i < eidx; i++) {
if (test_and_set_bit(i, bdata->node_bootmem_map)) {
#ifdef CONFIG_DEBUG_BOOTMEM
printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE);
#endif
- if (flags & BOOTMEM_EXCLUSIVE) {
- ret = -EBUSY;
- goto err;
- }
}
-
- return 0;
-
-err:
- /* unreserve memory we accidentally reserved */
- for (i--; i >= sidx; i--)
- clear_bit(i, bdata->node_bootmem_map);
-
- return ret;
+ }
}
static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
@@ -206,9 +236,11 @@ void * __init
__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
unsigned long align, unsigned long goal, unsigned long limit)
{
- unsigned long offset, remaining_size, areasize, preferred;
+ unsigned long areasize, preferred;
unsigned long i, start = 0, incr, eidx, end_pfn;
void *ret;
+ unsigned long node_boot_start;
+ void *node_bootmem_map;
if (!size) {
printk("__alloc_bootmem_core(): zero-sized request\n");
@@ -216,70 +248,83 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
}
BUG_ON(align & (align-1));
- if (limit && bdata->node_boot_start >= limit)
- return NULL;
-
/* on nodes without memory - bootmem_map is NULL */
if (!bdata->node_bootmem_map)
return NULL;
+ /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */
+ node_boot_start = bdata->node_boot_start;
+ node_bootmem_map = bdata->node_bootmem_map;
+ if (align) {
+ node_boot_start = ALIGN(bdata->node_boot_start, align);
+ if (node_boot_start > bdata->node_boot_start)
+ node_bootmem_map = (unsigned long *)bdata->node_bootmem_map +
+ PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG;
+ }
+
+ if (limit && node_boot_start >= limit)
+ return NULL;
+
end_pfn = bdata->node_low_pfn;
limit = PFN_DOWN(limit);
if (limit && end_pfn > limit)
end_pfn = limit;
- eidx = end_pfn - PFN_DOWN(bdata->node_boot_start);
- offset = 0;
- if (align && (bdata->node_boot_start & (align - 1UL)) != 0)
- offset = align - (bdata->node_boot_start & (align - 1UL));
- offset = PFN_DOWN(offset);
+ eidx = end_pfn - PFN_DOWN(node_boot_start);
/*
* We try to allocate bootmem pages above 'goal'
* first, then we try to allocate lower pages.
*/
- if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) {
- preferred = goal - bdata->node_boot_start;
+ preferred = 0;
+ if (goal && PFN_DOWN(goal) < end_pfn) {
+ if (goal > node_boot_start)
+ preferred = goal - node_boot_start;
- if (bdata->last_success >= preferred)
+ if (bdata->last_success > node_boot_start &&
+ bdata->last_success - node_boot_start >= preferred)
if (!limit || (limit && limit > bdata->last_success))
- preferred = bdata->last_success;
- } else
- preferred = 0;
+ preferred = bdata->last_success - node_boot_start;
+ }
- preferred = PFN_DOWN(ALIGN(preferred, align)) + offset;
+ preferred = PFN_DOWN(ALIGN(preferred, align));
areasize = (size + PAGE_SIZE-1) / PAGE_SIZE;
incr = align >> PAGE_SHIFT ? : 1;
restart_scan:
- for (i = preferred; i < eidx; i += incr) {
+ for (i = preferred; i < eidx;) {
unsigned long j;
- i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
+
+ i = find_next_zero_bit(node_bootmem_map, eidx, i);
i = ALIGN(i, incr);
if (i >= eidx)
break;
- if (test_bit(i, bdata->node_bootmem_map))
+ if (test_bit(i, node_bootmem_map)) {
+ i += incr;
continue;
+ }
for (j = i + 1; j < i + areasize; ++j) {
if (j >= eidx)
goto fail_block;
- if (test_bit(j, bdata->node_bootmem_map))
+ if (test_bit(j, node_bootmem_map))
goto fail_block;
}
start = i;
goto found;
fail_block:
i = ALIGN(j, incr);
+ if (i == j)
+ i += incr;
}
- if (preferred > offset) {
- preferred = offset;
+ if (preferred > 0) {
+ preferred = 0;
goto restart_scan;
}
return NULL;
found:
- bdata->last_success = PFN_PHYS(start);
+ bdata->last_success = PFN_PHYS(start) + node_boot_start;
BUG_ON(start >= eidx);
/*
@@ -289,6 +334,7 @@ found:
*/
if (align < PAGE_SIZE &&
bdata->last_offset && bdata->last_pos+1 == start) {
+ unsigned long offset, remaining_size;
offset = ALIGN(bdata->last_offset, align);
BUG_ON(offset > PAGE_SIZE);
remaining_size = PAGE_SIZE - offset;
@@ -297,14 +343,12 @@ found:
/* last_pos unchanged */
bdata->last_offset = offset + size;
ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
- offset +
- bdata->node_boot_start);
+ offset + node_boot_start);
} else {
remaining_size = size - remaining_size;
areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE;
ret = phys_to_virt(bdata->last_pos * PAGE_SIZE +
- offset +
- bdata->node_boot_start);
+ offset + node_boot_start);
bdata->last_pos = start + areasize - 1;
bdata->last_offset = remaining_size;
}
@@ -312,14 +356,14 @@ found:
} else {
bdata->last_pos = start + areasize - 1;
bdata->last_offset = size & ~PAGE_MASK;
- ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
+ ret = phys_to_virt(start * PAGE_SIZE + node_boot_start);
}
/*
* Reserve the area now:
*/
for (i = start; i < start + areasize; i++)
- if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
+ if (unlikely(test_and_set_bit(i, node_bootmem_map)))
BUG();
memset(ret, 0, size);
return ret;
@@ -401,6 +445,11 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size, int flags)
{
+ int ret;
+
+ ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+ if (ret < 0)
+ return;
reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
}
@@ -412,6 +461,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
{
+ register_page_bootmem_info_node(pgdat);
return free_all_bootmem_core(pgdat);
}
@@ -426,7 +476,18 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
int __init reserve_bootmem(unsigned long addr, unsigned long size,
int flags)
{
- return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags);
+ bootmem_data_t *bdata;
+ int ret;
+
+ list_for_each_entry(bdata, &bdata_list, list) {
+ ret = can_reserve_bootmem_core(bdata, addr, size, flags);
+ if (ret < 0)
+ return ret;
+ }
+ list_for_each_entry(bdata, &bdata_list, list)
+ reserve_bootmem_core(bdata, addr, size, flags);
+
+ return 0;
}
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
@@ -484,6 +545,37 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
return __alloc_bootmem(size, align, goal);
}
+#ifdef CONFIG_SPARSEMEM
+void * __init alloc_bootmem_section(unsigned long size,
+ unsigned long section_nr)
+{
+ void *ptr;
+ unsigned long limit, goal, start_nr, end_nr, pfn;
+ struct pglist_data *pgdat;
+
+ pfn = section_nr_to_pfn(section_nr);
+ goal = PFN_PHYS(pfn);
+ limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1;
+ pgdat = NODE_DATA(early_pfn_to_nid(pfn));
+ ptr = __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
+ limit);
+
+ if (!ptr)
+ return NULL;
+
+ start_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr)));
+ end_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr) + size));
+ if (start_nr != section_nr || end_nr != section_nr) {
+ printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n",
+ section_nr);
+ free_bootmem_core(pgdat->bdata, __pa(ptr), size);
+ ptr = NULL;
+ }
+
+ return ptr;
+}
+#endif
+
#ifndef ARCH_LOW_ADDRESS_LIMIT
#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
#endif
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 34aaac451a9..b1f0885dda2 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -37,6 +37,10 @@
#include <linux/types.h>
#include <linux/wait.h>
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
+#define DMAPOOL_DEBUG 1
+#endif
+
struct dma_pool { /* the pool */
struct list_head page_list;
spinlock_t lock;
@@ -216,7 +220,7 @@ static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags)
page->vaddr = dma_alloc_coherent(pool->dev, pool->allocation,
&page->dma, mem_flags);
if (page->vaddr) {
-#ifdef CONFIG_DEBUG_SLAB
+#ifdef DMAPOOL_DEBUG
memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
#endif
pool_initialise_page(pool, page);
@@ -239,7 +243,7 @@ static void pool_free_page(struct dma_pool *pool, struct dma_page *page)
{
dma_addr_t dma = page->dma;
-#ifdef CONFIG_DEBUG_SLAB
+#ifdef DMAPOOL_DEBUG
memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
#endif
dma_free_coherent(pool->dev, pool->allocation, page->vaddr, dma);
@@ -336,7 +340,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
page->offset = *(int *)(page->vaddr + offset);
retval = offset + page->vaddr;
*handle = offset + page->dma;
-#ifdef CONFIG_DEBUG_SLAB
+#ifdef DMAPOOL_DEBUG
memset(retval, POOL_POISON_ALLOCATED, pool->size);
#endif
done:
@@ -391,7 +395,7 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
}
offset = vaddr - page->vaddr;
-#ifdef CONFIG_DEBUG_SLAB
+#ifdef DMAPOOL_DEBUG
if ((dma - page->dma) != offset) {
if (pool->dev)
dev_err(pool->dev,
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 3c0f1e99f5e..343cfdfebd9 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -49,7 +49,7 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
goto out;
}
- if (mapping->a_ops->get_xip_page) {
+ if (mapping->a_ops->get_xip_mem) {
switch (advice) {
case POSIX_FADV_NORMAL:
case POSIX_FADV_RANDOM:
diff --git a/mm/filemap.c b/mm/filemap.c
index 07e9d9258b4..239d36163bb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -576,10 +576,12 @@ EXPORT_SYMBOL(unlock_page);
*/
void end_page_writeback(struct page *page)
{
- if (!TestClearPageReclaim(page) || rotate_reclaimable_page(page)) {
- if (!test_clear_page_writeback(page))
- BUG();
- }
+ if (TestClearPageReclaim(page))
+ rotate_reclaimable_page(page);
+
+ if (!test_clear_page_writeback(page))
+ BUG();
+
smp_mb__after_clear_bit();
wake_up_page(page, PG_writeback);
}
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 5e598c42afd..3e744abcce9 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -15,6 +15,7 @@
#include <linux/rmap.h>
#include <linux/sched.h>
#include <asm/tlbflush.h>
+#include <asm/io.h>
/*
* We do use our own empty page to avoid interference with other users
@@ -42,37 +43,41 @@ static struct page *xip_sparse_page(void)
/*
* This is a file read routine for execute in place files, and uses
- * the mapping->a_ops->get_xip_page() function for the actual low-level
+ * the mapping->a_ops->get_xip_mem() function for the actual low-level
* stuff.
*
* Note the struct file* is not used at all. It may be NULL.
*/
-static void
+static ssize_t
do_xip_mapping_read(struct address_space *mapping,
struct file_ra_state *_ra,
struct file *filp,
- loff_t *ppos,
- read_descriptor_t *desc,
- read_actor_t actor)
+ char __user *buf,
+ size_t len,
+ loff_t *ppos)
{
struct inode *inode = mapping->host;
pgoff_t index, end_index;
unsigned long offset;
- loff_t isize;
+ loff_t isize, pos;
+ size_t copied = 0, error = 0;
- BUG_ON(!mapping->a_ops->get_xip_page);
+ BUG_ON(!mapping->a_ops->get_xip_mem);
- index = *ppos >> PAGE_CACHE_SHIFT;
- offset = *ppos & ~PAGE_CACHE_MASK;
+ pos = *ppos;
+ index = pos >> PAGE_CACHE_SHIFT;
+ offset = pos & ~PAGE_CACHE_MASK;
isize = i_size_read(inode);
if (!isize)
goto out;
end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
- for (;;) {
- struct page *page;
- unsigned long nr, ret;
+ do {
+ unsigned long nr, left;
+ void *xip_mem;
+ unsigned long xip_pfn;
+ int zero = 0;
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE;
@@ -85,19 +90,17 @@ do_xip_mapping_read(struct address_space *mapping,
}
}
nr = nr - offset;
+ if (nr > len)
+ nr = len;
- page = mapping->a_ops->get_xip_page(mapping,
- index*(PAGE_SIZE/512), 0);
- if (!page)
- goto no_xip_page;
- if (unlikely(IS_ERR(page))) {
- if (PTR_ERR(page) == -ENODATA) {
+ error = mapping->a_ops->get_xip_mem(mapping, index, 0,
+ &xip_mem, &xip_pfn);
+ if (unlikely(error)) {
+ if (error == -ENODATA) {
/* sparse */
- page = ZERO_PAGE(0);
- } else {
- desc->error = PTR_ERR(page);
+ zero = 1;
+ } else
goto out;
- }
}
/* If users can be writing to this page using arbitrary
@@ -105,10 +108,10 @@ do_xip_mapping_read(struct address_space *mapping,
* before reading the page on the kernel side.
*/
if (mapping_writably_mapped(mapping))
- flush_dcache_page(page);
+ /* address based flush */ ;
/*
- * Ok, we have the page, so now we can copy it to user space...
+ * Ok, we have the mem, so now we can copy it to user space...
*
* The actor routine returns how many bytes were actually used..
* NOTE! This may not be the same as how much of a user buffer
@@ -116,47 +119,38 @@ do_xip_mapping_read(struct address_space *mapping,
* "pos" here (the actor routine has to update the user buffer
* pointers and the remaining count).
*/
- ret = actor(desc, page, offset, nr);
- offset += ret;
- index += offset >> PAGE_CACHE_SHIFT;
- offset &= ~PAGE_CACHE_MASK;
+ if (!zero)
+ left = __copy_to_user(buf+copied, xip_mem+offset, nr);
+ else
+ left = __clear_user(buf + copied, nr);
- if (ret == nr && desc->count)
- continue;
- goto out;
+ if (left) {
+ error = -EFAULT;
+ goto out;
+ }
-no_xip_page:
- /* Did not get the page. Report it */
- desc->error = -EIO;
- goto out;
- }
+ copied += (nr - left);
+ offset += (nr - left);
+ index += offset >> PAGE_CACHE_SHIFT;
+ offset &= ~PAGE_CACHE_MASK;
+ } while (copied < len);
out:
- *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
+ *ppos = pos + copied;
if (filp)
file_accessed(filp);
+
+ return (copied ? copied : error);
}
ssize_t
xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
- read_descriptor_t desc;
-
if (!access_ok(VERIFY_WRITE, buf, len))
return -EFAULT;
- desc.written = 0;
- desc.arg.buf = buf;
- desc.count = len;
- desc.error = 0;
-
- do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp,
- ppos, &desc, file_read_actor);
-
- if (desc.written)
- return desc.written;
- else
- return desc.error;
+ return do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp,
+ buf, len, ppos);
}
EXPORT_SYMBOL_GPL(xip_file_read);
@@ -211,13 +205,16 @@ __xip_unmap (struct address_space * mapping,
*
* This function is derived from filemap_fault, but used for execute in place
*/
-static int xip_file_fault(struct vm_area_struct *area, struct vm_fault *vmf)
+static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct file *file = area->vm_file;
+ struct file *file = vma->vm_file;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- struct page *page;
pgoff_t size;
+ void *xip_mem;
+ unsigned long xip_pfn;
+ struct page *page;
+ int error;
/* XXX: are VM_FAULT_ codes OK? */
@@ -225,35 +222,44 @@ static int xip_file_fault(struct vm_area_struct *area, struct vm_fault *vmf)
if (vmf->pgoff >= size)
return VM_FAULT_SIGBUS;
- page = mapping->a_ops->get_xip_page(mapping,
- vmf->pgoff*(PAGE_SIZE/512), 0);
- if (!IS_ERR(page))
- goto out;
- if (PTR_ERR(page) != -ENODATA)
+ error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
+ &xip_mem, &xip_pfn);
+ if (likely(!error))
+ goto found;
+ if (error != -ENODATA)
return VM_FAULT_OOM;
/* sparse block */
- if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
- (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) &&
+ if ((vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
+ (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) &&
(!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
+ int err;
+
/* maybe shared writable, allocate new block */
- page = mapping->a_ops->get_xip_page(mapping,
- vmf->pgoff*(PAGE_SIZE/512), 1);
- if (IS_ERR(page))
+ error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,
+ &xip_mem, &xip_pfn);
+ if (error)
return VM_FAULT_SIGBUS;
- /* unmap page at pgoff from all other vmas */
+ /* unmap sparse mappings at pgoff from all other vmas */
__xip_unmap(mapping, vmf->pgoff);
+
+found:
+ err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address,
+ xip_pfn);
+ if (err == -ENOMEM)
+ return VM_FAULT_OOM;
+ BUG_ON(err);
+ return VM_FAULT_NOPAGE;
} else {
/* not shared and writable, use xip_sparse_page() */
page = xip_sparse_page();
if (!page)
return VM_FAULT_OOM;
- }
-out:
- page_cache_get(page);
- vmf->page = page;
- return 0;
+ page_cache_get(page);
+ vmf->page = page;
+ return 0;
+ }
}
static struct vm_operations_struct xip_file_vm_ops = {
@@ -262,11 +268,11 @@ static struct vm_operations_struct xip_file_vm_ops = {
int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
{
- BUG_ON(!file->f_mapping->a_ops->get_xip_page);
+ BUG_ON(!file->f_mapping->a_ops->get_xip_mem);
file_accessed(file);
vma->vm_ops = &xip_file_vm_ops;
- vma->vm_flags |= VM_CAN_NONLINEAR;
+ vma->vm_flags |= VM_CAN_NONLINEAR | VM_MIXEDMAP;
return 0;
}
EXPORT_SYMBOL_GPL(xip_file_mmap);
@@ -279,17 +285,17 @@ __xip_file_write(struct file *filp, const char __user *buf,
const struct address_space_operations *a_ops = mapping->a_ops;
struct inode *inode = mapping->host;
long status = 0;
- struct page *page;
size_t bytes;
ssize_t written = 0;
- BUG_ON(!mapping->a_ops->get_xip_page);
+ BUG_ON(!mapping->a_ops->get_xip_mem);
do {
unsigned long index;
unsigned long offset;
size_t copied;
- char *kaddr;
+ void *xip_mem;
+ unsigned long xip_pfn;
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
index = pos >> PAGE_CACHE_SHIFT;
@@ -297,28 +303,22 @@ __xip_file_write(struct file *filp, const char __user *buf,
if (bytes > count)
bytes = count;
- page = a_ops->get_xip_page(mapping,
- index*(PAGE_SIZE/512), 0);
- if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) {
+ status = a_ops->get_xip_mem(mapping, index, 0,
+ &xip_mem, &xip_pfn);
+ if (status == -ENODATA) {
/* we allocate a new page unmap it */
- page = a_ops->get_xip_page(mapping,
- index*(PAGE_SIZE/512), 1);
- if (!IS_ERR(page))
+ status = a_ops->get_xip_mem(mapping, index, 1,
+ &xip_mem, &xip_pfn);
+ if (!status)
/* unmap page at pgoff from all other vmas */
__xip_unmap(mapping, index);
}
- if (IS_ERR(page)) {
- status = PTR_ERR(page);
+ if (status)
break;
- }
- fault_in_pages_readable(buf, bytes);
- kaddr = kmap_atomic(page, KM_USER0);
copied = bytes -
- __copy_from_user_inatomic_nocache(kaddr + offset, buf, bytes);
- kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(page);
+ __copy_from_user_nocache(xip_mem + offset, buf, bytes);
if (likely(copied > 0)) {
status = copied;
@@ -398,7 +398,7 @@ EXPORT_SYMBOL_GPL(xip_file_write);
/*
* truncate a page used for execute in place
- * functionality is analog to block_truncate_page but does use get_xip_page
+ * functionality is analog to block_truncate_page but does use get_xip_mem
* to get the page instead of page cache
*/
int
@@ -408,9 +408,11 @@ xip_truncate_page(struct address_space *mapping, loff_t from)
unsigned offset = from & (PAGE_CACHE_SIZE-1);
unsigned blocksize;
unsigned length;
- struct page *page;
+ void *xip_mem;
+ unsigned long xip_pfn;
+ int err;
- BUG_ON(!mapping->a_ops->get_xip_page);
+ BUG_ON(!mapping->a_ops->get_xip_mem);
blocksize = 1 << mapping->host->i_blkbits;
length = offset & (blocksize - 1);
@@ -421,18 +423,16 @@ xip_truncate_page(struct address_space *mapping, loff_t from)
length = blocksize - length;
- page = mapping->a_ops->get_xip_page(mapping,
- index*(PAGE_SIZE/512), 0);
- if (!page)
- return -ENOMEM;
- if (unlikely(IS_ERR(page))) {
- if (PTR_ERR(page) == -ENODATA)
+ err = mapping->a_ops->get_xip_mem(mapping, index, 0,
+ &xip_mem, &xip_pfn);
+ if (unlikely(err)) {
+ if (err == -ENODATA)
/* Hole? No need to truncate */
return 0;
else
- return PTR_ERR(page);
+ return err;
}
- zero_user(page, offset, length);
+ memset(xip_mem + offset, 0, length);
return 0;
}
EXPORT_SYMBOL_GPL(xip_truncate_page);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 51c9e2c0164..df28c1773fb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -95,13 +95,16 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
int nid;
struct page *page = NULL;
struct mempolicy *mpol;
+ nodemask_t *nodemask;
struct zonelist *zonelist = huge_zonelist(vma, address,
- htlb_alloc_mask, &mpol);
- struct zone **z;
-
- for (z = zonelist->zones; *z; z++) {
- nid = zone_to_nid(*z);
- if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) &&
+ htlb_alloc_mask, &mpol, &nodemask);
+ struct zone *zone;
+ struct zoneref *z;
+
+ for_each_zone_zonelist_nodemask(zone, z, zonelist,
+ MAX_NR_ZONES - 1, nodemask) {
+ nid = zone_to_nid(zone);
+ if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
!list_empty(&hugepage_freelists[nid])) {
page = list_entry(hugepage_freelists[nid].next,
struct page, lru);
@@ -113,7 +116,7 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
break;
}
}
- mpol_free(mpol); /* unref if mpol !NULL */
+ mpol_cond_put(mpol);
return page;
}
@@ -129,6 +132,7 @@ static void update_and_free_page(struct page *page)
}
set_compound_page_dtor(page, NULL);
set_page_refcounted(page);
+ arch_release_hugepage(page);
__free_pages(page, HUGETLB_PAGE_ORDER);
}
@@ -198,6 +202,10 @@ static struct page *alloc_fresh_huge_page_node(int nid)
htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|__GFP_NOWARN,
HUGETLB_PAGE_ORDER);
if (page) {
+ if (arch_prepare_hugepage(page)) {
+ __free_pages(page, HUGETLB_PAGE_ORDER);
+ return 0;
+ }
set_compound_page_dtor(page, free_huge_page);
spin_lock(&hugetlb_lock);
nr_huge_pages++;
@@ -239,6 +247,11 @@ static int alloc_fresh_huge_page(void)
hugetlb_next_nid = next_nid;
} while (!page && hugetlb_next_nid != start_nid);
+ if (ret)
+ count_vm_event(HTLB_BUDDY_PGALLOC);
+ else
+ count_vm_event(HTLB_BUDDY_PGAL