aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig4
-rw-r--r--mm/bootmem.c45
-rw-r--r--mm/fremap.c3
-rw-r--r--mm/highmem.c2
-rw-r--r--mm/hugetlb.c57
-rw-r--r--mm/madvise.c11
-rw-r--r--mm/memory.c4
-rw-r--r--mm/mempolicy.c8
-rw-r--r--mm/mempool.c6
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/mprotect.c3
-rw-r--r--mm/mremap.c6
-rw-r--r--mm/nommu.c3
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c12
-rw-r--r--mm/page_io.c2
-rw-r--r--mm/shmem.c3
-rw-r--r--mm/slab.c100
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/swapfile.c1
-rw-r--r--mm/vmalloc.c4
-rw-r--r--mm/vmscan.c13
22 files changed, 170 insertions, 123 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 4e9937ac352..391ffc54d13 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -29,7 +29,7 @@ config FLATMEM_MANUAL
If unsure, choose this option (Flat Memory) over any other.
config DISCONTIGMEM_MANUAL
- bool "Discontigious Memory"
+ bool "Discontiguous Memory"
depends on ARCH_DISCONTIGMEM_ENABLE
help
This option provides enhanced support for discontiguous
@@ -52,7 +52,7 @@ config SPARSEMEM_MANUAL
memory hotplug systems. This is normal.
For many other systems, this will be an alternative to
- "Discontigious Memory". This option provides some potential
+ "Discontiguous Memory". This option provides some potential
performance benefits, along with decreased code complexity,
but it is newer, and more experimental.
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 8ec4e4c2a17..a58699b6579 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -61,17 +61,9 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat,
{
bootmem_data_t *bdata = pgdat->bdata;
unsigned long mapsize = ((end - start)+7)/8;
- static struct pglist_data *pgdat_last;
-
- pgdat->pgdat_next = NULL;
- /* Add new nodes last so that bootmem always starts
- searching in the first nodes, not the last ones */
- if (pgdat_last)
- pgdat_last->pgdat_next = pgdat;
- else {
- pgdat_list = pgdat;
- pgdat_last = pgdat;
- }
+
+ pgdat->pgdat_next = pgdat_list;
+ pgdat_list = pgdat;
mapsize = ALIGN(mapsize, sizeof(long));
bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT);
@@ -162,10 +154,10 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
*/
static void * __init
__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
- unsigned long align, unsigned long goal)
+ unsigned long align, unsigned long goal, unsigned long limit)
{
unsigned long offset, remaining_size, areasize, preferred;
- unsigned long i, start = 0, incr, eidx;
+ unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn;
void *ret;
if(!size) {
@@ -174,7 +166,14 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
}
BUG_ON(align & (align-1));
- eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+ if (limit && bdata->node_boot_start >= limit)
+ return NULL;
+
+ limit >>=PAGE_SHIFT;
+ if (limit && end_pfn > limit)
+ end_pfn = limit;
+
+ eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
offset = 0;
if (align &&
(bdata->node_boot_start & (align - 1UL)) != 0)
@@ -186,11 +185,12 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
* first, then we try to allocate lower pages.
*/
if (goal && (goal >= bdata->node_boot_start) &&
- ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
+ ((goal >> PAGE_SHIFT) < end_pfn)) {
preferred = goal - bdata->node_boot_start;
if (bdata->last_success >= preferred)
- preferred = bdata->last_success;
+ if (!limit || (limit && limit > bdata->last_success))
+ preferred = bdata->last_success;
} else
preferred = 0;
@@ -390,14 +390,15 @@ unsigned long __init free_all_bootmem (void)
return(free_all_bootmem_core(NODE_DATA(0)));
}
-void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal)
+void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal,
+ unsigned long limit)
{
pg_data_t *pgdat = pgdat_list;
void *ptr;
for_each_pgdat(pgdat)
if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
- align, goal)))
+ align, goal, limit)))
return(ptr);
/*
@@ -408,14 +409,16 @@ void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned
return NULL;
}
-void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal)
+
+void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align,
+ unsigned long goal, unsigned long limit)
{
void *ptr;
- ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal);
+ ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, limit);
if (ptr)
return (ptr);
- return __alloc_bootmem(size, align, goal);
+ return __alloc_bootmem_limit(size, align, goal, limit);
}
diff --git a/mm/fremap.c b/mm/fremap.c
index 3235fb77c13..ab23a0673c3 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -89,6 +89,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (!page->mapping || page->index >= size)
goto err_unlock;
+ err = -ENOMEM;
+ if (page_mapcount(page) > INT_MAX/2)
+ goto err_unlock;
zap_pte(mm, vma, addr, pte);
diff --git a/mm/highmem.c b/mm/highmem.c
index 40091159946..90e1861e2da 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -30,7 +30,7 @@
static mempool_t *page_pool, *isa_page_pool;
-static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data)
+static void *page_pool_alloc(gfp_t gfp_mask, void *data)
{
unsigned int gfp = gfp_mask | (unsigned int) (long) data;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 901ac523a1c..61d38067803 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -274,21 +274,22 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
{
pte_t *src_pte, *dst_pte, entry;
struct page *ptepage;
- unsigned long addr = vma->vm_start;
- unsigned long end = vma->vm_end;
+ unsigned long addr;
- while (addr < end) {
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
dst_pte = huge_pte_alloc(dst, addr);
if (!dst_pte)
goto nomem;
+ spin_lock(&src->page_table_lock);
src_pte = huge_pte_offset(src, addr);
- BUG_ON(!src_pte || pte_none(*src_pte)); /* prefaulted */
- entry = *src_pte;
- ptepage = pte_page(entry);
- get_page(ptepage);
- add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
- set_huge_pte_at(dst, addr, dst_pte, entry);
- addr += HPAGE_SIZE;
+ if (src_pte && !pte_none(*src_pte)) {
+ entry = *src_pte;
+ ptepage = pte_page(entry);
+ get_page(ptepage);
+ add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
+ set_huge_pte_at(dst, addr, dst_pte, entry);
+ }
+ spin_unlock(&src->page_table_lock);
}
return 0;
@@ -323,8 +324,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
page = pte_page(pte);
put_page(page);
+ add_mm_counter(mm, rss, - (HPAGE_SIZE / PAGE_SIZE));
}
- add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
flush_tlb_range(vma, start, end);
}
@@ -393,6 +394,28 @@ out:
return ret;
}
+/*
+ * On ia64 at least, it is possible to receive a hugetlb fault from a
+ * stale zero entry left in the TLB from earlier hardware prefetching.
+ * Low-level arch code should already have flushed the stale entry as
+ * part of its fault handling, but we do need to accept this minor fault
+ * and return successfully. Whereas the "normal" case is that this is
+ * an access to a hugetlb page which has been truncated off since mmap.
+ */
+int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, int write_access)
+{
+ int ret = VM_FAULT_SIGBUS;
+ pte_t *pte;
+
+ spin_lock(&mm->page_table_lock);
+ pte = huge_pte_offset(mm, address);
+ if (pte && !pte_none(*pte))
+ ret = VM_FAULT_MINOR;
+ spin_unlock(&mm->page_table_lock);
+ return ret;
+}
+
int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, int *length, int i)
@@ -403,6 +426,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
BUG_ON(!is_vm_hugetlb_page(vma));
vpfn = vaddr/PAGE_SIZE;
+ spin_lock(&mm->page_table_lock);
while (vaddr < vma->vm_end && remainder) {
if (pages) {
@@ -415,8 +439,13 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
* indexing below to work. */
pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
- /* hugetlb should be locked, and hence, prefaulted */
- WARN_ON(!pte || pte_none(*pte));
+ /* the hugetlb file might have been truncated */
+ if (!pte || pte_none(*pte)) {
+ remainder = 0;
+ if (!i)
+ i = -EFAULT;
+ break;
+ }
page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
@@ -434,7 +463,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
--remainder;
++i;
}
-
+ spin_unlock(&mm->page_table_lock);
*length = remainder;
*position = vaddr;
diff --git a/mm/madvise.c b/mm/madvise.c
index 4454936f87d..20e075d1c64 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -83,6 +83,9 @@ static long madvise_willneed(struct vm_area_struct * vma,
{
struct file *file = vma->vm_file;
+ if (!file)
+ return -EBADF;
+
if (file->f_mapping->a_ops->get_xip_page) {
/* no bad return value, but ignore advice */
return 0;
@@ -141,11 +144,7 @@ static long
madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
unsigned long start, unsigned long end, int behavior)
{
- struct file *filp = vma->vm_file;
- long error = -EBADF;
-
- if (!filp)
- goto out;
+ long error;
switch (behavior) {
case MADV_NORMAL:
@@ -166,8 +165,6 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
error = -EINVAL;
break;
}
-
-out:
return error;
}
diff --git a/mm/memory.c b/mm/memory.c
index ae8161f1f45..1db40e935e5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2045,8 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
inc_page_state(pgfault);
- if (is_vm_hugetlb_page(vma))
- return VM_FAULT_SIGBUS; /* mapping truncation does this. */
+ if (unlikely(is_vm_hugetlb_page(vma)))
+ return hugetlb_fault(mm, vma, address, write_access);
/*
* We need the page table lock to synchronize with kswapd
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9033f0859aa..37af443eb09 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -687,7 +687,7 @@ get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned lo
}
/* Return a zonelist representing a mempolicy */
-static struct zonelist *zonelist_policy(unsigned int __nocast gfp, struct mempolicy *policy)
+static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
{
int nd;
@@ -751,7 +751,7 @@ static unsigned offset_il_node(struct mempolicy *pol,
/* Allocate a page in interleaved policy.
Own path because it needs to do special accounting. */
-static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned order, unsigned nid)
+static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, unsigned nid)
{
struct zonelist *zl;
struct page *page;
@@ -789,7 +789,7 @@ static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned or
* Should be called with the mm_sem of the vma hold.
*/
struct page *
-alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr)
+alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
{
struct mempolicy *pol = get_vma_policy(current, vma, addr);
@@ -832,7 +832,7 @@ alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned l
* 1) it's ok to take cpuset_sem (can WAIT), and
* 2) allocating for current task (not interrupt).
*/
-struct page *alloc_pages_current(unsigned int __nocast gfp, unsigned order)
+struct page *alloc_pages_current(gfp_t gfp, unsigned order)
{
struct mempolicy *pol = current->mempolicy;
diff --git a/mm/mempool.c b/mm/mempool.c
index 65f2957b8d5..9e377ea700b 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -112,7 +112,7 @@ EXPORT_SYMBOL(mempool_create_node);
* while this function is running. mempool_alloc() & mempool_free()
* might be called (eg. from IRQ contexts) while this function executes.
*/
-int mempool_resize(mempool_t *pool, int new_min_nr, unsigned int __nocast gfp_mask)
+int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask)
{
void *element;
void **new_elements;
@@ -200,7 +200,7 @@ EXPORT_SYMBOL(mempool_destroy);
* *never* fails when called from process contexts. (it might
* fail if called from an IRQ context.)
*/
-void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask)
+void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask)
{
void *element;
unsigned long flags;
@@ -276,7 +276,7 @@ EXPORT_SYMBOL(mempool_free);
/*
* A commonly used alloc and free fn.
*/
-void *mempool_alloc_slab(unsigned int __nocast gfp_mask, void *pool_data)
+void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
{
kmem_cache_t *mem = (kmem_cache_t *) pool_data;
return kmem_cache_alloc(mem, gfp_mask);
diff --git a/mm/mmap.c b/mm/mmap.c
index 8b8e05f07cd..fa11d91242e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1640,7 +1640,7 @@ static void unmap_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
/*
* Get rid of page table information in the indicated region.
*
- * Called with the page table lock held.
+ * Called with the mm semaphore held.
*/
static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index e9fbd013ad9..57577f63b30 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -248,7 +248,8 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot)
newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
- if ((newflags & ~(newflags >> 4)) & 0xf) {
+ /* newflags >> 4 shift VM_MAY% in place of VM_% */
+ if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
error = -EACCES;
goto out;
}
diff --git a/mm/mremap.c b/mm/mremap.c
index a32fed454bd..f343fc73a8b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -141,10 +141,10 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
if (dst) {
pte_t pte;
pte = ptep_clear_flush(vma, old_addr, src);
+
/* ZERO_PAGE can be dependant on virtual addr */
- if (pfn_valid(pte_pfn(pte)) &&
- pte_page(pte) == ZERO_PAGE(old_addr))
- pte = pte_wrprotect(mk_pte(ZERO_PAGE(new_addr), new_vma->vm_page_prot));
+ pte = move_pte(pte, new_vma->vm_page_prot,
+ old_addr, new_addr);
set_pte_at(mm, new_addr, dst, pte);
} else
error = -ENOMEM;
diff --git a/mm/nommu.c b/mm/nommu.c
index 064d7044289..0ef241ae376 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -157,8 +157,7 @@ void vfree(void *addr)
kfree(addr);
}
-void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask,
- pgprot_t prot)
+void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
/*
* kmalloc doesn't like __GFP_HIGHMEM for some reason
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ac3bf33e537..d348b903595 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -263,7 +263,7 @@ static struct mm_struct *oom_kill_process(struct task_struct *p)
* OR try to be smart about which process to kill. Note that we
* don't have to be perfect here, we just have to be good.
*/
-void out_of_memory(unsigned int __nocast gfp_mask, int order)
+void out_of_memory(gfp_t gfp_mask, int order)
{
struct mm_struct *mm = NULL;
task_t * p;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ae2903339e7..cc1fe2672a3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -671,7 +671,7 @@ void fastcall free_cold_page(struct page *page)
free_hot_cold_page(page, 1);
}
-static inline void prep_zero_page(struct page *page, int order, unsigned int __nocast gfp_flags)
+static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
{
int i;
@@ -686,7 +686,7 @@ static inline void prep_zero_page(struct page *page, int order, unsigned int __n
* or two.
*/
static struct page *
-buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags)
+buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
{
unsigned long flags;
struct page *page = NULL;
@@ -761,7 +761,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
}
static inline int
-should_reclaim_zone(struct zone *z, unsigned int gfp_mask)
+should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
{
if (!z->reclaim_pages)
return 0;
@@ -774,7 +774,7 @@ should_reclaim_zone(struct zone *z, unsigned int gfp_mask)
* This is the 'heart' of the zoned buddy allocator.
*/
struct page * fastcall
-__alloc_pages(unsigned int __nocast gfp_mask, unsigned int order,
+__alloc_pages(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist)
{
const int wait = gfp_mask & __GFP_WAIT;
@@ -977,7 +977,7 @@ EXPORT_SYMBOL(__alloc_pages);
/*
* Common helper functions.
*/
-fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, unsigned int order)
+fastcall unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
{
struct page * page;
page = alloc_pages(gfp_mask, order);
@@ -988,7 +988,7 @@ fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, unsigned
EXPORT_SYMBOL(__get_free_pages);
-fastcall unsigned long get_zeroed_page(unsigned int __nocast gfp_mask)
+fastcall unsigned long get_zeroed_page(gfp_t gfp_mask)
{
struct page * page;
diff --git a/mm/page_io.c b/mm/page_io.c
index 2e605a19ce5..330e00d6db0 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -19,7 +19,7 @@
#include <linux/writeback.h>
#include <asm/pgtable.h>
-static struct bio *get_swap_bio(unsigned int __nocast gfp_flags, pgoff_t index,
+static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
struct page *page, bio_end_io_t end_io)
{
struct bio *bio;
diff --git a/mm/shmem.c b/mm/shmem.c
index 1f7aeb210c7..ea064d89cda 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -921,8 +921,7 @@ shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx)
}
static inline struct page *
-shmem_alloc_page(unsigned int __nocast gfp,struct shmem_inode_info *info,
- unsigned long idx)
+shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx)
{
return alloc_page(gfp | __GFP_ZERO);
}
diff --git a/mm/slab.c b/mm/slab.c
index 437d3388054..d05c678bceb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -308,12 +308,12 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
#define SIZE_L3 (1 + MAX_NUMNODES)
/*
- * This function may be completely optimized away if
+ * This function must be completely optimized away if
* a constant is passed to it. Mostly the same as
* what is in linux/slab.h except it returns an
* index.
*/
-static inline int index_of(const size_t size)
+static __always_inline int index_of(const size_t size)
{
if (__builtin_constant_p(size)) {
int i = 0;
@@ -329,7 +329,8 @@ static inline int index_of(const size_t size)
extern void __bad_size(void);
__bad_size();
}
- }
+ } else
+ BUG();
return 0;
}
@@ -639,7 +640,7 @@ static enum {
static DEFINE_PER_CPU(struct work_struct, reap_work);
-static void free_block(kmem_cache_t* cachep, void** objpp, int len);
+static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node);
static void enable_cpucache (kmem_cache_t *cachep);
static void cache_reap (void *unused);
static int __node_shrink(kmem_cache_t *cachep, int node);
@@ -649,8 +650,7 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep)
return cachep->array[smp_processor_id()];
}
-static inline kmem_cache_t *__find_general_cachep(size_t size,
- unsigned int __nocast gfpflags)
+static inline kmem_cache_t *__find_general_cachep(size_t size, gfp_t gfpflags)
{
struct cache_sizes *csizep = malloc_sizes;
@@ -674,8 +674,7 @@ static inline kmem_cache_t *__find_general_cachep(size_t size,
return csizep->cs_cachep;
}
-kmem_cache_t *kmem_find_general_cachep(size_t size,
- unsigned int __nocast gfpflags)
+kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
{
return __find_general_cachep(size, gfpflags);
}
@@ -804,7 +803,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache
if (ac->avail) {
spin_lock(&rl3->list_lock);
- free_block(cachep, ac->entry, ac->avail);
+ free_block(cachep, ac->entry, ac->avail, node);
ac->avail = 0;
spin_unlock(&rl3->list_lock);
}
@@ -925,7 +924,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
/* Free limit for this kmem_list3 */
l3->free_limit -= cachep->batchcount;
if (nc)
- free_block(cachep, nc->entry, nc->avail);
+ free_block(cachep, nc->entry, nc->avail, node);
if (!cpus_empty(mask)) {
spin_unlock(&l3->list_lock);
@@ -934,7 +933,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
if (l3->shared) {
free_block(cachep, l3->shared->entry,
- l3->shared->avail);
+ l3->shared->avail, node);
kfree(l3->shared);
l3->shared = NULL;
}
@@ -1184,7 +1183,7 @@ __initcall(cpucache_init);
* did not request dmaable memory, we might get it, but that
* would be relatively rare and ignorable.
*/
-static void *kmem_getpages(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid)
+static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid)
{
struct page *page;
void *addr;
@@ -1882,12 +1881,13 @@ static void do_drain(void *arg)
{
kmem_cache_t *cachep = (kmem_cache_t*)arg;
struct array_cache *ac;
+ int node = numa_node_id();
check_irq_off();
ac = ac_data(cachep);
- spin_lock(&cachep->nodelists[numa_node_id()]->list_lock);
- free_block(cachep, ac->entry, ac->avail);
- spin_unlock(&cachep->nodelists[numa_node_id()]->list_lock);
+ spin_lock(&cachep->nodelists[node]->list_lock);
+ free_block(cachep, ac->entry, ac->avail, node);
+ spin_unlock(&cachep->nodelists[node]->list_lock);
ac->avail = 0;
}
@@ -2046,7 +2046,7 @@ EXPORT_SYMBOL(kmem_cache_destroy);
/* Get the memory for a slab management obj. */
static struct slab* alloc_slabmgmt(kmem_cache_t *cachep, void *objp,
- int colour_off, unsigned int __nocast local_flags)
+ int colour_off, gfp_t local_flags)
{
struct slab *slabp;
@@ -2147,7 +2147,7 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp)
* Grow (by 1) the number of slabs within a cache. This is called by
* kmem_cache_alloc() when there are no active objs left in a cache.
*/
-static int cache_grow(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid)
+static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid)
{
struct slab *slabp;
void *objp;
@@ -2354,7 +2354,7 @@ bad:
#define check_slabp(x,y) do { } while(0)
#endif
-static void *cache_alloc_refill(kmem_cache_t *cachep, unsigned int __nocast flags)
+static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags)
{
int batchcount;
struct kmem_list3 *l3;
@@ -2454,7 +2454,7 @@ alloc_done:
}
static inline void
-cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags)
+cache_alloc_debugcheck_before(kmem_cache_t *cachep, gfp_t flags)
{
might_sleep_if(flags & __GFP_WAIT);
#if DEBUG
@@ -2465,7 +2465,7 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags)
#if DEBUG
static void *
cache_alloc_debugcheck_after(kmem_cache_t *cachep,
- unsigned int __nocast flags, void *objp, void *caller)
+ gfp_t flags, void *objp, void *caller)
{
if (!objp)
return objp;
@@ -2508,16 +2508,12 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep,
#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
#endif
-
-static inline void *__cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags)
+static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags)
{
- unsigned long save_flags;
void* objp;
struct array_cache *ac;
- cache_alloc_debugcheck_before(cachep, flags);
-
- local_irq_save(save_flags);
+ check_irq_off();
ac = ac_data(cachep);
if (likely(ac->avail)) {
STATS_INC_ALLOCHIT(cachep);
@@ -2527,6 +2523,18 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, unsigned int __nocast fl
STATS_INC_ALLOCMISS(cachep);
objp = cache_alloc_refill(cachep, flags);
}
+ return objp;
+}
+
+static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags)
+{
+ unsigned long save_flags;
+ void* objp;
+
+ cache_alloc_debugcheck_before(cachep, flags);
+
+ local_irq_save(save_flags);
+ objp = ____cache_alloc(cachep, flags);
local_irq_restore(save_flags);
objp = cache_alloc_debugcheck_after(cachep, flags, objp,
__builtin_return_address(0));
@@ -2608,7 +2616,7 @@ done:
/*
* Caller needs to acquire correct kmem_list's list_lock
*/
-static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
+static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node)
{
int i;
struct kmem_list3 *l3;
@@ -2617,14 +2625,12 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
void *objp = objpp[i];
struct slab *slabp;
unsigned int objnr;
- int nodeid = 0;
slabp = GET_PAGE_SLAB(virt_to_page(objp));
- nodeid = slabp->nodeid;
- l3 = cachep->nodelists[nodeid];
+ l3 = cachep->nodelists[node];
list_del(&slabp->list);
objnr = (objp - slabp->s_mem) / cachep->objsize;
- check_spinlock_acquired_node(cachep, nodeid);
+ check_spinlock_acquired_node(cachep, node);
check_slabp(cachep, slabp);
@@ -2664,13 +2670,14 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
{
int batchcount;
struct kmem_list3 *l3;
+ int node = numa_node_id();
batchcount = ac->batchcount;
#if DEBUG
BUG_ON(!batchcount || batchcount > ac->avail);
#endif
check_irq_off();
- l3 = cachep->nodelists[numa_node_id()];
+ l3 = cachep->nodelists[node];
spin_lock(&l3->list_lock);
if (l3->shared) {
struct array_cache *shared_array = l3->shared;
@@ -2686,7 +2693,7 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
}
}
- free_block(cachep, ac->entry, batchcount);
+ free_block(cachep, ac->entry, batchcount, node);
free_done:
#if STATS
{
@@ -2751,7 +2758,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
} else {
spin_lock(&(cachep->nodelists[nodeid])->
list_lock);
- free_block(cachep, &objp, 1);
+ free_block(cachep, &objp, 1, nodeid);
spin_unlock(&(cachep->nodelists[nodeid])->
list_lock);
}
@@ -2778,7 +2785,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
* Allocate an object from this cache. The flags are only relevant
* if the cache has no available objects.
*/
-void *kmem_cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags)
+void *kmem_cache_alloc(kmem_cache_t *cachep, gfp_t flags)
{
return __cache_alloc(cachep, flags);
}
@@ -2839,12 +2846,12 @@ out:
* New and improved: it will now make sure that the object gets
* put on the correct node list so that there is no false sharing.
*/
-void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid)
+void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
{
unsigned long save_flags;
void *ptr;
- if (nodeid == numa_node_id() || nodeid == -1)
+ if (nodeid == -1)
return __cache_alloc(cachep, flags);
if (unlikely(!cachep->nodelists[nodeid])) {
@@ -2855,7 +2862,10 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
- ptr = __cache_alloc_node(cachep, flags, nodeid);
+ if (nodeid == numa_node_id())
+ ptr = ____cache_alloc(cachep, flags);
+ else
+ ptr = __cache_alloc_node(cachep, flags, nodeid);
local_irq_restore(save_flags);
ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, __builtin_return_address(0));
@@ -2863,7 +2873,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
-void *kmalloc_node(size_t size, unsigned int __nocast flags, int node)
+void *kmalloc_node(size_t size, gfp_t flags, int node)
{
kmem_cache_t *cachep;
@@ -2896,7 +2906,7 @@ EXPORT_SYMBOL(kmalloc_node);
* platforms. For example, on i386, it means that the memory must come
* from the first 16MB.
*/
-void *__kmalloc(size_t size, unsigned int __nocast flags)
+void *__kmalloc(size_t size, gfp_t flags)
{
kmem_cache_t *cachep;
@@ -2985,7 +2995,7 @@ EXPORT_SYMBOL(kmem_cache_free);
* @size: how many bytes of memory are required.
* @flags: the type of memory to allocate.
*/
-void *kzalloc(size_t size, unsigned int __nocast flags)
+void *kzalloc(size_t size, gfp_t flags)
{
void *ret = kmalloc(size, flags);
if (ret)
@@ -3079,7 +3089,7 @@ static int alloc_kmemlist(kmem_cache_t *cachep)
if ((nc = cachep->nodelists[node]->shared))
free_block(cachep, nc->entry,
- nc->avail);
+ nc->avail, node);
l3->shared = new;
if (!cachep->nodelists[node]->alien) {
@@ -3160,7 +3170,7 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount,
if (!ccold)
continue;
spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
- free_block(cachep, ccold->entry, ccold->avail);
+ free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
kfree(ccold);
}
@@ -3240,7 +3250,7 @@ static void drain_array_locked(kmem_cache_t *cachep,
if (tofree > ac->avail) {
tofree = (ac->avail+1)/2;
}
- free_block(cachep, ac->entry, tofree);
+ free_block(cachep, ac->entry, tofree, node);
ac->avail -= tofree;
memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void*)*ac->avail);
@@ -3591,7 +3601,7 @@ unsigned int ksize(const void *objp)
* @s: the string to duplicate
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
*/
-char *kstrdup(const char *s, unsigned int __nocast gfp)
+char *kstrdup(const char *s, gfp_t gfp)
{
size_t len;
char *buf;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index adbc2b426c2..132164f7d0a 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -68,7 +68,7 @@ void show_swap_cache_info(void)
* but sets SwapCache flag and private instead of mapping and index.
*/
static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
- unsigned int __nocast gfp_mask)
+ gfp_t gfp_mask)
{
int error;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0184f510aac..1dcaeda039f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1381,6 +1381,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
error = bd_claim(bdev, sys_swapon);
if (error < 0) {
bdev = NULL;
+ error = -EINVAL;
goto bad_swap;
}
p->old_block_size = block_size(bdev);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 13c3d82968a..1150229b636 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -395,7 +395,7 @@ void *vmap(struct page **pages, unsigned int count,
EXPORT_SYMBOL(vmap);
-void *__vmalloc_area(struct vm_struct *area, unsigned int __nocast gfp_mask, pgprot_t prot)
+void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
{
struct page **pages;
unsigned int nr_pages, array_size, i;
@@ -446,7 +446,7 @@ fail:
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
*/
-void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, pgprot_t prot)
+void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
struct vm_struct *area;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0ea71e887bb..64f9570cff5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -511,10 +511,11 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
* PageDirty _after_ making sure that the page is freeable and
* not in use by anybody. (pagecache + us == 2)
*/
- if (page_count(page) != 2 || PageDirty(page)) {
- write_unlock_irq(&mapping->tree_lock);
- goto keep_locked;
- }
+ if (unlikely(page_count(page) != 2))
+ goto cannot_free;
+ smp_rmb();
+ if (unlikely(PageDirty(page)))
+ goto cannot_free;
#ifdef CONFIG_SWAP
if (PageSwapCache(page)) {
@@ -538,6 +539,10 @@ free_it:
__pagevec_release_nonlru(&freed_pvec);
continue;
+cannot_free:
+ write_unlock_irq(&mapping->tree_lock);
+ goto keep_locked;
+
activate_locked:
SetPageActive(page);
pgactivate++;