diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 4 | ||||
-rw-r--r-- | mm/bootmem.c | 45 | ||||
-rw-r--r-- | mm/fremap.c | 3 | ||||
-rw-r--r-- | mm/highmem.c | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 57 | ||||
-rw-r--r-- | mm/madvise.c | 11 | ||||
-rw-r--r-- | mm/memory.c | 4 | ||||
-rw-r--r-- | mm/mempolicy.c | 8 | ||||
-rw-r--r-- | mm/mempool.c | 6 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/mprotect.c | 3 | ||||
-rw-r--r-- | mm/mremap.c | 6 | ||||
-rw-r--r-- | mm/nommu.c | 3 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 12 | ||||
-rw-r--r-- | mm/page_io.c | 2 | ||||
-rw-r--r-- | mm/shmem.c | 3 | ||||
-rw-r--r-- | mm/slab.c | 100 | ||||
-rw-r--r-- | mm/swap_state.c | 2 | ||||
-rw-r--r-- | mm/swapfile.c | 1 | ||||
-rw-r--r-- | mm/vmalloc.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 13 |
22 files changed, 170 insertions, 123 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 4e9937ac352..391ffc54d13 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -29,7 +29,7 @@ config FLATMEM_MANUAL If unsure, choose this option (Flat Memory) over any other. config DISCONTIGMEM_MANUAL - bool "Discontigious Memory" + bool "Discontiguous Memory" depends on ARCH_DISCONTIGMEM_ENABLE help This option provides enhanced support for discontiguous @@ -52,7 +52,7 @@ config SPARSEMEM_MANUAL memory hotplug systems. This is normal. For many other systems, this will be an alternative to - "Discontigious Memory". This option provides some potential + "Discontiguous Memory". This option provides some potential performance benefits, along with decreased code complexity, but it is newer, and more experimental. diff --git a/mm/bootmem.c b/mm/bootmem.c index 8ec4e4c2a17..a58699b6579 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -61,17 +61,9 @@ static unsigned long __init init_bootmem_core (pg_data_t *pgdat, { bootmem_data_t *bdata = pgdat->bdata; unsigned long mapsize = ((end - start)+7)/8; - static struct pglist_data *pgdat_last; - - pgdat->pgdat_next = NULL; - /* Add new nodes last so that bootmem always starts - searching in the first nodes, not the last ones */ - if (pgdat_last) - pgdat_last->pgdat_next = pgdat; - else { - pgdat_list = pgdat; - pgdat_last = pgdat; - } + + pgdat->pgdat_next = pgdat_list; + pgdat_list = pgdat; mapsize = ALIGN(mapsize, sizeof(long)); bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); @@ -162,10 +154,10 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, */ static void * __init __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, - unsigned long align, unsigned long goal) + unsigned long align, unsigned long goal, unsigned long limit) { unsigned long offset, remaining_size, areasize, preferred; - unsigned long i, start = 0, incr, eidx; + unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn; void *ret; if(!size) { @@ -174,7 +166,14 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, } BUG_ON(align & (align-1)); - eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); + if (limit && bdata->node_boot_start >= limit) + return NULL; + + limit >>=PAGE_SHIFT; + if (limit && end_pfn > limit) + end_pfn = limit; + + eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT); offset = 0; if (align && (bdata->node_boot_start & (align - 1UL)) != 0) @@ -186,11 +185,12 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, * first, then we try to allocate lower pages. */ if (goal && (goal >= bdata->node_boot_start) && - ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { + ((goal >> PAGE_SHIFT) < end_pfn)) { preferred = goal - bdata->node_boot_start; if (bdata->last_success >= preferred) - preferred = bdata->last_success; + if (!limit || (limit && limit > bdata->last_success)) + preferred = bdata->last_success; } else preferred = 0; @@ -390,14 +390,15 @@ unsigned long __init free_all_bootmem (void) return(free_all_bootmem_core(NODE_DATA(0))); } -void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal) +void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal, + unsigned long limit) { pg_data_t *pgdat = pgdat_list; void *ptr; for_each_pgdat(pgdat) if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, - align, goal))) + align, goal, limit))) return(ptr); /* @@ -408,14 +409,16 @@ void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned return NULL; } -void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) + +void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) { void *ptr; - ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal); + ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, limit); if (ptr) return (ptr); - return __alloc_bootmem(size, align, goal); + return __alloc_bootmem_limit(size, align, goal, limit); } diff --git a/mm/fremap.c b/mm/fremap.c index 3235fb77c13..ab23a0673c3 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -89,6 +89,9 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (!page->mapping || page->index >= size) goto err_unlock; + err = -ENOMEM; + if (page_mapcount(page) > INT_MAX/2) + goto err_unlock; zap_pte(mm, vma, addr, pte); diff --git a/mm/highmem.c b/mm/highmem.c index 40091159946..90e1861e2da 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -30,7 +30,7 @@ static mempool_t *page_pool, *isa_page_pool; -static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data) +static void *page_pool_alloc(gfp_t gfp_mask, void *data) { unsigned int gfp = gfp_mask | (unsigned int) (long) data; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 901ac523a1c..61d38067803 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -274,21 +274,22 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, { pte_t *src_pte, *dst_pte, entry; struct page *ptepage; - unsigned long addr = vma->vm_start; - unsigned long end = vma->vm_end; + unsigned long addr; - while (addr < end) { + for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { dst_pte = huge_pte_alloc(dst, addr); if (!dst_pte) goto nomem; + spin_lock(&src->page_table_lock); src_pte = huge_pte_offset(src, addr); - BUG_ON(!src_pte || pte_none(*src_pte)); /* prefaulted */ - entry = *src_pte; - ptepage = pte_page(entry); - get_page(ptepage); - add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); - set_huge_pte_at(dst, addr, dst_pte, entry); - addr += HPAGE_SIZE; + if (src_pte && !pte_none(*src_pte)) { + entry = *src_pte; + ptepage = pte_page(entry); + get_page(ptepage); + add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); + set_huge_pte_at(dst, addr, dst_pte, entry); + } + spin_unlock(&src->page_table_lock); } return 0; @@ -323,8 +324,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, page = pte_page(pte); put_page(page); + add_mm_counter(mm, rss, - (HPAGE_SIZE / PAGE_SIZE)); } - add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); flush_tlb_range(vma, start, end); } @@ -393,6 +394,28 @@ out: return ret; } +/* + * On ia64 at least, it is possible to receive a hugetlb fault from a + * stale zero entry left in the TLB from earlier hardware prefetching. + * Low-level arch code should already have flushed the stale entry as + * part of its fault handling, but we do need to accept this minor fault + * and return successfully. Whereas the "normal" case is that this is + * an access to a hugetlb page which has been truncated off since mmap. + */ +int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, int write_access) +{ + int ret = VM_FAULT_SIGBUS; + pte_t *pte; + + spin_lock(&mm->page_table_lock); + pte = huge_pte_offset(mm, address); + if (pte && !pte_none(*pte)) + ret = VM_FAULT_MINOR; + spin_unlock(&mm->page_table_lock); + return ret; +} + int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i) @@ -403,6 +426,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, BUG_ON(!is_vm_hugetlb_page(vma)); vpfn = vaddr/PAGE_SIZE; + spin_lock(&mm->page_table_lock); while (vaddr < vma->vm_end && remainder) { if (pages) { @@ -415,8 +439,13 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * indexing below to work. */ pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); - /* hugetlb should be locked, and hence, prefaulted */ - WARN_ON(!pte || pte_none(*pte)); + /* the hugetlb file might have been truncated */ + if (!pte || pte_none(*pte)) { + remainder = 0; + if (!i) + i = -EFAULT; + break; + } page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; @@ -434,7 +463,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, --remainder; ++i; } - + spin_unlock(&mm->page_table_lock); *length = remainder; *position = vaddr; diff --git a/mm/madvise.c b/mm/madvise.c index 4454936f87d..20e075d1c64 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -83,6 +83,9 @@ static long madvise_willneed(struct vm_area_struct * vma, { struct file *file = vma->vm_file; + if (!file) + return -EBADF; + if (file->f_mapping->a_ops->get_xip_page) { /* no bad return value, but ignore advice */ return 0; @@ -141,11 +144,7 @@ static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, int behavior) { - struct file *filp = vma->vm_file; - long error = -EBADF; - - if (!filp) - goto out; + long error; switch (behavior) { case MADV_NORMAL: @@ -166,8 +165,6 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, error = -EINVAL; break; } - -out: return error; } diff --git a/mm/memory.c b/mm/memory.c index ae8161f1f45..1db40e935e5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2045,8 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, inc_page_state(pgfault); - if (is_vm_hugetlb_page(vma)) - return VM_FAULT_SIGBUS; /* mapping truncation does this. */ + if (unlikely(is_vm_hugetlb_page(vma))) + return hugetlb_fault(mm, vma, address, write_access); /* * We need the page table lock to synchronize with kswapd diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9033f0859aa..37af443eb09 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -687,7 +687,7 @@ get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned lo } /* Return a zonelist representing a mempolicy */ -static struct zonelist *zonelist_policy(unsigned int __nocast gfp, struct mempolicy *policy) +static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy) { int nd; @@ -751,7 +751,7 @@ static unsigned offset_il_node(struct mempolicy *pol, /* Allocate a page in interleaved policy. Own path because it needs to do special accounting. */ -static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned order, unsigned nid) +static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, unsigned nid) { struct zonelist *zl; struct page *page; @@ -789,7 +789,7 @@ static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned or * Should be called with the mm_sem of the vma hold. */ struct page * -alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr) +alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(current, vma, addr); @@ -832,7 +832,7 @@ alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned l * 1) it's ok to take cpuset_sem (can WAIT), and * 2) allocating for current task (not interrupt). */ -struct page *alloc_pages_current(unsigned int __nocast gfp, unsigned order) +struct page *alloc_pages_current(gfp_t gfp, unsigned order) { struct mempolicy *pol = current->mempolicy; diff --git a/mm/mempool.c b/mm/mempool.c index 65f2957b8d5..9e377ea700b 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -112,7 +112,7 @@ EXPORT_SYMBOL(mempool_create_node); * while this function is running. mempool_alloc() & mempool_free() * might be called (eg. from IRQ contexts) while this function executes. */ -int mempool_resize(mempool_t *pool, int new_min_nr, unsigned int __nocast gfp_mask) +int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask) { void *element; void **new_elements; @@ -200,7 +200,7 @@ EXPORT_SYMBOL(mempool_destroy); * *never* fails when called from process contexts. (it might * fail if called from an IRQ context.) */ -void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask) +void * mempool_alloc(mempool_t *pool, gfp_t gfp_mask) { void *element; unsigned long flags; @@ -276,7 +276,7 @@ EXPORT_SYMBOL(mempool_free); /* * A commonly used alloc and free fn. */ -void *mempool_alloc_slab(unsigned int __nocast gfp_mask, void *pool_data) +void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) { kmem_cache_t *mem = (kmem_cache_t *) pool_data; return kmem_cache_alloc(mem, gfp_mask); diff --git a/mm/mmap.c b/mm/mmap.c index 8b8e05f07cd..fa11d91242e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1640,7 +1640,7 @@ static void unmap_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) /* * Get rid of page table information in the indicated region. * - * Called with the page table lock held. + * Called with the mm semaphore held. */ static void unmap_region(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, diff --git a/mm/mprotect.c b/mm/mprotect.c index e9fbd013ad9..57577f63b30 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -248,7 +248,8 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot) newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); - if ((newflags & ~(newflags >> 4)) & 0xf) { + /* newflags >> 4 shift VM_MAY% in place of VM_% */ + if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { error = -EACCES; goto out; } diff --git a/mm/mremap.c b/mm/mremap.c index a32fed454bd..f343fc73a8b 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -141,10 +141,10 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr, if (dst) { pte_t pte; pte = ptep_clear_flush(vma, old_addr, src); + /* ZERO_PAGE can be dependant on virtual addr */ - if (pfn_valid(pte_pfn(pte)) && - pte_page(pte) == ZERO_PAGE(old_addr)) - pte = pte_wrprotect(mk_pte(ZERO_PAGE(new_addr), new_vma->vm_page_prot)); + pte = move_pte(pte, new_vma->vm_page_prot, + old_addr, new_addr); set_pte_at(mm, new_addr, dst, pte); } else error = -ENOMEM; diff --git a/mm/nommu.c b/mm/nommu.c index 064d7044289..0ef241ae376 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -157,8 +157,7 @@ void vfree(void *addr) kfree(addr); } -void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, - pgprot_t prot) +void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) { /* * kmalloc doesn't like __GFP_HIGHMEM for some reason diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ac3bf33e537..d348b903595 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -263,7 +263,7 @@ static struct mm_struct *oom_kill_process(struct task_struct *p) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -void out_of_memory(unsigned int __nocast gfp_mask, int order) +void out_of_memory(gfp_t gfp_mask, int order) { struct mm_struct *mm = NULL; task_t * p; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ae2903339e7..cc1fe2672a3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -671,7 +671,7 @@ void fastcall free_cold_page(struct page *page) free_hot_cold_page(page, 1); } -static inline void prep_zero_page(struct page *page, int order, unsigned int __nocast gfp_flags) +static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) { int i; @@ -686,7 +686,7 @@ static inline void prep_zero_page(struct page *page, int order, unsigned int __n * or two. */ static struct page * -buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags) +buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) { unsigned long flags; struct page *page = NULL; @@ -761,7 +761,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, } static inline int -should_reclaim_zone(struct zone *z, unsigned int gfp_mask) +should_reclaim_zone(struct zone *z, gfp_t gfp_mask) { if (!z->reclaim_pages) return 0; @@ -774,7 +774,7 @@ should_reclaim_zone(struct zone *z, unsigned int gfp_mask) * This is the 'heart' of the zoned buddy allocator. */ struct page * fastcall -__alloc_pages(unsigned int __nocast gfp_mask, unsigned int order, +__alloc_pages(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist) { const int wait = gfp_mask & __GFP_WAIT; @@ -977,7 +977,7 @@ EXPORT_SYMBOL(__alloc_pages); /* * Common helper functions. */ -fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, unsigned int order) +fastcall unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) { struct page * page; page = alloc_pages(gfp_mask, order); @@ -988,7 +988,7 @@ fastcall unsigned long __get_free_pages(unsigned int __nocast gfp_mask, unsigned EXPORT_SYMBOL(__get_free_pages); -fastcall unsigned long get_zeroed_page(unsigned int __nocast gfp_mask) +fastcall unsigned long get_zeroed_page(gfp_t gfp_mask) { struct page * page; diff --git a/mm/page_io.c b/mm/page_io.c index 2e605a19ce5..330e00d6db0 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -19,7 +19,7 @@ #include <linux/writeback.h> #include <asm/pgtable.h> -static struct bio *get_swap_bio(unsigned int __nocast gfp_flags, pgoff_t index, +static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index, struct page *page, bio_end_io_t end_io) { struct bio *bio; diff --git a/mm/shmem.c b/mm/shmem.c index 1f7aeb210c7..ea064d89cda 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -921,8 +921,7 @@ shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) } static inline struct page * -shmem_alloc_page(unsigned int __nocast gfp,struct shmem_inode_info *info, - unsigned long idx) +shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx) { return alloc_page(gfp | __GFP_ZERO); } diff --git a/mm/slab.c b/mm/slab.c index 437d3388054..d05c678bceb 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -308,12 +308,12 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; #define SIZE_L3 (1 + MAX_NUMNODES) /* - * This function may be completely optimized away if + * This function must be completely optimized away if * a constant is passed to it. Mostly the same as * what is in linux/slab.h except it returns an * index. */ -static inline int index_of(const size_t size) +static __always_inline int index_of(const size_t size) { if (__builtin_constant_p(size)) { int i = 0; @@ -329,7 +329,8 @@ static inline int index_of(const size_t size) extern void __bad_size(void); __bad_size(); } - } + } else + BUG(); return 0; } @@ -639,7 +640,7 @@ static enum { static DEFINE_PER_CPU(struct work_struct, reap_work); -static void free_block(kmem_cache_t* cachep, void** objpp, int len); +static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node); static void enable_cpucache (kmem_cache_t *cachep); static void cache_reap (void *unused); static int __node_shrink(kmem_cache_t *cachep, int node); @@ -649,8 +650,7 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep) return cachep->array[smp_processor_id()]; } -static inline kmem_cache_t *__find_general_cachep(size_t size, - unsigned int __nocast gfpflags) +static inline kmem_cache_t *__find_general_cachep(size_t size, gfp_t gfpflags) { struct cache_sizes *csizep = malloc_sizes; @@ -674,8 +674,7 @@ static inline kmem_cache_t *__find_general_cachep(size_t size, return csizep->cs_cachep; } -kmem_cache_t *kmem_find_general_cachep(size_t size, - unsigned int __nocast gfpflags) +kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags) { return __find_general_cachep(size, gfpflags); } @@ -804,7 +803,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache if (ac->avail) { spin_lock(&rl3->list_lock); - free_block(cachep, ac->entry, ac->avail); + free_block(cachep, ac->entry, ac->avail, node); ac->avail = 0; spin_unlock(&rl3->list_lock); } @@ -925,7 +924,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, /* Free limit for this kmem_list3 */ l3->free_limit -= cachep->batchcount; if (nc) - free_block(cachep, nc->entry, nc->avail); + free_block(cachep, nc->entry, nc->avail, node); if (!cpus_empty(mask)) { spin_unlock(&l3->list_lock); @@ -934,7 +933,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, if (l3->shared) { free_block(cachep, l3->shared->entry, - l3->shared->avail); + l3->shared->avail, node); kfree(l3->shared); l3->shared = NULL; } @@ -1184,7 +1183,7 @@ __initcall(cpucache_init); * did not request dmaable memory, we might get it, but that * would be relatively rare and ignorable. */ -static void *kmem_getpages(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid) +static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid) { struct page *page; void *addr; @@ -1882,12 +1881,13 @@ static void do_drain(void *arg) { kmem_cache_t *cachep = (kmem_cache_t*)arg; struct array_cache *ac; + int node = numa_node_id(); check_irq_off(); ac = ac_data(cachep); - spin_lock(&cachep->nodelists[numa_node_id()]->list_lock); - free_block(cachep, ac->entry, ac->avail); - spin_unlock(&cachep->nodelists[numa_node_id()]->list_lock); + spin_lock(&cachep->nodelists[node]->list_lock); + free_block(cachep, ac->entry, ac->avail, node); + spin_unlock(&cachep->nodelists[node]->list_lock); ac->avail = 0; } @@ -2046,7 +2046,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); /* Get the memory for a slab management obj. */ static struct slab* alloc_slabmgmt(kmem_cache_t *cachep, void *objp, - int colour_off, unsigned int __nocast local_flags) + int colour_off, gfp_t local_flags) { struct slab *slabp; @@ -2147,7 +2147,7 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp) * Grow (by 1) the number of slabs within a cache. This is called by * kmem_cache_alloc() when there are no active objs left in a cache. */ -static int cache_grow(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid) +static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid) { struct slab *slabp; void *objp; @@ -2354,7 +2354,7 @@ bad: #define check_slabp(x,y) do { } while(0) #endif -static void *cache_alloc_refill(kmem_cache_t *cachep, unsigned int __nocast flags) +static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags) { int batchcount; struct kmem_list3 *l3; @@ -2454,7 +2454,7 @@ alloc_done: } static inline void -cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags) +cache_alloc_debugcheck_before(kmem_cache_t *cachep, gfp_t flags) { might_sleep_if(flags & __GFP_WAIT); #if DEBUG @@ -2465,7 +2465,7 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, unsigned int __nocast flags) #if DEBUG static void * cache_alloc_debugcheck_after(kmem_cache_t *cachep, - unsigned int __nocast flags, void *objp, void *caller) + gfp_t flags, void *objp, void *caller) { if (!objp) return objp; @@ -2508,16 +2508,12 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep, #define cache_alloc_debugcheck_after(a,b,objp,d) (objp) #endif - -static inline void *__cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags) +static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags) { - unsigned long save_flags; void* objp; struct array_cache *ac; - cache_alloc_debugcheck_before(cachep, flags); - - local_irq_save(save_flags); + check_irq_off(); ac = ac_data(cachep); if (likely(ac->avail)) { STATS_INC_ALLOCHIT(cachep); @@ -2527,6 +2523,18 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, unsigned int __nocast fl STATS_INC_ALLOCMISS(cachep); objp = cache_alloc_refill(cachep, flags); } + return objp; +} + +static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags) +{ + unsigned long save_flags; + void* objp; + + cache_alloc_debugcheck_before(cachep, flags); + + local_irq_save(save_flags); + objp = ____cache_alloc(cachep, flags); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, __builtin_return_address(0)); @@ -2608,7 +2616,7 @@ done: /* * Caller needs to acquire correct kmem_list's list_lock */ -static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects) +static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node) { int i; struct kmem_list3 *l3; @@ -2617,14 +2625,12 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects) void *objp = objpp[i]; struct slab *slabp; unsigned int objnr; - int nodeid = 0; slabp = GET_PAGE_SLAB(virt_to_page(objp)); - nodeid = slabp->nodeid; - l3 = cachep->nodelists[nodeid]; + l3 = cachep->nodelists[node]; list_del(&slabp->list); objnr = (objp - slabp->s_mem) / cachep->objsize; - check_spinlock_acquired_node(cachep, nodeid); + check_spinlock_acquired_node(cachep, node); check_slabp(cachep, slabp); @@ -2664,13 +2670,14 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) { int batchcount; struct kmem_list3 *l3; + int node = numa_node_id(); batchcount = ac->batchcount; #if DEBUG BUG_ON(!batchcount || batchcount > ac->avail); #endif check_irq_off(); - l3 = cachep->nodelists[numa_node_id()]; + l3 = cachep->nodelists[node]; spin_lock(&l3->list_lock); if (l3->shared) { struct array_cache *shared_array = l3->shared; @@ -2686,7 +2693,7 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) } } - free_block(cachep, ac->entry, batchcount); + free_block(cachep, ac->entry, batchcount, node); free_done: #if STATS { @@ -2751,7 +2758,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) } else { spin_lock(&(cachep->nodelists[nodeid])-> list_lock); - free_block(cachep, &objp, 1); + free_block(cachep, &objp, 1, nodeid); spin_unlock(&(cachep->nodelists[nodeid])-> list_lock); } @@ -2778,7 +2785,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) * Allocate an object from this cache. The flags are only relevant * if the cache has no available objects. */ -void *kmem_cache_alloc(kmem_cache_t *cachep, unsigned int __nocast flags) +void *kmem_cache_alloc(kmem_cache_t *cachep, gfp_t flags) { return __cache_alloc(cachep, flags); } @@ -2839,12 +2846,12 @@ out: * New and improved: it will now make sure that the object gets * put on the correct node list so that there is no false sharing. */ -void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, int nodeid) +void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid) { unsigned long save_flags; void *ptr; - if (nodeid == numa_node_id() || nodeid == -1) + if (nodeid == -1) return __cache_alloc(cachep, flags); if (unlikely(!cachep->nodelists[nodeid])) { @@ -2855,7 +2862,10 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); - ptr = __cache_alloc_node(cachep, flags, nodeid); + if (nodeid == numa_node_id()) + ptr = ____cache_alloc(cachep, flags); + else + ptr = __cache_alloc_node(cachep, flags, nodeid); local_irq_restore(save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, __builtin_return_address(0)); @@ -2863,7 +2873,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i } EXPORT_SYMBOL(kmem_cache_alloc_node); -void *kmalloc_node(size_t size, unsigned int __nocast flags, int node) +void *kmalloc_node(size_t size, gfp_t flags, int node) { kmem_cache_t *cachep; @@ -2896,7 +2906,7 @@ EXPORT_SYMBOL(kmalloc_node); * platforms. For example, on i386, it means that the memory must come * from the first 16MB. */ -void *__kmalloc(size_t size, unsigned int __nocast flags) +void *__kmalloc(size_t size, gfp_t flags) { kmem_cache_t *cachep; @@ -2985,7 +2995,7 @@ EXPORT_SYMBOL(kmem_cache_free); * @size: how many bytes of memory are required. * @flags: the type of memory to allocate. */ -void *kzalloc(size_t size, unsigned int __nocast flags) +void *kzalloc(size_t size, gfp_t flags) { void *ret = kmalloc(size, flags); if (ret) @@ -3079,7 +3089,7 @@ static int alloc_kmemlist(kmem_cache_t *cachep) if ((nc = cachep->nodelists[node]->shared)) free_block(cachep, nc->entry, - nc->avail); + nc->avail, node); l3->shared = new; if (!cachep->nodelists[node]->alien) { @@ -3160,7 +3170,7 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, if (!ccold) continue; spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); - free_block(cachep, ccold->entry, ccold->avail); + free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); kfree(ccold); } @@ -3240,7 +3250,7 @@ static void drain_array_locked(kmem_cache_t *cachep, if (tofree > ac->avail) { tofree = (ac->avail+1)/2; |