diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 15 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/mlock.c | 41 | ||||
-rw-r--r-- | mm/msync.c | 2 | ||||
-rw-r--r-- | mm/page-writeback.c | 44 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/percpu-km.c | 104 | ||||
-rw-r--r-- | mm/percpu-vm.c | 451 | ||||
-rw-r--r-- | mm/percpu.c | 585 | ||||
-rw-r--r-- | mm/shmem.c | 29 | ||||
-rw-r--r-- | mm/slab.c | 198 | ||||
-rw-r--r-- | mm/slob.c | 8 | ||||
-rw-r--r-- | mm/slub.c | 8 | ||||
-rw-r--r-- | mm/swapfile.c | 14 |
14 files changed, 859 insertions, 644 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 707d0dc6da0..660a87a2251 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -48,7 +48,6 @@ static struct timer_list sync_supers_timer; static int bdi_sync_supers(void *); static void sync_supers_timer_fn(unsigned long); -static void arm_supers_timer(void); static void bdi_add_default_flusher_task(struct backing_dev_info *bdi); @@ -252,7 +251,7 @@ static int __init default_bdi_init(void) init_timer(&sync_supers_timer); setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0); - arm_supers_timer(); + bdi_arm_supers_timer(); err = bdi_init(&default_backing_dev_info); if (!err) @@ -374,10 +373,13 @@ static int bdi_sync_supers(void *unused) return 0; } -static void arm_supers_timer(void) +void bdi_arm_supers_timer(void) { unsigned long next; + if (!dirty_writeback_interval) + return; + next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies; mod_timer(&sync_supers_timer, round_jiffies_up(next)); } @@ -385,7 +387,7 @@ static void arm_supers_timer(void) static void sync_supers_timer_fn(unsigned long unused) { wake_up_process(sync_supers_tsk); - arm_supers_timer(); + bdi_arm_supers_timer(); } static int bdi_forker_task(void *ptr) @@ -428,7 +430,10 @@ static int bdi_forker_task(void *ptr) spin_unlock_bh(&bdi_lock); wait = msecs_to_jiffies(dirty_writeback_interval * 10); - schedule_timeout(wait); + if (wait) + schedule_timeout(wait); + else + schedule(); try_to_freeze(); continue; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8a79a6f0f02..c8569bc298f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1438,7 +1438,7 @@ static void drain_local_stock(struct work_struct *dummy) /* * Cache charges(val) which is from res_counter, to local per_cpu area. - * This will be consumed by consumt_stock() function, later. + * This will be consumed by consume_stock() function, later. */ static void refill_stock(struct mem_cgroup *mem, int val) { diff --git a/mm/mlock.c b/mm/mlock.c index 8f4e2dfceec..3f82720e051 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -607,44 +607,3 @@ void user_shm_unlock(size_t size, struct user_struct *user) spin_unlock(&shmlock_user_lock); free_uid(user); } - -int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, - size_t size) -{ - unsigned long lim, vm, pgsz; - int error = -ENOMEM; - - pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; - - down_write(&mm->mmap_sem); - - lim = ACCESS_ONCE(rlim[RLIMIT_AS].rlim_cur) >> PAGE_SHIFT; - vm = mm->total_vm + pgsz; - if (lim < vm) - goto out; - - lim = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur) >> PAGE_SHIFT; - vm = mm->locked_vm + pgsz; - if (lim < vm) - goto out; - - mm->total_vm += pgsz; - mm->locked_vm += pgsz; - - error = 0; - out: - up_write(&mm->mmap_sem); - return error; -} - -void refund_locked_memory(struct mm_struct *mm, size_t size) -{ - unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; - - down_write(&mm->mmap_sem); - - mm->total_vm -= pgsz; - mm->locked_vm -= pgsz; - - up_write(&mm->mmap_sem); -} diff --git a/mm/msync.c b/mm/msync.c index 4083209b7f0..632df4527c0 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -82,7 +82,7 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags) (vma->vm_flags & VM_SHARED)) { get_file(file); up_read(&mm->mmap_sem); - error = vfs_fsync(file, file->f_path.dentry, 0); + error = vfs_fsync(file, 0); fput(file); if (error || start >= end) goto out; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0b19943ecf8..b289310e2c8 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping, (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS)) > background_thresh))) - bdi_start_writeback(bdi, NULL, 0); + bdi_start_writeback(bdi, NULL, 0, 0); } void set_page_dirty_balance(struct page *page, int page_mkwrite) @@ -683,10 +683,6 @@ void throttle_vm_writeout(gfp_t gfp_mask) } } -static void laptop_timer_fn(unsigned long unused); - -static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0); - /* * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs */ @@ -694,24 +690,24 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { proc_dointvec(table, write, buffer, length, ppos); + bdi_arm_supers_timer(); return 0; } -static void do_laptop_sync(struct work_struct *work) +#ifdef CONFIG_BLOCK +void laptop_mode_timer_fn(unsigned long data) { - wakeup_flusher_threads(0); - kfree(work); -} + struct request_queue *q = (struct request_queue *)data; + int nr_pages = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS); -static void laptop_timer_fn(unsigned long unused) -{ - struct work_struct *work; + /* + * We want to write everything out, not just down to the dirty + * threshold + */ - work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (work) { - INIT_WORK(work, do_laptop_sync); - schedule_work(work); - } + if (bdi_has_dirty_io(&q->backing_dev_info)) + bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages, 0); } /* @@ -719,9 +715,9 @@ static void laptop_timer_fn(unsigned long unused) * of all dirty data a few seconds from now. If the flush is already scheduled * then push it back - the user is still using the disk. */ -void laptop_io_completion(void) +void laptop_io_completion(struct backing_dev_info *info) { - mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode); + mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode); } /* @@ -731,8 +727,16 @@ void laptop_io_completion(void) */ void laptop_sync_completion(void) { - del_timer(&laptop_mode_wb_timer); + struct backing_dev_info *bdi; + + rcu_read_lock(); + + list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) + del_timer(&bdi->laptop_mode_wb_timer); + + rcu_read_unlock(); } +#endif /* * If ratelimit_pages is too high then we can get into dirty-data overload diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d03c946d556..a6326c71b66 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2579,7 +2579,7 @@ static int default_zonelist_order(void) struct zone *z; int average_size; /* - * ZONE_DMA and ZONE_DMA32 can be very small area in the sytem. + * ZONE_DMA and ZONE_DMA32 can be very small area in the system. * If they are really small and used heavily, the system can fall * into OOM very easily. * This function detect ZONE_DMA/DMA32 size and confgigures zone order. diff --git a/mm/percpu-km.c b/mm/percpu-km.c new file mode 100644 index 00000000000..df680855540 --- /dev/null +++ b/mm/percpu-km.c @@ -0,0 +1,104 @@ +/* + * mm/percpu-km.c - kernel memory based chunk allocation + * + * Copyright (C) 2010 SUSE Linux Products GmbH + * Copyright (C) 2010 Tejun Heo <tj@kernel.org> + * + * This file is released under the GPLv2. + * + * Chunks are allocated as a contiguous kernel memory using gfp + * allocation. This is to be used on nommu architectures. + * + * To use percpu-km, + * + * - define CONFIG_NEED_PER_CPU_KM from the arch Kconfig. + * + * - CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK must not be defined. It's + * not compatible with PER_CPU_KM. EMBED_FIRST_CHUNK should work + * fine. + * + * - NUMA is not supported. When setting up the first chunk, + * @cpu_distance_fn should be NULL or report all CPUs to be nearer + * than or at LOCAL_DISTANCE. + * + * - It's best if the chunk size is power of two multiple of + * PAGE_SIZE. Because each chunk is allocated as a contiguous + * kernel memory block using alloc_pages(), memory will be wasted if + * chunk size is not aligned. percpu-km code will whine about it. + */ + +#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK +#error "contiguous percpu allocation is incompatible with paged first chunk" +#endif + +#include <linux/log2.h> + +static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) +{ + /* noop */ + return 0; +} + +static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) +{ + /* nada */ +} + +static struct pcpu_chunk *pcpu_create_chunk(void) +{ + const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; + struct pcpu_chunk *chunk; + struct page *pages; + int i; + + chunk = pcpu_alloc_chunk(); + if (!chunk) + return NULL; + + pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages)); + if (!pages) { + pcpu_free_chunk(chunk); + return NULL; + } + + for (i = 0; i < nr_pages; i++) + pcpu_set_page_chunk(nth_page(pages, i), chunk); + + chunk->data = pages; + chunk->base_addr = page_address(pages) - pcpu_group_offsets[0]; + return chunk; +} + +static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) +{ + const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; + + if (chunk && chunk->data) + __free_pages(chunk->data, order_base_2(nr_pages)); + pcpu_free_chunk(chunk); +} + +static struct page *pcpu_addr_to_page(void *addr) +{ + return virt_to_page(addr); +} + +static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) +{ + size_t nr_pages, alloc_pages; + + /* all units must be in a single group */ + if (ai->nr_groups != 1) { + printk(KERN_CRIT "percpu: can't handle more than one groups\n"); + return -EINVAL; + } + + nr_pages = (ai->groups[0].nr_units * ai->unit_size) >> PAGE_SHIFT; + alloc_pages = roundup_pow_of_two(nr_pages); + + if (alloc_pages > nr_pages) + printk(KERN_WARNING "percpu: wasting %zu pages per chunk\n", + alloc_pages - nr_pages); + + return 0; +} diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c new file mode 100644 index 00000000000..7d9c1d0ebd3 --- /dev/null +++ b/mm/percpu-vm.c @@ -0,0 +1,451 @@ +/* + * mm/percpu-vm.c - vmalloc area based chunk allocation + * + * Copyright (C) 2010 SUSE Linux Products GmbH + * Copyright (C) 2010 Tejun Heo <tj@kernel.org> + * + * This file is released under the GPLv2. + * + * Chunks are mapped into vmalloc areas and populated page by page. + * This is the default chunk allocator. + */ + +static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, + unsigned int cpu, int page_idx) +{ + /* must not be used on pre-mapped chunk */ + WARN_ON(chunk->immutable); + + return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); +} + +/** + * pcpu_get_pages_and_bitmap - get temp pages array and bitmap + * @chunk: chunk of interest + * @bitmapp: output parameter for bitmap + * @may_alloc: may allocate the array + * + * Returns pointer to array of pointers to struct page and bitmap, + * both of which can be indexed with pcpu_page_idx(). The returned + * array is cleared to zero and *@bitmapp is copied from + * @chunk->populated. Note that there is only one array and bitmap + * and access exclusion is the caller's responsibility. + * + * CONTEXT: + * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. + * Otherwise, don't care. + * + * RETURNS: + * Pointer to temp pages array on success, NULL on failure. + */ +static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, + unsigned long **bitmapp, + bool may_alloc) +{ + static struct page **pages; + static unsigned long *bitmap; + size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); + size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * + sizeof(unsigned long); + + if (!pages || !bitmap) { + if (may_alloc && !pages) + pages = pcpu_mem_alloc(pages_size); + if (may_alloc && !bitmap) + bitmap = pcpu_mem_alloc(bitmap_size); + if (!pages || !bitmap) + return NULL; + } + + memset(pages, 0, pages_size); + bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); + + *bitmapp = bitmap; + return pages; +} + +/** + * pcpu_free_pages - free pages which were allocated for @chunk + * @chunk: chunk pages were allocated for + * @pages: array of pages to be freed, indexed by pcpu_page_idx() + * @populated: populated bitmap + * @page_start: page index of the first page to be freed + * @page_end: page index of the last page to be freed + 1 + * + * Free pages [@page_start and @page_end) in @pages for all units. + * The pages were allocated for @chunk. + */ +static void pcpu_free_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) +{ + unsigned int cpu; + int i; + + for_each_possible_cpu(cpu) { + for (i = page_start; i < page_end; i++) { + struct page *page = pages[pcpu_page_idx(cpu, i)]; + + if (page) + __free_page(page); + } + } +} + +/** + * pcpu_alloc_pages - allocates pages for @chunk + * @chunk: target chunk + * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() + * @populated: populated bitmap + * @page_start: page index of the first page to be allocated + * @page_end: page index of the last page to be allocated + 1 + * + * Allocate pages [@page_start,@page_end) into @pages for all units. + * The allocation is for @chunk. Percpu core doesn't care about the + * content of @pages and will pass it verbatim to pcpu_map_pages(). + */ +static int pcpu_alloc_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) +{ + const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; + unsigned int cpu; + int i; + + for_each_possible_cpu(cpu) { + for (i = page_start; i < page_end; i++) { + struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; + + *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); + if (!*pagep) { + pcpu_free_pages(chunk, pages, populated, + page_start, page_end); + return -ENOMEM; + } + } + } + return 0; +} + +/** + * pcpu_pre_unmap_flush - flush cache prior to unmapping + * @chunk: chunk the regions to be flushed belongs to + * @page_start: page index of the first page to be flushed + * @page_end: page index of the last page to be flushed + 1 + * + * Pages in [@page_start,@page_end) of @chunk are about to be + * unmapped. Flush cache. As each flushing trial can be very + * expensive, issue flush on the whole region at once rather than + * doing it for each cpu. This could be an overkill but is more + * scalable. + */ +static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, + int page_start, int page_end) +{ + flush_cache_vunmap( + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); +} + +static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) +{ + unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); +} + +/** + * pcpu_unmap_pages - unmap pages out of a pcpu_chunk + * @chunk: chunk of interest + * @pages: pages array which can be used to pass information to free + * @populated: populated bitmap + * @page_start: page index of the first page to unmap + * @page_end: page index of the last page to unmap + 1 + * + * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. + * Corresponding elements in @pages were cleared by the caller and can + * be used to carry information to pcpu_free_pages() which will be + * called after all unmaps are finished. The caller should call + * proper pre/post flush functions. + */ +static void pcpu_unmap_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) +{ + unsigned int cpu; + int i; + + for_each_possible_cpu(cpu) { + for (i = page_start; i < page_end; i++) { + struct page *page; + + page = pcpu_chunk_page(chunk, cpu, i); + WARN_ON(!page); + pages[pcpu_page_idx(cpu, i)] = page; + } + __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), + page_end - page_start); + } + + for (i = page_start; i < page_end; i++) + __clear_bit(i, populated); +} + +/** + * pcpu_post_unmap_tlb_flush - flush TLB after unmapping + * @chunk: pcpu_chunk the regions to be flushed belong to + * @page_start: page index of the first page to be flushed + * @page_end: page index of the last page to be flushed + 1 + * + * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush + * TLB for the regions. This can be skipped if the area is to be + * returned to vmalloc as vmalloc will handle TLB flushing lazily. + * + * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once + * for the whole region. + */ +static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, + int page_start, int page_end) +{ + flush_tlb_kernel_range( + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); +} + +static int __pcpu_map_pages(unsigned long addr, struct page **pages, + int nr_pages) +{ + return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, + PAGE_KERNEL, pages); +} + +/** + * pcpu_map_pages - map pages into a pcpu_chunk + * @chunk: chunk of interest + * @pages: pages array containing pages to be mapped + * @populated: populated bitmap + * @page_start: page index of the first page to map + * @page_end: page index of the last page to map + 1 + * + * For each cpu, map pages [@page_start,@page_end) into @chunk. The + * caller is responsible for calling pcpu_post_map_flush() after all + * mappings are complete. + * + * This function is responsible for setting corresponding bits in + * @chunk->populated bitmap and whatever is necessary for reverse + * lookup (addr -> chunk). + */ +static int pcpu_map_pages(struct pcpu_chunk *chunk, + struct page **pages, unsigned long *populated, + int page_start, int page_end) +{ + unsigned int cpu, tcpu; + int i, err; + + for_each_possible_cpu(cpu) { + err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), + &pages[pcpu_page_idx(cpu, page_start)], + page_end - page_start); + if (err < 0) + goto err; + } + + /* mapping successful, link chunk and mark populated */ + for (i = page_start; i < page_end; i++) { + for_each_possible_cpu(cpu) + pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], + chunk); + __set_bit(i, populated); + } + + return 0; + +err: + for_each_possible_cpu(tcpu) { + if (tcpu == cpu) + break; + __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), + page_end - page_start); + } + return err; +} + +/** + * pcpu_post_map_flush - flush cache after mapping + * @chunk: pcpu_chunk the regions to be flushed belong to + * @page_start: page index of the first page to be flushed + * @page_end: page index of the last page to be flushed + 1 + * + * Pages [@page_start,@page_end) of @chunk have been mapped. Flush + * cache. + * + * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once + * for the whole region. + */ +static void pcpu_post_map_flush(struct pcpu_chunk *chunk, + int page_start, int page_end) +{ + flush_cache_vmap( + pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start), + pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end)); +} + +/** + * pcpu_populate_chunk - populate and map an area of a pcpu_chunk + * @chunk: chunk of interest + * @off: offset to the area to populate + * @size: size of the area to populate in bytes + * + * For each cpu, populate and map pages [@page_start,@page_end) into + * @chunk. The area is cleared on return. + * + * CONTEXT: + * pcpu_alloc_mutex, does GFP_KERNEL allocation. + */ +static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) +{ + int page_start = PFN_DOWN(off); + int page_end = PFN_UP(off + size); + int free_end = page_start, unmap_end = page_start; + struct page **pages; + unsigned long *populated; + unsigned int cpu; + int rs, re, rc; + + /* quick path, check whether all pages are already there */ + rs = page_start; + pcpu_next_pop(chunk, &rs, &re, page_end); + if (rs == page_start && re == page_end) + goto clear; + + /* need to allocate and map pages, this chunk can't be immutable */ + WARN_ON(chunk->immutable); + + pages = pcpu_get_pages_and_bitmap(chunk, &populated, true); + if (!pages) + return -ENOMEM; + + /* alloc and map */ + pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { + rc = pcpu_alloc_pages(chunk, pages, populated, rs, re); + if (rc) + goto err_free; + free_end = re; + } + + pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { + rc = pcpu_map_pages(chunk, pages, populated, rs, re); + if (rc) + goto err_unmap; + unmap_end = re; + } + pcpu_post_map_flush(chunk, page_start, page_end); + + /* commit new bitmap */ + bitmap_copy(chunk->populated, populated, pcpu_unit_pages); +clear: + for_each_possible_cpu(cpu) + memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); + return 0; + +err_unmap: + pcpu_pre_unmap_flush(chunk, page_start, unmap_end); + pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end) + pcpu_unmap_pages(chunk, pages, populated, rs, re); + pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end); +err_free: + pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end) + pcpu_free_pages(chunk, pages, populated, rs, re); + return rc; +} + +/** + * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk + * @chunk: chunk to depopulate + * @off: offset to the area to depopulate + * @size: size of the area to depopulate in bytes + * @flush: whether to flush cache and tlb or not + * + * For each cpu, depopulate and unmap pages [@page_start,@page_end) + * from @chunk. If @flush is true, vcache is flushed before unmapping + * and tlb after. + * + * CONTEXT: + * pcpu_alloc_mutex. + */ +static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) +{ + int page_start = PFN_DOWN(off); + int page_end = PFN_UP(off + size); + struct page **pages; + unsigned long *populated; + int rs, re; + + /* quick path, check whether it's empty already */ + rs = page_start; + pcpu_next_unpop(chunk, &rs, &re, page_end); + if (rs == page_start && re == page_end) + return; + + /* immutable chunks can't be depopulated */ + WARN_ON(chunk->immutable); + + /* + * If control reaches here, there must have been at least one + * successful population attempt so the temp pages array must + * be available now. + */ + pages = pcpu_get_pages_and_bitmap(chunk, &populated, false); + BUG_ON(!pages); + + /* unmap and free */ + pcpu_pre_unmap_flush(chunk, page_start, page_end); + + pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) + pcpu_unmap_pages(chunk, pages, populated, rs, re); + + /* no need to flush tlb, vmalloc will handle it lazily */ + + pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) + pcpu_free_pages(chunk, pages, populated, rs, re); + + /* commit new bitmap */ + bitmap_copy(chunk->populated, populated, pcpu_unit_pages); +} + +static struct pcpu_chunk *pcpu_create_chunk(void) +{ + struct pcpu_chunk *chunk; + struct vm_struct **vms; + + chunk = pcpu_alloc_chunk(); + if (!chunk) + return NULL; + + vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, + pcpu_nr_groups, pcpu_atom_size, GFP_KERNEL); + if (!vms) { + pcpu_free_chunk(chunk); + return NULL; + } + + chunk->data = vms; + chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; + return chunk; +} + +static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) +{ + if (chunk && chunk->data) + pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); + pcpu_free_chunk(chunk); +} + +static struct page *pcpu_addr_to_page(void *addr) +{ + return vmalloc_to_page(addr); +} + +static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) +{ + /* no extra restriction */ + return 0; +} diff --git a/mm/percpu.c b/mm/percpu.c index 6e09741ddc6..39f7dfd5958 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1,5 +1,5 @@ /* - * linux/mm/percpu.c - percpu memory allocator + * mm/percpu.c - percpu memory allocator * * Copyright (C) 2009 SUSE Linux Products GmbH * Copyright (C) 2009 Tejun Heo <tj@kernel.org> @@ -7,14 +7,13 @@ * This file is released under the GPLv2. * * This is percpu allocator which can handle both static and dynamic - * areas. Percpu areas are allocated in chunks in vmalloc area. Each - * chunk is consisted of boot-time determined number of units and the - * first chunk is used for static percpu variables in the kernel image + * areas. Percpu areas are allocated in chunks. Each chunk is + * consisted of boot-time determined number of units and the first + * chunk is used for static percpu variables in the kernel image * (special boot time alloc/init handling necessary as these areas * need to be brought up before allocation services are running). * Unit grows as necessary and all units grow or shrink in unison. - * When a chunk is filled up, another chunk is allocated. ie. in - * vmalloc area + * When a chunk is filled up, another chunk is allocated. * * c0 c1 c2 * ------------------- ------------------- ------------ @@ -99,7 +98,7 @@ struct pcpu_chunk { int map_used; /* # of map entries used */ int map_alloc; /* # of map entries allocated */ int *map; /* allocation map */ - struct vm_struct **vms; /* mapped vmalloc regions */ + void *data; /* chunk data */ bool immutable; /* no [de]population allowed */ unsigned long populated[]; /* populated bitmap */ }; @@ -177,6 +176,21 @@ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ static void pcpu_reclaim(struct work_struct *work); static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim); +static bool pcpu_addr_in_first_chunk(void *addr) +{ + void *first_start = pcpu_first_chunk->base_addr; + + return addr >= first_start && addr < first_start + pcpu_unit_size; +} + +static bool pcpu_addr_in_reserved_chunk(void *addr) +{ + void *first_start = pcpu_first_chunk->base_addr; + + return addr >= first_start && + addr < first_start + pcpu_reserved_chunk_limit; +} + static int __pcpu_size_to_slot(int size) { int highbit = fls(size); /* size is in bytes */ @@ -198,27 +212,6 @@ static int pcpu_chunk_slot(const struct pcpu_chunk *chunk) return pcpu_size_to_slot(chunk->free_size); } -static int pcpu_page_idx(unsigned int cpu, int page_idx) -{ - return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx; -} - -static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk, - unsigned int cpu, int page_idx) -{ - return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] + - (page_idx << PAGE_SHIFT); -} - -static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, - unsigned int cpu, int page_idx) -{ - /* must not be used on pre-mapped chunk */ - WARN_ON(chunk->immutable); - - return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); -} - /* set the pointer to a chunk in a page struct */ static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu) { @@ -231,13 +224,27 @@ static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page) return (struct pcpu_chunk *)page->index; } -static void pcpu_next_unpop(struct pcpu_chunk *chunk, int *rs, int *re, int end) +static int __maybe_unused pcpu_page_idx(unsigned int cpu, int page_idx) +{ + return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx; +} + +static unsigned long __maybe_unused pcpu_chunk_addr(struct pcpu_chunk *chunk, + unsigned int cpu, int page_idx) +{ + return (unsigned long)chunk->base_addr + pcpu_unit_offsets[cpu] + + (page_idx << PAGE_SHIFT); +} + +static void __maybe_unused pcpu_next_unpop(struct pcpu_chunk *chunk, + int *rs, int *re, int end) { *rs = find_next_zero_bit(chunk->populated, end, *rs); *re = find_next_bit(chunk->populated, end, *rs + 1); } -static void pcpu_next_pop(struct pcpu_chunk *chunk, int *rs, int *re, int end) +static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk, + int *rs, int *re, int end) { *rs = find_next_bit(chunk->populated, end, *rs); *re = find_next_zero_bit(chunk->populated, end, *rs + 1); @@ -326,36 +333,6 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) } /** - * pcpu_chunk_addr_search - determine chunk containing specified address - * @addr: address for which the chunk needs to be determined. - * - * RETURNS: - * The address of the found chunk. - */ -static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) -{ - void *first_start = pcpu_first_chunk->base_addr; - - /* is it in the first chunk? */ - if (addr >= first_start && addr < first_start + pcpu_unit_size) { - /* is it in the reserved area? */ - if (addr < first_start + pcpu_reserved_chunk_limit) - return pcpu_reserved_chunk; - return pcpu_first_chunk; - } - - /* - * The address is relative to unit0 which might be unused and - * thus unmapped. Offset the address to the unit space of the - * current processor before looking it up in the vmalloc - * space. Note that any possible cpu id can be used here, so - * there's no need to worry about preemption or cpu hotplug. - */ - addr += pcpu_unit_offsets[raw_smp_processor_id()]; - return pcpu_get_page_chunk(vmalloc_to_page(addr)); -} - -/** * pcpu_need_to_extend - determine whether chunk area map needs to be extended * @chunk: chunk of interest * @@ -623,434 +600,92 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) pcpu_chunk_relocate(chunk, oslot); } -/** - * pcpu_get_pages_and_bitmap - get temp pages array and bitmap - * @chunk: chunk of interest - * @bitmapp: output parameter for bitmap - * @may_alloc: may allocate the array - * - * Returns pointer to array of pointers to struct page and bitmap, - * both of which can be indexed with pcpu_page_idx(). The returned - * array is cleared to zero and *@bitmapp is copied from - * @chunk->populated. Note that there is only one array and bitmap - * and access exclusion is the caller's responsibility. - * - * CONTEXT: - * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc. - * Otherwise, don't care. - * - * RETURNS: - * Pointer to temp pages array on success, NULL on failure. - */ -static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk, - unsigned long **bitmapp, - bool may_alloc) -{ - static struct page **pages; - static unsigned long *bitmap; - size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); - size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) * - sizeof(unsigned long); - - if (!pages || !bitmap) { - if (may_alloc && !pages) - pages = pcpu_mem_alloc(pages_size); - if (may_alloc && !bitmap) - bitmap = pcpu_mem_alloc(bitmap_size); - if (!pages || !bitmap) - return NULL; - } - - memset(pages, 0, pages_size); - bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages); - - *bitmapp = bitmap; - return pages; -} - -/** |