diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 9 | ||||
-rw-r--r-- | mm/Kconfig.debug | 26 | ||||
-rw-r--r-- | mm/Makefile | 5 | ||||
-rw-r--r-- | mm/allocpercpu.c | 36 | ||||
-rw-r--r-- | mm/backing-dev.c | 36 | ||||
-rw-r--r-- | mm/bootmem.c | 35 | ||||
-rw-r--r-- | mm/debug-pagealloc.c | 129 | ||||
-rw-r--r-- | mm/failslab.c | 1 | ||||
-rw-r--r-- | mm/filemap.c | 30 | ||||
-rw-r--r-- | mm/filemap_xip.c | 4 | ||||
-rw-r--r-- | mm/fremap.c | 2 | ||||
-rw-r--r-- | mm/highmem.c | 110 | ||||
-rw-r--r-- | mm/hugetlb.c | 34 | ||||
-rw-r--r-- | mm/internal.h | 8 | ||||
-rw-r--r-- | mm/memcontrol.c | 687 | ||||
-rw-r--r-- | mm/memory.c | 39 | ||||
-rw-r--r-- | mm/migrate.c | 12 | ||||
-rw-r--r-- | mm/mlock.c | 12 | ||||
-rw-r--r-- | mm/mmap.c | 55 | ||||
-rw-r--r-- | mm/mprotect.c | 5 | ||||
-rw-r--r-- | mm/nommu.c | 52 | ||||
-rw-r--r-- | mm/oom_kill.c | 13 | ||||
-rw-r--r-- | mm/page-writeback.c | 90 | ||||
-rw-r--r-- | mm/page_alloc.c | 64 | ||||
-rw-r--r-- | mm/page_cgroup.c | 40 | ||||
-rw-r--r-- | mm/page_io.c | 2 | ||||
-rw-r--r-- | mm/pdflush.c | 49 | ||||
-rw-r--r-- | mm/percpu.c | 1326 | ||||
-rw-r--r-- | mm/quicklist.c | 2 | ||||
-rw-r--r-- | mm/readahead.c | 65 | ||||
-rw-r--r-- | mm/rmap.c | 3 | ||||
-rw-r--r-- | mm/shmem.c | 48 | ||||
-rw-r--r-- | mm/slab.c | 75 | ||||
-rw-r--r-- | mm/slob.c | 75 | ||||
-rw-r--r-- | mm/slub.c | 158 | ||||
-rw-r--r-- | mm/sparse.c | 4 | ||||
-rw-r--r-- | mm/swap.c | 27 | ||||
-rw-r--r-- | mm/swapfile.c | 4 | ||||
-rw-r--r-- | mm/truncate.c | 10 | ||||
-rw-r--r-- | mm/util.c | 66 | ||||
-rw-r--r-- | mm/vmalloc.c | 136 | ||||
-rw-r--r-- | mm/vmscan.c | 143 | ||||
-rw-r--r-- | mm/vmstat.c | 18 |
43 files changed, 3007 insertions, 738 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index a5b77811fdf..b53427ad30a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -206,7 +206,6 @@ config VIRT_TO_BUS config UNEVICTABLE_LRU bool "Add LRU list to track non-evictable pages" default y - depends on MMU help Keeps unevictable pages off of the active and inactive pageout lists, so kswapd will not waste CPU time or have its balancing @@ -214,5 +213,13 @@ config UNEVICTABLE_LRU will use one page flag and increase the code size a little, say Y unless you know what you are doing. +config HAVE_MLOCK + bool + default y if MMU=y + +config HAVE_MLOCKED_PAGE_BIT + bool + default y if HAVE_MLOCK=y && UNEVICTABLE_LRU=y + config MMU_NOTIFIER bool diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug new file mode 100644 index 00000000000..bb01e298f26 --- /dev/null +++ b/mm/Kconfig.debug @@ -0,0 +1,26 @@ +config DEBUG_PAGEALLOC + bool "Debug page memory allocations" + depends on DEBUG_KERNEL && ARCH_SUPPORTS_DEBUG_PAGEALLOC + depends on !HIBERNATION || !PPC && !SPARC + ---help--- + Unmap pages from the kernel linear mapping after free_pages(). + This results in a large slowdown, but helps to find certain types + of memory corruptions. + +config WANT_PAGE_DEBUG_FLAGS + bool + +config PAGE_POISONING + bool "Debug page memory allocations" + depends on DEBUG_KERNEL && !ARCH_SUPPORTS_DEBUG_PAGEALLOC + depends on !HIBERNATION + select DEBUG_PAGEALLOC + select WANT_PAGE_DEBUG_FLAGS + help + Fill the pages with poison patterns after free_pages() and verify + the patterns before alloc_pages(). This results in a large slowdown, + but helps to find certain types of memory corruptions. + + This option cannot enalbe with hibernation. Otherwise, it will get + wrong messages for memory corruption because the free pages are not + saved to the suspend image. diff --git a/mm/Makefile b/mm/Makefile index 72255be57f8..ec73c68b601 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -24,12 +24,17 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o +obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o +ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +obj-$(CONFIG_SMP) += percpu.o +else obj-$(CONFIG_SMP) += allocpercpu.o +endif obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c index 4297bc41bfd..dfdee6a4735 100644 --- a/mm/allocpercpu.c +++ b/mm/allocpercpu.c @@ -31,7 +31,7 @@ static void percpu_depopulate(void *__pdata, int cpu) * @__pdata: per-cpu data to depopulate * @mask: depopulate per-cpu data for cpu's selected through mask bits */ -static void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) +static void __percpu_depopulate_mask(void *__pdata, const cpumask_t *mask) { int cpu; for_each_cpu_mask_nr(cpu, *mask) @@ -99,45 +99,51 @@ static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, __percpu_populate_mask((__pdata), (size), (gfp), &(mask)) /** - * percpu_alloc_mask - initial setup of per-cpu data + * alloc_percpu - initial setup of per-cpu data * @size: size of per-cpu object - * @gfp: may sleep or not etc. - * @mask: populate per-data for cpu's selected through mask bits + * @align: alignment * - * Populating per-cpu data for all online cpu's would be a typical use case, - * which is simplified by the percpu_alloc() wrapper. - * Per-cpu objects are populated with zeroed buffers. + * Allocate dynamic percpu area. Percpu objects are populated with + * zeroed buffers. */ -void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +void *__alloc_percpu(size_t size, size_t align) { /* * We allocate whole cache lines to avoid false sharing */ size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size()); - void *pdata = kzalloc(sz, gfp); + void *pdata = kzalloc(sz, GFP_KERNEL); void *__pdata = __percpu_disguise(pdata); + /* + * Can't easily make larger alignment work with kmalloc. WARN + * on it. Larger alignment should only be used for module + * percpu sections on SMP for which this path isn't used. + */ + WARN_ON_ONCE(align > SMP_CACHE_BYTES); + if (unlikely(!pdata)) return NULL; - if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask))) + if (likely(!__percpu_populate_mask(__pdata, size, GFP_KERNEL, + &cpu_possible_map))) return __pdata; kfree(pdata); return NULL; } -EXPORT_SYMBOL_GPL(__percpu_alloc_mask); +EXPORT_SYMBOL_GPL(__alloc_percpu); /** - * percpu_free - final cleanup of per-cpu data + * free_percpu - final cleanup of per-cpu data * @__pdata: object to clean up * * We simply clean up any per-cpu object left. No need for the client to * track and specify through a bis mask which per-cpu objects are to free. */ -void percpu_free(void *__pdata) +void free_percpu(void *__pdata) { if (unlikely(!__pdata)) return; - __percpu_depopulate_mask(__pdata, &cpu_possible_map); + __percpu_depopulate_mask(__pdata, cpu_possible_mask); kfree(__percpu_disguise(__pdata)); } -EXPORT_SYMBOL_GPL(percpu_free); +EXPORT_SYMBOL_GPL(free_percpu); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 8e858744413..493b468a503 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -2,11 +2,24 @@ #include <linux/wait.h> #include <linux/backing-dev.h> #include <linux/fs.h> +#include <linux/pagemap.h> #include <linux/sched.h> #include <linux/module.h> #include <linux/writeback.h> #include <linux/device.h> +void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) +{ +} +EXPORT_SYMBOL(default_unplug_io_fn); + +struct backing_dev_info default_backing_dev_info = { + .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, + .state = 0, + .capabilities = BDI_CAP_MAP_COPY, + .unplug_io_fn = default_unplug_io_fn, +}; +EXPORT_SYMBOL_GPL(default_backing_dev_info); static struct class *bdi_class; @@ -166,9 +179,20 @@ static __init int bdi_class_init(void) bdi_debug_init(); return 0; } - postcore_initcall(bdi_class_init); +static int __init default_bdi_init(void) +{ + int err; + + err = bdi_init(&default_backing_dev_info); + if (!err) + bdi_register(&default_backing_dev_info, NULL, "default"); + + return err; +} +subsys_initcall(default_bdi_init); + int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...) { @@ -260,12 +284,12 @@ static wait_queue_head_t congestion_wqh[2] = { }; -void clear_bdi_congested(struct backing_dev_info *bdi, int rw) +void clear_bdi_congested(struct backing_dev_info *bdi, int sync) { enum bdi_state bit; - wait_queue_head_t *wqh = &congestion_wqh[rw]; + wait_queue_head_t *wqh = &congestion_wqh[sync]; - bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; + bit = sync ? BDI_sync_congested : BDI_async_congested; clear_bit(bit, &bdi->state); smp_mb__after_clear_bit(); if (waitqueue_active(wqh)) @@ -273,11 +297,11 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int rw) } EXPORT_SYMBOL(clear_bdi_congested); -void set_bdi_congested(struct backing_dev_info *bdi, int rw) +void set_bdi_congested(struct backing_dev_info *bdi, int sync) { enum bdi_state bit; - bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested; + bit = sync ? BDI_sync_congested : BDI_async_congested; set_bit(bit, &bdi->state); } EXPORT_SYMBOL(set_bdi_congested); diff --git a/mm/bootmem.c b/mm/bootmem.c index 51a0ccf61e0..daf92713f7d 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -382,7 +382,6 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); } -#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE /** * reserve_bootmem - mark a page range as usable * @addr: starting address of the range @@ -403,7 +402,6 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size, return mark_bootmem(start, end, 1, flags); } -#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx, unsigned long step) @@ -429,8 +427,8 @@ static unsigned long align_off(struct bootmem_data *bdata, unsigned long off, } static void * __init alloc_bootmem_core(struct bootmem_data *bdata, - unsigned long size, unsigned long align, - unsigned long goal, unsigned long limit) + unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) { unsigned long fallback = 0; unsigned long min, max, start, sidx, midx, step; @@ -530,17 +528,34 @@ find_block: return NULL; } +static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, + unsigned long size, unsigned long align, + unsigned long goal, unsigned long limit) +{ +#ifdef CONFIG_HAVE_ARCH_BOOTMEM + bootmem_data_t *p_bdata; + + p_bdata = bootmem_arch_preferred_node(bdata, size, align, goal, limit); + if (p_bdata) + return alloc_bootmem_core(p_bdata, size, align, goal, limit); +#endif + return NULL; +} + static void * __init ___alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { bootmem_data_t *bdata; + void *region; restart: - list_for_each_entry(bdata, &bdata_list, list) { - void *region; + region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit); + if (region) + return region; + list_for_each_entry(bdata, &bdata_list, list) { if (goal && bdata->node_low_pfn <= PFN_DOWN(goal)) continue; if (limit && bdata->node_min_pfn >= PFN_DOWN(limit)) @@ -618,6 +633,10 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, { void *ptr; + ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit); + if (ptr) + return ptr; + ptr = alloc_bootmem_core(bdata, size, align, goal, limit); if (ptr) return ptr; @@ -674,6 +693,10 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, { void *ptr; + ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); + if (ptr) + return ptr; + ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); if (ptr) return ptr; diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c new file mode 100644 index 00000000000..a1e3324de2b --- /dev/null +++ b/mm/debug-pagealloc.c @@ -0,0 +1,129 @@ +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/page-debug-flags.h> +#include <linux/poison.h> + +static inline void set_page_poison(struct page *page) +{ + __set_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static inline void clear_page_poison(struct page *page) +{ + __clear_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static inline bool page_poison(struct page *page) +{ + return test_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags); +} + +static void poison_highpage(struct page *page) +{ + /* + * Page poisoning for highmem pages is not implemented. + * + * This can be called from interrupt contexts. + * So we need to create a new kmap_atomic slot for this + * application and it will need interrupt protection. + */ +} + +static void poison_page(struct page *page) +{ + void *addr; + + if (PageHighMem(page)) { + poison_highpage(page); + return; + } + set_page_poison(page); + addr = page_address(page); + memset(addr, PAGE_POISON, PAGE_SIZE); +} + +static void poison_pages(struct page *page, int n) +{ + int i; + + for (i = 0; i < n; i++) + poison_page(page + i); +} + +static bool single_bit_flip(unsigned char a, unsigned char b) +{ + unsigned char error = a ^ b; + + return error && !(error & (error - 1)); +} + +static void check_poison_mem(unsigned char *mem, size_t bytes) +{ + unsigned char *start; + unsigned char *end; + + for (start = mem; start < mem + bytes; start++) { + if (*start != PAGE_POISON) + break; + } + if (start == mem + bytes) + return; + + for (end = mem + bytes - 1; end > start; end--) { + if (*end != PAGE_POISON) + break; + } + + if (!printk_ratelimit()) + return; + else if (start == end && single_bit_flip(*start, PAGE_POISON)) + printk(KERN_ERR "pagealloc: single bit error\n"); + else + printk(KERN_ERR "pagealloc: memory corruption\n"); + + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, start, + end - start + 1, 1); + dump_stack(); +} + +static void unpoison_highpage(struct page *page) +{ + /* + * See comment in poison_highpage(). + * Highmem pages should not be poisoned for now + */ + BUG_ON(page_poison(page)); +} + +static void unpoison_page(struct page *page) +{ + if (PageHighMem(page)) { + unpoison_highpage(page); + return; + } + if (page_poison(page)) { + void *addr = page_address(page); + + check_poison_mem(addr, PAGE_SIZE); + clear_page_poison(page); + } +} + +static void unpoison_pages(struct page *page, int n) +{ + int i; + + for (i = 0; i < n; i++) + unpoison_page(page + i); +} + +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (!debug_pagealloc_enabled) + return; + + if (enable) + unpoison_pages(page, numpages); + else + poison_pages(page, numpages); +} diff --git a/mm/failslab.c b/mm/failslab.c index 7c6ea6493f8..9339de5f0a9 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -1,4 +1,5 @@ #include <linux/fault-inject.h> +#include <linux/gfp.h> static struct { struct fault_attr attr; diff --git a/mm/filemap.c b/mm/filemap.c index 23acefe5180..2e2d38ebda4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -513,6 +513,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, } return ret; } +EXPORT_SYMBOL_GPL(add_to_page_cache_lru); #ifdef CONFIG_NUMA struct page *__page_cache_alloc(gfp_t gfp) @@ -565,6 +566,24 @@ void wait_on_page_bit(struct page *page, int bit_nr) EXPORT_SYMBOL(wait_on_page_bit); /** + * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue + * @page - Page defining the wait queue of interest + * @waiter - Waiter to add to the queue + * + * Add an arbitrary @waiter to the wait queue for the nominated @page. + */ +void add_page_wait_queue(struct page *page, wait_queue_t *waiter) +{ + wait_queue_head_t *q = page_waitqueue(page); + unsigned long flags; + + spin_lock_irqsave(&q->lock, flags); + __add_wait_queue(q, waiter); + spin_unlock_irqrestore(&q->lock, flags); +} +EXPORT_SYMBOL_GPL(add_page_wait_queue); + +/** * unlock_page - unlock a locked page * @page: the page * @@ -627,6 +646,7 @@ int __lock_page_killable(struct page *page) return __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page_killable, TASK_KILLABLE); } +EXPORT_SYMBOL_GPL(__lock_page_killable); /** * __lock_page_nosync - get a lock on the page, without calling sync_page() @@ -1823,7 +1843,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr, int copy = min(bytes, iov->iov_len - base); base = 0; - left = __copy_from_user_inatomic_nocache(vaddr, buf, copy); + left = __copy_from_user_inatomic(vaddr, buf, copy); copied += copy; bytes -= copy; vaddr += copy; @@ -1851,8 +1871,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; - left = __copy_from_user_inatomic_nocache(kaddr + offset, - buf, bytes); + left = __copy_from_user_inatomic(kaddr + offset, buf, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, @@ -1880,7 +1899,7 @@ size_t iov_iter_copy_from_user(struct page *page, if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; - left = __copy_from_user_nocache(kaddr + offset, buf, bytes); + left = __copy_from_user(kaddr + offset, buf, bytes); copied = bytes - left; } else { copied = __iovec_copy_from_user_inatomic(kaddr + offset, @@ -2464,6 +2483,9 @@ EXPORT_SYMBOL(generic_file_aio_write); * (presumably at page->private). If the release was successful, return `1'. * Otherwise return zero. * + * This may also be called if PG_fscache is set on a page, indicating that the + * page is known to the local caching routines. + * * The @gfp_mask argument specifies whether I/O may be performed to release * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS). * diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 0c04615651b..427dfe3ce78 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -89,8 +89,8 @@ do_xip_mapping_read(struct address_space *mapping, } } nr = nr - offset; - if (nr > len) - nr = len; + if (nr > len - copied) + nr = len - copied; error = mapping->a_ops->get_xip_mem(mapping, index, 0, &xip_mem, &xip_pfn); diff --git a/mm/fremap.c b/mm/fremap.c index 736ba7f3306..b6ec85abbb3 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -198,7 +198,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, flags &= MAP_NONBLOCK; get_file(file); addr = mmap_region(file, start, size, - flags, vma->vm_flags, pgoff, 1); + flags, vma->vm_flags, pgoff); fput(file); if (IS_ERR_VALUE(addr)) { err = addr; diff --git a/mm/highmem.c b/mm/highmem.c index b36b83b920f..68eb1d9b63f 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -67,6 +67,25 @@ pte_t * pkmap_page_table; static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); +/* + * Most architectures have no use for kmap_high_get(), so let's abstract + * the disabling of IRQ out of the locking in that case to save on a + * potential useless overhead. + */ +#ifdef ARCH_NEEDS_KMAP_HIGH_GET +#define lock_kmap() spin_lock_irq(&kmap_lock) +#define unlock_kmap() spin_unlock_irq(&kmap_lock) +#define lock_kmap_any(flags) spin_lock_irqsave(&kmap_lock, flags) +#define unlock_kmap_any(flags) spin_unlock_irqrestore(&kmap_lock, flags) +#else +#define lock_kmap() spin_lock(&kmap_lock) +#define unlock_kmap() spin_unlock(&kmap_lock) +#define lock_kmap_any(flags) \ + do { spin_lock(&kmap_lock); (void)(flags); } while (0) +#define unlock_kmap_any(flags) \ + do { spin_unlock(&kmap_lock); (void)(flags); } while (0) +#endif + static void flush_all_zero_pkmaps(void) { int i; @@ -113,9 +132,9 @@ static void flush_all_zero_pkmaps(void) */ void kmap_flush_unused(void) { - spin_lock(&kmap_lock); + lock_kmap(); flush_all_zero_pkmaps(); - spin_unlock(&kmap_lock); + unlock_kmap(); } static inline unsigned long map_new_virtual(struct page *page) @@ -145,10 +164,10 @@ start: __set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&pkmap_map_wait, &wait); - spin_unlock(&kmap_lock); + unlock_kmap(); schedule(); remove_wait_queue(&pkmap_map_wait, &wait); - spin_lock(&kmap_lock); + lock_kmap(); /* Somebody else might have mapped it while we slept */ if (page_address(page)) @@ -184,29 +203,59 @@ void *kmap_high(struct page *page) * For highmem pages, we can't trust "virtual" until * after we have the lock. */ - spin_lock(&kmap_lock); + lock_kmap(); vaddr = (unsigned long)page_address(page); if (!vaddr) vaddr = map_new_virtual(page); pkmap_count[PKMAP_NR(vaddr)]++; BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2); - spin_unlock(&kmap_lock); + unlock_kmap(); return (void*) vaddr; } EXPORT_SYMBOL(kmap_high); +#ifdef ARCH_NEEDS_KMAP_HIGH_GET +/** + * kmap_high_get - pin a highmem page into memory + * @page: &struct page to pin + * + * Returns the page's current virtual memory address, or NULL if no mapping + * exists. When and only when a non null address is returned then a + * matching call to kunmap_high() is necessary. + * + * This can be called from any context. + */ +void *kmap_high_get(struct page *page) +{ + unsigned long vaddr, flags; + + lock_kmap_any(flags); + vaddr = (unsigned long)page_address(page); + if (vaddr) { + BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 1); + pkmap_count[PKMAP_NR(vaddr)]++; + } + unlock_kmap_any(flags); + return (void*) vaddr; +} +#endif + /** * kunmap_high - map a highmem page into memory * @page: &struct page to unmap + * + * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called + * only from user context. */ void kunmap_high(struct page *page) { unsigned long vaddr; unsigned long nr; + unsigned long flags; int need_wakeup; - spin_lock(&kmap_lock); + lock_kmap_any(flags); vaddr = (unsigned long)page_address(page); BUG_ON(!vaddr); nr = PKMAP_NR(vaddr); @@ -232,7 +281,7 @@ void kunmap_high(struct page *page) */ need_wakeup = waitqueue_active(&pkmap_map_wait); } - spin_unlock(&kmap_lock); + unlock_kmap_any(flags); /* do wake-up, if needed, race-free outside of the spin lock */ if (need_wakeup) @@ -373,3 +422,48 @@ void __init page_address_init(void) } #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */ + +#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT) + +void debug_kmap_atomic(enum km_type type) +{ + static unsigned warn_count = 10; + + if (unlikely(warn_count == 0)) + return; + + if (unlikely(in_interrupt())) { + if (in_irq()) { + if (type != KM_IRQ0 && type != KM_IRQ1 && + type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && + type != KM_BOUNCE_READ) { + WARN_ON(1); + warn_count--; + } + } else if (!irqs_disabled()) { /* softirq */ + if (type != KM_IRQ0 && type != KM_IRQ1 && + type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 && + type != KM_SKB_SUNRPC_DATA && + type != KM_SKB_DATA_SOFTIRQ && + type != KM_BOUNCE_READ) { + WARN_ON(1); + warn_count--; + } + } + } + + if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || + type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { + if (!irqs_disabled()) { + WARN_ON(1); + warn_count--; + } + } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) { + if (irq_count() == 0 && !irqs_disabled()) { + WARN_ON(1); + warn_count--; + } + } +} + +#endif diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 618e9830408..28c655ba935 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -918,7 +918,7 @@ static void return_unused_surplus_pages(struct hstate *h, * an instantiated the change should be committed via vma_commit_reservation. * No action is required on failure. */ -static int vma_needs_reservation(struct hstate *h, +static long vma_needs_reservation(struct hstate *h, struct vm_area_struct *vma, unsigned long addr) { struct address_space *mapping = vma->vm_file->f_mapping; @@ -933,7 +933,7 @@ static int vma_needs_reservation(struct hstate *h, return 1; } else { - int err; + long err; pgoff_t idx = vma_hugecache_offset(h, vma, addr); struct resv_map *reservations = vma_resv_map(vma); @@ -969,7 +969,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *page; struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; - unsigned int chg; + long chg; /* * Processes that did not create the mapping will have no reserves and @@ -2269,12 +2269,18 @@ void hugetlb_change_protection(struct vm_area_struct *vma, int hugetlb_reserve_pages(struct inode *inode, long from, long to, - struct vm_area_struct *vma) + struct vm_area_struct *vma, + int acctflag) { long ret, chg; struct hstate *h = hstate_inode(inode); - if (vma && vma->vm_flags & VM_NORESERVE) + /* + * Only apply hugepage reservation if asked. At fault time, an + * attempt will be made for VM_NORESERVE to allocate a page + * and filesystem quota without using reserves |