diff options
Diffstat (limited to 'mm')
44 files changed, 2826 insertions, 1512 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 82fed4eb2b6..d5c8019c662 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -140,9 +140,13 @@ config ARCH_DISCARD_MEMBLOCK config NO_BOOTMEM boolean +config MEMORY_ISOLATION + boolean + # eventually, we can have this option just 'select SPARSEMEM' config MEMORY_HOTPLUG bool "Allow for memory hot-add" + select MEMORY_ISOLATION depends on SPARSEMEM || X86_64_ACPI_NUMA depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390) @@ -272,6 +276,7 @@ config MEMORY_FAILURE depends on MMU depends on ARCH_SUPPORTS_MEMORY_FAILURE bool "Enable recovery from hardware memory errors" + select MEMORY_ISOLATION help Enables code to recover from some memory failures on systems with MCA recovery. This allows a system to continue running diff --git a/mm/Makefile b/mm/Makefile index 2e2fbbefb99..92753e2d82d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -15,8 +15,9 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ - page_isolation.o mm_init.o mmu_context.o percpu.o \ + mm_init.o mmu_context.o percpu.o slab_common.o \ compaction.o $(mmu-y) + obj-y += init-mm.o ifdef CONFIG_NO_BOOTMEM @@ -48,9 +49,11 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o -obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o +obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o +obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o obj-$(CONFIG_CLEANCACHE) += cleancache.o +obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o diff --git a/mm/backing-dev.c b/mm/backing-dev.c index dd8e2aafb07..6b4718e2ee3 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -677,7 +677,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi->min_ratio = 0; bdi->max_ratio = 100; - bdi->max_prop_frac = PROP_FRAC_BASE; + bdi->max_prop_frac = FPROP_FRAC_BASE; spin_lock_init(&bdi->wb_lock); INIT_LIST_HEAD(&bdi->bdi_list); INIT_LIST_HEAD(&bdi->work_list); @@ -700,7 +700,7 @@ int bdi_init(struct backing_dev_info *bdi) bdi->write_bandwidth = INIT_BW; bdi->avg_write_bandwidth = INIT_BW; - err = prop_local_init_percpu(&bdi->completions); + err = fprop_local_init_percpu(&bdi->completions); if (err) { err: @@ -744,7 +744,7 @@ void bdi_destroy(struct backing_dev_info *bdi) for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); - prop_local_destroy_percpu(&bdi->completions); + fprop_local_destroy_percpu(&bdi->completions); } EXPORT_SYMBOL(bdi_destroy); @@ -886,3 +886,23 @@ out: return ret; } EXPORT_SYMBOL(wait_iff_congested); + +int pdflush_proc_obsolete(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + char kbuf[] = "0\n"; + + if (*ppos) { + *lenp = 0; + return 0; + } + + if (copy_to_user(buffer, kbuf, sizeof(kbuf))) + return -EFAULT; + printk_once(KERN_WARNING "%s exported in /proc is scheduled for removal\n", + table->procname); + + *lenp = 2; + *ppos += *lenp; + return 2; +} diff --git a/mm/bootmem.c b/mm/bootmem.c index 73096630cb3..bcb63ac48cc 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -710,6 +710,10 @@ again: if (ptr) return ptr; + /* do not panic in alloc_bootmem_bdata() */ + if (limit && goal + size > limit) + limit = 0; + ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit); if (ptr) return ptr; diff --git a/mm/bounce.c b/mm/bounce.c index d1be02ca188..04208677556 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -24,23 +24,25 @@ static mempool_t *page_pool, *isa_page_pool; -#ifdef CONFIG_HIGHMEM +#if defined(CONFIG_HIGHMEM) || defined(CONFIG_NEED_BOUNCE_POOL) static __init int init_emergency_pool(void) { -#ifndef CONFIG_MEMORY_HOTPLUG +#if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG) if (max_pfn <= max_low_pfn) return 0; #endif page_pool = mempool_create_page_pool(POOL_SIZE, 0); BUG_ON(!page_pool); - printk("highmem bounce pool size: %d pages\n", POOL_SIZE); + printk("bounce pool size: %d pages\n", POOL_SIZE); return 0; } __initcall(init_emergency_pool); +#endif +#ifdef CONFIG_HIGHMEM /* * highmem version, map in to vec */ diff --git a/mm/compaction.c b/mm/compaction.c index 2f42d952853..e78cb968842 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -422,6 +422,17 @@ static void isolate_freepages(struct zone *zone, pfn -= pageblock_nr_pages) { unsigned long isolated; + /* + * Skip ahead if another thread is compacting in the area + * simultaneously. If we wrapped around, we can only skip + * ahead if zone->compact_cached_free_pfn also wrapped to + * above our starting point. + */ + if (cc->order > 0 && (!cc->wrapped || + zone->compact_cached_free_pfn > + cc->start_free_pfn)) + pfn = min(pfn, zone->compact_cached_free_pfn); + if (!pfn_valid(pfn)) continue; @@ -461,8 +472,11 @@ static void isolate_freepages(struct zone *zone, * looking for free pages, the search will restart here as * page migration may have returned some pages to the allocator */ - if (isolated) + if (isolated) { high_pfn = max(high_pfn, pfn); + if (cc->order > 0) + zone->compact_cached_free_pfn = high_pfn; + } } /* split_free_page does not map the pages */ @@ -556,6 +570,20 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, return ISOLATE_SUCCESS; } +/* + * Returns the start pfn of the last page block in a zone. This is the starting + * point for full compaction of a zone. Compaction searches for free pages from + * the end of each zone, while isolate_freepages_block scans forward inside each + * page block. + */ +static unsigned long start_free_pfn(struct zone *zone) +{ + unsigned long free_pfn; + free_pfn = zone->zone_start_pfn + zone->spanned_pages; + free_pfn &= ~(pageblock_nr_pages-1); + return free_pfn; +} + static int compact_finished(struct zone *zone, struct compact_control *cc) { @@ -565,8 +593,26 @@ static int compact_finished(struct zone *zone, if (fatal_signal_pending(current)) return COMPACT_PARTIAL; - /* Compaction run completes if the migrate and free scanner meet */ - if (cc->free_pfn <= cc->migrate_pfn) + /* + * A full (order == -1) compaction run starts at the beginning and + * end of a zone; it completes when the migrate and free scanner meet. + * A partial (order > 0) compaction can start with the free scanner + * at a random point in the zone, and may have to restart. + */ + if (cc->free_pfn <= cc->migrate_pfn) { + if (cc->order > 0 && !cc->wrapped) { + /* We started partway through; restart at the end. */ + unsigned long free_pfn = start_free_pfn(zone); + zone->compact_cached_free_pfn = free_pfn; + cc->free_pfn = free_pfn; + cc->wrapped = 1; + return COMPACT_CONTINUE; + } + return COMPACT_COMPLETE; + } + + /* We wrapped around and ended up where we started. */ + if (cc->wrapped && cc->free_pfn <= cc->start_free_pfn) return COMPACT_COMPLETE; /* @@ -664,8 +710,15 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) /* Setup to move all movable pages to the end of the zone */ cc->migrate_pfn = zone->zone_start_pfn; - cc->free_pfn = cc->migrate_pfn + zone->spanned_pages; - cc->free_pfn &= ~(pageblock_nr_pages-1); + + if (cc->order > 0) { + /* Incremental compaction. Start where the last one stopped. */ + cc->free_pfn = zone->compact_cached_free_pfn; + cc->start_free_pfn = cc->free_pfn; + } else { + /* Order == -1 starts at the end of the zone. */ + cc->free_pfn = start_free_pfn(zone); + } migrate_prep_local(); diff --git a/mm/fadvise.c b/mm/fadvise.c index 469491e0af7..9b75a045dbf 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -93,11 +93,6 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) spin_unlock(&file->f_lock); break; case POSIX_FADV_WILLNEED: - if (!mapping->a_ops->readpage) { - ret = -EINVAL; - break; - } - /* First and last PARTIAL page! */ start_index = offset >> PAGE_CACHE_SHIFT; end_index = endbyte >> PAGE_CACHE_SHIFT; @@ -106,12 +101,13 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) nrpages = end_index - start_index + 1; if (!nrpages) nrpages = ~0UL; - - ret = force_page_cache_readahead(mapping, file, - start_index, - nrpages); - if (ret > 0) - ret = 0; + + /* + * Ignore return value because fadvise() shall return + * success even if filesystem can't retrieve a hint, + */ + force_page_cache_readahead(mapping, file, start_index, + nrpages); break; case POSIX_FADV_NOREUSE: break; diff --git a/mm/frontswap.c b/mm/frontswap.c index e25025574a0..6b3e71a2cd4 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -11,15 +11,11 @@ * This work is licensed under the terms of the GNU GPL, version 2. */ -#include <linux/mm.h> #include <linux/mman.h> #include <linux/swap.h> #include <linux/swapops.h> -#include <linux/proc_fs.h> #include <linux/security.h> -#include <linux/capability.h> #include <linux/module.h> -#include <linux/uaccess.h> #include <linux/debugfs.h> #include <linux/frontswap.h> #include <linux/swapfile.h> @@ -110,16 +106,21 @@ void __frontswap_init(unsigned type) BUG_ON(sis == NULL); if (sis->frontswap_map == NULL) return; - if (frontswap_enabled) - (*frontswap_ops.init)(type); + frontswap_ops.init(type); } EXPORT_SYMBOL(__frontswap_init); +static inline void __frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +{ + frontswap_clear(sis, offset); + atomic_dec(&sis->frontswap_pages); +} + /* * "Store" data from a page to frontswap and associate it with the page's * swaptype and offset. Page must be locked and in the swap cache. * If frontswap already contains a page with matching swaptype and - * offset, the frontswap implmentation may either overwrite the data and + * offset, the frontswap implementation may either overwrite the data and * return success or invalidate the page from frontswap and return failure. */ int __frontswap_store(struct page *page) @@ -134,22 +135,21 @@ int __frontswap_store(struct page *page) BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) dup = 1; - ret = (*frontswap_ops.store)(type, offset, page); + ret = frontswap_ops.store(type, offset, page); if (ret == 0) { frontswap_set(sis, offset); inc_frontswap_succ_stores(); if (!dup) atomic_inc(&sis->frontswap_pages); - } else if (dup) { + } else { /* failed dup always results in automatic invalidate of the (older) page from frontswap */ - frontswap_clear(sis, offset); - atomic_dec(&sis->frontswap_pages); - inc_frontswap_failed_stores(); - } else inc_frontswap_failed_stores(); + if (dup) + __frontswap_clear(sis, offset); + } if (frontswap_writethrough_enabled) /* report failure so swap also writes to swap device */ ret = -1; @@ -173,7 +173,7 @@ int __frontswap_load(struct page *page) BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) - ret = (*frontswap_ops.load)(type, offset, page); + ret = frontswap_ops.load(type, offset, page); if (ret == 0) inc_frontswap_loads(); return ret; @@ -190,9 +190,8 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) { - (*frontswap_ops.invalidate_page)(type, offset); - atomic_dec(&sis->frontswap_pages); - frontswap_clear(sis, offset); + frontswap_ops.invalidate_page(type, offset); + __frontswap_clear(sis, offset); inc_frontswap_invalidates(); } } @@ -209,67 +208,102 @@ void __frontswap_invalidate_area(unsigned type) BUG_ON(sis == NULL); if (sis->frontswap_map == NULL) return; - (*frontswap_ops.invalidate_area)(type); + frontswap_ops.invalidate_area(type); atomic_set(&sis->frontswap_pages, 0); memset(sis->frontswap_map, 0, sis->max / sizeof(long)); } EXPORT_SYMBOL(__frontswap_invalidate_area); -/* - * Frontswap, like a true swap device, may unnecessarily retain pages - * under certain circumstances; "shrink" frontswap is essentially a - * "partial swapoff" and works by calling try_to_unuse to attempt to - * unuse enough frontswap pages to attempt to -- subject to memory - * constraints -- reduce the number of pages in frontswap to the - * number given in the parameter target_pages. - */ -void frontswap_shrink(unsigned long target_pages) +static unsigned long __frontswap_curr_pages(void) { - struct swap_info_struct *si = NULL; - int si_frontswap_pages; - unsigned long total_pages = 0, total_pages_to_unuse; - unsigned long pages = 0, pages_to_unuse = 0; int type; - bool locked = false; + unsigned long totalpages = 0; + struct swap_info_struct *si = NULL; - /* - * we don't want to hold swap_lock while doing a very - * lengthy try_to_unuse, but swap_list may change - * so restart scan from swap_list.head each time - */ - spin_lock(&swap_lock); - locked = true; - total_pages = 0; + assert_spin_locked(&swap_lock); for (type = swap_list.head; type >= 0; type = si->next) { si = swap_info[type]; - total_pages += atomic_read(&si->frontswap_pages); + totalpages += atomic_read(&si->frontswap_pages); } - if (total_pages <= target_pages) - goto out; - total_pages_to_unuse = total_pages - target_pages; + return totalpages; +} + +static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused, + int *swapid) +{ + int ret = -EINVAL; + struct swap_info_struct *si = NULL; + int si_frontswap_pages; + unsigned long total_pages_to_unuse = total; + unsigned long pages = 0, pages_to_unuse = 0; + int type; + + assert_spin_locked(&swap_lock); for (type = swap_list.head; type >= 0; type = si->next) { si = swap_info[type]; si_frontswap_pages = atomic_read(&si->frontswap_pages); - if (total_pages_to_unuse < si_frontswap_pages) + if (total_pages_to_unuse < si_frontswap_pages) { pages = pages_to_unuse = total_pages_to_unuse; - else { + } else { pages = si_frontswap_pages; pages_to_unuse = 0; /* unuse all */ } /* ensure there is enough RAM to fetch pages from frontswap */ - if (security_vm_enough_memory_mm(current->mm, pages)) + if (security_vm_enough_memory_mm(current->mm, pages)) { + ret = -ENOMEM; continue; + } vm_unacct_memory(pages); + *unused = pages_to_unuse; + *swapid = type; + ret = 0; break; } - if (type < 0) - goto out; - locked = false; + + return ret; +} + +static int __frontswap_shrink(unsigned long target_pages, + unsigned long *pages_to_unuse, + int *type) +{ + unsigned long total_pages = 0, total_pages_to_unuse; + + assert_spin_locked(&swap_lock); + + total_pages = __frontswap_curr_pages(); + if (total_pages <= target_pages) { + /* Nothing to do */ + *pages_to_unuse = 0; + return 0; + } + total_pages_to_unuse = total_pages - target_pages; + return __frontswap_unuse_pages(total_pages_to_unuse, pages_to_unuse, type); +} + +/* + * Frontswap, like a true swap device, may unnecessarily retain pages + * under certain circumstances; "shrink" frontswap is essentially a + * "partial swapoff" and works by calling try_to_unuse to attempt to + * unuse enough frontswap pages to attempt to -- subject to memory + * constraints -- reduce the number of pages in frontswap to the + * number given in the parameter target_pages. + */ +void frontswap_shrink(unsigned long target_pages) +{ + unsigned long pages_to_unuse = 0; + int type, ret; + + /* + * we don't want to hold swap_lock while doing a very + * lengthy try_to_unuse, but swap_list may change + * so restart scan from swap_list.head each time + */ + spin_lock(&swap_lock); + ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type); spin_unlock(&swap_lock); - try_to_unuse(type, true, pages_to_unuse); -out: - if (locked) - spin_unlock(&swap_lock); + if (ret == 0 && pages_to_unuse) + try_to_unuse(type, true, pages_to_unuse); return; } EXPORT_SYMBOL(frontswap_shrink); @@ -281,16 +315,12 @@ EXPORT_SYMBOL(frontswap_shrink); */ unsigned long frontswap_curr_pages(void) { - int type; unsigned long totalpages = 0; - struct swap_info_struct *si = NULL; spin_lock(&swap_lock); - for (type = swap_list.head; type >= 0; type = si->next) { - si = swap_info[type]; - totalpages += atomic_read(&si->frontswap_pages); - } + totalpages = __frontswap_curr_pages(); spin_unlock(&swap_lock); + return totalpages; } EXPORT_SYMBOL(frontswap_curr_pages); diff --git a/mm/highmem.c b/mm/highmem.c index 57d82c6250c..d517cd16a6e 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -94,6 +94,18 @@ static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); do { spin_unlock(&kmap_lock); (void)(flags); } while (0) #endif +struct page *kmap_to_page(void *vaddr) +{ + unsigned long addr = (unsigned long)vaddr; + + if (addr >= PKMAP_ADDR(0) && addr <= PKMAP_ADDR(LAST_PKMAP)) { + int i = (addr - PKMAP_ADDR(0)) >> PAGE_SHIFT; + return pte_page(pkmap_page_table[i]); + } + + return virt_to_page(addr); +} + static void flush_all_zero_pkmaps(void) { int i; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e198831276a..bc727122dd4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -24,17 +24,20 @@ #include <asm/page.h> #include <asm/pgtable.h> -#include <linux/io.h> +#include <asm/tlb.h> +#include <linux/io.h> #include <linux/hugetlb.h> +#include <linux/hugetlb_cgroup.h> #include <linux/node.h> +#include <linux/hugetlb_cgroup.h> #include "internal.h" const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; static gfp_t htlb_alloc_mask = GFP_HIGHUSER; unsigned long hugepages_treat_as_movable; -static int max_hstate; +int hugetlb_max_hstate __read_mostly; unsigned int default_hstate_idx; struct hstate hstates[HUGE_MAX_HSTATE]; @@ -45,13 +48,10 @@ static struct hstate * __initdata parsed_hstate; static unsigned long __initdata default_hstate_max_huge_pages; static unsigned long __initdata default_hstate_size; -#define for_each_hstate(h) \ - for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++) - /* * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages */ -static DEFINE_SPINLOCK(hugetlb_lock); +DEFINE_SPINLOCK(hugetlb_lock); static inline void unlock_or_release_subpool(struct hugepage_subpool *spool) { @@ -509,7 +509,7 @@ void copy_huge_page(struct page *dst, struct page *src) static void enqueue_huge_page(struct hstate *h, struct page *page) { int nid = page_to_nid(page); - list_add(&page->lru, &h->hugepage_freelists[nid]); + list_move(&page->lru, &h->hugepage_freelists[nid]); h->free_huge_pages++; h->free_huge_pages_node[nid]++; } @@ -521,7 +521,7 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid) if (list_empty(&h->hugepage_freelists[nid])) return NULL; page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); - list_del(&page->lru); + list_move(&page->lru, &h->hugepage_activelist); set_page_refcounted(page); h->free_huge_pages--; h->free_huge_pages_node[nid]--; @@ -593,6 +593,7 @@ static void update_and_free_page(struct hstate *h, struct page *page) 1 << PG_active | 1 << PG_reserved | 1 << PG_private | 1 << PG_writeback); } + VM_BUG_ON(hugetlb_cgroup_from_page(page)); set_compound_page_dtor(page, NULL); set_page_refcounted(page); arch_release_hugepage(page); @@ -625,10 +626,13 @@ static void free_huge_page(struct page *page) page->mapping = NULL; BUG_ON(page_count(page)); BUG_ON(page_mapcount(page)); - INIT_LIST_HEAD(&page->lru); spin_lock(&hugetlb_lock); + hugetlb_cgroup_uncharge_page(hstate_index(h), + pages_per_huge_page(h), page); if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) { + /* remove the page from active list */ + list_del(&page->lru); update_and_free_page(h, page); h->surplus_huge_pages--; h->surplus_huge_pages_node[nid]--; @@ -641,8 +645,10 @@ static void free_huge_page(struct page *page) static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) { + INIT_LIST_HEAD(&page->lru); set_compound_page_dtor(page, free_huge_page); spin_lock(&hugetlb_lock); + set_hugetlb_cgroup(page, NULL); h->nr_huge_pages++; h->nr_huge_pages_node[nid]++; spin_unlock(&hugetlb_lock); @@ -889,8 +895,10 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) spin_lock(&hugetlb_lock); if (page) { + INIT_LIST_HEAD(&page->lru); r_nid = page_to_nid(page); set_compound_page_dtor(page, free_huge_page); + set_hugetlb_cgroup(page, NULL); /* * We incremented the global counters already */ @@ -993,7 +1001,6 @@ retry: list_for_each_entry_safe(page, tmp, &surplus_list, lru) { if ((--needed) < 0) break; - list_del(&page->lru); /* * This page is now managed by the hugetlb allocator and has * no users -- drop the buddy allocator's reference. @@ -1008,7 +1015,6 @@ free: /* Free unnecessary surplus pages to the buddy allocator */ if (!list_empty(&surplus_list)) { list_for_each_entry_safe(page, tmp, &surplus_list, lru) { - list_del(&page->lru); put_page(page); } } @@ -1112,7 +1118,10 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, struct hstate *h = hstate_vma(vma); struct page *page; long chg; + int ret, idx; + struct hugetlb_cgroup *h_cg; + idx = hstate_index(h); /* * Processes that did not create the mapping will have no * reserves and will not have accounted against subpool @@ -1123,27 +1132,43 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, */ chg = vma_needs_reservation(h, vma, addr); if (chg < 0) - return ERR_PTR(-VM_FAULT_OOM); + return ERR_PTR(-ENOMEM); if (chg) if (hugepage_subpool_get_pages(spool, chg)) - return ERR_PTR(-VM_FAULT_SIGBUS); + return ERR_PTR(-ENOSPC); + ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg); + if (ret) { + hugepage_subpool_put_pages(spool, chg); + return ERR_PTR(-ENOSPC); + } spin_lock(&hugetlb_lock); page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve); - spin_unlock(&hugetlb_lock); - - if (!page) { + if (page) { + /* update page cgroup details */ + hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), + h_cg, page); |