From b45b5bd65f668a665db40d093e4e1fe563533608 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 22 Mar 2006 00:08:55 -0800 Subject: [PATCH] hugepage: Strict page reservation for hugepage inodes These days, hugepages are demand-allocated at first fault time. There's a somewhat dubious (and racy) heuristic when making a new mmap() to check if there are enough available hugepages to fully satisfy that mapping. A particularly obvious case where the heuristic breaks down is where a process maps its hugepages not as a single chunk, but as a bunch of individually mmap()ed (or shmat()ed) blocks without touching and instantiating the pages in between allocations. In this case the size of each block is compared against the total number of available hugepages. It's thus easy for the process to become overcommitted, because each block mapping will succeed, although the total number of hugepages required by all blocks exceeds the number available. In particular, this defeats such a program which will detect a mapping failure and adjust its hugepage usage downward accordingly. The patch below addresses this problem, by strictly reserving a number of physical hugepages for hugepage inodes which have been mapped, but not instatiated. MAP_SHARED mappings are thus "safe" - they will fail on mmap(), not later with an OOM SIGKILL. MAP_PRIVATE mappings can still trigger an OOM. (Actually SHARED mappings can technically still OOM, but only if the sysadmin explicitly reduces the hugepage pool between mapping and instantiation) This patch appears to address the problem at hand - it allows DB2 to start correctly, for instance, which previously suffered the failure described above. This patch causes no regressions on the libhugetblfs testsuite, and makes a test (designed to catch this problem) pass which previously failed (ppc64, POWER5). Signed-off-by: David Gibson Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index fa83836b63d..cafe73eecb0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -20,7 +20,6 @@ void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long) int hugetlb_prefault(struct address_space *, struct vm_area_struct *); int hugetlb_report_meminfo(char *); int hugetlb_report_node_meminfo(int, char *); -int is_hugepage_mem_enough(size_t); unsigned long hugetlb_total_pages(void); struct page *alloc_huge_page(struct vm_area_struct *, unsigned long); void free_huge_page(struct page *); @@ -89,7 +88,6 @@ static inline unsigned long hugetlb_total_pages(void) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) #define unmap_hugepage_range(vma, start, end) BUG() -#define is_hugepage_mem_enough(size) 0 #define hugetlb_report_meminfo(buf) 0 #define hugetlb_report_node_meminfo(n, buf) 0 #define follow_huge_pmd(mm, addr, pmd, write) NULL @@ -132,6 +130,8 @@ struct hugetlbfs_sb_info { struct hugetlbfs_inode_info { struct shared_policy policy; + /* Protected by the (global) hugetlb_lock */ + unsigned long prereserved_hpages; struct inode vfs_inode; }; @@ -148,6 +148,10 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) extern struct file_operations hugetlbfs_file_operations; extern struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_zero_setup(size_t); +int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info, + unsigned long atleast_hpages); +void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info, + unsigned long atmost_hpages); int hugetlb_get_quota(struct address_space *mapping); void hugetlb_put_quota(struct address_space *mapping); -- cgit v1.2.3-18-g5258