diff options
Diffstat (limited to 'fs/hugetlbfs/inode.c')
| -rw-r--r-- | fs/hugetlbfs/inode.c | 202 |
1 files changed, 119 insertions, 83 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c5bc355d824..1e2872b2534 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1,11 +1,13 @@ /* * hugetlbpage-backed filesystem. Based on ramfs. * - * William Irwin, 2002 + * Nadia Yvette Chambers, 2002 * * Copyright (C) 2002 Linus Torvalds. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/thread_info.h> #include <asm/current.h> @@ -97,7 +99,7 @@ static void huge_pagevec_release(struct pagevec *pvec) static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); @@ -110,7 +112,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap_pgoff unwinds (may be important on powerpc * and ia64). */ - vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP; + vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_ops = &hugetlb_vm_ops; if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) @@ -151,8 +153,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; if (len & ~huge_page_mask(h)) return -EINVAL; @@ -173,39 +175,13 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return addr; } - if (len > mm->cached_hole_size) - start_addr = mm->free_area_cache; - else { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } - -full_search: - addr = ALIGN(start_addr, huge_page_size(h)); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - - if (!vma || addr + len <= vma->vm_start) { - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = ALIGN(vma->vm_end, huge_page_size(h)); - } + info.flags = 0; + info.length = len; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); } #endif @@ -392,7 +368,13 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) static void hugetlbfs_evict_inode(struct inode *inode) { + struct resv_map *resv_map; + truncate_hugepages(inode, 0); + resv_map = (struct resv_map *)inode->i_mapping->private_data; + /* root inode doesn't have the resv_map, so we should check it */ + if (resv_map) + resv_map_release(&resv_map->refs); clear_inode(inode); } @@ -489,21 +471,36 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, return inode; } +/* + * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never + * be taken from reclaim -- unlike regular filesystems. This needs an + * annotation because huge_pmd_share() does an allocation under + * i_mmap_mutex. + */ +static struct lock_class_key hugetlbfs_i_mmap_mutex_key; + static struct inode *hugetlbfs_get_inode(struct super_block *sb, struct inode *dir, umode_t mode, dev_t dev) { struct inode *inode; + struct resv_map *resv_map; + + resv_map = resv_map_alloc(); + if (!resv_map) + return NULL; inode = new_inode(sb); if (inode) { struct hugetlbfs_inode_info *info; inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); + lockdep_set_class(&inode->i_mapping->i_mmap_mutex, + &hugetlbfs_i_mmap_mutex_key); inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - INIT_LIST_HEAD(&inode->i_mapping->private_list); + inode->i_mapping->private_data = resv_map; info = HUGETLBFS_I(inode); /* * The policy is initialized here even if we are creating a @@ -533,7 +530,9 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, break; } lockdep_annotate_inode_mutex_key(inode); - } + } else + kref_put(&resv_map->refs, resv_map_release); + return inode; } @@ -608,11 +607,11 @@ static int hugetlbfs_migrate_page(struct address_space *mapping, int rc; rc = migrate_huge_page_move_mapping(mapping, newpage, page); - if (rc) + if (rc != MIGRATEPAGE_SUCCESS) return rc; migrate_page_copy(newpage, page); - return 0; + return MIGRATEPAGE_SUCCESS; } static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -826,8 +825,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) ps = memparse(args[0].from, &rest); pconfig->hstate = size_to_hstate(ps); if (!pconfig->hstate) { - printk(KERN_ERR - "hugetlbfs: Unsupported page size %lu MB\n", + pr_err("Unsupported page size %lu MB\n", ps >> 20); return -EINVAL; } @@ -835,8 +833,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) } default: - printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", - p); + pr_err("Bad mount option: \"%s\"\n", p); return -EINVAL; break; } @@ -856,8 +853,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) return 0; bad_val: - printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", - args[0].from, p); + pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p); return -EINVAL; } @@ -905,8 +901,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) goto out_free; return 0; out_free: - if (sbinfo->spool) - kfree(sbinfo->spool); + kfree(sbinfo->spool); kfree(sbinfo); return -ENOMEM; } @@ -922,8 +917,9 @@ static struct file_system_type hugetlbfs_fs_type = { .mount = hugetlbfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("hugetlbfs"); -static struct vfsmount *hugetlbfs_vfsmount; +static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; static int can_do_hugetlb_shm(void) { @@ -932,29 +928,47 @@ static int can_do_hugetlb_shm(void) return capable(CAP_IPC_LOCK) || in_group_p(shm_group); } -struct file *hugetlb_file_setup(const char *name, unsigned long addr, - size_t size, vm_flags_t acctflag, - struct user_struct **user, int creat_flags) +static int get_hstate_idx(int page_size_log) { - int error = -ENOMEM; - struct file *file; + struct hstate *h = hstate_sizelog(page_size_log); + + if (!h) + return -1; + return h - hstates; +} + +static const struct dentry_operations anon_ops = { + .d_dname = simple_dname +}; + +/* + * Note that size should be aligned to proper hugepage size in caller side, + * otherwise hugetlb_reserve_pages reserves one less hugepages than intended. + */ +struct file *hugetlb_file_setup(const char *name, size_t size, + vm_flags_t acctflag, struct user_struct **user, + int creat_flags, int page_size_log) +{ + struct file *file = ERR_PTR(-ENOMEM); struct inode *inode; struct path path; - struct dentry *root; + struct super_block *sb; struct qstr quick_string; - struct hstate *hstate; - unsigned long num_pages; + int hstate_idx; + + hstate_idx = get_hstate_idx(page_size_log); + if (hstate_idx < 0) + return ERR_PTR(-ENODEV); *user = NULL; - if (!hugetlbfs_vfsmount) + if (!hugetlbfs_vfsmount[hstate_idx]) return ERR_PTR(-ENOENT); if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { *user = current_user(); if (user_shm_lock(size, *user)) { task_lock(current); - printk_once(KERN_WARNING - "%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n", + pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n", current->comm, current->pid); task_unlock(current); } else { @@ -963,36 +977,34 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, } } - root = hugetlbfs_vfsmount->mnt_root; + sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb; quick_string.name = name; quick_string.len = strlen(quick_string.name); quick_string.hash = 0; - path.dentry = d_alloc(root, &quick_string); + path.dentry = d_alloc_pseudo(sb, &quick_string); if (!path.dentry) goto out_shm_unlock; - path.mnt = mntget(hugetlbfs_vfsmount); - error = -ENOSPC; - inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0); + d_set_d_op(path.dentry, &anon_ops); + path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]); + file = ERR_PTR(-ENOSPC); + inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0); if (!inode) goto out_dentry; - hstate = hstate_inode(inode); - size += addr & ~huge_page_mask(hstate); - num_pages = ALIGN(size, huge_page_size(hstate)) >> - huge_page_shift(hstate); - error = -ENOMEM; - if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag)) + file = ERR_PTR(-ENOMEM); + if (hugetlb_reserve_pages(inode, 0, + size >> huge_page_shift(hstate_inode(inode)), NULL, + acctflag)) goto out_inode; d_instantiate(path.dentry, inode); inode->i_size = size; clear_nlink(inode); - error = -ENFILE; file = alloc_file(&path, FMODE_WRITE | FMODE_READ, &hugetlbfs_file_operations); - if (!file) + if (IS_ERR(file)) goto out_dentry; /* inode is already attached */ return file; @@ -1006,13 +1018,19 @@ out_shm_unlock: user_shm_unlock(size, *user); *user = NULL; } - return ERR_PTR(error); + return file; } static int __init init_hugetlbfs_fs(void) { + struct hstate *h; int error; - struct vfsmount *vfsmount; + int i; + + if (!hugepages_supported()) { + pr_info("disabling because there are no supported hugepage sizes\n"); + return -ENOTSUPP; + } error = bdi_init(&hugetlbfs_backing_dev_info); if (error) @@ -1029,14 +1047,26 @@ static int __init init_hugetlbfs_fs(void) if (error) goto out; - vfsmount = kern_mount(&hugetlbfs_fs_type); + i = 0; + for_each_hstate(h) { + char buf[50]; + unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10); - if (!IS_ERR(vfsmount)) { - hugetlbfs_vfsmount = vfsmount; - return 0; - } + snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb); + hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type, + buf); - error = PTR_ERR(vfsmount); + if (IS_ERR(hugetlbfs_vfsmount[i])) { + pr_err("Cannot mount internal hugetlbfs for " + "page size %uK", ps_kb); + error = PTR_ERR(hugetlbfs_vfsmount[i]); + hugetlbfs_vfsmount[i] = NULL; + } + i++; + } + /* Non default hstates are optional */ + if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx])) + return 0; out: kmem_cache_destroy(hugetlbfs_inode_cachep); @@ -1047,13 +1077,19 @@ static int __init init_hugetlbfs_fs(void) static void __exit exit_hugetlbfs_fs(void) { + struct hstate *h; + int i; + + /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(hugetlbfs_inode_cachep); - kern_unmount(hugetlbfs_vfsmount); + i = 0; + for_each_hstate(h) + kern_unmount(hugetlbfs_vfsmount[i++]); unregister_filesystem(&hugetlbfs_fs_type); bdi_destroy(&hugetlbfs_backing_dev_info); } |
