diff options
Diffstat (limited to 'fs/hugetlbfs/inode.c')
| -rw-r--r-- | fs/hugetlbfs/inode.c | 413 |
1 files changed, 240 insertions, 173 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a5fe68189ee..1e2872b2534 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1,11 +1,13 @@ /* * hugetlbpage-backed filesystem. Based on ramfs. * - * William Irwin, 2002 + * Nadia Yvette Chambers, 2002 * * Copyright (C) 2002 Linus Torvalds. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/thread_info.h> #include <asm/current.h> @@ -41,6 +43,25 @@ const struct file_operations hugetlbfs_file_operations; static const struct inode_operations hugetlbfs_dir_inode_operations; static const struct inode_operations hugetlbfs_inode_operations; +struct hugetlbfs_config { + kuid_t uid; + kgid_t gid; + umode_t mode; + long nr_blocks; + long nr_inodes; + struct hstate *hstate; +}; + +struct hugetlbfs_inode_info { + struct shared_policy policy; + struct inode vfs_inode; +}; + +static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) +{ + return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); +} + static struct backing_dev_info hugetlbfs_backing_dev_info = { .name = "hugetlbfs", .ra_pages = 0, /* No readahead */ @@ -78,7 +99,7 @@ static void huge_pagevec_release(struct pagevec *pvec) static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); @@ -91,10 +112,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap_pgoff unwinds (may be important on powerpc * and ia64). */ - vma->vm_flags |= VM_HUGETLB | VM_RESERVED; + vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_ops = &hugetlb_vm_ops; - if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT)) + if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); @@ -132,8 +153,8 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; if (len & ~huge_page_mask(h)) return -EINVAL; @@ -154,32 +175,13 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return addr; } - start_addr = mm->free_area_cache; - - if (len <= mm->cached_hole_size) - start_addr = TASK_UNMAPPED_BASE; - -full_search: - addr = ALIGN(start_addr, huge_page_size(h)); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = TASK_UNMAPPED_BASE; - goto full_search; - } - return -ENOMEM; - } - - if (!vma || addr + len <= vma->vm_start) - return addr; - addr = ALIGN(vma->vm_end, huge_page_size(h)); - } + info.flags = 0; + info.length = len; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); } #endif @@ -238,17 +240,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, loff_t isize; ssize_t retval = 0; - mutex_lock(&inode->i_mutex); - /* validate length */ if (len == 0) goto out; - isize = i_size_read(inode); - if (!isize) - goto out; - - end_index = (isize - 1) >> huge_page_shift(h); for (;;) { struct page *page; unsigned long nr, ret; @@ -256,18 +251,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, /* nr is the maximum number of bytes to copy from this page */ nr = huge_page_size(h); + isize = i_size_read(inode); + if (!isize) + goto out; + end_index = (isize - 1) >> huge_page_shift(h); if (index >= end_index) { if (index > end_index) goto out; nr = ((isize - 1) & ~huge_page_mask(h)) + 1; - if (nr <= offset) { + if (nr <= offset) goto out; - } } nr = nr - offset; /* Find the page */ - page = find_get_page(mapping, index); + page = find_lock_page(mapping, index); if (unlikely(page == NULL)) { /* * We have a HOLE, zero out the user-buffer for the @@ -279,17 +277,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, else ra = 0; } else { + unlock_page(page); + /* * We have the page, copy it to user space buffer. */ ra = hugetlbfs_read_actor(page, offset, buf, len, nr); ret = ra; + page_cache_release(page); } if (ra < 0) { if (retval == 0) retval = ra; - if (page) - page_cache_release(page); goto out; } @@ -299,16 +298,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, index += offset >> huge_page_shift(h); offset &= ~huge_page_mask(h); - if (page) - page_cache_release(page); - /* short read or no more work */ if ((ret != nr) || (len == 0)) break; } out: *ppos = ((loff_t)index << huge_page_shift(h)) + offset; - mutex_unlock(&inode->i_mutex); return retval; } @@ -332,8 +327,7 @@ static void truncate_huge_page(struct page *page) { cancel_dirty_page(page, /* No IO accounting for huge pages? */0); ClearPageUptodate(page); - remove_from_page_cache(page); - put_page(page); + delete_from_page_cache(page); } static void truncate_hugepages(struct inode *inode, loff_t lstart) @@ -374,22 +368,27 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) static void hugetlbfs_evict_inode(struct inode *inode) { + struct resv_map *resv_map; + truncate_hugepages(inode, 0); - end_writeback(inode); + resv_map = (struct resv_map *)inode->i_mapping->private_data; + /* root inode doesn't have the resv_map, so we should check it */ + if (resv_map) + resv_map_release(&resv_map->refs); + clear_inode(inode); } static inline void -hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) +hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff) { struct vm_area_struct *vma; - struct prio_tree_iter iter; - vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { + vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) { unsigned long v_offset; /* * Can the expression below overflow on 32-bit arches? - * No, because the prio_tree returns us only those vmas + * No, because the interval tree returns us only those vmas * which overlap the truncated area starting at pgoff, * and no vma on a 32-bit arch can span beyond the 4GB. */ @@ -398,8 +397,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) else v_offset = 0; - __unmap_hugepage_range(vma, - vma->vm_start + v_offset, vma->vm_end, NULL); + unmap_hugepage_range(vma, vma->vm_start + v_offset, + vma->vm_end, NULL); } } @@ -413,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) pgoff = offset >> PAGE_SHIFT; i_size_write(inode, offset); - spin_lock(&mapping->i_mmap_lock); - if (!prio_tree_empty(&mapping->i_mmap)) + mutex_lock(&mapping->i_mmap_mutex); + if (!RB_EMPTY_ROOT(&mapping->i_mmap)) hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); - spin_unlock(&mapping->i_mmap_lock); + mutex_unlock(&mapping->i_mmap_mutex); truncate_hugepages(inode, offset); return 0; } @@ -448,8 +447,8 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) return 0; } -static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, - gid_t gid, int mode, dev_t dev) +static struct inode *hugetlbfs_get_root(struct super_block *sb, + struct hugetlbfs_config *config) { struct inode *inode; @@ -457,13 +456,51 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, if (inode) { struct hugetlbfs_inode_info *info; inode->i_ino = get_next_ino(); - inode->i_mode = mode; - inode->i_uid = uid; - inode->i_gid = gid; + inode->i_mode = S_IFDIR | config->mode; + inode->i_uid = config->uid; + inode->i_gid = config->gid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + info = HUGETLBFS_I(inode); + mpol_shared_policy_init(&info->policy, NULL); + inode->i_op = &hugetlbfs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); + lockdep_annotate_inode_mutex_key(inode); + } + return inode; +} + +/* + * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never + * be taken from reclaim -- unlike regular filesystems. This needs an + * annotation because huge_pmd_share() does an allocation under + * i_mmap_mutex. + */ +static struct lock_class_key hugetlbfs_i_mmap_mutex_key; + +static struct inode *hugetlbfs_get_inode(struct super_block *sb, + struct inode *dir, + umode_t mode, dev_t dev) +{ + struct inode *inode; + struct resv_map *resv_map; + + resv_map = resv_map_alloc(); + if (!resv_map) + return NULL; + + inode = new_inode(sb); + if (inode) { + struct hugetlbfs_inode_info *info; + inode->i_ino = get_next_ino(); + inode_init_owner(inode, dir, mode); + lockdep_set_class(&inode->i_mapping->i_mmap_mutex, + &hugetlbfs_i_mmap_mutex_key); inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - INIT_LIST_HEAD(&inode->i_mapping->private_list); + inode->i_mapping->private_data = resv_map; info = HUGETLBFS_I(inode); /* * The policy is initialized here even if we are creating a @@ -492,7 +529,10 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_op = &page_symlink_inode_operations; break; } - } + lockdep_annotate_inode_mutex_key(inode); + } else + kref_put(&resv_map->refs, resv_map_release); + return inode; } @@ -500,20 +540,12 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, * File creation. Allocate an inode, and we're done.. */ static int hugetlbfs_mknod(struct inode *dir, - struct dentry *dentry, int mode, dev_t dev) + struct dentry *dentry, umode_t mode, dev_t dev) { struct inode *inode; int error = -ENOSPC; - gid_t gid; - - if (dir->i_mode & S_ISGID) { - gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else { - gid = current_fsgid(); - } - inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(), gid, mode, dev); + + inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev); if (inode) { dir->i_ctime = dir->i_mtime = CURRENT_TIME; d_instantiate(dentry, inode); @@ -523,7 +555,7 @@ static int hugetlbfs_mknod(struct inode *dir, return error; } -static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); if (!retval) @@ -531,7 +563,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) return retval; } -static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) +static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); } @@ -541,15 +573,8 @@ static int hugetlbfs_symlink(struct inode *dir, { struct inode *inode; int error = -ENOSPC; - gid_t gid; - - if (dir->i_mode & S_ISGID) - gid = dir->i_gid; - else - gid = current_fsgid(); - inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(), - gid, S_IFLNK|S_IRWXUGO, 0); + inode = hugetlbfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0); if (inode) { int l = strlen(symname)+1; error = page_symlink(inode, symname, l); @@ -576,16 +601,17 @@ static int hugetlbfs_set_page_dirty(struct page *page) } static int hugetlbfs_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page) + struct page *newpage, struct page *page, + enum migrate_mode mode) { int rc; rc = migrate_huge_page_move_mapping(mapping, newpage, page); - if (rc) + if (rc != MIGRATEPAGE_SUCCESS) return rc; migrate_page_copy(newpage, page); - return 0; + return MIGRATEPAGE_SUCCESS; } static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -599,9 +625,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) spin_lock(&sbinfo->stat_lock); /* If no limits set, just report 0 for max/free/used * blocks, like simple_statfs() */ - if (sbinfo->max_blocks >= 0) { - buf->f_blocks = sbinfo->max_blocks; - buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; + if (sbinfo->spool) { + long free_pages; + + spin_lock(&sbinfo->spool->lock); + buf->f_blocks = sbinfo->spool->max_hpages; + free_pages = sbinfo->spool->max_hpages + - sbinfo->spool->used_hpages; + buf->f_bavail = buf->f_bfree = free_pages; + spin_unlock(&sbinfo->spool->lock); buf->f_files = sbinfo->max_inodes; buf->f_ffree = sbinfo->free_inodes; } @@ -617,6 +649,10 @@ static void hugetlbfs_put_super(struct super_block *sb) if (sbi) { sb->s_fs_info = NULL; + + if (sbi->spool) + hugepage_put_subpool(sbi->spool); + kfree(sbi); } } @@ -663,11 +699,17 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) return &p->vfs_inode; } +static void hugetlbfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); +} + static void hugetlbfs_destroy_inode(struct inode *inode) { hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); - kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); + call_rcu(&inode->i_rcu, hugetlbfs_i_callback); } static const struct address_space_operations hugetlbfs_aops = { @@ -741,13 +783,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) case Opt_uid: if (match_int(&args[0], &option)) goto bad_val; - pconfig->uid = option; + pconfig->uid = make_kuid(current_user_ns(), option); + if (!uid_valid(pconfig->uid)) + goto bad_val; break; case Opt_gid: if (match_int(&args[0], &option)) goto bad_val; - pconfig->gid = option; + pconfig->gid = make_kgid(current_user_ns(), option); + if (!gid_valid(pconfig->gid)) + goto bad_val; break; case Opt_mode: @@ -779,8 +825,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) ps = memparse(args[0].from, &rest); pconfig->hstate = size_to_hstate(ps); if (!pconfig->hstate) { - printk(KERN_ERR - "hugetlbfs: Unsupported page size %lu MB\n", + pr_err("Unsupported page size %lu MB\n", ps >> 20); return -EINVAL; } @@ -788,8 +833,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) } default: - printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", - p); + pr_err("Bad mount option: \"%s\"\n", p); return -EINVAL; break; } @@ -809,16 +853,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) return 0; bad_val: - printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", - args[0].from, p); + pr_err("Bad value '%s' for mount option '%s'\n", args[0].from, p); return -EINVAL; } static int hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) { - struct inode * inode; - struct dentry * root; int ret; struct hugetlbfs_config config; struct hugetlbfs_sb_info *sbinfo; @@ -841,61 +882,30 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = sbinfo; sbinfo->hstate = config.hstate; spin_lock_init(&sbinfo->stat_lock); - sbinfo->max_blocks = config.nr_blocks; - sbinfo->free_blocks = config.nr_blocks; sbinfo->max_inodes = config.nr_inodes; sbinfo->free_inodes = config.nr_inodes; + sbinfo->spool = NULL; + if (config.nr_blocks != -1) { + sbinfo->spool = hugepage_new_subpool(config.nr_blocks); + if (!sbinfo->spool) + goto out_free; + } sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = huge_page_size(config.hstate); sb->s_blocksize_bits = huge_page_shift(config.hstate); sb->s_magic = HUGETLBFS_MAGIC; sb->s_op = &hugetlbfs_ops; sb->s_time_gran = 1; - inode = hugetlbfs_get_inode(sb, config.uid, config.gid, - S_IFDIR | config.mode, 0); - if (!inode) - goto out_free; - - root = d_alloc_root(inode); - if (!root) { - iput(inode); + sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config)); + if (!sb->s_root) goto out_free; - } - sb->s_root = root; return 0; out_free: + kfree(sbinfo->spool); kfree(sbinfo); return -ENOMEM; } -int hugetlb_get_quota(struct address_space *mapping, long delta) -{ - int ret = 0; - struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); - - if (sbinfo->free_blocks > -1) { - spin_lock(&sbinfo->stat_lock); - if (sbinfo->free_blocks - delta >= 0) - sbinfo->free_blocks -= delta; - else - ret = -ENOMEM; - spin_unlock(&sbinfo->stat_lock); - } - - return ret; -} - -void hugetlb_put_quota(struct address_space *mapping, long delta) -{ - struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); - - if (sbinfo->free_blocks > -1) { - spin_lock(&sbinfo->stat_lock); - sbinfo->free_blocks += delta; - spin_unlock(&sbinfo->stat_lock); - } -} - static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { @@ -907,54 +917,82 @@ static struct file_system_type hugetlbfs_fs_type = { .mount = hugetlbfs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("hugetlbfs"); -static struct vfsmount *hugetlbfs_vfsmount; +static struct vfsmount *hugetlbfs_vfsmount[HUGE_MAX_HSTATE]; static int can_do_hugetlb_shm(void) { - return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); + kgid_t shm_group; + shm_group = make_kgid(&init_user_ns, sysctl_hugetlb_shm_group); + return capable(CAP_IPC_LOCK) || in_group_p(shm_group); } -struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, - struct user_struct **user, int creat_flags) +static int get_hstate_idx(int page_size_log) { - int error = -ENOMEM; - struct file *file; + struct hstate *h = hstate_sizelog(page_size_log); + + if (!h) + return -1; + return h - hstates; +} + +static const struct dentry_operations anon_ops = { + .d_dname = simple_dname +}; + +/* + * Note that size should be aligned to proper hugepage size in caller side, + * otherwise hugetlb_reserve_pages reserves one less hugepages than intended. + */ +struct file *hugetlb_file_setup(const char *name, size_t size, + vm_flags_t acctflag, struct user_struct **user, + int creat_flags, int page_size_log) +{ + struct file *file = ERR_PTR(-ENOMEM); struct inode *inode; struct path path; - struct dentry *root; + struct super_block *sb; struct qstr quick_string; + int hstate_idx; + + hstate_idx = get_hstate_idx(page_size_log); + if (hstate_idx < 0) + return ERR_PTR(-ENODEV); *user = NULL; - if (!hugetlbfs_vfsmount) + if (!hugetlbfs_vfsmount[hstate_idx]) return ERR_PTR(-ENOENT); if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { *user = current_user(); if (user_shm_lock(size, *user)) { - printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n"); + task_lock(current); + pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n", + current->comm, current->pid); + task_unlock(current); } else { *user = NULL; return ERR_PTR(-EPERM); } } - root = hugetlbfs_vfsmount->mnt_root; + sb = hugetlbfs_vfsmount[hstate_idx]->mnt_sb; quick_string.name = name; quick_string.len = strlen(quick_string.name); quick_string.hash = 0; - path.dentry = d_alloc(root, &quick_string); + path.dentry = d_alloc_pseudo(sb, &quick_string); if (!path.dentry) goto out_shm_unlock; - path.mnt = mntget(hugetlbfs_vfsmount); - error = -ENOSPC; - inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(), - current_fsgid(), S_IFREG | S_IRWXUGO, 0); + d_set_d_op(path.dentry, &anon_ops); + path.mnt = mntget(hugetlbfs_vfsmount[hstate_idx]); + file = ERR_PTR(-ENOSPC); + inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0); if (!inode) goto out_dentry; - error = -ENOMEM; + file = ERR_PTR(-ENOMEM); if (hugetlb_reserve_pages(inode, 0, size >> huge_page_shift(hstate_inode(inode)), NULL, acctflag)) @@ -962,12 +1000,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, d_instantiate(path.dentry, inode); inode->i_size = size; - inode->i_nlink = 0; + clear_nlink(inode); - error = -ENFILE; file = alloc_file(&path, FMODE_WRITE | FMODE_READ, &hugetlbfs_file_operations); - if (!file) + if (IS_ERR(file)) goto out_dentry; /* inode is already attached */ return file; @@ -981,18 +1018,25 @@ out_shm_unlock: user_shm_unlock(size, *user); *user = NULL; } - return ERR_PTR(error); + return file; } static int __init init_hugetlbfs_fs(void) { + struct hstate *h; int error; - struct vfsmount *vfsmount; + int i; + + if (!hugepages_supported()) { + pr_info("disabling because there are no supported hugepage sizes\n"); + return -ENOTSUPP; + } error = bdi_init(&hugetlbfs_backing_dev_info); if (error) return error; + error = -ENOMEM; hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", sizeof(struct hugetlbfs_inode_info), 0, 0, init_once); @@ -1003,18 +1047,29 @@ static int __init init_hugetlbfs_fs(void) if (error) goto out; - vfsmount = kern_mount(&hugetlbfs_fs_type); + i = 0; + for_each_hstate(h) { + char buf[50]; + unsigned ps_kb = 1U << (h->order + PAGE_SHIFT - 10); - if (!IS_ERR(vfsmount)) { - hugetlbfs_vfsmount = vfsmount; - return 0; - } + snprintf(buf, sizeof(buf), "pagesize=%uK", ps_kb); + hugetlbfs_vfsmount[i] = kern_mount_data(&hugetlbfs_fs_type, + buf); - error = PTR_ERR(vfsmount); + if (IS_ERR(hugetlbfs_vfsmount[i])) { + pr_err("Cannot mount internal hugetlbfs for " + "page size %uK", ps_kb); + error = PTR_ERR(hugetlbfs_vfsmount[i]); + hugetlbfs_vfsmount[i] = NULL; + } + i++; + } + /* Non default hstates are optional */ + if (!IS_ERR_OR_NULL(hugetlbfs_vfsmount[default_hstate_idx])) + return 0; out: - if (error) - kmem_cache_destroy(hugetlbfs_inode_cachep); + kmem_cache_destroy(hugetlbfs_inode_cachep); out2: bdi_destroy(&hugetlbfs_backing_dev_info); return error; @@ -1022,7 +1077,19 @@ static int __init init_hugetlbfs_fs(void) static void __exit exit_hugetlbfs_fs(void) { + struct hstate *h; + int i; + + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(hugetlbfs_inode_cachep); + i = 0; + for_each_hstate(h) + kern_unmount(hugetlbfs_vfsmount[i++]); unregister_filesystem(&hugetlbfs_fs_type); bdi_destroy(&hugetlbfs_backing_dev_info); } |
