diff options
Diffstat (limited to 'fs')
47 files changed, 708 insertions, 218 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 22f7ccd58d3..0f628041e3f 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -460,8 +460,10 @@ static int __init init_v9fs(void) ret = v9fs_mux_global_init(); if (!ret) - ret = register_filesystem(&v9fs_fs_type); - + return ret; + ret = register_filesystem(&v9fs_fs_type); + if (!ret) + v9fs_mux_global_exit(); return ret; } diff --git a/fs/Kconfig b/fs/Kconfig index d311198bba4..4fd9efac29a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -881,6 +881,19 @@ config TMPFS See <file:Documentation/filesystems/tmpfs.txt> for details. +config TMPFS_POSIX_ACL + bool "Tmpfs POSIX Access Control Lists" + depends on TMPFS + select GENERIC_ACL + help + POSIX Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website <http://acl.bestbits.at/>. + + If you don't know what Access Control Lists are, say N. + config HUGETLBFS bool "HugeTLB file system support" depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN @@ -1940,6 +1953,10 @@ config 9P_FS If unsure, say N. +config GENERIC_ACL + bool + select FS_POSIX_ACL + endmenu menu "Partition Types" diff --git a/fs/Makefile b/fs/Makefile index 89135428a53..46b8cfe497b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ +obj-$(CONFIG_GENERIC_ACL) += generic_acl.o obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 101d21b6c03..86463ec9ccb 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -775,6 +775,7 @@ static int afs_proc_cell_servers_release(struct inode *inode, * first item */ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) + __acquires(m->private->sv_lock) { struct list_head *_p; struct afs_cell *cell = m->private; @@ -823,6 +824,7 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, * clean up after reading from the cells list */ static void afs_proc_cell_servers_stop(struct seq_file *p, void *v) + __releases(p->private->sv_lock) { struct afs_cell *cell = p->private; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 27e17f96cad..563ef9d7da9 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -281,9 +281,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) DPRINTK("mount done status=%d", status); - if (status && dentry->d_inode) - return status; /* Try to get the kernel to invalidate this dentry */ - /* Turn this into a real negative dentry? */ if (status == -ENOENT) { spin_lock(&dentry->d_lock); @@ -359,7 +356,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) * don't try to mount it again. */ spin_lock(&dcache_lock); - if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { + if (!d_mountpoint(dentry) && __simple_empty(dentry)) { spin_unlock(&dcache_lock); status = try_to_fill_dentry(dentry, 0); @@ -540,6 +537,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s return ERR_PTR(-ERESTARTNOINTR); } } + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; + spin_unlock(&dentry->d_lock); } /* diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index f312103434d..517e111bb7e 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -278,6 +278,13 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) return -ENOEXEC; } + /* + * Requires a mmap handler. This prevents people from using a.out + * as part of an exploit attack against /proc-related vulnerabilities. + */ + if (!bprm->file->f_op || !bprm->file->f_op->mmap) + return -ENOEXEC; + fd_offset = N_TXTOFF(ex); /* Check initial limits. This avoids letting people circumvent @@ -476,6 +483,13 @@ static int load_aout_library(struct file *file) goto out; } + /* + * Requires a mmap handler. This prevents people from using a.out + * as part of an exploit attack against /proc-related vulnerabilities. + */ + if (!file->f_op || !file->f_op->mmap) + goto out; + if (N_FLAGS(ex)) goto out; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index dfd8cfb7fb5..6eb48e1446e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1038,10 +1038,8 @@ out_free_interp: out_free_file: sys_close(elf_exec_fileno); out_free_fh: - if (files) { - put_files_struct(current->files); - current->files = files; - } + if (files) + reset_files_struct(current, files); out_free_ph: kfree(elf_phdata); goto out; @@ -1481,20 +1479,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) if (signr) { struct elf_thread_status *tmp; - read_lock(&tasklist_lock); + rcu_read_lock(); do_each_thread(g,p) if (current->mm == p->mm && current != p) { tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); if (!tmp) { - read_unlock(&tasklist_lock); + rcu_read_unlock(); goto cleanup; } - INIT_LIST_HEAD(&tmp->list); tmp->thread = p; list_add(&tmp->list, &thread_list); } while_each_thread(g,p); - read_unlock(&tasklist_lock); + rcu_read_unlock(); list_for_each(t, &thread_list) { struct elf_thread_status *tmp; int sz; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 2f336582922..f86d5c9ce5e 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1597,20 +1597,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, if (signr) { struct elf_thread_status *tmp; - read_lock(&tasklist_lock); + rcu_read_lock(); do_each_thread(g,p) if (current->mm == p->mm && current != p) { tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); if (!tmp) { - read_unlock(&tasklist_lock); + rcu_read_unlock(); goto cleanup; } - INIT_LIST_HEAD(&tmp->list); tmp->thread = p; list_add(&tmp->list, &thread_list); } while_each_thread(g,p); - read_unlock(&tasklist_lock); + rcu_read_unlock(); list_for_each(t, &thread_list) { struct elf_thread_status *tmp; int sz; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 66ba137f866..1713c48fef5 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -215,10 +215,8 @@ _error: bprm->interp_flags = 0; bprm->interp_data = 0; _unshare: - if (files) { - put_files_struct(current->files); - current->files = files; - } + if (files) + reset_files_struct(current, files); goto _ret; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 045f98854f1..4346468139e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -543,11 +543,11 @@ static struct kobject *bdev_get_holder(struct block_device *bdev) return kobject_get(bdev->bd_disk->holder_dir); } -static void add_symlink(struct kobject *from, struct kobject *to) +static int add_symlink(struct kobject *from, struct kobject *to) { if (!from || !to) - return; - sysfs_create_link(from, to, kobject_name(to)); + return 0; + return sysfs_create_link(from, to, kobject_name(to)); } static void del_symlink(struct kobject *from, struct kobject *to) @@ -648,30 +648,38 @@ static void free_bd_holder(struct bd_holder *bo) * If there is no matching entry with @bo in @bdev->bd_holder_list, * add @bo to the list, create symlinks. * - * Returns 1 if @bo was added to the list. - * Returns 0 if @bo wasn't used by any reason and should be freed. + * Returns 0 if symlinks are created or already there. + * Returns -ve if something fails and @bo can be freed. */ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) { struct bd_holder *tmp; + int ret; if (!bo) - return 0; + return -EINVAL; list_for_each_entry(tmp, &bdev->bd_holder_list, list) { if (tmp->sdir == bo->sdir) { tmp->count++; + /* We've already done what we need to do here. */ + free_bd_holder(bo); return 0; } } if (!bd_holder_grab_dirs(bdev, bo)) - return 0; + return -EBUSY; - add_symlink(bo->sdir, bo->sdev); - add_symlink(bo->hdir, bo->hdev); - list_add_tail(&bo->list, &bdev->bd_holder_list); - return 1; + ret = add_symlink(bo->sdir, bo->sdev); + if (ret == 0) { + ret = add_symlink(bo->hdir, bo->hdev); + if (ret) + del_symlink(bo->sdir, bo->sdev); + } + if (ret == 0) + list_add_tail(&bo->list, &bdev->bd_holder_list); + return ret; } /** @@ -741,7 +749,9 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); res = bd_claim(bdev, holder); - if (res || !add_bd_holder(bdev, bo)) + if (res == 0) + res = add_bd_holder(bdev, bo); + if (res) free_bd_holder(bo); mutex_unlock(&bdev->bd_mutex); @@ -1021,7 +1031,7 @@ do_open(struct block_device *bdev, struct file *file, unsigned int subclass) rescan_partitions(bdev->bd_disk, bdev); } else { mutex_lock_nested(&bdev->bd_contains->bd_mutex, - BD_MUTEX_PARTITION); + BD_MUTEX_WHOLE); bdev->bd_contains->bd_part_count++; mutex_unlock(&bdev->bd_contains->bd_mutex); } diff --git a/fs/char_dev.c b/fs/char_dev.c index 0009346d827..1f3285affa3 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -128,13 +128,31 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next) if ((*cp)->major > major || - ((*cp)->major == major && (*cp)->baseminor >= baseminor)) + ((*cp)->major == major && + (((*cp)->baseminor >= baseminor) || + ((*cp)->baseminor + (*cp)->minorct > baseminor)))) break; - if (*cp && (*cp)->major == major && - (*cp)->baseminor < baseminor + minorct) { - ret = -EBUSY; - goto out; + + /* Check for overlapping minor ranges. */ + if (*cp && (*cp)->major == major) { + int old_min = (*cp)->baseminor; + int old_max = (*cp)->baseminor + (*cp)->minorct - 1; + int new_min = baseminor; + int new_max = baseminor + minorct - 1; + + /* New driver overlaps from the left. */ + if (new_max >= old_min && new_max <= old_max) { + ret = -EBUSY; + goto out; + } + + /* New driver overlaps from the right. */ + if (new_min <= old_max && new_min >= old_min) { + ret = -EBUSY; + goto out; + } } + cd->next = *cp; *cp = cd; mutex_unlock(&chrdevs_lock); @@ -165,6 +183,15 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct) return cd; } +/** + * register_chrdev_region() - register a range of device numbers + * @from: the first in the desired range of device numbers; must include + * the major number. + * @count: the number of consecutive device numbers required + * @name: the name of the device or driver. + * + * Return value is zero on success, a negative error code on failure. + */ int register_chrdev_region(dev_t from, unsigned count, const char *name) { struct char_device_struct *cd; @@ -190,6 +217,17 @@ fail: return PTR_ERR(cd); } +/** + * alloc_chrdev_region() - register a range of char device numbers + * @dev: output parameter for first assigned number + * @baseminor: first of the requested range of minor numbers + * @count: the number of minor numbers required + * @name: the name of the associated device or driver + * + * Allocates a range of char device numbers. The major number will be + * chosen dynamically, and returned (along with the first minor number) + * in @dev. Returns zero or a negative error code. + */ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, const char *name) { @@ -259,6 +297,15 @@ out2: return err; } +/** + * unregister_chrdev_region() - return a range of device numbers + * @from: the first in the range of numbers to unregister + * @count: the number of device numbers to unregister + * + * This function will unregister a range of @count device numbers, + * starting with @from. The caller should normally be the one who + * allocated those numbers in the first place... + */ void unregister_chrdev_region(dev_t from, unsigned count) { dev_t to = from + count; @@ -396,6 +443,16 @@ static int exact_lock(dev_t dev, void *data) return cdev_get(p) ? 0 : -1; } +/** + * cdev_add() - add a char device to the system + * @p: the cdev structure for the device + * @dev: the first device number for which this device is responsible + * @count: the number of consecutive minor numbers corresponding to this + * device + * + * cdev_add() adds the device represented by @p to the system, making it + * live immediately. A negative error code is returned on failure. + */ int cdev_add(struct cdev *p, dev_t dev, unsigned count) { p->dev = dev; @@ -408,6 +465,13 @@ static void cdev_unmap(dev_t dev, unsigned count) kobj_unmap(cdev_map, dev, count); } +/** + * cdev_del() - remove a cdev from the system + * @p: the cdev structure to be removed + * + * cdev_del() removes @p from the system, possibly freeing the structure + * itself. + */ void cdev_del(struct cdev *p) { cdev_unmap(p->dev, p->count); @@ -436,6 +500,11 @@ static struct kobj_type ktype_cdev_dynamic = { .release = cdev_dynamic_release, }; +/** + * cdev_alloc() - allocate a cdev structure + * + * Allocates and returns a cdev structure, or NULL on failure. + */ struct cdev *cdev_alloc(void) { struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL); @@ -447,6 +516,14 @@ struct cdev *cdev_alloc(void) return p; } +/** + * cdev_init() - initialize a cdev structure + * @cdev: the structure to initialize + * @fops: the file_operations for this device + * + * Initializes @cdev, remembering @fops, making it ready to add to the + * system with cdev_add(). + */ void cdev_init(struct cdev *cdev, const struct file_operations *fops) { memset(cdev, 0, sizeof *cdev); diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index ad96b699071..a624c3ec818 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -543,8 +543,15 @@ static struct file_system_type cramfs_fs_type = { static int __init init_cramfs_fs(void) { - cramfs_uncompress_init(); - return register_filesystem(&cramfs_fs_type); + int rv; + + rv = cramfs_uncompress_init(); + if (rv < 0) + return rv; + rv = register_filesystem(&cramfs_fs_type); + if (rv < 0) + cramfs_uncompress_exit(); + return rv; } static void __exit exit_cramfs_fs(void) diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c index 8def89f2c43..fc3ccb74626 100644 --- a/fs/cramfs/uncompress.c +++ b/fs/cramfs/uncompress.c @@ -68,11 +68,10 @@ int cramfs_uncompress_init(void) return 0; } -int cramfs_uncompress_exit(void) +void cramfs_uncompress_exit(void) { if (!--initialized) { zlib_inflateEnd(&stream); vfree(stream.workspace); } - return 0; } diff --git a/fs/dquot.c b/fs/dquot.c index 0122a279106..9af789567e5 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -834,6 +834,9 @@ static void print_warning(struct dquot *dquot, const char warntype) if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags))) return; + mutex_lock(&tty_mutex); + if (!current->signal->tty) + goto out_lock; tty_write_message(current->signal->tty, dquot->dq_sb->s_id); if (warntype == ISOFTWARN || warntype == BSOFTWARN) tty_write_message(current->signal->tty, ": warning, "); @@ -861,6 +864,8 @@ static void print_warning(struct dquot *dquot, const char warntype) break; } tty_write_message(current->signal->tty, msg); +out_lock: + mutex_unlock(&tty_mutex); } static inline void flush_warnings(struct dquot **dquots, char *warntype) diff --git a/fs/exec.c b/fs/exec.c index 97df6e0aeae..a8efe35176b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -898,8 +898,7 @@ int flush_old_exec(struct linux_binprm * bprm) return 0; mmap_failed: - put_files_struct(current->files); - current->files = files; + reset_files_struct(current, files); out: return retval; } diff --git a/fs/fat/file.c b/fs/fat/file.c index 1ee25232e6a..d50fc47169c 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -13,6 +13,7 @@ #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/writeback.h> +#include <linux/blkdev.h> int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) @@ -112,6 +113,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, } } +static int fat_file_release(struct inode *inode, struct file *filp) +{ + if ((filp->f_mode & FMODE_WRITE) && + MSDOS_SB(inode->i_sb)->options.flush) { + fat_flush_inodes(inode->i_sb, inode, NULL); + blk_congestion_wait(WRITE, HZ/10); + } + return 0; +} + const struct file_operations fat_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -121,6 +132,7 @@ const struct file_operations fat_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, + .release = fat_file_release, .ioctl = fat_generic_ioctl, .fsync = file_fsync, .sendfile = generic_file_sendfile, @@ -289,6 +301,7 @@ void fat_truncate(struct inode *inode) lock_kernel(); fat_free(inode, nr_clusters); unlock_kernel(); + fat_flush_inodes(inode->i_sb, inode, NULL); } struct inode_operations fat_file_inode_operations = { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ab96ae82375..045738032a8 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -24,6 +24,7 @@ #include <linux/vfs.h> #include <linux/parser.h> #include <linux/uio.h> +#include <linux/writeback.h> #include <asm/unaligned.h> #ifndef CONFIG_FAT_DEFAULT_IOCHARSET @@ -853,7 +854,7 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_err, + Opt_obsolate, Opt_flush, Opt_err, }; static match_table_t fat_tokens = { @@ -885,7 +886,8 @@ static match_table_t fat_tokens = { {Opt_obsolate, "cvf_format=%20s"}, {Opt_obsolate, "cvf_options=%100s"}, {Opt_obsolate, "posix"}, - {Opt_err, NULL} + {Opt_flush, "flush"}, + {Opt_err, NULL}, }; static match_table_t msdos_tokens = { {Opt_nodots, "nodots"}, @@ -1026,6 +1028,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, return 0; opts->codepage = option; break; + case Opt_flush: + opts->flush = 1; + break; /* msdos specific */ case Opt_dots: @@ -1425,6 +1430,56 @@ out_fail: EXPORT_SYMBOL_GPL(fat_fill_super); +/* + * helper function for fat_flush_inodes. This writes both the inode + * and the file data blocks, waiting for in flight data blocks before + * the start of the call. It does not wait for any io started + * during the call + */ +static int writeback_inode(struct inode *inode) +{ + + int ret; + struct address_space *mapping = inode->i_mapping; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = 0, + }; + /* if we used WB_SYNC_ALL, sync_inode waits for the io for the + * inode to finish. So WB_SYNC_NONE is sent down to sync_inode + * and filemap_fdatawrite is used for the data blocks + */ + ret = sync_inode(inode, &wbc); + if (!ret) + ret = filemap_fdatawrite(mapping); + return ret; +} + +/* + * write data and metadata corresponding to i1 and i2. The io is + * started but we do not wait for any of it to finish. + * + * filemap_flush is used for the block device, so if there is a dirty + * page for a block already in flight, we will not wait and start the + * io over again + */ +int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2) +{ + int ret = 0; + if (!MSDOS_SB(sb)->options.flush) + return 0; + if (i1) + ret = writeback_inode(i1); + if (!ret && i2) + ret = writeback_inode(i2); + if (!ret && sb) { + struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; + ret = filemap_flush(mapping); + } + return ret; +} +EXPORT_SYMBOL_GPL(fat_flush_inodes); + static int __init init_fat_fs(void) { int err; diff --git a/fs/file.c b/fs/file.c index 8d3bfca7714..8e81775c5dc 100644 --- a/fs/file.c +++ b/fs/file.c @@ -288,71 +288,63 @@ out: } /* - * Expands the file descriptor table - it will allocate a new fdtable and - * both fd array and fdset. It is expected to be called with the - * files_lock held. + * Expand the file descriptor table. + * This function will allocate a new fdtable and both fd array and fdset, of + * the given size. + * Return <0 error code on error; 1 on successful completion. + * The files->file_lock should be held on entry, and will be held on exit. |