diff options
Diffstat (limited to 'fs')
47 files changed, 708 insertions, 218 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 22f7ccd58d3..0f628041e3f 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -460,8 +460,10 @@ static int __init init_v9fs(void) ret = v9fs_mux_global_init(); if (!ret) - ret = register_filesystem(&v9fs_fs_type); - + return ret; + ret = register_filesystem(&v9fs_fs_type); + if (!ret) + v9fs_mux_global_exit(); return ret; } diff --git a/fs/Kconfig b/fs/Kconfig index d311198bba4..4fd9efac29a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -881,6 +881,19 @@ config TMPFS See <file:Documentation/filesystems/tmpfs.txt> for details. +config TMPFS_POSIX_ACL + bool "Tmpfs POSIX Access Control Lists" + depends on TMPFS + select GENERIC_ACL + help + POSIX Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website <http://acl.bestbits.at/>. + + If you don't know what Access Control Lists are, say N. + config HUGETLBFS bool "HugeTLB file system support" depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN @@ -1940,6 +1953,10 @@ config 9P_FS If unsure, say N. +config GENERIC_ACL + bool + select FS_POSIX_ACL + endmenu menu "Partition Types" diff --git a/fs/Makefile b/fs/Makefile index 89135428a53..46b8cfe497b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ +obj-$(CONFIG_GENERIC_ACL) += generic_acl.o obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 101d21b6c03..86463ec9ccb 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -775,6 +775,7 @@ static int afs_proc_cell_servers_release(struct inode *inode, * first item */ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos) + __acquires(m->private->sv_lock) { struct list_head *_p; struct afs_cell *cell = m->private; @@ -823,6 +824,7 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v, * clean up after reading from the cells list */ static void afs_proc_cell_servers_stop(struct seq_file *p, void *v) + __releases(p->private->sv_lock) { struct afs_cell *cell = p->private; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 27e17f96cad..563ef9d7da9 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -281,9 +281,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags) DPRINTK("mount done status=%d", status); - if (status && dentry->d_inode) - return status; /* Try to get the kernel to invalidate this dentry */ - /* Turn this into a real negative dentry? */ if (status == -ENOENT) { spin_lock(&dentry->d_lock); @@ -359,7 +356,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) * don't try to mount it again. */ spin_lock(&dcache_lock); - if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { + if (!d_mountpoint(dentry) && __simple_empty(dentry)) { spin_unlock(&dcache_lock); status = try_to_fill_dentry(dentry, 0); @@ -540,6 +537,9 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s return ERR_PTR(-ERESTARTNOINTR); } } + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; + spin_unlock(&dentry->d_lock); } /* diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index f312103434d..517e111bb7e 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -278,6 +278,13 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) return -ENOEXEC; } + /* + * Requires a mmap handler. This prevents people from using a.out + * as part of an exploit attack against /proc-related vulnerabilities. + */ + if (!bprm->file->f_op || !bprm->file->f_op->mmap) + return -ENOEXEC; + fd_offset = N_TXTOFF(ex); /* Check initial limits. This avoids letting people circumvent @@ -476,6 +483,13 @@ static int load_aout_library(struct file *file) goto out; } + /* + * Requires a mmap handler. This prevents people from using a.out + * as part of an exploit attack against /proc-related vulnerabilities. + */ + if (!file->f_op || !file->f_op->mmap) + goto out; + if (N_FLAGS(ex)) goto out; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index dfd8cfb7fb5..6eb48e1446e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1038,10 +1038,8 @@ out_free_interp: out_free_file: sys_close(elf_exec_fileno); out_free_fh: - if (files) { - put_files_struct(current->files); - current->files = files; - } + if (files) + reset_files_struct(current, files); out_free_ph: kfree(elf_phdata); goto out; @@ -1481,20 +1479,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file) if (signr) { struct elf_thread_status *tmp; - read_lock(&tasklist_lock); + rcu_read_lock(); do_each_thread(g,p) if (current->mm == p->mm && current != p) { tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); if (!tmp) { - read_unlock(&tasklist_lock); + rcu_read_unlock(); goto cleanup; } - INIT_LIST_HEAD(&tmp->list); tmp->thread = p; list_add(&tmp->list, &thread_list); } while_each_thread(g,p); - read_unlock(&tasklist_lock); + rcu_read_unlock(); list_for_each(t, &thread_list) { struct elf_thread_status *tmp; int sz; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 2f336582922..f86d5c9ce5e 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1597,20 +1597,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, if (signr) { struct elf_thread_status *tmp; - read_lock(&tasklist_lock); + rcu_read_lock(); do_each_thread(g,p) if (current->mm == p->mm && current != p) { tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); if (!tmp) { - read_unlock(&tasklist_lock); + rcu_read_unlock(); goto cleanup; } - INIT_LIST_HEAD(&tmp->list); tmp->thread = p; list_add(&tmp->list, &thread_list); } while_each_thread(g,p); - read_unlock(&tasklist_lock); + rcu_read_unlock(); list_for_each(t, &thread_list) { struct elf_thread_status *tmp; int sz; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 66ba137f866..1713c48fef5 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -215,10 +215,8 @@ _error: bprm->interp_flags = 0; bprm->interp_data = 0; _unshare: - if (files) { - put_files_struct(current->files); - current->files = files; - } + if (files) + reset_files_struct(current, files); goto _ret; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 045f98854f1..4346468139e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -543,11 +543,11 @@ static struct kobject *bdev_get_holder(struct block_device *bdev) return kobject_get(bdev->bd_disk->holder_dir); } -static void add_symlink(struct kobject *from, struct kobject *to) +static int add_symlink(struct kobject *from, struct kobject *to) { if (!from || !to) - return; - sysfs_create_link(from, to, kobject_name(to)); + return 0; + return sysfs_create_link(from, to, kobject_name(to)); } static void del_symlink(struct kobject *from, struct kobject *to) @@ -648,30 +648,38 @@ static void free_bd_holder(struct bd_holder *bo) * If there is no matching entry with @bo in @bdev->bd_holder_list, * add @bo to the list, create symlinks. * - * Returns 1 if @bo was added to the list. - * Returns 0 if @bo wasn't used by any reason and should be freed. + * Returns 0 if symlinks are created or already there. + * Returns -ve if something fails and @bo can be freed. */ static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) { struct bd_holder *tmp; + int ret; if (!bo) - return 0; + return -EINVAL; list_for_each_entry(tmp, &bdev->bd_holder_list, list) { if (tmp->sdir == bo->sdir) { tmp->count++; + /* We've already done what we need to do here. */ + free_bd_holder(bo); return 0; } } if (!bd_holder_grab_dirs(bdev, bo)) - return 0; + return -EBUSY; - add_symlink(bo->sdir, bo->sdev); - add_symlink(bo->hdir, bo->hdev); - list_add_tail(&bo->list, &bdev->bd_holder_list); - return 1; + ret = add_symlink(bo->sdir, bo->sdev); + if (ret == 0) { + ret = add_symlink(bo->hdir, bo->hdev); + if (ret) + del_symlink(bo->sdir, bo->sdev); + } + if (ret == 0) + list_add_tail(&bo->list, &bdev->bd_holder_list); + return ret; } /** @@ -741,7 +749,9 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); res = bd_claim(bdev, holder); - if (res || !add_bd_holder(bdev, bo)) + if (res == 0) + res = add_bd_holder(bdev, bo); + if (res) free_bd_holder(bo); mutex_unlock(&bdev->bd_mutex); @@ -1021,7 +1031,7 @@ do_open(struct block_device *bdev, struct file *file, unsigned int subclass) rescan_partitions(bdev->bd_disk, bdev); } else { mutex_lock_nested(&bdev->bd_contains->bd_mutex, - BD_MUTEX_PARTITION); + BD_MUTEX_WHOLE); bdev->bd_contains->bd_part_count++; mutex_unlock(&bdev->bd_contains->bd_mutex); } diff --git a/fs/char_dev.c b/fs/char_dev.c index 0009346d827..1f3285affa3 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -128,13 +128,31 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor, for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next) if ((*cp)->major > major || - ((*cp)->major == major && (*cp)->baseminor >= baseminor)) + ((*cp)->major == major && + (((*cp)->baseminor >= baseminor) || + ((*cp)->baseminor + (*cp)->minorct > baseminor)))) break; - if (*cp && (*cp)->major == major && - (*cp)->baseminor < baseminor + minorct) { - ret = -EBUSY; - goto out; + + /* Check for overlapping minor ranges. */ + if (*cp && (*cp)->major == major) { + int old_min = (*cp)->baseminor; + int old_max = (*cp)->baseminor + (*cp)->minorct - 1; + int new_min = baseminor; + int new_max = baseminor + minorct - 1; + + /* New driver overlaps from the left. */ + if (new_max >= old_min && new_max <= old_max) { + ret = -EBUSY; + goto out; + } + + /* New driver overlaps from the right. */ + if (new_min <= old_max && new_min >= old_min) { + ret = -EBUSY; + goto out; + } } + cd->next = *cp; *cp = cd; mutex_unlock(&chrdevs_lock); @@ -165,6 +183,15 @@ __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct) return cd; } +/** + * register_chrdev_region() - register a range of device numbers + * @from: the first in the desired range of device numbers; must include + * the major number. + * @count: the number of consecutive device numbers required + * @name: the name of the device or driver. + * + * Return value is zero on success, a negative error code on failure. + */ int register_chrdev_region(dev_t from, unsigned count, const char *name) { struct char_device_struct *cd; @@ -190,6 +217,17 @@ fail: return PTR_ERR(cd); } +/** + * alloc_chrdev_region() - register a range of char device numbers + * @dev: output parameter for first assigned number + * @baseminor: first of the requested range of minor numbers + * @count: the number of minor numbers required + * @name: the name of the associated device or driver + * + * Allocates a range of char device numbers. The major number will be + * chosen dynamically, and returned (along with the first minor number) + * in @dev. Returns zero or a negative error code. + */ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, const char *name) { @@ -259,6 +297,15 @@ out2: return err; } +/** + * unregister_chrdev_region() - return a range of device numbers + * @from: the first in the range of numbers to unregister + * @count: the number of device numbers to unregister + * + * This function will unregister a range of @count device numbers, + * starting with @from. The caller should normally be the one who + * allocated those numbers in the first place... + */ void unregister_chrdev_region(dev_t from, unsigned count) { dev_t to = from + count; @@ -396,6 +443,16 @@ static int exact_lock(dev_t dev, void *data) return cdev_get(p) ? 0 : -1; } +/** + * cdev_add() - add a char device to the system + * @p: the cdev structure for the device + * @dev: the first device number for which this device is responsible + * @count: the number of consecutive minor numbers corresponding to this + * device + * + * cdev_add() adds the device represented by @p to the system, making it + * live immediately. A negative error code is returned on failure. + */ int cdev_add(struct cdev *p, dev_t dev, unsigned count) { p->dev = dev; @@ -408,6 +465,13 @@ static void cdev_unmap(dev_t dev, unsigned count) kobj_unmap(cdev_map, dev, count); } +/** + * cdev_del() - remove a cdev from the system + * @p: the cdev structure to be removed + * + * cdev_del() removes @p from the system, possibly freeing the structure + * itself. + */ void cdev_del(struct cdev *p) { cdev_unmap(p->dev, p->count); @@ -436,6 +500,11 @@ static struct kobj_type ktype_cdev_dynamic = { .release = cdev_dynamic_release, }; +/** + * cdev_alloc() - allocate a cdev structure + * + * Allocates and returns a cdev structure, or NULL on failure. + */ struct cdev *cdev_alloc(void) { struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL); @@ -447,6 +516,14 @@ struct cdev *cdev_alloc(void) return p; } +/** + * cdev_init() - initialize a cdev structure + * @cdev: the structure to initialize + * @fops: the file_operations for this device + * + * Initializes @cdev, remembering @fops, making it ready to add to the + * system with cdev_add(). + */ void cdev_init(struct cdev *cdev, const struct file_operations *fops) { memset(cdev, 0, sizeof *cdev); diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index ad96b699071..a624c3ec818 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -543,8 +543,15 @@ static struct file_system_type cramfs_fs_type = { static int __init init_cramfs_fs(void) { - cramfs_uncompress_init(); - return register_filesystem(&cramfs_fs_type); + int rv; + + rv = cramfs_uncompress_init(); + if (rv < 0) + return rv; + rv = register_filesystem(&cramfs_fs_type); + if (rv < 0) + cramfs_uncompress_exit(); + return rv; } static void __exit exit_cramfs_fs(void) diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c index 8def89f2c43..fc3ccb74626 100644 --- a/fs/cramfs/uncompress.c +++ b/fs/cramfs/uncompress.c @@ -68,11 +68,10 @@ int cramfs_uncompress_init(void) return 0; } -int cramfs_uncompress_exit(void) +void cramfs_uncompress_exit(void) { if (!--initialized) { zlib_inflateEnd(&stream); vfree(stream.workspace); } - return 0; } diff --git a/fs/dquot.c b/fs/dquot.c index 0122a279106..9af789567e5 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -834,6 +834,9 @@ static void print_warning(struct dquot *dquot, const char warntype) if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags))) return; + mutex_lock(&tty_mutex); + if (!current->signal->tty) + goto out_lock; tty_write_message(current->signal->tty, dquot->dq_sb->s_id); if (warntype == ISOFTWARN || warntype == BSOFTWARN) tty_write_message(current->signal->tty, ": warning, "); @@ -861,6 +864,8 @@ static void print_warning(struct dquot *dquot, const char warntype) break; } tty_write_message(current->signal->tty, msg); +out_lock: + mutex_unlock(&tty_mutex); } static inline void flush_warnings(struct dquot **dquots, char *warntype) diff --git a/fs/exec.c b/fs/exec.c index 97df6e0aeae..a8efe35176b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -898,8 +898,7 @@ int flush_old_exec(struct linux_binprm * bprm) return 0; mmap_failed: - put_files_struct(current->files); - current->files = files; + reset_files_struct(current, files); out: return retval; } diff --git a/fs/fat/file.c b/fs/fat/file.c index 1ee25232e6a..d50fc47169c 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -13,6 +13,7 @@ #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/writeback.h> +#include <linux/blkdev.h> int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) @@ -112,6 +113,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, } } +static int fat_file_release(struct inode *inode, struct file *filp) +{ + if ((filp->f_mode & FMODE_WRITE) && + MSDOS_SB(inode->i_sb)->options.flush) { + fat_flush_inodes(inode->i_sb, inode, NULL); + blk_congestion_wait(WRITE, HZ/10); + } + return 0; +} + const struct file_operations fat_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -121,6 +132,7 @@ const struct file_operations fat_file_operations = { .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, + .release = fat_file_release, .ioctl = fat_generic_ioctl, .fsync = file_fsync, .sendfile = generic_file_sendfile, @@ -289,6 +301,7 @@ void fat_truncate(struct inode *inode) lock_kernel(); fat_free(inode, nr_clusters); unlock_kernel(); + fat_flush_inodes(inode->i_sb, inode, NULL); } struct inode_operations fat_file_inode_operations = { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ab96ae82375..045738032a8 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -24,6 +24,7 @@ #include <linux/vfs.h> #include <linux/parser.h> #include <linux/uio.h> +#include <linux/writeback.h> #include <asm/unaligned.h> #ifndef CONFIG_FAT_DEFAULT_IOCHARSET @@ -853,7 +854,7 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_err, + Opt_obsolate, Opt_flush, Opt_err, }; static match_table_t fat_tokens = { @@ -885,7 +886,8 @@ static match_table_t fat_tokens = { {Opt_obsolate, "cvf_format=%20s"}, {Opt_obsolate, "cvf_options=%100s"}, {Opt_obsolate, "posix"}, - {Opt_err, NULL} + {Opt_flush, "flush"}, + {Opt_err, NULL}, }; static match_table_t msdos_tokens = { {Opt_nodots, "nodots"}, @@ -1026,6 +1028,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, return 0; opts->codepage = option; break; + case Opt_flush: + opts->flush = 1; + break; /* msdos specific */ case Opt_dots: @@ -1425,6 +1430,56 @@ out_fail: EXPORT_SYMBOL_GPL(fat_fill_super); +/* + * helper function for fat_flush_inodes. This writes both the inode + * and the file data blocks, waiting for in flight data blocks before + * the start of the call. It does not wait for any io started + * during the call + */ +static int writeback_inode(struct inode *inode) +{ + + int ret; + struct address_space *mapping = inode->i_mapping; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = 0, + }; + /* if we used WB_SYNC_ALL, sync_inode waits for the io for the + * inode to finish. So WB_SYNC_NONE is sent down to sync_inode + * and filemap_fdatawrite is used for the data blocks + */ + ret = sync_inode(inode, &wbc); + if (!ret) + ret = filemap_fdatawrite(mapping); + return ret; +} + +/* + * write data and metadata corresponding to i1 and i2. The io is + * started but we do not wait for any of it to finish. + * + * filemap_flush is used for the block device, so if there is a dirty + * page for a block already in flight, we will not wait and start the + * io over again + */ +int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2) +{ + int ret = 0; + if (!MSDOS_SB(sb)->options.flush) + return 0; + if (i1) + ret = writeback_inode(i1); + if (!ret && i2) + ret = writeback_inode(i2); + if (!ret && sb) { + struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; + ret = filemap_flush(mapping); + } + return ret; +} +EXPORT_SYMBOL_GPL(fat_flush_inodes); + static int __init init_fat_fs(void) { int err; diff --git a/fs/file.c b/fs/file.c index 8d3bfca7714..8e81775c5dc 100644 --- a/fs/file.c +++ b/fs/file.c @@ -288,71 +288,63 @@ out: } /* - * Expands the file descriptor table - it will allocate a new fdtable and - * both fd array and fdset. It is expected to be called with the - * files_lock held. + * Expand the file descriptor table. + * This function will allocate a new fdtable and both fd array and fdset, of + * the given size. + * Return <0 error code on error; 1 on successful completion. + * The files->file_lock should be held on entry, and will be held on exit. */ static int expand_fdtable(struct files_struct *files, int nr) __releases(files->file_lock) __acquires(files->file_lock) { - int error = 0; - struct fdtable *fdt; - struct fdtable *nfdt = NULL; + struct fdtable *new_fdt, *cur_fdt; spin_unlock(&files->file_lock); - nfdt = alloc_fdtable(nr); - if (!nfdt) { - error = -ENOMEM; - spin_lock(&files->file_lock); - goto out; - } - + new_fdt = alloc_fdtable(nr); spin_lock(&files->file_lock); - fdt = files_fdtable(files); + if (!new_fdt) + return -ENOMEM; /* - * Check again since another task may have expanded the - * fd table while we dropped the lock + * Check again since another task may have expanded the fd table while + * we dropped the lock */ - if (nr >= fdt->max_fds || nr >= fdt->max_fdset) { - copy_fdtable(nfdt, fdt); + cur_fdt = files_fdtable(files); + if (nr >= cur_fdt->max_fds || nr >= cur_fdt->max_fdset) { + /* Continue as planned */ + copy_fdtable(new_fdt, cur_fdt); + rcu_assign_pointer(files->fdt, new_fdt); + free_fdtable(cur_fdt); } else { - /* Somebody expanded while we dropped file_lock */ - spin_unlock(&files->file_lock); - __free_fdtable(nfdt); - spin_lock(&files->file_lock); - goto out; + /* Somebody else expanded, so undo our attempt */ + __free_fdtable(new_fdt); } - rcu_assign_pointer(files->fdt, nfdt); - free_fdtable(fdt); -out: - return error; + return 1; } /* * Expand files. - * Return <0 on error; 0 nothing done; 1 files expanded, we may have blocked. - * Should be called with the files->file_lock spinlock held for write. + * This function will expand the file structures, if the requested size exceeds + * the current capacity and there is room for expansion. + * Return <0 error code on error; 0 when nothing done; 1 when files were + * expanded and execution may have blocked. + * The files->file_lock should be held on entry, and will be held on exit. */ int expand_files(struct files_struct *files, int nr) { - int err, expand = 0; struct fdtable *fdt; fdt = files_fdtable(files); - if (nr >= fdt->max_fdset || nr >= fdt->max_fds) { - if (fdt->max_fdset >= NR_OPEN || - fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) { - err = -EMFILE; - goto out; - } - expand = 1; - if ((err = expand_fdtable(files, nr))) - goto out; - } - err = expand; -out: - return err; + /* Do we need to expand? */ + if (nr < fdt->max_fdset && nr < fdt->max_fds) + return 0; + /* Can we expand? */ + if (fdt->max_fdset >= NR_OPEN || fdt->max_fds >= NR_OPEN || + nr >= NR_OPEN) + return -EMFILE; + + /* All good, so we try */ + return expand_fdtable(files, nr); } static void __devinit fdtable_defer_list_init(int cpu) diff --git a/fs/filesystems.c b/fs/filesystems.c index 9f1072836c8..e3fa77c6ed5 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -69,8 +69,6 @@ int register_filesystem(struct file_system_type * fs) int res = 0; struct file_system_type ** p; - if (!fs) - return -EINVAL; if (fs->next) return -EBUSY; INIT_LIST_HEAD(&fs->fs_supers); diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index b74b791fc23..ac28b0835ff 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -260,12 +260,17 @@ static struct file_system_type vxfs_fs_type = { static int __init vxfs_init(void) { + int rv; + vxfs_inode_cachep = kmem_cache_create("vxfs_inode", sizeof(struct vxfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL, NULL); - if (vxfs_inode_cachep) - return register_filesystem(&vxfs_fs_type); - return -ENOMEM; + if (!vxfs_inode_cachep) + return -ENOMEM; + rv = register_filesystem(&vxfs_fs_type); + if (rv < 0) + kmem_cache_destroy(vxfs_inode_cachep); + return rv; } static void __exit diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 1e2006caf15..4fc557c40cc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -212,6 +212,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) * Called with fc->lock, unlocks it */ static void request_end(struct fuse_conn *fc, struct fuse_req *req) + __releases(fc->lock) { void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; req->end = NULL; @@ -640,6 +641,7 @@ static void request_wait(struct fuse_conn *fc) */ static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, const struct iovec *iov, unsigned long nr_segs) + __releases(fc->lock) { struct fuse_copy_state cs; struct fuse_in_header ih; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 409ce6a7cca..f85b2a282f1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -776,7 +776,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO)) return -EACCES; - if (nd && (nd->flags & LOOKUP_ACCESS)) + if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR))) return fuse_access(inode, mask); return 0; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cb7cadb0b79..7d0a9aee01f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -251,6 +251,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) memset(&outarg, 0, sizeof(outarg)); req->in.numargs = 0; req->in.h.opcode = FUSE_STATFS; + req->in.h.nodeid = get_node_id(dentry->d_inode); req->out.numargs = 1; req->out.args[0].size = fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); diff --git a/fs/generic_acl.c b/fs/generic_acl.c new file mode 100644 index 00000000000..9ccb7894717 --- /dev/null +++ b/fs/generic_acl.c @@ -0,0 +1,197 @@ +/* + * fs/generic_acl.c + * + * (C) 2005 Andreas Gruenbacher <agruen@suse.de> + * + * This file is released under the GPL. + */ + +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/generic_acl.h> + +/** + * generic_acl_list - Generic xattr_handler->list() operation + * @ops: Filesystem specific getacl and setacl callbacks + */ +size_t +generic_acl_list(struct inode *inode, struct generic_acl_operations *ops, + int type, char *list, size_t list_size) +{ + struct posix_acl *acl; + const char *name; + size_t size; + + acl = ops->getacl(inode, type); + if (!acl) + return 0; + posix_acl_release(acl); + + switch(type) { + case ACL_TYPE_ACCESS: + name = POSIX_ACL_XATTR_ACCESS; + break; + + case ACL_TYPE_DEFAULT: + name = POSIX_ACL_XATTR_DEFAULT; + break; + + default: + return 0; + } + size = strlen(name) + 1; + if (list && size <= list_size) + memcpy(list, name, size); + return size; +} + +/** + * generic_acl_get - Generic xattr_handler->get() operation + * @ops: Filesystem specific getacl and setacl callbacks + */ +int +generic_acl_get(struct inode *inode, struct generic_acl_operations *ops, + int type, void *buffer, size_t size) +{ + struct posix_acl *acl; + int error; + + acl = ops->getacl(inode, type); + if (!acl) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +/** + * generic_acl_set - Generic xattr_handler->set() operation + * @ops: Filesystem specific getacl and setacl callbacks + */ +int +generic_acl_set(struct inode *inode, struct generic_acl_operations *ops, + int type, const void *value, size_t size) +{ + struct posix_acl *acl = NULL; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) + return -EPERM; + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (acl) { + mode_t mode; + + error = posix_acl_valid(acl); + if (error) + goto failed; + switch(type) { + case ACL_TYPE_ACCESS: + mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + goto failed; + inode->i_mode = mode; + if (error == 0) { + posix_acl_release(acl); + acl = NULL; + } + break; + + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) { + error = -EINVAL; + goto failed; + } + break; + } + } + ops->setacl(inode, type, acl); + error = 0; +failed: + posix_acl_release(acl); + return error; +} + +/** + * generic_acl_init - Take care of acl inheritance at @inode create time + * @ops: Filesystem specific getacl and setacl callbacks + * + * Files created inside a directory with a default ACL inherit the + * directory's default ACL. + */ +int +generic_acl_init(struct inode *inode, struct inode *dir, + struct generic_acl_operations *ops) +{ + struct posix_acl *acl = NULL; + mode_t mode = inode->i_mode; + int error; + + inode->i_mode = mode & ~current->fs->umask; + if (!S_ISLNK(inode->i_mode)) + acl = ops->getacl(dir, ACL_TYPE_DEFAULT); + if (acl) { + struct posix_acl *clone; + + if (S_ISDIR(inode->i_mode)) { + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto cleanup; + ops->setacl(inode, ACL_TYPE_DEFAULT, clone); + posix_acl_release(clone); + } + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto cleanup; + error = posix_acl_create_masq(clone, &mode); + if (error >= 0) { + inode->i_mode = mode; + if (error > 0) + ops->setacl(inode, ACL_TYPE_ACCESS, clone); + } + posix_acl_release(clone); + } + error = 0; + +cleanup: + posix_acl_release(acl); + return error; +} + +/** + * generic_acl_chmod - change the access acl of @inode upon chmod() + * @ops: FIlesystem specific getacl and setacl callbacks + * + * A chmod also changes the permissions of the owner, group/mask, and + * other ACL entries. + */ +int +generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops) +{ + struct posix_acl *acl, *clone; + int error = 0; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + acl = ops->getacl(inode, ACL_TYPE_ACCESS); + if (acl) { + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + error = posix_acl_chmod_masq(clone, inode->i_mode); + if (!error) + ops->setacl(inode, ACL_TYPE_ACCESS, clone); + posix_acl_release(clone); + } + return error; +} diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index e025a31b4c6..f5b8f329aca 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -229,7 +229,7 @@ static void hugetlbfs_delete_inode(struct inode *inode) clear_inode(inode); } -static void hugetlbfs_forget_inode(struct inode *inode) +static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) { struct super_block *sb = inode->i_sb; diff --git a/fs/inode.c b/fs/inode.c index f5c04dd9ae8..abf77471e6c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -133,7 +133,6 @@ static struct inode *alloc_inode(struct super_block *sb) inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_rdev = 0; - inode->i_security = NULL; inode->dirtied_when = 0; if (security_inode_alloc(inode)) { if (inode->i_sb->s_op->destroy_inode) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 4527692f432..c34b862cdbf 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -960,30 +960,30 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, goto abort; } - if (nextblk) { - while (b_off >= (offset + sect_size)) { - struct inode *ninode; - - offset += sect_size; - if (nextblk == 0) - goto abort; - ninode = isofs_iget(inode->i_sb, nextblk, nextoff); - if (!ninode) - goto abort; - firstext = ISOFS_I(ninode)->i_first_extent; - sect_size = ISOFS_I(ninode)->i_section_size >> ISOFS_BUFFER_BITS(ninode); - nextblk = ISOFS_I(ninode)->i_next_section_block; - nextoff = ISOFS_I(ninode)->i_next_section_offset; - iput(ninode); - - if (++section > 100) { - printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n"); - printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u " - "nextblk=%lu nextoff=%lu\n", - iblock, firstext, (unsigned) sect_size, - nextblk, nextoff); - goto abort; - } + /* On the last section, nextblk == 0, section size is likely to + * exceed sect_size by a partial block, and access beyond the + * end of the file will reach beyond the section size, too. + */ + while (nextblk && (b_off >= (offset + sect_size))) { + struct inode *ninode; + + offset += sect_size; + ninode = isofs_iget(inode->i_sb, nextblk, nextoff); + if (!ninode) + goto abort; + firstext = ISOFS_I(ninode)->i_first_extent; + sect_size = ISOFS_I(ninode)->i_section_size >> ISOFS_BUFFER_BITS(ninode); + nextblk = ISOFS_I(ninode)->i_next_section_block; + nextoff = ISOFS_I(ninode)->i_next_section_offset; + iput(ninode); + + if (++section > 100) { + printk("isofs_get_blocks: More than 100 file sections ?!?, aborting...\n"); + printk("isofs_get_blocks: block=%ld firstext=%u sect_size=%u " + "nextblk=%lu nextoff=%lu\n", + iblock, firstext, (unsigned) sect_size, + nextblk, nextoff); + goto abort; } } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 2fc66c3e668..7af6099c911 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -715,18 +715,8 @@ journal_t * journal_init_dev(struct block_device *bdev, if (!journal) return NULL; - journal->j_dev = bdev; - journal->j_fs_dev = fs_dev; - journal->j_blk_offset = start; - journal->j_maxlen = len; - journal->j_blocksize = blocksize; - - bh = __getblk(journal->j_dev, start, journal->j_blocksize); - J_ASSERT(bh != NULL); - journal->j_sb_buffer = bh; - journal->j_superblock = (journal_superblock_t *)bh->b_data; - /* journal descriptor can store up to n blocks -bzzz */ + journal->j_blocksize = blocksize; n = journal->j_blocksize / sizeof(journal_block_tag_t); journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); @@ -736,6 +726,15 @@ journal_t * journal_init_dev(struct block_device *bdev, kfree(journal); journal = NULL; } + journal->j_dev = bdev; + journal->j_fs_dev = fs_dev; + journal->j_blk_offset = start; + journal->j_maxlen = len; + + bh = __getblk(journal->j_dev, start, journal->j_blocksize); + J_ASSERT(bh != NULL); + journal->j_sb_buffer = bh; + journal->j_superblock = (journal_superblock_t *)bh->b_data; return journal; } diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 445eed6ce5d..11563fe2a52 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -46,7 +46,7 @@ static int scan_revoke_records(journal_t *, struct buffer_head *, #ifdef __KERNEL__ /* Release readahead buffers after use */ -void journal_brelse_array(struct buffer_head *b[], int n) +static void journal_brelse_array(struct buffer_head *b[], int n) { while (--n >= 0) brelse (b[n]); diff --git a/fs/libfs.c b/fs/libfs.c index 8db5afb7b0a..3793aaa1457 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -317,17 +317,9 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, int simple_readpage(struct file *file, struct page *page) { - void *kaddr; - - if (PageUptodate(page)) - goto out; - - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr, 0, PAGE_CACHE_SIZE); - kunmap_atomic(kaddr, KM_USER0); + clear_highpage(page); flush_dcache_page(page); SetPageUptodate(page); -out: unlock_page(page); return 0; } diff --git a/fs/mbcache.c b/fs/mbcache.c index e4fde1ab22c..0ff71256e65 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -160,6 +160,7 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) static void __mb_cache_entry_release_unlock(struct mb_cache_entry *ce) + __releases(mb_cache_spinlock) { /* Wake up all processes queuing for this cache entry. */ if (ce->e_queued) diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 9e44158a754..d220165d491 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -280,7 +280,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { struct super_block *sb = dir->i_sb; - struct inode *inode; + struct inode *inode = NULL; struct fat_slot_info sinfo; struct timespec ts; unsigned char msdos_name[MSDOS_NAME]; @@ -316,6 +316,8 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, d_instantiate(dentry, inode); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(sb, dir, inode); return err; } @@ -348,6 +350,8 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) fat_detach(inode); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(inode->i_sb, dir, inode); return err; } @@ -401,6 +405,7 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); unlock_kernel(); + fat_flush_inodes(sb, dir, inode); return 0; out_free: @@ -430,6 +435,8 @@ static int msdos_unlink(struct inode *dir, struct dentry *dentry) fat_detach(inode); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(inode->i_sb, dir, inode); return err; } @@ -635,6 +642,8 @@ static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry, new_dir, new_msdos_name, new_dentry, is_hid); out: unlock_kernel(); + if (!err) + err = fat_flush_inodes(old_dir->i_sb, old_dir, new_dir); return err; } diff --git a/fs/namei.c b/fs/namei.c index 808e4ea2bb9..2892e68d3a8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -518,18 +518,20 @@ static int __emul_lookup_dentry(const char *, struct nameidata *); static __always_inline int walk_init_root(const char *name, struct nameidata *nd) { - read_lock(¤t->fs->lock); - if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { - nd->mnt = mntget(current->fs->altrootmnt); - nd->dentry = dget(current->fs->altroot); - read_unlock(¤t->fs->lock); + struct fs_struct *fs = current->fs; + + read_lock(&fs->lock); + if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) { + nd->mnt = mntget(fs->altrootmnt); + nd->dentry = dget(fs->altroot); + read_unlock(&fs->lock); if (__emul_lookup_dentry(name,nd)) return 0; - read_lock(¤t->fs->lock); + read_lock(&fs->lock); } - nd->mnt = mntget(current->fs->rootmnt); - nd->dentry = dget(current->fs->root); - read_unlock(¤t->fs->lock); + nd->mnt = mntget(fs->rootmnt); + nd->dentry = dget(fs->root); + read_unlock(&fs->lock); return 1; } @@ -724,17 +726,19 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry) static __always_inline void follow_dotdot(struct nameidata *nd) { + struct fs_struct *fs = current->fs; + while(1) { struct vfsmount *parent; struct dentry *old = nd->dentry; - read_lock(¤t->fs->lock); - if (nd->dentry == current->fs->root && - nd->mnt == current->fs->rootmnt) { - read_unlock(¤t->fs->lock); + read_lock(&fs->lock); + if (nd->dentry == fs->root && + nd->mnt == fs->rootmnt) { + read_unlock(&fs->lock); break; } - read_unlock(¤t->fs->lock); + read_unlock(&fs->lock); spin_lock(&dcache_lock); if (nd->dentry != nd->mnt->mnt_root) { nd->dentry = dget(nd->dentry->d_parent); @@ -1042,15 +1046,17 @@ static int __emul_lookup_dentry(const char *name, struct nameidata *nd) struct vfsmount *old_mnt = nd->mnt; struct qstr last = nd->last; int last_type = nd->last_type; + struct fs_struct *fs = current->fs; + /* - * NAME was not found in alternate root or it's a directory. Try to find - * it in the normal root: + * NAME was not found in alternate root or it's a directory. + * Try to find it in the normal root: */ nd->last_type = LAST_ROOT; - read_lock(¤t->fs->lock); - nd->mnt = mntget(current->fs->rootmnt); - nd->dentry = dget(current->fs->root); - read_unlock(¤t->fs->lock); + read_lock(&fs->lock); + nd->mnt = mntget(fs->rootmnt); + nd->dentry = dget(fs->root); + read_unlock(&fs->lock); if (path_walk(name, nd) == 0) { if (nd->dentry->d_inode) { dput(old_dentry); @@ -1074,6 +1080,7 @@ void set_fs_altroot(void) struct vfsmount *mnt = NULL, *oldmnt; struct dentry *dentry = NULL, *olddentry; int err; + struct fs_struct *fs = current->fs; if (!emul) goto set_it; @@ -1083,12 +1090,12 @@ void set_fs_altroot(void) dentry = nd.dentry; } set_it: - write_lock(¤t->fs->lock); - oldmnt = current->fs->altrootmnt; - olddentry = current->fs->altroot; - current->fs->altrootmnt = mnt; - current->fs->altroot = dentry; - write_unlock(¤t->fs->lock); + write_lock(&fs->lock); + oldmnt = fs->altrootmnt; + olddentry = fs->altroot; + fs->altrootmnt = mnt; + fs->altroot = dentry; + write_unlock(&fs->lock); if (olddentry) { dput(olddentry); mntput(oldmnt); @@ -1102,29 +1109,30 @@ static int fastcall do_path_lookup(int dfd, const char *name, int retval = 0; int fput_needed; struct file *file; + struct fs_struct *fs = current->fs; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; nd->depth = 0; if (*name=='/') { - read_lock(¤t->fs->lock); - if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { - nd->mnt = mntget(current->fs->altrootmnt); - nd->dentry = dget(current->fs->altroot); - read_unlock(¤t->fs->lock); + read_lock(&fs->lock); + if (fs->altroot && !(nd->flags & LOOKUP_NOALT)) { + nd->mnt = mntget(fs->altrootmnt); + nd->dentry = dget(fs->altroot); + read_unlock(&fs->lock); if (__emul_lookup_dentry(name,nd)) goto out; /* found in altroot */ - read_lock(¤t->fs->lock); + read_lock(&fs->lock); } - nd->mnt = mntget(current->fs->rootmnt); - nd->dentry = dget(current->fs->root); - read_unlock(¤t->fs->lock); + nd->mnt = mntget(fs->rootmnt); + nd->dentry = dget(fs->root); + read_unlock(&fs->lock); } else if (dfd == AT_FDCWD) { - read_lock(¤t->fs->lock); - nd->mnt = mntget(current->fs->pwdmnt); - nd->dentry = dget(current->fs->pwd); - read_unlock(¤t->fs->lock); + read_lock(&fs->lock); + nd->mnt = mntget(fs->pwdmnt); + nd->dentry = dget(fs->pwd); + read_unlock(&fs->lock); } else { struct dentry *dentry; diff --git a/fs/namespace.c b/fs/namespace.c index 36d18085813..6ede3a539ed 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -13,6 +13,7 @@ #include <linux/sched.h> #include <linux/smp_lock.h> #include <linux/init.h> +#include <linux/kernel.h> #include <linux/quotaops.h> #include <linux/acct.h> #include <linux/capability.h> @@ -1813,6 +1814,7 @@ void __init mnt_init(unsigned long mempages) struct list_head *d; unsigned int nr_hash; int i; + int err; init_rwsem(&namespace_sem); @@ -1853,8 +1855,14 @@ void __init mnt_init(unsigned long mempages) d++; i--; } while (i); - sysfs_init(); - subsystem_register(&fs_subsys); + err = sysfs_init(); + if (err) + printk(KERN_WARNING "%s: sysfs_init error: %d\n", + __FUNCTION__, err); + err = subsystem_register(&fs_subsys); + if (err) + printk(KERN_WARNING "%s: subsystem_register error: %d\n", + __FUNCTION__, err); init_rootfs(); init_mount_tree(); } diff --git a/fs/open.c b/fs/open.c index 303f06d2a7b..304c1c7814c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -546,7 +546,8 @@ asmlinkage long sys_chdir(const char __user * filename) struct nameidata nd; int error; - error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); + error = __user_walk(filename, + LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd); if (error) goto out; @@ -1172,6 +1173,7 @@ asmlinkage long sys_close(unsigned int fd) struct file * filp; struct files_struct *files = current->files; struct fdtable *fdt; + int retval; spin_lock(&files->file_lock); fdt = files_fdtable(files); @@ -1184,7 +1186,16 @@ asmlinkage long sys_close(unsigned int fd) FD_CLR(fd, fdt->close_on_exec); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); - return filp_close(filp, files); + retval = filp_close(filp, files); + + /* can't restart close syscall because file table entry was cleared */ + if (unlikely(retval == -ERESTARTSYS || + retval == -ERESTARTNOINTR || + retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK)) + retval = -EINTR; + + return retval; out_unlock: spin_unlock(&files->file_lock); diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c index 8f12587c312..4f8df71e49d 100644 --- a/fs/partitions/msdos.c +++ b/fs/partitions/msdos.c @@ -58,6 +58,31 @@ msdos_magic_present(unsigned char *p) return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2); } +/* Value is EBCDIC 'IBMA' */ +#define AIX_LABEL_MAGIC1 0xC9 +#define AIX_LABEL_MAGIC2 0xC2 +#define AIX_LABEL_MAGIC3 0xD4 +#define AIX_LABEL_MAGIC4 0xC1 +static int aix_magic_present(unsigned char *p, struct block_device *bdev) +{ + Sector sect; + unsigned char *d; + int ret = 0; + + if (p[0] != AIX_LABEL_MAGIC1 && + p[1] != AIX_LABEL_MAGIC2 && + p[2] != AIX_LABEL_MAGIC3 && + p[3] != AIX_LABEL_MAGIC4) + return 0; + d = read_dev_sector(bdev, 7, §); + if (d) { + if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') + ret = 1; + put_dev_sector(sect); + }; + return ret; +} + /* * Create devices for each logical partition in an extended partition. * The logical partitions form a linked list, with each entry being @@ -393,6 +418,12 @@ int msdos_partition(struct parsed_partitions *state, struct block_device *bdev) return 0; } + if (aix_magic_present(data, bdev)) { + put_dev_sector(sect); + printk( " [AIX]"); + return 0; + } + /* * Now that the 55aa signature is present, this is probably * either the boot sector of a FAT filesystem or a DOS-type diff --git a/fs/proc/array.c b/fs/proc/array.c index 0b615d62a15..c0e554971df 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -347,6 +347,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) sigemptyset(&sigign); sigemptyset(&sigcatch); cutime = cstime = utime = stime = cputime_zero; + + mutex_lock(&tty_mutex); read_lock(&tasklist_lock); if (task->sighand) { spin_lock_irq(&task->sighand->siglock); @@ -388,6 +390,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) } ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; read_unlock(&tasklist_lock); + mutex_unlock(&tty_mutex); if (!whole || num_threads<2) wchan = get_wchan(task); diff --git a/fs/proc/base.c b/fs/proc/base.c index fe8d55fb17c..89c20d9d50b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -797,7 +797,7 @@ out_no_task: static ssize_t mem_write(struct file * file, const char * buf, size_t count, loff_t *ppos) { - int copied = 0; + int copied; char *page; struct task_struct *task = get_proc_task(file->f_dentry->d_inode); unsigned long dst = *ppos; @@ -814,6 +814,7 @@ static ssize_t mem_write(struct file * file, const char * buf, if (!page) goto out; + copied = 0; while (count > 0) { int this_len, retval; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 3ceff385727..1294eda4aca 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -100,7 +100,7 @@ static int notesize(struct memelfnote *en) int sz; sz = sizeof(struct elf_note); - sz += roundup(strlen(en->name), 4); + sz += roundup((strlen(en->name) + 1), 4); sz += roundup(en->datasz, 4); return sz; @@ -116,7 +116,7 @@ static char *storenote(struct memelfnote *men, char *bufp) #define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0) - en.n_namesz = strlen(men->name); + en.n_namesz = strlen(men->name) + 1; en.n_descsz = men->datasz; en.n_type = men->type; diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile index 3a59309f3ca..0eb7ac08048 100644 --- a/fs/reiserfs/Makefile +++ b/fs/reiserfs/Makefile @@ -28,7 +28,7 @@ endif # will work around it. If any other architecture displays this behavior, # add it here. ifeq ($(CONFIG_PPC32),y) -EXTRA_CFLAGS := -O1 +EXTRA_CFLAGS := $(call cc-ifversion, -lt, 0400, -O1) endif TAGS: diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 1627edd5081..1cfbe857ba2 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -130,7 +130,7 @@ static int reiserfs_sync_file(struct file *p_s_filp, reiserfs_write_lock(p_s_inode->i_sb); barrier_done = reiserfs_commit_for_inode(p_s_inode); reiserfs_write_unlock(p_s_inode->i_sb); - if (barrier_done != 1) + if (barrier_done != 1 && reiserfs_barrier_flush(p_s_inode->i_sb)) blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL); if (barrier_done < 0) return barrier_done; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 8810fda0da4..7e5a2f5ebeb 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1127,9 +1127,9 @@ static void init_inode(struct inode *inode, struct path *path) REISERFS_I(inode)->i_prealloc_count = 0; REISERFS_I(inode)->i_trans_id = 0; REISERFS_I(inode)->i_jl = NULL; - REISERFS_I(inode)->i_acl_access = NULL; - REISERFS_I(inode)->i_acl_default = NULL; - init_rwsem(&REISERFS_I(inode)->xattr_sem); + reiserfs_init_acl_access(inode); + reiserfs_init_acl_default(inode); + reiserfs_init_xattr_rwsem(inode); if (stat_data_v1(ih)) { struct stat_data_v1 *sd = @@ -1834,9 +1834,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, REISERFS_I(inode)->i_attrs = REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); - REISERFS_I(inode)->i_acl_access = NULL; - REISERFS_I(inode)->i_acl_default = NULL; - init_rwsem(&REISERFS_I(inode)->xattr_sem); + reiserfs_init_acl_access(inode); + reiserfs_init_acl_default(inode); + reiserfs_init_xattr_rwsem(inode); if (old_format_only(sb)) make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, @@ -1974,11 +1974,13 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, * iput doesn't deadlock in reiserfs_delete_xattrs. The locking * code really needs to be reworked, but this will take care of it * for now. -jeffm */ +#ifdef CONFIG_REISERFS_FS_POSIX_ACL if (REISERFS_I(dir)->i_acl_default && !IS_ERR(REISERFS_I(dir)->i_acl_default)) { reiserfs_write_unlock_xattrs(dir->i_sb); iput(inode); reiserfs_write_lock_xattrs(dir->i_sb); } else +#endif iput(inode); return err; } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 9b3672d6936..e6b5ccf23f1 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1186,6 +1186,21 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct return NULL; } +static int newer_jl_done(struct reiserfs_journal_cnode *cn) +{ + struct super_block *sb = cn->sb; + b_blocknr_t blocknr = cn->blocknr; + + cn = cn->hprev; + while (cn) { + if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist && + atomic_read(&cn->jlist->j_commit_left) != 0) + return 0; + cn = cn->hprev; + } + return 1; +} + static void remove_journal_hash(struct super_block *, struct reiserfs_journal_cnode **, struct reiserfs_journal_list *, unsigned long, @@ -1604,6 +1619,31 @@ static int flush_journal_list(struct super_block *s, return err; } +static int test_transaction(struct super_block *s, + struct reiserfs_journal_list *jl) +{ + struct reiserfs_journal_cnode *cn; + + if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) + return 1; + + cn = jl->j_realblock; + while (cn) { + /* if the blocknr == 0, this has been cleared from the hash, + ** skip it + */ + if (cn->blocknr == 0) { + goto next; + } + if (cn->bh && !newer_jl_done(cn)) + return 0; + next: + cn = cn->next; + cond_resched(); + } + return 0; +} + static int write_one_transaction(struct super_block *s, struct reiserfs_journal_list *jl, struct buffer_chunk *chunk) @@ -3433,16 +3473,6 @@ static void flush_async_commits(void *p) flush_commit_list(p_s_sb, jl, 1); } unlock_kernel(); - /* - * this is a little racey, but there's no harm in missing - * the filemap_fdata_write - */ - if (!atomic_read(&journal->j_async_throttle) - && !reiserfs_is_journal_aborted(journal)) { - atomic_inc(&journal->j_async_throttle); - filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); - atomic_dec(&journal->j_async_throttle); - } } /* @@ -3844,7 +3874,9 @@ static void flush_old_journal_lists(struct super_block *s) entry = journal->j_journal_list.next; jl = JOURNAL_LIST_ENTRY(entry); /* this check should always be run, to send old lists to disk */ - if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) { + if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) && + atomic_read(&jl->j_commit_left) == 0 && + test_transaction(s, jl)) { flush_used_journal_lists(s, jl); } else { break; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b40d4d64d59..80fc3b32802 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -510,8 +510,10 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) SLAB_CTOR_CONSTRUCTOR) { INIT_LIST_HEAD(&ei->i_prealloc_list); inode_init_once(&ei->vfs_inode); +#ifdef CONFIG_REISERFS_FS_POSIX_ACL ei->i_acl_access = NULL; ei->i_acl_default = NULL; +#endif } } @@ -560,6 +562,7 @@ static void reiserfs_dirty_inode(struct inode *inode) reiserfs_write_unlock(inode->i_sb); } +#ifdef CONFIG_REISERFS_FS_POSIX_ACL static void reiserfs_clear_inode(struct inode *inode) { struct posix_acl *acl; @@ -574,6 +577,9 @@ static void reiserfs_clear_inode(struct inode *inode) posix_acl_release(acl); REISERFS_I(inode)->i_acl_default = NULL; } +#else +#define reiserfs_clear_inode NULL +#endif #ifdef CONFIG_QUOTA static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, diff --git a/fs/select.c b/fs/select.c index 33b72ba0f86..dcbc1112b7e 100644 --- a/fs/select.c +++ b/fs/select.c @@ -658,8 +658,6 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) unsigned int i; struct poll_list *head; struct poll_list *walk; - struct fdtable *fdt; - int max_fdset; /* Allocate small arguments on the stack to save memory and be faster - use long to make sure the buffer is aligned properly on 64 bit archs to avoid unaligned access */ @@ -667,11 +665,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) struct poll_list *stack_pp = NULL; /* Do a sanity check on nfds ... */ - rcu_read_lock(); - fdt = files_fdtable(current->files); - max_fdset = fdt->max_fdset; - rcu_read_unlock(); - if (nfds > max_fdset && nfds > OPEN_MAX) + if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur) return -EINVAL; poll_initwait(&table); diff --git a/fs/super.c b/fs/super.c index 5c4c94d5495..6987824d0dc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -199,7 +199,7 @@ EXPORT_SYMBOL(deactivate_super); * success, 0 if we had failed (superblock contents was already dead or * dying when grab_super() had been called). */ -static int grab_super(struct super_block *s) +static int grab_super(struct super_block *s) __releases(sb_lock) { s->s_count++; spin_unlock(&sb_lock); diff --git a/fs/udf/super.c b/fs/udf/super.c index 5dd356cbbda..1d3b5d2070e 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1621,6 +1621,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) goto error_out; } + if (UDF_SB_PARTFLAGS(sb, UDF_SB_PARTITION(sb)) & UDF_PART_FLAG_READ_ONLY) + printk("UDF-fs: Partition marked readonly; forcing readonly mount\n"); + sb->s_flags |= MS_RDONLY; + if ( udf_find_fileset(sb, &fileset, &rootdir) ) { printk("UDF-fs: No fileset found\n"); |