From f52111b1546943545e67573c4dde1c7613ca33d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 18:19:16 -0400 Subject: [PATCH] take init_files to fs/file.c Signed-off-by: Al Viro --- arch/alpha/kernel/init_task.c | 1 - arch/arm/kernel/init_task.c | 1 - arch/avr32/kernel/init_task.c | 1 - arch/blackfin/kernel/init_task.c | 1 - arch/cris/kernel/process.c | 1 - arch/frv/kernel/init_task.c | 1 - arch/h8300/kernel/init_task.c | 1 - arch/ia64/kernel/init_task.c | 1 - arch/m32r/kernel/init_task.c | 1 - arch/m68k/kernel/process.c | 1 - arch/m68knommu/kernel/init_task.c | 1 - arch/mips/kernel/init_task.c | 1 - arch/mn10300/kernel/init_task.c | 1 - arch/parisc/kernel/init_task.c | 1 - arch/powerpc/kernel/init_task.c | 1 - arch/s390/kernel/init_task.c | 1 - arch/sh/kernel/init_task.c | 1 - arch/sparc/kernel/init_task.c | 1 - arch/sparc64/kernel/init_task.c | 1 - arch/um/kernel/init_task.c | 1 - arch/v850/kernel/init_task.c | 1 - arch/x86/kernel/init_task.c | 1 - arch/xtensa/kernel/init_task.c | 1 - fs/file.c | 13 +++++++++++++ include/linux/init_task.h | 23 +---------------------- 25 files changed, 14 insertions(+), 45 deletions(-) diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c index 835d09a7b33..1f762189fa6 100644 --- a/arch/alpha/kernel/init_task.c +++ b/arch/alpha/kernel/init_task.c @@ -9,7 +9,6 @@ static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c index bd4ef53bc6b..8b8c9d38a76 100644 --- a/arch/arm/kernel/init_task.c +++ b/arch/arm/kernel/init_task.c @@ -13,7 +13,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c index effcacf9d1a..44058469c6e 100644 --- a/arch/avr32/kernel/init_task.c +++ b/arch/avr32/kernel/init_task.c @@ -14,7 +14,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c index c640154030e..6bdba7b2110 100644 --- a/arch/blackfin/kernel/init_task.c +++ b/arch/blackfin/kernel/init_task.c @@ -34,7 +34,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index ef2db8fd102..5933656db5a 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -38,7 +38,6 @@ */ static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c index 22993932b3f..e2198815b63 100644 --- a/arch/frv/kernel/init_task.c +++ b/arch/frv/kernel/init_task.c @@ -11,7 +11,6 @@ static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c index 19272c2ac56..93a4899e46c 100644 --- a/arch/h8300/kernel/init_task.c +++ b/arch/h8300/kernel/init_task.c @@ -13,7 +13,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index bc8efcad28b..9d7e1c66faf 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -18,7 +18,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c index 9e508fd9d97..0d658dbb676 100644 --- a/arch/m32r/kernel/init_task.c +++ b/arch/m32r/kernel/init_task.c @@ -12,7 +12,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 5de4e4ed76a..7888cdf91f5 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -41,7 +41,6 @@ * setup. */ static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c index 3897043a126..344c01aede0 100644 --- a/arch/m68knommu/kernel/init_task.c +++ b/arch/m68knommu/kernel/init_task.c @@ -13,7 +13,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c index aeda7f58391..d72487ad7c1 100644 --- a/arch/mips/kernel/init_task.c +++ b/arch/mips/kernel/init_task.c @@ -10,7 +10,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c index 39fe6882dd1..af16f6e5c91 100644 --- a/arch/mn10300/kernel/init_task.c +++ b/arch/mn10300/kernel/init_task.c @@ -19,7 +19,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c index 26198a074d6..f5941c08655 100644 --- a/arch/parisc/kernel/init_task.c +++ b/arch/parisc/kernel/init_task.c @@ -35,7 +35,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c index 941043ae040..4c85b8d5647 100644 --- a/arch/powerpc/kernel/init_task.c +++ b/arch/powerpc/kernel/init_task.c @@ -8,7 +8,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c index d494161b05b..7ad00396925 100644 --- a/arch/s390/kernel/init_task.c +++ b/arch/s390/kernel/init_task.c @@ -17,7 +17,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c index f9bcc606127..b151a25cb14 100644 --- a/arch/sh/kernel/init_task.c +++ b/arch/sh/kernel/init_task.c @@ -8,7 +8,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct pt_regs fake_swapper_regs; diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c index d9d4f96360c..8e64ebc445e 100644 --- a/arch/sparc/kernel/init_task.c +++ b/arch/sparc/kernel/init_task.c @@ -9,7 +9,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/sparc64/kernel/init_task.c b/arch/sparc64/kernel/init_task.c index 90007cf88ba..d2b312381c1 100644 --- a/arch/sparc64/kernel/init_task.c +++ b/arch/sparc64/kernel/init_task.c @@ -10,7 +10,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c index dcfceca9505..910eda8fca1 100644 --- a/arch/um/kernel/init_task.c +++ b/arch/um/kernel/init_task.c @@ -12,7 +12,6 @@ static struct fs_struct init_fs = INIT_FS; struct mm_struct init_mm = INIT_MM(init_mm); -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); EXPORT_SYMBOL(init_mm); diff --git a/arch/v850/kernel/init_task.c b/arch/v850/kernel/init_task.c index ed2f93cf7c6..44b274dff33 100644 --- a/arch/v850/kernel/init_task.c +++ b/arch/v850/kernel/init_task.c @@ -21,7 +21,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS (init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM (init_mm); diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 3d01e47777d..a4f93b4120c 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -11,7 +11,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c index 021b4f46ff9..3df469dbe81 100644 --- a/arch/xtensa/kernel/init_task.c +++ b/arch/xtensa/kernel/init_task.c @@ -22,7 +22,6 @@ #include static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/fs/file.c b/fs/file.c index 4c6f0ea12c4..754cd05b06a 100644 --- a/fs/file.c +++ b/fs/file.c @@ -275,3 +275,16 @@ void __init files_defer_init(void) for_each_possible_cpu(i) fdtable_defer_list_init(i); } + +struct files_struct init_files = { + .count = ATOMIC_INIT(1), + .fdt = &init_files.fdtab, + .fdtab = { + .max_fds = NR_OPEN_DEFAULT, + .fd = &init_files.fd_array[0], + .close_on_exec = (fd_set *)&init_files.close_on_exec_init, + .open_fds = (fd_set *)&init_files.open_fds_init, + .rcu = RCU_HEAD_INIT, + }, + .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), +}; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index b24c2875aa0..9927a88674a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -1,7 +1,6 @@ #ifndef _LINUX__INIT_TASK_H #define _LINUX__INIT_TASK_H -#include #include #include #include @@ -12,27 +11,7 @@ #include #include -#define INIT_FDTABLE \ -{ \ - .max_fds = NR_OPEN_DEFAULT, \ - .fd = &init_files.fd_array[0], \ - .close_on_exec = (fd_set *)&init_files.close_on_exec_init, \ - .open_fds = (fd_set *)&init_files.open_fds_init, \ - .rcu = RCU_HEAD_INIT, \ - .next = NULL, \ -} - -#define INIT_FILES \ -{ \ - .count = ATOMIC_INIT(1), \ - .fdt = &init_files.fdtab, \ - .fdtab = INIT_FDTABLE, \ - .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), \ - .next_fd = 0, \ - .close_on_exec_init = { { 0, } }, \ - .open_fds_init = { { 0, } }, \ - .fd_array = { NULL, } \ -} +extern struct files_struct init_files; #define INIT_KIOCTX(name, which_mm) \ { \ -- cgit v1.2.3-18-g5258 From 02afc6267f6d55d47aba9fcafdbd1b7230d2294a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 19:42:56 -0400 Subject: [PATCH] dup_fd() fixes, part 1 Move the sucker to fs/file.c in preparation to the rest Signed-off-by: Al Viro --- fs/file.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fdtable.h | 1 + kernel/fork.c | 130 ------------------------------------------------ 3 files changed, 131 insertions(+), 130 deletions(-) diff --git a/fs/file.c b/fs/file.c index 754cd05b06a..7dbadaaf00f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -261,6 +261,136 @@ int expand_files(struct files_struct *files, int nr) return expand_fdtable(files, nr); } +static int count_open_files(struct fdtable *fdt) +{ + int size = fdt->max_fds; + int i; + + /* Find the last open fd */ + for (i = size/(8*sizeof(long)); i > 0; ) { + if (fdt->open_fds->fds_bits[--i]) + break; + } + i = (i+1) * 8 * sizeof(long); + return i; +} + +static struct files_struct *alloc_files(void) +{ + struct files_struct *newf; + struct fdtable *fdt; + + newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); + if (!newf) + goto out; + + atomic_set(&newf->count, 1); + + spin_lock_init(&newf->file_lock); + newf->next_fd = 0; + fdt = &newf->fdtab; + fdt->max_fds = NR_OPEN_DEFAULT; + fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; + fdt->open_fds = (fd_set *)&newf->open_fds_init; + fdt->fd = &newf->fd_array[0]; + INIT_RCU_HEAD(&fdt->rcu); + fdt->next = NULL; + rcu_assign_pointer(newf->fdt, fdt); +out: + return newf; +} + +/* + * Allocate a new files structure and copy contents from the + * passed in files structure. + * errorp will be valid only when the returned files_struct is NULL. + */ +struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) +{ + struct files_struct *newf; + struct file **old_fds, **new_fds; + int open_files, size, i; + struct fdtable *old_fdt, *new_fdt; + + *errorp = -ENOMEM; + newf = alloc_files(); + if (!newf) + goto out; + + spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); + new_fdt = files_fdtable(newf); + open_files = count_open_files(old_fdt); + + /* + * Check whether we need to allocate a larger fd array and fd set. + * Note: we're not a clone task, so the open count won't change. + */ + if (open_files > new_fdt->max_fds) { + new_fdt->max_fds = 0; + spin_unlock(&oldf->file_lock); + spin_lock(&newf->file_lock); + *errorp = expand_files(newf, open_files-1); + spin_unlock(&newf->file_lock); + if (*errorp < 0) + goto out_release; + new_fdt = files_fdtable(newf); + /* + * Reacquire the oldf lock and a pointer to its fd table + * who knows it may have a new bigger fd table. We need + * the latest pointer. + */ + spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); + } + + old_fds = old_fdt->fd; + new_fds = new_fdt->fd; + + memcpy(new_fdt->open_fds->fds_bits, + old_fdt->open_fds->fds_bits, open_files/8); + memcpy(new_fdt->close_on_exec->fds_bits, + old_fdt->close_on_exec->fds_bits, open_files/8); + + for (i = open_files; i != 0; i--) { + struct file *f = *old_fds++; + if (f) { + get_file(f); + } else { + /* + * The fd may be claimed in the fd bitmap but not yet + * instantiated in the files array if a sibling thread + * is partway through open(). So make sure that this + * fd is available to the new process. + */ + FD_CLR(open_files - i, new_fdt->open_fds); + } + rcu_assign_pointer(*new_fds++, f); + } + spin_unlock(&oldf->file_lock); + + /* compute the remainder to be cleared */ + size = (new_fdt->max_fds - open_files) * sizeof(struct file *); + + /* This is long word aligned thus could use a optimized version */ + memset(new_fds, 0, size); + + if (new_fdt->max_fds > open_files) { + int left = (new_fdt->max_fds-open_files)/8; + int start = open_files / (8 * sizeof(unsigned long)); + + memset(&new_fdt->open_fds->fds_bits[start], 0, left); + memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); + } + + return newf; + +out_release: + kmem_cache_free(files_cachep, newf); +out: + return NULL; +} + static void __devinit fdtable_defer_list_init(int cpu) { struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index a118f3c0b24..4aab6f12cfa 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -93,6 +93,7 @@ struct files_struct *get_files_struct(struct task_struct *); void put_files_struct(struct files_struct *fs); void reset_files_struct(struct files_struct *); int unshare_files(struct files_struct **); +struct files_struct *dup_fd(struct files_struct *, int *); extern struct kmem_cache *files_cachep; diff --git a/kernel/fork.c b/kernel/fork.c index 933e60ebcca..19908b26cf8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -660,136 +660,6 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) return 0; } -static int count_open_files(struct fdtable *fdt) -{ - int size = fdt->max_fds; - int i; - - /* Find the last open fd */ - for (i = size/(8*sizeof(long)); i > 0; ) { - if (fdt->open_fds->fds_bits[--i]) - break; - } - i = (i+1) * 8 * sizeof(long); - return i; -} - -static struct files_struct *alloc_files(void) -{ - struct files_struct *newf; - struct fdtable *fdt; - - newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); - if (!newf) - goto out; - - atomic_set(&newf->count, 1); - - spin_lock_init(&newf->file_lock); - newf->next_fd = 0; - fdt = &newf->fdtab; - fdt->max_fds = NR_OPEN_DEFAULT; - fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; - fdt->open_fds = (fd_set *)&newf->open_fds_init; - fdt->fd = &newf->fd_array[0]; - INIT_RCU_HEAD(&fdt->rcu); - fdt->next = NULL; - rcu_assign_pointer(newf->fdt, fdt); -out: - return newf; -} - -/* - * Allocate a new files structure and copy contents from the - * passed in files structure. - * errorp will be valid only when the returned files_struct is NULL. - */ -static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) -{ - struct files_struct *newf; - struct file **old_fds, **new_fds; - int open_files, size, i; - struct fdtable *old_fdt, *new_fdt; - - *errorp = -ENOMEM; - newf = alloc_files(); - if (!newf) - goto out; - - spin_lock(&oldf->file_lock); - old_fdt = files_fdtable(oldf); - new_fdt = files_fdtable(newf); - open_files = count_open_files(old_fdt); - - /* - * Check whether we need to allocate a larger fd array and fd set. - * Note: we're not a clone task, so the open count won't change. - */ - if (open_files > new_fdt->max_fds) { - new_fdt->max_fds = 0; - spin_unlock(&oldf->file_lock); - spin_lock(&newf->file_lock); - *errorp = expand_files(newf, open_files-1); - spin_unlock(&newf->file_lock); - if (*errorp < 0) - goto out_release; - new_fdt = files_fdtable(newf); - /* - * Reacquire the oldf lock and a pointer to its fd table - * who knows it may have a new bigger fd table. We need - * the latest pointer. - */ - spin_lock(&oldf->file_lock); - old_fdt = files_fdtable(oldf); - } - - old_fds = old_fdt->fd; - new_fds = new_fdt->fd; - - memcpy(new_fdt->open_fds->fds_bits, - old_fdt->open_fds->fds_bits, open_files/8); - memcpy(new_fdt->close_on_exec->fds_bits, - old_fdt->close_on_exec->fds_bits, open_files/8); - - for (i = open_files; i != 0; i--) { - struct file *f = *old_fds++; - if (f) { - get_file(f); - } else { - /* - * The fd may be claimed in the fd bitmap but not yet - * instantiated in the files array if a sibling thread - * is partway through open(). So make sure that this - * fd is available to the new process. - */ - FD_CLR(open_files - i, new_fdt->open_fds); - } - rcu_assign_pointer(*new_fds++, f); - } - spin_unlock(&oldf->file_lock); - - /* compute the remainder to be cleared */ - size = (new_fdt->max_fds - open_files) * sizeof(struct file *); - - /* This is long word aligned thus could use a optimized version */ - memset(new_fds, 0, size); - - if (new_fdt->max_fds > open_files) { - int left = (new_fdt->max_fds-open_files)/8; - int start = open_files / (8 * sizeof(unsigned long)); - - memset(&new_fdt->open_fds->fds_bits[start], 0, left); - memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); - } - - return newf; - -out_release: - kmem_cache_free(files_cachep, newf); -out: - return NULL; -} - static int copy_files(unsigned long clone_flags, struct task_struct * tsk) { struct files_struct *oldf, *newf; -- cgit v1.2.3-18-g5258 From 9dec3c4d306b09b31331e475e895bb9674e16d81 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:02:45 -0400 Subject: [PATCH] dup_fd() part 2 use alloc_fdtable() instead of expand_files(), get rid of pointless grabbing newf->file_lock, kill magic in copy_fdtable() that used to be there only to skip copying when called from dup_fd(). Signed-off-by: Al Viro --- fs/file.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/file.c b/fs/file.c index 7dbadaaf00f..6491b2b5bc3 100644 --- a/fs/file.c +++ b/fs/file.c @@ -119,8 +119,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) unsigned int cpy, set; BUG_ON(nfdt->max_fds < ofdt->max_fds); - if (ofdt->max_fds == 0) - return; cpy = ofdt->max_fds * sizeof(struct file *); set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); @@ -327,14 +325,24 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) * Note: we're not a clone task, so the open count won't change. */ if (open_files > new_fdt->max_fds) { - new_fdt->max_fds = 0; spin_unlock(&oldf->file_lock); - spin_lock(&newf->file_lock); - *errorp = expand_files(newf, open_files-1); - spin_unlock(&newf->file_lock); - if (*errorp < 0) + + new_fdt = alloc_fdtable(open_files - 1); + if (!new_fdt) { + *errorp = -ENOMEM; + goto out_release; + } + + /* beyond sysctl_nr_open; nothing to do */ + if (unlikely(new_fdt->max_fds < open_files)) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + *errorp = -EMFILE; goto out_release; - new_fdt = files_fdtable(newf); + } + rcu_assign_pointer(files->fdt, new_fdt); + /* * Reacquire the oldf lock and a pointer to its fd table * who knows it may have a new bigger fd table. We need -- cgit v1.2.3-18-g5258 From afbec7fff4928c273a1f1bb14dfdfdf62688a193 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:11:17 -0400 Subject: [PATCH] dup_fd() - part 3 merge alloc_files() into dup_fd(), leave setting newf->fdt until the end Signed-off-by: Al Viro --- fs/file.c | 43 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/fs/file.c b/fs/file.c index 6491b2b5bc3..689d2b6947e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -273,31 +273,6 @@ static int count_open_files(struct fdtable *fdt) return i; } -static struct files_struct *alloc_files(void) -{ - struct files_struct *newf; - struct fdtable *fdt; - - newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); - if (!newf) - goto out; - - atomic_set(&newf->count, 1); - - spin_lock_init(&newf->file_lock); - newf->next_fd = 0; - fdt = &newf->fdtab; - fdt->max_fds = NR_OPEN_DEFAULT; - fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; - fdt->open_fds = (fd_set *)&newf->open_fds_init; - fdt->fd = &newf->fd_array[0]; - INIT_RCU_HEAD(&fdt->rcu); - fdt->next = NULL; - rcu_assign_pointer(newf->fdt, fdt); -out: - return newf; -} - /* * Allocate a new files structure and copy contents from the * passed in files structure. @@ -311,13 +286,24 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) struct fdtable *old_fdt, *new_fdt; *errorp = -ENOMEM; - newf = alloc_files(); + newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); if (!newf) goto out; + atomic_set(&newf->count, 1); + + spin_lock_init(&newf->file_lock); + newf->next_fd = 0; + new_fdt = &newf->fdtab; + new_fdt->max_fds = NR_OPEN_DEFAULT; + new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; + new_fdt->open_fds = (fd_set *)&newf->open_fds_init; + new_fdt->fd = &newf->fd_array[0]; + INIT_RCU_HEAD(&new_fdt->rcu); + new_fdt->next = NULL; + spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); - new_fdt = files_fdtable(newf); open_files = count_open_files(old_fdt); /* @@ -341,7 +327,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) *errorp = -EMFILE; goto out_release; } - rcu_assign_pointer(files->fdt, new_fdt); /* * Reacquire the oldf lock and a pointer to its fd table @@ -391,6 +376,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); } + rcu_assign_pointer(newf->fdt, new_fdt); + return newf; out_release: -- cgit v1.2.3-18-g5258 From adbecb128cd2cc5d14b0ebef6d020ced0efd0ec6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:19:42 -0400 Subject: [PATCH] dup_fd() part 4 - race fix Parent _can_ be a clone task, contrary to the comment. Moreover, more files could be opened while we allocate a copy, in which case we end up copying only part into new descriptor table. Since what we get _is_ affected by all changes in the old range, we can get rather weird effects - e.g. dup2(0, 1024); close(0); in parallel with fork() resulting in child that sees the effect of close(), but not that of dup2() done just before that close(). What we need is to recalculate the open_count after having reacquired ->file_lock and if external fdtable we'd just allocated is too small for it, free the sucker and redo allocation. Signed-off-by: Al Viro --- fs/file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/file.c b/fs/file.c index 689d2b6947e..0f705c7cfef 100644 --- a/fs/file.c +++ b/fs/file.c @@ -308,11 +308,16 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) /* * Check whether we need to allocate a larger fd array and fd set. - * Note: we're not a clone task, so the open count won't change. */ - if (open_files > new_fdt->max_fds) { + while (unlikely(open_files > new_fdt->max_fds)) { spin_unlock(&oldf->file_lock); + if (new_fdt != &newf->fdtab) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + } + new_fdt = alloc_fdtable(open_files - 1); if (!new_fdt) { *errorp = -ENOMEM; @@ -335,6 +340,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) */ spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); + open_files = count_open_files(old_fdt); } old_fds = old_fdt->fd; -- cgit v1.2.3-18-g5258 From eceea0b3df05ed262ae32e0c6340cc7a3626632d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 May 2008 10:08:32 -0400 Subject: [PATCH] avoid multiplication overflows and signedness issues for max_fds Limit sysctl_nr_open - we don't want ->max_fds to exceed MAX_INT and we don't want size calculation for ->fd[] to overflow. Signed-off-by: Al Viro --- fs/file.c | 4 ++++ kernel/sysctl.c | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/file.c b/fs/file.c index 0f705c7cfef..7b3887e054d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -26,6 +26,8 @@ struct fdtable_defer { }; int sysctl_nr_open __read_mostly = 1024*1024; +int sysctl_nr_open_min = BITS_PER_LONG; +int sysctl_nr_open_max = 1024 * 1024; /* raised later */ /* * We use this list to defer free fdtables that have vmalloced @@ -405,6 +407,8 @@ void __init files_defer_init(void) int i; for_each_possible_cpu(i) fdtable_defer_list_init(i); + sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & + -BITS_PER_LONG; } struct files_struct init_files = { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d7ffdc59816..29116652dca 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -81,6 +81,7 @@ extern int compat_log; extern int maps_protect; extern int sysctl_stat_interval; extern int latencytop_enabled; +extern int sysctl_nr_open_min, sysctl_nr_open_max; /* Constants used for minimum and maximum */ #if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) @@ -1190,7 +1191,9 @@ static struct ctl_table fs_table[] = { .data = &sysctl_nr_open, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &sysctl_nr_open_min, + .extra2 = &sysctl_nr_open_max, }, { .ctl_name = FS_DENTRY, -- cgit v1.2.3-18-g5258 From 5f719558edf9c84bfbb1f7ad37e84c483282d09f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 6 May 2008 12:45:35 +0800 Subject: [Patch] fs/binfmt_elf.c: fix a wrong free In kmalloc failing path, we shouldn't free pointers in 'info', because the struct 'info' is uninitilized when kmalloc is called. And when kmalloc returns NULL, it's needless to kfree it. Signed-off-by: WANG Cong Cc: Alexander Viro Reviewed-by: Pekka Enberg -- Signed-off-by: Al Viro --- fs/binfmt_elf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b25707fee2c..bd08332079c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1900,7 +1900,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un /* alloc memory for large data structures: too large to be on stack */ elf = kmalloc(sizeof(*elf), GFP_KERNEL); if (!elf) - goto cleanup; + goto out; segs = current->mm->map_count; #ifdef ELF_CORE_EXTRA_PHDRS @@ -2034,8 +2034,9 @@ end_coredump: set_fs(fs); cleanup: - kfree(elf); free_note_info(&info); + kfree(elf); +out: return has_dumped; } -- cgit v1.2.3-18-g5258 From 08a6fac1c63233c87eec129938022f1a9a4d51f6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 May 2008 16:38:25 -0400 Subject: [PATCH] get rid of leak in compat_execve() Even though copy_compat_strings() doesn't cache the pages, copy_strings_kernel() and stuff indirectly called by e.g. ->load_binary() is doing that, so we need to drop the cache contents in the end. [found by WANG Cong ] Signed-off-by: Al Viro --- fs/compat.c | 4 ++-- fs/exec.c | 12 ++++++++---- include/linux/binfmts.h | 1 + 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/compat.c b/fs/compat.c index 332a869d2c5..ed43e17a5dc 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1405,7 +1405,7 @@ int compat_do_execve(char * filename, /* execve success */ security_bprm_free(bprm); acct_update_integrals(current); - kfree(bprm); + free_bprm(bprm); return retval; } @@ -1424,7 +1424,7 @@ out_file: } out_kfree: - kfree(bprm); + free_bprm(bprm); out_ret: return retval; diff --git a/fs/exec.c b/fs/exec.c index 1f8a24aa1f8..3c2ba7ce11d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1251,6 +1251,12 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) EXPORT_SYMBOL(search_binary_handler); +void free_bprm(struct linux_binprm *bprm) +{ + free_arg_pages(bprm); + kfree(bprm); +} + /* * sys_execve() executes a new program. */ @@ -1320,17 +1326,15 @@ int do_execve(char * filename, retval = search_binary_handler(bprm,regs); if (retval >= 0) { /* execve success */ - free_arg_pages(bprm); security_bprm_free(bprm); acct_update_integrals(current); - kfree(bprm); + free_bprm(bprm); if (displaced) put_files_struct(displaced); return retval; } out: - free_arg_pages(bprm); if (bprm->security) security_bprm_free(bprm); @@ -1344,7 +1348,7 @@ out_file: fput(bprm->file); } out_kfree: - kfree(bprm); + free_bprm(bprm); out_files: if (displaced) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index b512e48f6d8..ee0ed48e834 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -99,6 +99,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); extern void compute_creds(struct linux_binprm *binprm); extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); extern int set_binfmt(struct linux_binfmt *new); +extern void free_bprm(struct linux_binprm *); #endif /* __KERNEL__ */ #endif /* _LINUX_BINFMTS_H */ -- cgit v1.2.3-18-g5258 From 23c4971e3d97de4e1b7961ca6eacee35aa15ce5f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 8 May 2008 21:52:33 +0800 Subject: [Patch] fs/binfmt_elf.c: fix wrong return values create_elf_tables() returns 0 on success. But when strnlen_user() "fails", it returns 0 directly. So this is wrong. Signed-off-by: WANG Cong Cc: Alexander Viro Signed-off-by: Al Viro --- fs/binfmt_elf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index bd08332079c..0fa95b198e6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -256,7 +256,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, return -EFAULT; len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); if (!len || len > MAX_ARG_STRLEN) - return 0; + return -EINVAL; p += len; } if (__put_user(0, argv)) @@ -268,7 +268,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, return -EFAULT; len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); if (!len || len > MAX_ARG_STRLEN) - return 0; + return -EINVAL; p += len; } if (__put_user(0, envp)) -- cgit v1.2.3-18-g5258 From e9baf6e59842285bcf9570f5094e4c27674a0f7c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 15 May 2008 04:49:12 -0400 Subject: [PATCH] return to old errno choice in mkdir() et.al. In case when both EEXIST and EROFS would apply we used to return the former in mkdir(2) and friends. Lest anyone suspects us of being consistent, in the same situation knfsd gave clients nfs_erofs... ro-bind series had switched the syscall side of things to returning -EROFS and immediately broke an application - namely, mkdir -p. Patch restores the original behaviour... Signed-off-by: Al Viro --- fs/namei.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 32fd9655485..c7e43536c49 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2003,18 +2003,22 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) if (IS_ERR(dentry)) goto fail; + if (dentry->d_inode) + goto eexist; /* * Special case - lookup gave negative, but... we had foo/bar/ * From the vfs_mknod() POV we just have a negative dentry - * all is fine. Let's be bastards - you had / on the end, you've * been asking for (non-existent) directory. -ENOENT for you. */ - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) - goto enoent; + if (unlikely(!is_dir && nd->last.name[nd->last.len])) { + dput(dentry); + dentry = ERR_PTR(-ENOENT); + } return dentry; -enoent: +eexist: dput(dentry); - dentry = ERR_PTR(-ENOENT); + dentry = ERR_PTR(-EEXIST); fail: return dentry; } -- cgit v1.2.3-18-g5258