diff options
Diffstat (limited to 'fs/file.c')
| -rw-r--r-- | fs/file.c | 237 |
1 files changed, 70 insertions, 167 deletions
diff --git a/fs/file.c b/fs/file.c index 7cb71b99260..66923fe3176 100644 --- a/fs/file.c +++ b/fs/file.c @@ -23,23 +23,12 @@ #include <linux/rcupdate.h> #include <linux/workqueue.h> -struct fdtable_defer { - spinlock_t lock; - struct work_struct wq; - struct fdtable *next; -}; - int sysctl_nr_open __read_mostly = 1024*1024; int sysctl_nr_open_min = BITS_PER_LONG; -int sysctl_nr_open_max = 1024 * 1024; /* raised later */ - -/* - * We use this list to defer free fdtables that have vmalloced - * sets/arrays. By keeping a per-cpu list, we avoid having to embed - * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in - * this per-task structure. - */ -static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); +/* our max() is unusable in constant expressions ;-/ */ +#define __const_max(x, y) ((x) < (y) ? (x) : (y)) +int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) & + -BITS_PER_LONG; static void *alloc_fdmem(size_t size) { @@ -48,65 +37,23 @@ static void *alloc_fdmem(size_t size) * vmalloc() if the allocation size will be considered "large" by the VM. */ if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { - void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); + void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY); if (data != NULL) return data; } return vmalloc(size); } -static void free_fdmem(void *ptr) -{ - is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr); -} - static void __free_fdtable(struct fdtable *fdt) { - free_fdmem(fdt->fd); - free_fdmem(fdt->open_fds); + kvfree(fdt->fd); + kvfree(fdt->open_fds); kfree(fdt); } -static void free_fdtable_work(struct work_struct *work) -{ - struct fdtable_defer *f = - container_of(work, struct fdtable_defer, wq); - struct fdtable *fdt; - - spin_lock_bh(&f->lock); - fdt = f->next; - f->next = NULL; - spin_unlock_bh(&f->lock); - while(fdt) { - struct fdtable *next = fdt->next; - - __free_fdtable(fdt); - fdt = next; - } -} - static void free_fdtable_rcu(struct rcu_head *rcu) { - struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); - struct fdtable_defer *fddef; - - BUG_ON(!fdt); - BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT); - - if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { - kfree(fdt->fd); - kfree(fdt->open_fds); - kfree(fdt); - } else { - fddef = &get_cpu_var(fdtable_defer_list); - spin_lock(&fddef->lock); - fdt->next = fddef->next; - fddef->next = fdt; - /* vmallocs are handled from the workqueue context */ - schedule_work(&fddef->wq); - spin_unlock(&fddef->lock); - put_cpu_var(fdtable_defer_list); - } + __free_fdtable(container_of(rcu, struct fdtable, rcu)); } /* @@ -174,12 +121,11 @@ static struct fdtable * alloc_fdtable(unsigned int nr) fdt->open_fds = data; data += nr / BITS_PER_BYTE; fdt->close_on_exec = data; - fdt->next = NULL; return fdt; out_arr: - free_fdmem(fdt->fd); + kvfree(fdt->fd); out_fdt: kfree(fdt); out: @@ -221,7 +167,7 @@ static int expand_fdtable(struct files_struct *files, int nr) /* Continue as planned */ copy_fdtable(new_fdt, cur_fdt); rcu_assign_pointer(files->fdt, new_fdt); - if (cur_fdt->max_fds > NR_OPEN_DEFAULT) + if (cur_fdt != &files->fdtab) call_rcu(&cur_fdt->rcu, free_fdtable_rcu); } else { /* Somebody else expanded, so undo our attempt */ @@ -316,7 +262,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) new_fdt->close_on_exec = newf->close_on_exec_init; new_fdt->open_fds = newf->open_fds_init; new_fdt->fd = &newf->fd_array[0]; - new_fdt->next = NULL; spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); @@ -401,21 +346,16 @@ out: return NULL; } -static void close_files(struct files_struct * files) +static struct fdtable *close_files(struct files_struct * files) { - int i, j; - struct fdtable *fdt; - - j = 0; - /* * It is safe to dereference the fd table without RCU or * ->file_lock because this is the last reference to the - * files structure. But use RCU to shut RCU-lockdep up. + * files structure. */ - rcu_read_lock(); - fdt = files_fdtable(files); - rcu_read_unlock(); + struct fdtable *fdt = rcu_dereference_raw(files->fdt); + int i, j = 0; + for (;;) { unsigned long set; i = j * BITS_PER_LONG; @@ -434,6 +374,8 @@ static void close_files(struct files_struct * files) set >>= 1; } } + + return fdt; } struct files_struct *get_files_struct(struct task_struct *task) @@ -451,14 +393,9 @@ struct files_struct *get_files_struct(struct task_struct *task) void put_files_struct(struct files_struct *files) { - struct fdtable *fdt; - if (atomic_dec_and_test(&files->count)) { - close_files(files); - /* not really needed, since nobody can see us */ - rcu_read_lock(); - fdt = files_fdtable(files); - rcu_read_unlock(); + struct fdtable *fdt = close_files(files); + /* free the arrays if they are not embedded */ if (fdt != &files->fdtab) __free_fdtable(fdt); @@ -490,23 +427,6 @@ void exit_files(struct task_struct *tsk) } } -static void __devinit fdtable_defer_list_init(int cpu) -{ - struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); - spin_lock_init(&fddef->lock); - INIT_WORK(&fddef->wq, free_fdtable_work); - fddef->next = NULL; -} - -void __init files_defer_init(void) -{ - int i; - for_each_possible_cpu(i) - fdtable_defer_list_init(i); - sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & - -BITS_PER_LONG; -} - struct files_struct init_files = { .count = ATOMIC_INIT(1), .fdt = &init_files.fdtab, @@ -516,15 +436,9 @@ struct files_struct init_files = { .close_on_exec = init_files.close_on_exec_init, .open_fds = init_files.open_fds_init, }, - .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), + .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), }; -void daemonize_descriptors(void) -{ - atomic_inc(&init_files.count); - reset_files_struct(&init_files); -} - /* * allocate a file descriptor, mark it busy. */ @@ -575,7 +489,7 @@ repeat: error = fd; #if 1 /* Sanity check */ - if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { + if (rcu_access_pointer(fdt->fd[fd]) != NULL) { printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); rcu_assign_pointer(fdt->fd[fd], NULL); } @@ -715,16 +629,16 @@ void do_close_on_exec(struct files_struct *files) spin_unlock(&files->file_lock); } -struct file *fget(unsigned int fd) +static struct file *__fget(unsigned int fd, fmode_t mask) { - struct file *file; struct files_struct *files = current->files; + struct file *file; rcu_read_lock(); file = fcheck_files(files, fd); if (file) { /* File object ref couldn't be taken */ - if (file->f_mode & FMODE_PATH || + if ((file->f_mode & mask) || !atomic_long_inc_not_zero(&file->f_count)) file = NULL; } @@ -733,25 +647,16 @@ struct file *fget(unsigned int fd) return file; } +struct file *fget(unsigned int fd) +{ + return __fget(fd, FMODE_PATH); +} EXPORT_SYMBOL(fget); struct file *fget_raw(unsigned int fd) { - struct file *file; - struct files_struct *files = current->files; - - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - /* File object ref couldn't be taken */ - if (!atomic_long_inc_not_zero(&file->f_count)) - file = NULL; - } - rcu_read_unlock(); - - return file; + return __fget(fd, 0); } - EXPORT_SYMBOL(fget_raw); /* @@ -770,58 +675,54 @@ EXPORT_SYMBOL(fget_raw); * The fput_needed flag returned by fget_light should be passed to the * corresponding fput_light. */ -struct file *fget_light(unsigned int fd, int *fput_needed) +static unsigned long __fget_light(unsigned int fd, fmode_t mask) { - struct file *file; struct files_struct *files = current->files; + struct file *file; - *fput_needed = 0; if (atomic_read(&files->count) == 1) { - file = fcheck_files(files, fd); - if (file && (file->f_mode & FMODE_PATH)) - file = NULL; + file = __fcheck_files(files, fd); + if (!file || unlikely(file->f_mode & mask)) + return 0; + return (unsigned long)file; } else { - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - if (!(file->f_mode & FMODE_PATH) && - atomic_long_inc_not_zero(&file->f_count)) - *fput_needed = 1; - else - /* Didn't get the reference, someone's freed */ - file = NULL; - } - rcu_read_unlock(); + file = __fget(fd, mask); + if (!file) + return 0; + return FDPUT_FPUT | (unsigned long)file; } +} +unsigned long __fdget(unsigned int fd) +{ + return __fget_light(fd, FMODE_PATH); +} +EXPORT_SYMBOL(__fdget); - return file; +unsigned long __fdget_raw(unsigned int fd) +{ + return __fget_light(fd, 0); } -EXPORT_SYMBOL(fget_light); -struct file *fget_raw_light(unsigned int fd, int *fput_needed) +unsigned long __fdget_pos(unsigned int fd) { - struct file *file; - struct files_struct *files = current->files; + unsigned long v = __fdget(fd); + struct file *file = (struct file *)(v & ~3); - *fput_needed = 0; - if (atomic_read(&files->count) == 1) { - file = fcheck_files(files, fd); - } else { - rcu_read_lock(); - file = fcheck_files(files, fd); - if (file) { - if (atomic_long_inc_not_zero(&file->f_count)) - *fput_needed = 1; - else - /* Didn't get the reference, someone's freed */ - file = NULL; + if (file && (file->f_mode & FMODE_ATOMIC_POS)) { + if (file_count(file) > 1) { + v |= FDPUT_POS_UNLOCK; + mutex_lock(&file->f_pos_lock); } - rcu_read_unlock(); } - - return file; + return v; } +/* + * We only lock f_pos if we have threads or if the file might be + * shared with another process. In both cases we'll have an elevated + * file count (done either by fdget() or by fork()). + */ + void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; @@ -994,16 +895,18 @@ int iterate_fd(struct files_struct *files, unsigned n, const void *p) { struct fdtable *fdt; - struct file *file; int res = 0; if (!files) return 0; spin_lock(&files->file_lock); - fdt = files_fdtable(files); - while (!res && n < fdt->max_fds) { - file = rcu_dereference_check_fdtable(files, fdt->fd[n++]); - if (file) - res = f(p, file, n); + for (fdt = files_fdtable(files); n < fdt->max_fds; n++) { + struct file *file; + file = rcu_dereference_check_fdtable(files, fdt->fd[n]); + if (!file) + continue; + res = f(p, file, n); + if (res) + break; } spin_unlock(&files->file_lock); return res; |
