diff options
Diffstat (limited to 'fs/file.c')
| -rw-r--r-- | fs/file.c | 700 | 
1 files changed, 566 insertions, 134 deletions
diff --git a/fs/file.c b/fs/file.c index 0be344755c0..66923fe3176 100644 --- a/fs/file.c +++ b/fs/file.c @@ -6,9 +6,11 @@   *  Manage the dynamic fd arrays in the process files_struct.   */ -#include <linux/module.h> +#include <linux/syscalls.h> +#include <linux/export.h>  #include <linux/fs.h>  #include <linux/mm.h> +#include <linux/mmzone.h>  #include <linux/time.h>  #include <linux/sched.h>  #include <linux/slab.h> @@ -21,95 +23,37 @@  #include <linux/rcupdate.h>  #include <linux/workqueue.h> -struct fdtable_defer { -	spinlock_t lock; -	struct work_struct wq; -	struct fdtable *next; -}; -  int sysctl_nr_open __read_mostly = 1024*1024;  int sysctl_nr_open_min = BITS_PER_LONG; -int sysctl_nr_open_max = 1024 * 1024; /* raised later */ - -/* - * We use this list to defer free fdtables that have vmalloced - * sets/arrays. By keeping a per-cpu list, we avoid having to embed - * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in - * this per-task structure. - */ -static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); +/* our max() is unusable in constant expressions ;-/ */ +#define __const_max(x, y) ((x) < (y) ? (x) : (y)) +int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) & +			 -BITS_PER_LONG; -static inline void *alloc_fdmem(unsigned int size) +static void *alloc_fdmem(size_t size)  { -	void *data; - -	data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); -	if (data != NULL) -		return data; - +	/* +	 * Very large allocations can stress page reclaim, so fall back to +	 * vmalloc() if the allocation size will be considered "large" by the VM. +	 */ +	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { +		void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY); +		if (data != NULL) +			return data; +	}  	return vmalloc(size);  } -static void free_fdmem(void *ptr) -{ -	is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr); -} -  static void __free_fdtable(struct fdtable *fdt)  { -	free_fdmem(fdt->fd); -	free_fdmem(fdt->open_fds); +	kvfree(fdt->fd); +	kvfree(fdt->open_fds);  	kfree(fdt);  } -static void free_fdtable_work(struct work_struct *work) +static void free_fdtable_rcu(struct rcu_head *rcu)  { -	struct fdtable_defer *f = -		container_of(work, struct fdtable_defer, wq); -	struct fdtable *fdt; - -	spin_lock_bh(&f->lock); -	fdt = f->next; -	f->next = NULL; -	spin_unlock_bh(&f->lock); -	while(fdt) { -		struct fdtable *next = fdt->next; - -		__free_fdtable(fdt); -		fdt = next; -	} -} - -void free_fdtable_rcu(struct rcu_head *rcu) -{ -	struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); -	struct fdtable_defer *fddef; - -	BUG_ON(!fdt); - -	if (fdt->max_fds <= NR_OPEN_DEFAULT) { -		/* -		 * This fdtable is embedded in the files structure and that -		 * structure itself is getting destroyed. -		 */ -		kmem_cache_free(files_cachep, -				container_of(fdt, struct files_struct, fdtab)); -		return; -	} -	if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) { -		kfree(fdt->fd); -		kfree(fdt->open_fds); -		kfree(fdt); -	} else { -		fddef = &get_cpu_var(fdtable_defer_list); -		spin_lock(&fddef->lock); -		fdt->next = fddef->next; -		fddef->next = fdt; -		/* vmallocs are handled from the workqueue context */ -		schedule_work(&fddef->wq); -		spin_unlock(&fddef->lock); -		put_cpu_var(fdtable_defer_list); -	} +	__free_fdtable(container_of(rcu, struct fdtable, rcu));  }  /* @@ -138,7 +82,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)  static struct fdtable * alloc_fdtable(unsigned int nr)  {  	struct fdtable *fdt; -	char *data; +	void *data;  	/*  	 * Figure out how many fds we actually want to support in this fdtable. @@ -168,20 +112,20 @@ static struct fdtable * alloc_fdtable(unsigned int nr)  	data = alloc_fdmem(nr * sizeof(struct file *));  	if (!data)  		goto out_fdt; -	fdt->fd = (struct file **)data; -	data = alloc_fdmem(max_t(unsigned int, +	fdt->fd = data; + +	data = alloc_fdmem(max_t(size_t,  				 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));  	if (!data)  		goto out_arr; -	fdt->open_fds = (fd_set *)data; +	fdt->open_fds = data;  	data += nr / BITS_PER_BYTE; -	fdt->close_on_exec = (fd_set *)data; -	fdt->next = NULL; +	fdt->close_on_exec = data;  	return fdt;  out_arr: -	free_fdmem(fdt->fd); +	kvfree(fdt->fd);  out_fdt:  	kfree(fdt);  out: @@ -223,8 +167,8 @@ static int expand_fdtable(struct files_struct *files, int nr)  		/* Continue as planned */  		copy_fdtable(new_fdt, cur_fdt);  		rcu_assign_pointer(files->fdt, new_fdt); -		if (cur_fdt->max_fds > NR_OPEN_DEFAULT) -			free_fdtable(cur_fdt); +		if (cur_fdt != &files->fdtab) +			call_rcu(&cur_fdt->rcu, free_fdtable_rcu);  	} else {  		/* Somebody else expanded, so undo our attempt */  		__free_fdtable(new_fdt); @@ -240,19 +184,12 @@ static int expand_fdtable(struct files_struct *files, int nr)   * expanded and execution may have blocked.   * The files->file_lock should be held on entry, and will be held on exit.   */ -int expand_files(struct files_struct *files, int nr) +static int expand_files(struct files_struct *files, int nr)  {  	struct fdtable *fdt;  	fdt = files_fdtable(files); -	/* -	 * N.B. For clone tasks sharing a files structure, this test -	 * will limit the total number of files that can be opened. -	 */ -	if (nr >= rlimit(RLIMIT_NOFILE)) -		return -EMFILE; -  	/* Do we need to expand? */  	if (nr < fdt->max_fds)  		return 0; @@ -265,17 +202,37 @@ int expand_files(struct files_struct *files, int nr)  	return expand_fdtable(files, nr);  } +static inline void __set_close_on_exec(int fd, struct fdtable *fdt) +{ +	__set_bit(fd, fdt->close_on_exec); +} + +static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) +{ +	__clear_bit(fd, fdt->close_on_exec); +} + +static inline void __set_open_fd(int fd, struct fdtable *fdt) +{ +	__set_bit(fd, fdt->open_fds); +} + +static inline void __clear_open_fd(int fd, struct fdtable *fdt) +{ +	__clear_bit(fd, fdt->open_fds); +} +  static int count_open_files(struct fdtable *fdt)  {  	int size = fdt->max_fds;  	int i;  	/* Find the last open fd */ -	for (i = size/(8*sizeof(long)); i > 0; ) { -		if (fdt->open_fds->fds_bits[--i]) +	for (i = size / BITS_PER_LONG; i > 0; ) { +		if (fdt->open_fds[--i])  			break;  	} -	i = (i+1) * 8 * sizeof(long); +	i = (i + 1) * BITS_PER_LONG;  	return i;  } @@ -302,10 +259,9 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)  	newf->next_fd = 0;  	new_fdt = &newf->fdtab;  	new_fdt->max_fds = NR_OPEN_DEFAULT; -	new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; -	new_fdt->open_fds = (fd_set *)&newf->open_fds_init; +	new_fdt->close_on_exec = newf->close_on_exec_init; +	new_fdt->open_fds = newf->open_fds_init;  	new_fdt->fd = &newf->fd_array[0]; -	new_fdt->next = NULL;  	spin_lock(&oldf->file_lock);  	old_fdt = files_fdtable(oldf); @@ -346,10 +302,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)  	old_fds = old_fdt->fd;  	new_fds = new_fdt->fd; -	memcpy(new_fdt->open_fds->fds_bits, -		old_fdt->open_fds->fds_bits, open_files/8); -	memcpy(new_fdt->close_on_exec->fds_bits, -		old_fdt->close_on_exec->fds_bits, open_files/8); +	memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8); +	memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);  	for (i = open_files; i != 0; i--) {  		struct file *f = *old_fds++; @@ -362,7 +316,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)  			 * is partway through open().  So make sure that this  			 * fd is available to the new process.  			 */ -			FD_CLR(open_files - i, new_fdt->open_fds); +			__clear_open_fd(open_files - i, new_fdt);  		}  		rcu_assign_pointer(*new_fds++, f);  	} @@ -375,11 +329,11 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)  	memset(new_fds, 0, size);  	if (new_fdt->max_fds > open_files) { -		int left = (new_fdt->max_fds-open_files)/8; -		int start = open_files / (8 * sizeof(unsigned long)); +		int left = (new_fdt->max_fds - open_files) / 8; +		int start = open_files / BITS_PER_LONG; -		memset(&new_fdt->open_fds->fds_bits[start], 0, left); -		memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); +		memset(&new_fdt->open_fds[start], 0, left); +		memset(&new_fdt->close_on_exec[start], 0, left);  	}  	rcu_assign_pointer(newf->fdt, new_fdt); @@ -392,21 +346,85 @@ out:  	return NULL;  } -static void __devinit fdtable_defer_list_init(int cpu) +static struct fdtable *close_files(struct files_struct * files)  { -	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); -	spin_lock_init(&fddef->lock); -	INIT_WORK(&fddef->wq, free_fdtable_work); -	fddef->next = NULL; +	/* +	 * It is safe to dereference the fd table without RCU or +	 * ->file_lock because this is the last reference to the +	 * files structure. +	 */ +	struct fdtable *fdt = rcu_dereference_raw(files->fdt); +	int i, j = 0; + +	for (;;) { +		unsigned long set; +		i = j * BITS_PER_LONG; +		if (i >= fdt->max_fds) +			break; +		set = fdt->open_fds[j++]; +		while (set) { +			if (set & 1) { +				struct file * file = xchg(&fdt->fd[i], NULL); +				if (file) { +					filp_close(file, files); +					cond_resched(); +				} +			} +			i++; +			set >>= 1; +		} +	} + +	return fdt;  } -void __init files_defer_init(void) +struct files_struct *get_files_struct(struct task_struct *task)  { -	int i; -	for_each_possible_cpu(i) -		fdtable_defer_list_init(i); -	sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & -			     -BITS_PER_LONG; +	struct files_struct *files; + +	task_lock(task); +	files = task->files; +	if (files) +		atomic_inc(&files->count); +	task_unlock(task); + +	return files; +} + +void put_files_struct(struct files_struct *files) +{ +	if (atomic_dec_and_test(&files->count)) { +		struct fdtable *fdt = close_files(files); + +		/* free the arrays if they are not embedded */ +		if (fdt != &files->fdtab) +			__free_fdtable(fdt); +		kmem_cache_free(files_cachep, files); +	} +} + +void reset_files_struct(struct files_struct *files) +{ +	struct task_struct *tsk = current; +	struct files_struct *old; + +	old = tsk->files; +	task_lock(tsk); +	tsk->files = files; +	task_unlock(tsk); +	put_files_struct(old); +} + +void exit_files(struct task_struct *tsk) +{ +	struct files_struct * files = tsk->files; + +	if (files) { +		task_lock(tsk); +		tsk->files = NULL; +		task_unlock(tsk); +		put_files_struct(files); +	}  }  struct files_struct init_files = { @@ -415,18 +433,18 @@ struct files_struct init_files = {  	.fdtab		= {  		.max_fds	= NR_OPEN_DEFAULT,  		.fd		= &init_files.fd_array[0], -		.close_on_exec	= (fd_set *)&init_files.close_on_exec_init, -		.open_fds	= (fd_set *)&init_files.open_fds_init, +		.close_on_exec	= init_files.close_on_exec_init, +		.open_fds	= init_files.open_fds_init,  	}, -	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock), +	.file_lock	= __SPIN_LOCK_UNLOCKED(init_files.file_lock),  };  /*   * allocate a file descriptor, mark it busy.   */ -int alloc_fd(unsigned start, unsigned flags) +int __alloc_fd(struct files_struct *files, +	       unsigned start, unsigned end, unsigned flags)  { -	struct files_struct *files = current->files;  	unsigned int fd;  	int error;  	struct fdtable *fdt; @@ -439,8 +457,15 @@ repeat:  		fd = files->next_fd;  	if (fd < fdt->max_fds) -		fd = find_next_zero_bit(fdt->open_fds->fds_bits, -					   fdt->max_fds, fd); +		fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); + +	/* +	 * N.B. For clone tasks sharing a files structure, this test +	 * will limit the total number of files that can be opened. +	 */ +	error = -EMFILE; +	if (fd >= end) +		goto out;  	error = expand_files(files, fd);  	if (error < 0) @@ -456,15 +481,15 @@ repeat:  	if (start <= files->next_fd)  		files->next_fd = fd + 1; -	FD_SET(fd, fdt->open_fds); +	__set_open_fd(fd, fdt);  	if (flags & O_CLOEXEC) -		FD_SET(fd, fdt->close_on_exec); +		__set_close_on_exec(fd, fdt);  	else -		FD_CLR(fd, fdt->close_on_exec); +		__clear_close_on_exec(fd, fdt);  	error = fd;  #if 1  	/* Sanity check */ -	if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { +	if (rcu_access_pointer(fdt->fd[fd]) != NULL) {  		printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);  		rcu_assign_pointer(fdt->fd[fd], NULL);  	} @@ -475,8 +500,415 @@ out:  	return error;  } -int get_unused_fd(void) +static int alloc_fd(unsigned start, unsigned flags) +{ +	return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); +} + +int get_unused_fd_flags(unsigned flags) +{ +	return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); +} +EXPORT_SYMBOL(get_unused_fd_flags); + +static void __put_unused_fd(struct files_struct *files, unsigned int fd)  { -	return alloc_fd(0, 0); +	struct fdtable *fdt = files_fdtable(files); +	__clear_open_fd(fd, fdt); +	if (fd < files->next_fd) +		files->next_fd = fd; +} + +void put_unused_fd(unsigned int fd) +{ +	struct files_struct *files = current->files; +	spin_lock(&files->file_lock); +	__put_unused_fd(files, fd); +	spin_unlock(&files->file_lock); +} + +EXPORT_SYMBOL(put_unused_fd); + +/* + * Install a file pointer in the fd array. + * + * The VFS is full of places where we drop the files lock between + * setting the open_fds bitmap and installing the file in the file + * array.  At any such point, we are vulnerable to a dup2() race + * installing a file in the array before us.  We need to detect this and + * fput() the struct file we are about to overwrite in this case. + * + * It should never happen - if we allow dup2() do it, _really_ bad things + * will follow. + * + * NOTE: __fd_install() variant is really, really low-level; don't + * use it unless you are forced to by truly lousy API shoved down + * your throat.  'files' *MUST* be either current->files or obtained + * by get_files_struct(current) done by whoever had given it to you, + * or really bad things will happen.  Normally you want to use + * fd_install() instead. + */ + +void __fd_install(struct files_struct *files, unsigned int fd, +		struct file *file) +{ +	struct fdtable *fdt; +	spin_lock(&files->file_lock); +	fdt = files_fdtable(files); +	BUG_ON(fdt->fd[fd] != NULL); +	rcu_assign_pointer(fdt->fd[fd], file); +	spin_unlock(&files->file_lock); +} + +void fd_install(unsigned int fd, struct file *file) +{ +	__fd_install(current->files, fd, file); +} + +EXPORT_SYMBOL(fd_install); + +/* + * The same warnings as for __alloc_fd()/__fd_install() apply here... + */ +int __close_fd(struct files_struct *files, unsigned fd) +{ +	struct file *file; +	struct fdtable *fdt; + +	spin_lock(&files->file_lock); +	fdt = files_fdtable(files); +	if (fd >= fdt->max_fds) +		goto out_unlock; +	file = fdt->fd[fd]; +	if (!file) +		goto out_unlock; +	rcu_assign_pointer(fdt->fd[fd], NULL); +	__clear_close_on_exec(fd, fdt); +	__put_unused_fd(files, fd); +	spin_unlock(&files->file_lock); +	return filp_close(file, files); + +out_unlock: +	spin_unlock(&files->file_lock); +	return -EBADF; +} + +void do_close_on_exec(struct files_struct *files) +{ +	unsigned i; +	struct fdtable *fdt; + +	/* exec unshares first */ +	spin_lock(&files->file_lock); +	for (i = 0; ; i++) { +		unsigned long set; +		unsigned fd = i * BITS_PER_LONG; +		fdt = files_fdtable(files); +		if (fd >= fdt->max_fds) +			break; +		set = fdt->close_on_exec[i]; +		if (!set) +			continue; +		fdt->close_on_exec[i] = 0; +		for ( ; set ; fd++, set >>= 1) { +			struct file *file; +			if (!(set & 1)) +				continue; +			file = fdt->fd[fd]; +			if (!file) +				continue; +			rcu_assign_pointer(fdt->fd[fd], NULL); +			__put_unused_fd(files, fd); +			spin_unlock(&files->file_lock); +			filp_close(file, files); +			cond_resched(); +			spin_lock(&files->file_lock); +		} + +	} +	spin_unlock(&files->file_lock); +} + +static struct file *__fget(unsigned int fd, fmode_t mask) +{ +	struct files_struct *files = current->files; +	struct file *file; + +	rcu_read_lock(); +	file = fcheck_files(files, fd); +	if (file) { +		/* File object ref couldn't be taken */ +		if ((file->f_mode & mask) || +		    !atomic_long_inc_not_zero(&file->f_count)) +			file = NULL; +	} +	rcu_read_unlock(); + +	return file; +} + +struct file *fget(unsigned int fd) +{ +	return __fget(fd, FMODE_PATH); +} +EXPORT_SYMBOL(fget); + +struct file *fget_raw(unsigned int fd) +{ +	return __fget(fd, 0); +} +EXPORT_SYMBOL(fget_raw); + +/* + * Lightweight file lookup - no refcnt increment if fd table isn't shared. + * + * You can use this instead of fget if you satisfy all of the following + * conditions: + * 1) You must call fput_light before exiting the syscall and returning control + *    to userspace (i.e. you cannot remember the returned struct file * after + *    returning to userspace). + * 2) You must not call filp_close on the returned struct file * in between + *    calls to fget_light and fput_light. + * 3) You must not clone the current task in between the calls to fget_light + *    and fput_light. + * + * The fput_needed flag returned by fget_light should be passed to the + * corresponding fput_light. + */ +static unsigned long __fget_light(unsigned int fd, fmode_t mask) +{ +	struct files_struct *files = current->files; +	struct file *file; + +	if (atomic_read(&files->count) == 1) { +		file = __fcheck_files(files, fd); +		if (!file || unlikely(file->f_mode & mask)) +			return 0; +		return (unsigned long)file; +	} else { +		file = __fget(fd, mask); +		if (!file) +			return 0; +		return FDPUT_FPUT | (unsigned long)file; +	} +} +unsigned long __fdget(unsigned int fd) +{ +	return __fget_light(fd, FMODE_PATH); +} +EXPORT_SYMBOL(__fdget); + +unsigned long __fdget_raw(unsigned int fd) +{ +	return __fget_light(fd, 0); +} + +unsigned long __fdget_pos(unsigned int fd) +{ +	unsigned long v = __fdget(fd); +	struct file *file = (struct file *)(v & ~3); + +	if (file && (file->f_mode & FMODE_ATOMIC_POS)) { +		if (file_count(file) > 1) { +			v |= FDPUT_POS_UNLOCK; +			mutex_lock(&file->f_pos_lock); +		} +	} +	return v; +} + +/* + * We only lock f_pos if we have threads or if the file might be + * shared with another process. In both cases we'll have an elevated + * file count (done either by fdget() or by fork()). + */ + +void set_close_on_exec(unsigned int fd, int flag) +{ +	struct files_struct *files = current->files; +	struct fdtable *fdt; +	spin_lock(&files->file_lock); +	fdt = files_fdtable(files); +	if (flag) +		__set_close_on_exec(fd, fdt); +	else +		__clear_close_on_exec(fd, fdt); +	spin_unlock(&files->file_lock); +} + +bool get_close_on_exec(unsigned int fd) +{ +	struct files_struct *files = current->files; +	struct fdtable *fdt; +	bool res; +	rcu_read_lock(); +	fdt = files_fdtable(files); +	res = close_on_exec(fd, fdt); +	rcu_read_unlock(); +	return res; +} + +static int do_dup2(struct files_struct *files, +	struct file *file, unsigned fd, unsigned flags) +{ +	struct file *tofree; +	struct fdtable *fdt; + +	/* +	 * We need to detect attempts to do dup2() over allocated but still +	 * not finished descriptor.  NB: OpenBSD avoids that at the price of +	 * extra work in their equivalent of fget() - they insert struct +	 * file immediately after grabbing descriptor, mark it larval if +	 * more work (e.g. actual opening) is needed and make sure that +	 * fget() treats larval files as absent.  Potentially interesting, +	 * but while extra work in fget() is trivial, locking implications +	 * and amount of surgery on open()-related paths in VFS are not. +	 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" +	 * deadlocks in rather amusing ways, AFAICS.  All of that is out of +	 * scope of POSIX or SUS, since neither considers shared descriptor +	 * tables and this condition does not arise without those. +	 */ +	fdt = files_fdtable(files); +	tofree = fdt->fd[fd]; +	if (!tofree && fd_is_open(fd, fdt)) +		goto Ebusy; +	get_file(file); +	rcu_assign_pointer(fdt->fd[fd], file); +	__set_open_fd(fd, fdt); +	if (flags & O_CLOEXEC) +		__set_close_on_exec(fd, fdt); +	else +		__clear_close_on_exec(fd, fdt); +	spin_unlock(&files->file_lock); + +	if (tofree) +		filp_close(tofree, files); + +	return fd; + +Ebusy: +	spin_unlock(&files->file_lock); +	return -EBUSY; +} + +int replace_fd(unsigned fd, struct file *file, unsigned flags) +{ +	int err; +	struct files_struct *files = current->files; + +	if (!file) +		return __close_fd(files, fd); + +	if (fd >= rlimit(RLIMIT_NOFILE)) +		return -EBADF; + +	spin_lock(&files->file_lock); +	err = expand_files(files, fd); +	if (unlikely(err < 0)) +		goto out_unlock; +	return do_dup2(files, file, fd, flags); + +out_unlock: +	spin_unlock(&files->file_lock); +	return err; +} + +SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) +{ +	int err = -EBADF; +	struct file *file; +	struct files_struct *files = current->files; + +	if ((flags & ~O_CLOEXEC) != 0) +		return -EINVAL; + +	if (unlikely(oldfd == newfd)) +		return -EINVAL; + +	if (newfd >= rlimit(RLIMIT_NOFILE)) +		return -EBADF; + +	spin_lock(&files->file_lock); +	err = expand_files(files, newfd); +	file = fcheck(oldfd); +	if (unlikely(!file)) +		goto Ebadf; +	if (unlikely(err < 0)) { +		if (err == -EMFILE) +			goto Ebadf; +		goto out_unlock; +	} +	return do_dup2(files, file, newfd, flags); + +Ebadf: +	err = -EBADF; +out_unlock: +	spin_unlock(&files->file_lock); +	return err; +} + +SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) +{ +	if (unlikely(newfd == oldfd)) { /* corner case */ +		struct files_struct *files = current->files; +		int retval = oldfd; + +		rcu_read_lock(); +		if (!fcheck_files(files, oldfd)) +			retval = -EBADF; +		rcu_read_unlock(); +		return retval; +	} +	return sys_dup3(oldfd, newfd, 0); +} + +SYSCALL_DEFINE1(dup, unsigned int, fildes) +{ +	int ret = -EBADF; +	struct file *file = fget_raw(fildes); + +	if (file) { +		ret = get_unused_fd(); +		if (ret >= 0) +			fd_install(ret, file); +		else +			fput(file); +	} +	return ret; +} + +int f_dupfd(unsigned int from, struct file *file, unsigned flags) +{ +	int err; +	if (from >= rlimit(RLIMIT_NOFILE)) +		return -EINVAL; +	err = alloc_fd(from, flags); +	if (err >= 0) { +		get_file(file); +		fd_install(err, file); +	} +	return err; +} + +int iterate_fd(struct files_struct *files, unsigned n, +		int (*f)(const void *, struct file *, unsigned), +		const void *p) +{ +	struct fdtable *fdt; +	int res = 0; +	if (!files) +		return 0; +	spin_lock(&files->file_lock); +	for (fdt = files_fdtable(files); n < fdt->max_fds; n++) { +		struct file *file; +		file = rcu_dereference_check_fdtable(files, fdt->fd[n]); +		if (!file) +			continue; +		res = f(p, file, n); +		if (res) +			break; +	} +	spin_unlock(&files->file_lock); +	return res;  } -EXPORT_SYMBOL(get_unused_fd); +EXPORT_SYMBOL(iterate_fd);  | 
