diff options
Diffstat (limited to 'fs/select.c')
| -rw-r--r-- | fs/select.c | 160 | 
1 files changed, 100 insertions, 60 deletions
diff --git a/fs/select.c b/fs/select.c index b7b10aa3086..467bb1cb3ea 100644 --- a/fs/select.c +++ b/fs/select.c @@ -17,7 +17,7 @@  #include <linux/kernel.h>  #include <linux/sched.h>  #include <linux/syscalls.h> -#include <linux/module.h> +#include <linux/export.h>  #include <linux/slab.h>  #include <linux/poll.h>  #include <linux/personality.h> /* for STICKY_TIMEOUTS */ @@ -26,6 +26,9 @@  #include <linux/fs.h>  #include <linux/rcupdate.h>  #include <linux/hrtimer.h> +#include <linux/sched/rt.h> +#include <linux/freezer.h> +#include <net/busy_poll.h>  #include <asm/uaccess.h> @@ -220,10 +223,9 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,  	struct poll_table_entry *entry = poll_get_entry(pwq);  	if (!entry)  		return; -	get_file(filp); -	entry->filp = filp; +	entry->filp = get_file(filp);  	entry->wait_address = wait_address; -	entry->key = p->key; +	entry->key = p->_key;  	init_waitqueue_func_entry(&entry->wait, pollwake);  	entry->wait.private = pwq;  	add_wait_queue(wait_address, &entry->wait); @@ -306,6 +308,8 @@ static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,  		rts.tv_sec = rts.tv_nsec = 0;  	if (timeval) { +		if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec)) +			memset(&rtv, 0, sizeof(rtv));  		rtv.tv_sec = rts.tv_sec;  		rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; @@ -343,10 +347,10 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)  	struct fdtable *fdt;  	/* handle last in-complete long-word first */ -	set = ~(~0UL << (n & (__NFDBITS-1))); -	n /= __NFDBITS; +	set = ~(~0UL << (n & (BITS_PER_LONG-1))); +	n /= BITS_PER_LONG;  	fdt = files_fdtable(current->files); -	open_fds = fdt->open_fds->fds_bits+n; +	open_fds = fdt->open_fds + n;  	max = 0;  	if (set) {  		set &= BITS(fds, n); @@ -371,7 +375,7 @@ get_max:  			max++;  			set >>= 1;  		} while (set); -		max += n * __NFDBITS; +		max += n * BITS_PER_LONG;  	}  	return max; @@ -382,15 +386,14 @@ get_max:  #define POLLEX_SET (POLLPRI)  static inline void wait_key_set(poll_table *wait, unsigned long in, -				unsigned long out, unsigned long bit) +				unsigned long out, unsigned long bit, +				unsigned int ll_flag)  { -	if (wait) { -		wait->key = POLLEX_SET; -		if (in & bit) -			wait->key |= POLLIN_SET; -		if (out & bit) -			wait->key |= POLLOUT_SET; -	} +	wait->_key = POLLEX_SET | ll_flag; +	if (in & bit) +		wait->_key |= POLLIN_SET; +	if (out & bit) +		wait->_key |= POLLOUT_SET;  }  int do_select(int n, fd_set_bits *fds, struct timespec *end_time) @@ -400,6 +403,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)  	poll_table *wait;  	int retval, i, timed_out = 0;  	unsigned long slack = 0; +	unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; +	unsigned long busy_end = 0;  	rcu_read_lock();  	retval = max_select_fd(n, fds); @@ -412,7 +417,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)  	poll_initwait(&table);  	wait = &table.pt;  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { -		wait = NULL; +		wait->_qproc = NULL;  		timed_out = 1;  	} @@ -422,6 +427,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)  	retval = 0;  	for (;;) {  		unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; +		bool can_busy_loop = false;  		inp = fds->in; outp = fds->out; exp = fds->ex;  		rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; @@ -429,46 +435,58 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)  		for (i = 0; i < n; ++rinp, ++routp, ++rexp) {  			unsigned long in, out, ex, all_bits, bit = 1, mask, j;  			unsigned long res_in = 0, res_out = 0, res_ex = 0; -			const struct file_operations *f_op = NULL; -			struct file *file = NULL;  			in = *inp++; out = *outp++; ex = *exp++;  			all_bits = in | out | ex;  			if (all_bits == 0) { -				i += __NFDBITS; +				i += BITS_PER_LONG;  				continue;  			} -			for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) { -				int fput_needed; +			for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { +				struct fd f;  				if (i >= n)  					break;  				if (!(bit & all_bits))  					continue; -				file = fget_light(i, &fput_needed); -				if (file) { -					f_op = file->f_op; +				f = fdget(i); +				if (f.file) { +					const struct file_operations *f_op; +					f_op = f.file->f_op;  					mask = DEFAULT_POLLMASK; -					if (f_op && f_op->poll) { -						wait_key_set(wait, in, out, bit); -						mask = (*f_op->poll)(file, wait); +					if (f_op->poll) { +						wait_key_set(wait, in, out, +							     bit, busy_flag); +						mask = (*f_op->poll)(f.file, wait);  					} -					fput_light(file, fput_needed); +					fdput(f);  					if ((mask & POLLIN_SET) && (in & bit)) {  						res_in |= bit;  						retval++; -						wait = NULL; +						wait->_qproc = NULL;  					}  					if ((mask & POLLOUT_SET) && (out & bit)) {  						res_out |= bit;  						retval++; -						wait = NULL; +						wait->_qproc = NULL;  					}  					if ((mask & POLLEX_SET) && (ex & bit)) {  						res_ex |= bit;  						retval++; -						wait = NULL; +						wait->_qproc = NULL;  					} +					/* got something, stop busy polling */ +					if (retval) { +						can_busy_loop = false; +						busy_flag = 0; + +					/* +					 * only remember a returned +					 * POLL_BUSY_LOOP if we asked for it +					 */ +					} else if (busy_flag & mask) +						can_busy_loop = true; +  				}  			}  			if (res_in) @@ -479,7 +497,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)  				*rexp = res_ex;  			cond_resched();  		} -		wait = NULL; +		wait->_qproc = NULL;  		if (retval || timed_out || signal_pending(current))  			break;  		if (table.error) { @@ -487,6 +505,17 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)  			break;  		} +		/* only if found POLL_BUSY_LOOP sockets && not out of time */ +		if (can_busy_loop && !need_resched()) { +			if (!busy_end) { +				busy_end = busy_loop_end_time(); +				continue; +			} +			if (!busy_loop_timeout(busy_end)) +				continue; +		} +		busy_flag = 0; +  		/*  		 * If this is the first loop and we have a timeout  		 * given, then we convert to ktime_t and set the to @@ -515,9 +544,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)   * Update: ERESTARTSYS breaks at least the xview clock binary, so   * I'm trying ERESTARTNOHAND which restart only when you want to.   */ -#define MAX_SELECT_SECONDS \ -	((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) -  int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,  			   fd_set __user *exp, struct timespec *end_time)  { @@ -617,7 +643,6 @@ SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,  	return ret;  } -#ifdef HAVE_SET_RESTORE_SIGMASK  static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,  		       fd_set __user *exp, struct timespec __user *tsp,  		       const sigset_t __user *sigmask, size_t sigsetsize) @@ -689,7 +714,6 @@ SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,  	return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize);  } -#endif /* HAVE_SET_RESTORE_SIGMASK */  #ifdef __ARCH_WANT_SYS_OLD_SELECT  struct sel_arg_struct { @@ -721,9 +745,11 @@ struct poll_list {   * interested in events matching the pollfd->events mask, and the result   * matching that mask is both recorded in pollfd->revents and returned. The   * pwait poll_table will be used by the fd-provided poll handler for waiting, - * if non-NULL. + * if pwait->_qproc is non-NULL.   */ -static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) +static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, +				     bool *can_busy_poll, +				     unsigned int busy_flag)  {  	unsigned int mask;  	int fd; @@ -731,22 +757,20 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)  	mask = 0;  	fd = pollfd->fd;  	if (fd >= 0) { -		int fput_needed; -		struct file * file; - -		file = fget_light(fd, &fput_needed); +		struct fd f = fdget(fd);  		mask = POLLNVAL; -		if (file != NULL) { +		if (f.file) {  			mask = DEFAULT_POLLMASK; -			if (file->f_op && file->f_op->poll) { -				if (pwait) -					pwait->key = pollfd->events | -							POLLERR | POLLHUP; -				mask = file->f_op->poll(file, pwait); +			if (f.file->f_op->poll) { +				pwait->_key = pollfd->events|POLLERR|POLLHUP; +				pwait->_key |= busy_flag; +				mask = f.file->f_op->poll(f.file, pwait); +				if (mask & busy_flag) +					*can_busy_poll = true;  			}  			/* Mask out unneeded events. */  			mask &= pollfd->events | POLLERR | POLLHUP; -			fput_light(file, fput_needed); +			fdput(f);  		}  	}  	pollfd->revents = mask; @@ -761,10 +785,12 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,  	ktime_t expire, *to = NULL;  	int timed_out = 0, count = 0;  	unsigned long slack = 0; +	unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; +	unsigned long busy_end = 0;  	/* Optimise the no-wait case */  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { -		pt = NULL; +		pt->_qproc = NULL;  		timed_out = 1;  	} @@ -773,6 +799,7 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,  	for (;;) {  		struct poll_list *walk; +		bool can_busy_loop = false;  		for (walk = list; walk != NULL; walk = walk->next) {  			struct pollfd * pfd, * pfd_end; @@ -782,22 +809,26 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,  			for (; pfd != pfd_end; pfd++) {  				/*  				 * Fish for events. If we found one, record it -				 * and kill the poll_table, so we don't +				 * and kill poll_table->_qproc, so we don't  				 * needlessly register any other waiters after  				 * this. They'll get immediately deregistered  				 * when we break out and return.  				 */ -				if (do_pollfd(pfd, pt)) { +				if (do_pollfd(pfd, pt, &can_busy_loop, +					      busy_flag)) {  					count++; -					pt = NULL; +					pt->_qproc = NULL; +					/* found something, stop busy polling */ +					busy_flag = 0; +					can_busy_loop = false;  				}  			}  		}  		/*  		 * All waiters have already been registered, so don't provide -		 * a poll_table to them on the next loop iteration. +		 * a poll_table->_qproc to them on the next loop iteration.  		 */ -		pt = NULL; +		pt->_qproc = NULL;  		if (!count) {  			count = wait->error;  			if (signal_pending(current)) @@ -806,6 +837,17 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,  		if (count || timed_out)  			break; +		/* only if found POLL_BUSY_LOOP sockets && not out of time */ +		if (can_busy_loop && !need_resched()) { +			if (!busy_end) { +				busy_end = busy_loop_end_time(); +				continue; +			} +			if (!busy_loop_timeout(busy_end)) +				continue; +		} +		busy_flag = 0; +  		/*  		 * If this is the first loop and we have a timeout  		 * given, then we convert to ktime_t and set the to @@ -913,7 +955,7 @@ static long do_restart_poll(struct restart_block *restart_block)  }  SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, -		long, timeout_msecs) +		int, timeout_msecs)  {  	struct timespec end_time, *to = NULL;  	int ret; @@ -946,7 +988,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,  	return ret;  } -#ifdef HAVE_SET_RESTORE_SIGMASK  SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,  		struct timespec __user *, tsp, const sigset_t __user *, sigmask,  		size_t, sigsetsize) @@ -997,4 +1038,3 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,  	return ret;  } -#endif /* HAVE_SET_RESTORE_SIGMASK */  | 
