diff options
Diffstat (limited to 'fs/select.c')
| -rw-r--r-- | fs/select.c | 185 |
1 files changed, 121 insertions, 64 deletions
diff --git a/fs/select.c b/fs/select.c index fd38ce2e32e..467bb1cb3ea 100644 --- a/fs/select.c +++ b/fs/select.c @@ -17,7 +17,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/syscalls.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/personality.h> /* for STICKY_TIMEOUTS */ @@ -26,6 +26,9 @@ #include <linux/fs.h> #include <linux/rcupdate.h> #include <linux/hrtimer.h> +#include <linux/sched/rt.h> +#include <linux/freezer.h> +#include <net/busy_poll.h> #include <asm/uaccess.h> @@ -67,7 +70,7 @@ static long __estimate_accuracy(struct timespec *tv) return slack; } -static long estimate_accuracy(struct timespec *tv) +long select_estimate_accuracy(struct timespec *tv) { unsigned long ret; struct timespec now; @@ -220,10 +223,9 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, struct poll_table_entry *entry = poll_get_entry(pwq); if (!entry) return; - get_file(filp); - entry->filp = filp; + entry->filp = get_file(filp); entry->wait_address = wait_address; - entry->key = p->key; + entry->key = p->_key; init_waitqueue_func_entry(&entry->wait, pollwake); entry->wait.private = pwq; add_wait_queue(wait_address, &entry->wait); @@ -306,6 +308,8 @@ static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, rts.tv_sec = rts.tv_nsec = 0; if (timeval) { + if (sizeof(rtv) > sizeof(rtv.tv_sec) + sizeof(rtv.tv_usec)) + memset(&rtv, 0, sizeof(rtv)); rtv.tv_sec = rts.tv_sec; rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; @@ -343,10 +347,10 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) struct fdtable *fdt; /* handle last in-complete long-word first */ - set = ~(~0UL << (n & (__NFDBITS-1))); - n /= __NFDBITS; + set = ~(~0UL << (n & (BITS_PER_LONG-1))); + n /= BITS_PER_LONG; fdt = files_fdtable(current->files); - open_fds = fdt->open_fds->fds_bits+n; + open_fds = fdt->open_fds + n; max = 0; if (set) { set &= BITS(fds, n); @@ -371,7 +375,7 @@ get_max: max++; set >>= 1; } while (set); - max += n * __NFDBITS; + max += n * BITS_PER_LONG; } return max; @@ -382,15 +386,14 @@ get_max: #define POLLEX_SET (POLLPRI) static inline void wait_key_set(poll_table *wait, unsigned long in, - unsigned long out, unsigned long bit) + unsigned long out, unsigned long bit, + unsigned int ll_flag) { - if (wait) { - wait->key = POLLEX_SET; - if (in & bit) - wait->key |= POLLIN_SET; - if (out & bit) - wait->key |= POLLOUT_SET; - } + wait->_key = POLLEX_SET | ll_flag; + if (in & bit) + wait->_key |= POLLIN_SET; + if (out & bit) + wait->_key |= POLLOUT_SET; } int do_select(int n, fd_set_bits *fds, struct timespec *end_time) @@ -400,6 +403,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) poll_table *wait; int retval, i, timed_out = 0; unsigned long slack = 0; + unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; + unsigned long busy_end = 0; rcu_read_lock(); retval = max_select_fd(n, fds); @@ -412,16 +417,17 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) poll_initwait(&table); wait = &table.pt; if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { - wait = NULL; + wait->_qproc = NULL; timed_out = 1; } if (end_time && !timed_out) - slack = estimate_accuracy(end_time); + slack = select_estimate_accuracy(end_time); retval = 0; for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; + bool can_busy_loop = false; inp = fds->in; outp = fds->out; exp = fds->ex; rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; @@ -429,46 +435,58 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) for (i = 0; i < n; ++rinp, ++routp, ++rexp) { unsigned long in, out, ex, all_bits, bit = 1, mask, j; unsigned long res_in = 0, res_out = 0, res_ex = 0; - const struct file_operations *f_op = NULL; - struct file *file = NULL; in = *inp++; out = *outp++; ex = *exp++; all_bits = in | out | ex; if (all_bits == 0) { - i += __NFDBITS; + i += BITS_PER_LONG; continue; } - for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) { - int fput_needed; + for (j = 0; j < BITS_PER_LONG; ++j, ++i, bit <<= 1) { + struct fd f; if (i >= n) break; if (!(bit & all_bits)) continue; - file = fget_light(i, &fput_needed); - if (file) { - f_op = file->f_op; + f = fdget(i); + if (f.file) { + const struct file_operations *f_op; + f_op = f.file->f_op; mask = DEFAULT_POLLMASK; - if (f_op && f_op->poll) { - wait_key_set(wait, in, out, bit); - mask = (*f_op->poll)(file, wait); + if (f_op->poll) { + wait_key_set(wait, in, out, + bit, busy_flag); + mask = (*f_op->poll)(f.file, wait); } - fput_light(file, fput_needed); + fdput(f); if ((mask & POLLIN_SET) && (in & bit)) { res_in |= bit; retval++; - wait = NULL; + wait->_qproc = NULL; } if ((mask & POLLOUT_SET) && (out & bit)) { res_out |= bit; retval++; - wait = NULL; + wait->_qproc = NULL; } if ((mask & POLLEX_SET) && (ex & bit)) { res_ex |= bit; retval++; - wait = NULL; + wait->_qproc = NULL; } + /* got something, stop busy polling */ + if (retval) { + can_busy_loop = false; + busy_flag = 0; + + /* + * only remember a returned + * POLL_BUSY_LOOP if we asked for it + */ + } else if (busy_flag & mask) + can_busy_loop = true; + } } if (res_in) @@ -479,7 +497,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) *rexp = res_ex; cond_resched(); } - wait = NULL; + wait->_qproc = NULL; if (retval || timed_out || signal_pending(current)) break; if (table.error) { @@ -487,6 +505,17 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) break; } + /* only if found POLL_BUSY_LOOP sockets && not out of time */ + if (can_busy_loop && !need_resched()) { + if (!busy_end) { + busy_end = busy_loop_end_time(); + continue; + } + if (!busy_loop_timeout(busy_end)) + continue; + } + busy_flag = 0; + /* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to @@ -515,9 +544,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) * Update: ERESTARTSYS breaks at least the xview clock binary, so * I'm trying ERESTARTNOHAND which restart only when you want to. */ -#define MAX_SELECT_SECONDS \ - ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) - int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec *end_time) { @@ -617,7 +643,6 @@ SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, return ret; } -#ifdef HAVE_SET_RESTORE_SIGMASK static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) @@ -689,7 +714,23 @@ SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp, return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize); } -#endif /* HAVE_SET_RESTORE_SIGMASK */ + +#ifdef __ARCH_WANT_SYS_OLD_SELECT +struct sel_arg_struct { + unsigned long n; + fd_set __user *inp, *outp, *exp; + struct timeval __user *tvp; +}; + +SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg) +{ + struct sel_arg_struct a; + + if (copy_from_user(&a, arg, sizeof(a))) + return -EFAULT; + return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); +} +#endif struct poll_list { struct poll_list *next; @@ -704,9 +745,11 @@ struct poll_list { * interested in events matching the pollfd->events mask, and the result * matching that mask is both recorded in pollfd->revents and returned. The * pwait poll_table will be used by the fd-provided poll handler for waiting, - * if non-NULL. + * if pwait->_qproc is non-NULL. */ -static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) +static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, + bool *can_busy_poll, + unsigned int busy_flag) { unsigned int mask; int fd; @@ -714,22 +757,20 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) mask = 0; fd = pollfd->fd; if (fd >= 0) { - int fput_needed; - struct file * file; - - file = fget_light(fd, &fput_needed); + struct fd f = fdget(fd); mask = POLLNVAL; - if (file != NULL) { + if (f.file) { mask = DEFAULT_POLLMASK; - if (file->f_op && file->f_op->poll) { - if (pwait) - pwait->key = pollfd->events | - POLLERR | POLLHUP; - mask = file->f_op->poll(file, pwait); + if (f.file->f_op->poll) { + pwait->_key = pollfd->events|POLLERR|POLLHUP; + pwait->_key |= busy_flag; + mask = f.file->f_op->poll(f.file, pwait); + if (mask & busy_flag) + *can_busy_poll = true; } /* Mask out unneeded events. */ mask &= pollfd->events | POLLERR | POLLHUP; - fput_light(file, fput_needed); + fdput(f); } } pollfd->revents = mask; @@ -744,18 +785,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list, ktime_t expire, *to = NULL; int timed_out = 0, count = 0; unsigned long slack = 0; + unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; + unsigned long busy_end = 0; /* Optimise the no-wait case */ if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { - pt = NULL; + pt->_qproc = NULL; timed_out = 1; } if (end_time && !timed_out) - slack = estimate_accuracy(end_time); + slack = select_estimate_accuracy(end_time); for (;;) { struct poll_list *walk; + bool can_busy_loop = false; for (walk = list; walk != NULL; walk = walk->next) { struct pollfd * pfd, * pfd_end; @@ -765,22 +809,26 @@ static int do_poll(unsigned int nfds, struct poll_list *list, for (; pfd != pfd_end; pfd++) { /* * Fish for events. If we found one, record it - * and kill the poll_table, so we don't + * and kill poll_table->_qproc, so we don't * needlessly register any other waiters after * this. They'll get immediately deregistered * when we break out and return. */ - if (do_pollfd(pfd, pt)) { + if (do_pollfd(pfd, pt, &can_busy_loop, + busy_flag)) { count++; - pt = NULL; + pt->_qproc = NULL; + /* found something, stop busy polling */ + busy_flag = 0; + can_busy_loop = false; } } } /* * All waiters have already been registered, so don't provide - * a poll_table to them on the next loop iteration. + * a poll_table->_qproc to them on the next loop iteration. */ - pt = NULL; + pt->_qproc = NULL; if (!count) { count = wait->error; if (signal_pending(current)) @@ -789,6 +837,17 @@ static int do_poll(unsigned int nfds, struct poll_list *list, if (count || timed_out) break; + /* only if found POLL_BUSY_LOOP sockets && not out of time */ + if (can_busy_loop && !need_resched()) { + if (!busy_end) { + busy_end = busy_loop_end_time(); + continue; + } + if (!busy_loop_timeout(busy_end)) + continue; + } + busy_flag = 0; + /* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to @@ -821,7 +880,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, struct poll_list *walk = head; unsigned long todo = nfds; - if (nfds > current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + if (nfds > rlimit(RLIMIT_NOFILE)) return -EINVAL; len = min_t(unsigned int, nfds, N_STACK_PPS); @@ -896,7 +955,7 @@ static long do_restart_poll(struct restart_block *restart_block) } SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, - long, timeout_msecs) + int, timeout_msecs) { struct timespec end_time, *to = NULL; int ret; @@ -929,7 +988,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, return ret; } -#ifdef HAVE_SET_RESTORE_SIGMASK SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, struct timespec __user *, tsp, const sigset_t __user *, sigmask, size_t, sigsetsize) @@ -980,4 +1038,3 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, return ret; } -#endif /* HAVE_SET_RESTORE_SIGMASK */ |
