aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 17:37:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 17:37:43 -0700
commit5f56886521d6ddd3648777fae44d82382dd8c87f (patch)
treeaa0db6331cdb01c23f1884439840aadd31bbcca4 /fs
parentf1e9a236e5ddab6c349611ee86f54291916f226c (diff)
parente2a8b0a779787314eca1061308a8182e6c5bfabd (diff)
Merge branch 'akpm' (incoming from Andrew)
Merge third batch of fixes from Andrew Morton: "Most of the rest. I still have two large patchsets against AIO and IPC, but they're a bit stuck behind other trees and I'm about to vanish for six days. - random fixlets - inotify - more of the MM queue - show_stack() cleanups - DMI update - kthread/workqueue things - compat cleanups - epoll udpates - binfmt updates - nilfs2 - hfs - hfsplus - ptrace - kmod - coredump - kexec - rbtree - pids - pidns - pps - semaphore tweaks - some w1 patches - relay updates - core Kconfig changes - sysrq tweaks" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (109 commits) Documentation/sysrq: fix inconstistent help message of sysrq key ethernet/emac/sysrq: fix inconstistent help message of sysrq key sparc/sysrq: fix inconstistent help message of sysrq key powerpc/xmon/sysrq: fix inconstistent help message of sysrq key ARM/etm/sysrq: fix inconstistent help message of sysrq key power/sysrq: fix inconstistent help message of sysrq key kgdb/sysrq: fix inconstistent help message of sysrq key lib/decompress.c: fix initconst notifier-error-inject: fix module names in Kconfig kernel/sys.c: make prctl(PR_SET_MM) generally available UAPI: remove empty Kbuild files menuconfig: print more info for symbol without prompts init/Kconfig: re-order CONFIG_EXPERT options to fix menuconfig display kconfig menu: move Virtualization drivers near other virtualization options Kconfig: consolidate CONFIG_DEBUG_STRICT_USER_COPY_CHECKS relay: use macro PAGE_ALIGN instead of FIX_SIZE kernel/relay.c: move FIX_SIZE macro into relay.c kernel/relay.c: remove unused function argument actor drivers/w1/slaves/w1_ds2760.c: fix the error handling in w1_ds2760_add_slave() drivers/w1/slaves/w1_ds2781.c: fix the error handling in w1_ds2781_add_slave() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig.binfmt14
-rw-r--r--fs/Makefile5
-rw-r--r--fs/binfmt_aout.c1
-rw-r--r--fs/binfmt_elf.c6
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/binfmt_misc.c24
-rw-r--r--fs/block_dev.c8
-rw-r--r--fs/coredump.c84
-rw-r--r--fs/dcache.c4
-rw-r--r--fs/eventpoll.c128
-rw-r--r--fs/exec.c13
-rw-r--r--fs/fs-writeback.c1
-rw-r--r--fs/hfs/bfind.c10
-rw-r--r--fs/hfs/bitmap.c4
-rw-r--r--fs/hfs/bnode.c39
-rw-r--r--fs/hfs/brec.c19
-rw-r--r--fs/hfs/btree.c31
-rw-r--r--fs/hfs/catalog.c24
-rw-r--r--fs/hfs/dir.c20
-rw-r--r--fs/hfs/extent.c68
-rw-r--r--fs/hfs/hfs_fs.h22
-rw-r--r--fs/hfs/inode.c15
-rw-r--r--fs/hfs/mdb.c23
-rw-r--r--fs/hfs/super.c47
-rw-r--r--fs/hfsplus/attributes.c26
-rw-r--r--fs/hfsplus/bfind.c14
-rw-r--r--fs/hfsplus/bitmap.c13
-rw-r--r--fs/hfsplus/bnode.c36
-rw-r--r--fs/hfsplus/brec.c14
-rw-r--r--fs/hfsplus/btree.c29
-rw-r--r--fs/hfsplus/catalog.c11
-rw-r--r--fs/hfsplus/dir.c14
-rw-r--r--fs/hfsplus/extents.c53
-rw-r--r--fs/hfsplus/hfsplus_fs.h20
-rw-r--r--fs/hfsplus/inode.c4
-rw-r--r--fs/hfsplus/options.c22
-rw-r--r--fs/hfsplus/super.c56
-rw-r--r--fs/hfsplus/wrapper.c8
-rw-r--r--fs/hfsplus/xattr.c41
-rw-r--r--fs/nilfs2/inode.c17
-rw-r--r--fs/nilfs2/mdt.c19
-rw-r--r--fs/nilfs2/page.c70
-rw-r--r--fs/nilfs2/page.h3
-rw-r--r--fs/notify/inotify/inotify_user.c6
-rw-r--r--fs/proc/base.c5
45 files changed, 652 insertions, 441 deletions
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 0efd1524b97..370b24cee4d 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -65,6 +65,20 @@ config CORE_DUMP_DEFAULT_ELF_HEADERS
This config option changes the default setting of coredump_filter
seen at boot time. If unsure, say Y.
+config BINFMT_SCRIPT
+ tristate "Kernel support for scripts starting with #!"
+ default y
+ help
+ Say Y here if you want to execute interpreted scripts starting with
+ #! followed by the path to an interpreter.
+
+ You can build this support as a module; however, until that module
+ gets loaded, you cannot run scripts. Thus, if you want to load this
+ module from an initramfs, the portion of the initramfs before loading
+ this module must consist of compiled binaries only.
+
+ Most systems will not boot if you say M or N here. If unsure, say Y.
+
config BINFMT_FLAT
bool "Kernel support for flat binaries"
depends on !MMU && (!FRV || BROKEN)
diff --git a/fs/Makefile b/fs/Makefile
index 3b2c76759ec..5e67e57b59d 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -34,10 +34,7 @@ obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
-
-# binfmt_script is always there
-obj-y += binfmt_script.o
-
+obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o
obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o
obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o
obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index bbc8f8827ea..02fe378fc50 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -62,7 +62,6 @@ static int aout_core_dump(struct coredump_params *cprm)
fs = get_fs();
set_fs(KERNEL_DS);
has_dumped = 1;
- current->flags |= PF_DUMPCORE;
strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
dump.u_ar0 = offsetof(struct user, regs);
dump.signal = cprm->siginfo->si_signo;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 86af964c242..34a9771eaa6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -803,7 +803,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
* follow the loader, and is not movable. */
#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
/* Memory randomization might have been switched off
- * in runtime via sysctl.
+ * in runtime via sysctl or explicit setting of
+ * personality flags.
* If that is the case, retain the original non-zero
* load_bias value in order to establish proper
* non-randomized mappings.
@@ -2091,8 +2092,7 @@ static int elf_core_dump(struct coredump_params *cprm)
goto cleanup;
has_dumped = 1;
- current->flags |= PF_DUMPCORE;
-
+
fs = get_fs();
set_fs(KERNEL_DS);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9c13e023e2b..c1cc06aed60 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1687,8 +1687,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
fill_elf_fdpic_header(elf, e_phnum);
has_dumped = 1;
- current->flags |= PF_DUMPCORE;
-
/*
* Set up the notes in similar form to SVR4 core dumps made
* with info from their /proc.
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 751df5e4f61..1c740e152f3 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -23,6 +23,7 @@
#include <linux/binfmts.h>
#include <linux/slab.h>
#include <linux/ctype.h>
+#include <linux/string_helpers.h>
#include <linux/file.h>
#include <linux/pagemap.h>
#include <linux/namei.h>
@@ -234,24 +235,6 @@ static char *scanarg(char *s, char del)
return s;
}
-static int unquote(char *from)
-{
- char c = 0, *s = from, *p = from;
-
- while ((c = *s++) != '\0') {
- if (c == '\\' && *s == 'x') {
- s++;
- c = toupper(*s++);
- *p = (c - (isdigit(c) ? '0' : 'A' - 10)) << 4;
- c = toupper(*s++);
- *p++ |= c - (isdigit(c) ? '0' : 'A' - 10);
- continue;
- }
- *p++ = c;
- }
- return p - from;
-}
-
static char * check_special_flags (char * sfs, Node * e)
{
char * p = sfs;
@@ -354,8 +337,9 @@ static Node *create_entry(const char __user *buffer, size_t count)
p[-1] = '\0';
if (!e->mask[0])
e->mask = NULL;
- e->size = unquote(e->magic);
- if (e->mask && unquote(e->mask) != e->size)
+ e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX);
+ if (e->mask &&
+ string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
goto Einval;
if (e->size + e->offset > BINPRM_BUF_SIZE)
goto Einval;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index aae187a7f94..ce08de7467a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -617,11 +617,9 @@ void bd_forget(struct inode *inode)
struct block_device *bdev = NULL;
spin_lock(&bdev_lock);
- if (inode->i_bdev) {
- if (!sb_is_blkdev_sb(inode->i_sb))
- bdev = inode->i_bdev;
- __bd_forget(inode);
- }
+ if (!sb_is_blkdev_sb(inode->i_sb))
+ bdev = inode->i_bdev;
+ __bd_forget(inode);
spin_unlock(&bdev_lock);
if (bdev)
diff --git a/fs/coredump.c b/fs/coredump.c
index c6479658d48..ec306cc9a28 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -263,7 +263,6 @@ static int zap_process(struct task_struct *start, int exit_code)
struct task_struct *t;
int nr = 0;
- start->signal->flags = SIGNAL_GROUP_EXIT;
start->signal->group_exit_code = exit_code;
start->signal->group_stop_count = 0;
@@ -280,8 +279,8 @@ static int zap_process(struct task_struct *start, int exit_code)
return nr;
}
-static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- struct core_state *core_state, int exit_code)
+static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+ struct core_state *core_state, int exit_code)
{
struct task_struct *g, *p;
unsigned long flags;
@@ -291,11 +290,16 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
if (!signal_group_exit(tsk->signal)) {
mm->core_state = core_state;
nr = zap_process(tsk, exit_code);
+ tsk->signal->group_exit_task = tsk;
+ /* ignore all signals except SIGKILL, see prepare_signal() */
+ tsk->signal->flags = SIGNAL_GROUP_COREDUMP;
+ clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
}
spin_unlock_irq(&tsk->sighand->siglock);
if (unlikely(nr < 0))
return nr;
+ tsk->flags = PF_DUMPCORE;
if (atomic_read(&mm->mm_users) == nr + 1)
goto done;
/*
@@ -340,6 +344,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
if (unlikely(p->mm == mm)) {
lock_task_sighand(p, &flags);
nr += zap_process(p, exit_code);
+ p->signal->flags = SIGNAL_GROUP_EXIT;
unlock_task_sighand(p, &flags);
}
break;
@@ -386,11 +391,18 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
return core_waiters;
}
-static void coredump_finish(struct mm_struct *mm)
+static void coredump_finish(struct mm_struct *mm, bool core_dumped)
{
struct core_thread *curr, *next;
struct task_struct *task;
+ spin_lock_irq(&current->sighand->siglock);
+ if (core_dumped && !__fatal_signal_pending(current))
+ current->signal->group_exit_code |= 0x80;
+ current->signal->group_exit_task = NULL;
+ current->signal->flags = SIGNAL_GROUP_EXIT;
+ spin_unlock_irq(&current->sighand->siglock);
+
next = mm->core_state->dumper.next;
while ((curr = next) != NULL) {
next = curr->next;
@@ -407,6 +419,17 @@ static void coredump_finish(struct mm_struct *mm)
mm->core_state = NULL;
}
+static bool dump_interrupted(void)
+{
+ /*
+ * SIGKILL or freezing() interrupt the coredumping. Perhaps we
+ * can do try_to_freeze() and check __fatal_signal_pending(),
+ * but then we need to teach dump_write() to restart and clear
+ * TIF_SIGPENDING.
+ */
+ return signal_pending(current);
+}
+
static void wait_for_dump_helpers(struct file *file)
{
struct pipe_inode_info *pipe;
@@ -416,17 +439,20 @@ static void wait_for_dump_helpers(struct file *file)
pipe_lock(pipe);
pipe->readers++;
pipe->writers--;
+ wake_up_interruptible_sync(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ pipe_unlock(pipe);
- while ((pipe->readers > 1) && (!signal_pending(current))) {
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- pipe_wait(pipe);
- }
+ /*
+ * We actually want wait_event_freezable() but then we need
+ * to clear TIF_SIGPENDING and improve dump_interrupted().
+ */
+ wait_event_interruptible(pipe->wait, pipe->readers == 1);
+ pipe_lock(pipe);
pipe->readers--;
pipe->writers++;
pipe_unlock(pipe);
-
}
/*
@@ -471,6 +497,7 @@ void do_coredump(siginfo_t *siginfo)
int ispipe;
struct files_struct *displaced;
bool need_nonrelative = false;
+ bool core_dumped = false;
static atomic_t core_dump_count = ATOMIC_INIT(0);
struct coredump_params cprm = {
.siginfo = siginfo,
@@ -514,17 +541,12 @@ void do_coredump(siginfo_t *siginfo)
old_cred = override_creds(cred);
- /*
- * Clear any false indication of pending signals that might
- * be seen by the filesystem code called to write the core file.
- */
- clear_thread_flag(TIF_SIGPENDING);
-
ispipe = format_corename(&cn, &cprm);
- if (ispipe) {
+ if (ispipe) {
int dump_count;
char **helper_argv;
+ struct subprocess_info *sub_info;
if (ispipe < 0) {
printk(KERN_WARNING "format_corename failed\n");
@@ -571,15 +593,20 @@ void do_coredump(siginfo_t *siginfo)
goto fail_dropcount;
}
- retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
- NULL, UMH_WAIT_EXEC, umh_pipe_setup,
- NULL, &cprm);
+ retval = -ENOMEM;
+ sub_info = call_usermodehelper_setup(helper_argv[0],
+ helper_argv, NULL, GFP_KERNEL,
+ umh_pipe_setup, NULL, &cprm);
+ if (sub_info)
+ retval = call_usermodehelper_exec(sub_info,
+ UMH_WAIT_EXEC);
+
argv_free(helper_argv);
if (retval) {
- printk(KERN_INFO "Core dump to %s pipe failed\n",
+ printk(KERN_INFO "Core dump to %s pipe failed\n",
cn.corename);
goto close_fail;
- }
+ }
} else {
struct inode *inode;
@@ -629,9 +656,7 @@ void do_coredump(siginfo_t *siginfo)
goto close_fail;
if (displaced)
put_files_struct(displaced);
- retval = binfmt->core_dump(&cprm);
- if (retval)
- current->signal->group_exit_code |= 0x80;
+ core_dumped = !dump_interrupted() && binfmt->core_dump(&cprm);
if (ispipe && core_pipe_limit)
wait_for_dump_helpers(cprm.file);
@@ -644,7 +669,7 @@ fail_dropcount:
fail_unlock:
kfree(cn.corename);
fail_corename:
- coredump_finish(mm);
+ coredump_finish(mm, core_dumped);
revert_creds(old_cred);
fail_creds:
put_cred(cred);
@@ -659,7 +684,9 @@ fail:
*/
int dump_write(struct file *file, const void *addr, int nr)
{
- return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+ return !dump_interrupted() &&
+ access_ok(VERIFY_READ, addr, nr) &&
+ file->f_op->write(file, addr, nr, &file->f_pos) == nr;
}
EXPORT_SYMBOL(dump_write);
@@ -668,7 +695,8 @@ int dump_seek(struct file *file, loff_t off)
int ret = 1;
if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
- if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+ if (dump_interrupted() ||
+ file->f_op->llseek(file, off, SEEK_CUR) < 0)
return 0;
} else {
char *buf = (char *)get_zeroed_page(GFP_KERNEL);
diff --git a/fs/dcache.c b/fs/dcache.c
index e8bc3420d63..e689268046c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1230,8 +1230,10 @@ void shrink_dcache_parent(struct dentry * parent)
LIST_HEAD(dispose);
int found;
- while ((found = select_parent(parent, &dispose)) != 0)
+ while ((found = select_parent(parent, &dispose)) != 0) {
shrink_dentry_list(&dispose);
+ cond_resched();
+ }
}
EXPORT_SYMBOL(shrink_dcache_parent);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9fec1836057..277cc38aeda 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -104,7 +104,7 @@
struct epoll_filefd {
struct file *file;
int fd;
-};
+} __packed;
/*
* Structure used to track possible nested calls, for too deep recursions
@@ -128,6 +128,8 @@ struct nested_calls {
/*
* Each file descriptor added to the eventpoll interface will
* have an entry of this type linked to the "rbr" RB tree.
+ * Avoid increasing the size of this struct, there can be many thousands
+ * of these on a server and we do not want this to take another cache line.
*/
struct epitem {
/* RB tree node used to link this structure to the eventpoll RB tree */
@@ -158,7 +160,7 @@ struct epitem {
struct list_head fllink;
/* wakeup_source used when EPOLLWAKEUP is set */
- struct wakeup_source *ws;
+ struct wakeup_source __rcu *ws;
/* The structure that describe the interested events and the source fd */
struct epoll_event event;
@@ -536,6 +538,38 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
}
}
+/* call only when ep->mtx is held */
+static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi)
+{
+ return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx));
+}
+
+/* call only when ep->mtx is held */
+static inline void ep_pm_stay_awake(struct epitem *epi)
+{
+ struct wakeup_source *ws = ep_wakeup_source(epi);
+
+ if (ws)
+ __pm_stay_awake(ws);
+}
+
+static inline bool ep_has_wakeup_source(struct epitem *epi)
+{
+ return rcu_access_pointer(epi->ws) ? true : false;
+}
+
+/* call when ep->mtx cannot be held (ep_poll_callback) */
+static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
+{
+ struct wakeup_source *ws;
+
+ rcu_read_lock();
+ ws = rcu_dereference(epi->ws);
+ if (ws)
+ __pm_stay_awake(ws);
+ rcu_read_unlock();
+}
+
/**
* ep_scan_ready_list - Scans the ready list in a way that makes possible for
* the scan code, to call f_op->poll(). Also allows for
@@ -599,7 +633,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
*/
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
- __pm_stay_awake(epi->ws);
+ ep_pm_stay_awake(epi);
}
}
/*
@@ -668,7 +702,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
list_del_init(&epi->rdllink);
spin_unlock_irqrestore(&ep->lock, flags);
- wakeup_source_unregister(epi->ws);
+ wakeup_source_unregister(ep_wakeup_source(epi));
/* At this point it is safe to free the eventpoll item */
kmem_cache_free(epi_cache, epi);
@@ -711,11 +745,15 @@ static void ep_free(struct eventpoll *ep)
* point we are sure no poll callbacks will be lingering around, and also by
* holding "epmutex" we can be sure that no file cleanup code will hit
* us during this operation. So we can avoid the lock on "ep->lock".
+ * We do not need to lock ep->mtx, either, we only do it to prevent
+ * a lockdep warning.
*/
+ mutex_lock(&ep->mtx);
while ((rbp = rb_first(&ep->rbr)) != NULL) {
epi = rb_entry(rbp, struct epitem, rbn);
ep_remove(ep, epi);
}
+ mutex_unlock(&ep->mtx);
mutex_unlock(&epmutex);
mutex_destroy(&ep->mtx);
@@ -734,6 +772,13 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file)
return 0;
}
+static inline unsigned int ep_item_poll(struct epitem *epi, poll_table *pt)
+{
+ pt->_key = epi->event.events;
+
+ return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & epi->event.events;
+}
+
static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
void *priv)
{
@@ -741,10 +786,9 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
poll_table pt;
init_poll_funcptr(&pt, NULL);
+
list_for_each_entry_safe(epi, tmp, head, rdllink) {
- pt._key = epi->event.events;
- if (epi->ffd.file->f_op->poll(epi->ffd.file, &pt) &
- epi->event.events)
+ if (ep_item_poll(epi, &pt))
return POLLIN | POLLRDNORM;
else {
/*
@@ -752,7 +796,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
* callback, but it's not actually ready, as far as
* caller requested events goes. We can remove it here.
*/
- __pm_relax(epi->ws);
+ __pm_relax(ep_wakeup_source(epi));
list_del_init(&epi->rdllink);
}
}
@@ -984,7 +1028,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
/* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
- __pm_stay_awake(epi->ws);
+ ep_pm_stay_awake_rcu(epi);
}
/*
@@ -1146,6 +1190,7 @@ static int reverse_path_check(void)
static int ep_create_wakeup_source(struct epitem *epi)
{
const char *name;
+ struct wakeup_source *ws;
if (!epi->ep->ws) {
epi->ep->ws = wakeup_source_register("eventpoll");
@@ -1154,17 +1199,29 @@ static int ep_create_wakeup_source(struct epitem *epi)
}
name = epi->ffd.file->f_path.dentry->d_name.name;
- epi->ws = wakeup_source_register(name);
- if (!epi->ws)
+ ws = wakeup_source_register(name);
+
+ if (!ws)
return -ENOMEM;
+ rcu_assign_pointer(epi->ws, ws);
return 0;
}
-static void ep_destroy_wakeup_source(struct epitem *epi)
+/* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */
+static noinline void ep_destroy_wakeup_source(struct epitem *epi)
{
- wakeup_source_unregister(epi->ws);
- epi->ws = NULL;
+ struct wakeup_source *ws = ep_wakeup_source(epi);
+
+ RCU_INIT_POINTER(epi->ws, NULL);
+
+ /*
+ * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is
+ * used internally by wakeup_source_remove, too (called by
+ * wakeup_source_unregister), so we cannot use call_rcu
+ */
+ synchronize_rcu();
+ wakeup_source_unregister(ws);
}
/*
@@ -1199,13 +1256,12 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
if (error)
goto error_create_wakeup_source;
} else {
- epi->ws = NULL;
+ RCU_INIT_POINTER(epi->ws, NULL);
}
/* Initialize the poll table using the queue callback */
epq.epi = epi;
init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
- epq.pt._key = event->events;
/*
* Attach the item to the poll hooks and get current event bits.
@@ -1214,7 +1270,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
* this operation completes, the poll callback can start hitting
* the new item.
*/
- revents = tfile->f_op->poll(tfile, &epq.pt);
+ revents = ep_item_poll(epi, &epq.pt);
/*
* We have to check if something went wrong during the poll wait queue
@@ -1247,7 +1303,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* If the file is already "ready" we drop it inside the ready list */
if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
- __pm_stay_awake(epi->ws);
+ ep_pm_stay_awake(epi);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
@@ -1288,7 +1344,7 @@ error_unregister:
list_del_init(&epi->rdllink);
spin_unlock_irqrestore(&ep->lock, flags);
- wakeup_source_unregister(epi->ws);
+ wakeup_source_unregister(ep_wakeup_source(epi));
error_create_wakeup_source:
kmem_cache_free(epi_cache, epi);
@@ -1314,12 +1370,11 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
* f_op->poll() call and the new event set registering.
*/
epi->event.events = event->events; /* need barrier below */
- pt._key = event->events;
epi->event.data = event->data; /* protected by mtx */
if (epi->event.events & EPOLLWAKEUP) {
- if (!epi->ws)
+ if (!ep_has_wakeup_source(epi))
ep_create_wakeup_source(epi);
- } else if (epi->ws) {
+ } else if (ep_has_wakeup_source(epi)) {
ep_destroy_wakeup_source(epi);
}
@@ -1347,7 +1402,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
* Get current event bits. We can safely use the file* here because
* its usage count has been increased by the caller of this function.
*/
- revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt);
+ revents = ep_item_poll(epi, &pt);
/*
* If the item is "hot" and it is not registered inside the ready
@@ -1357,7 +1412,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
spin_lock_irq(&ep->lock);
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
- __pm_stay_awake(epi->ws);
+ ep_pm_stay_awake(epi);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
@@ -1383,6 +1438,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
unsigned int revents;
struct epitem *epi;
struct epoll_event __user *uevent;
+ struct wakeup_source *ws;
poll_table pt;
init_poll_funcptr(&pt, NULL);
@@ -1405,14 +1461,16 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
* instead, but then epi->ws would temporarily be out of sync
* with ep_is_linked().
*/
- if (epi->ws && epi->ws->active)
- __pm_stay_awake(ep->ws);
- __pm_relax(epi->ws);
+ ws = ep_wakeup_source(epi);
+ if (ws) {
+ if (ws->active)
+ __pm_stay_awake(ep->ws);
+ __pm_relax(ws);
+ }
+
list_del_init(&epi->rdllink);
- pt._key = epi->event.events;
- revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt) &
- epi->event.events;
+ revents = ep_item_poll(epi, &pt);
/*
* If the event mask intersect the caller-requested one,
@@ -1424,7 +1482,7 @@ static int ep_send_events_proc(struct ev