aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/async.c27
-rw-r--r--kernel/compat.c23
-rw-r--r--kernel/context_tracking.c75
-rw-r--r--kernel/debug/kdb/kdb_main.c2
-rw-r--r--kernel/events/core.c20
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/module.c154
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/printk.c9
-rw-r--r--kernel/ptrace.c74
-rw-r--r--kernel/rcu.h7
-rw-r--r--kernel/rcupdate.c60
-rw-r--r--kernel/rcutiny.c8
-rw-r--r--kernel/rcutiny_plugin.h56
-rw-r--r--kernel/rcutorture.c66
-rw-r--r--kernel/rcutree.c260
-rw-r--r--kernel/rcutree.h11
-rw-r--r--kernel/rcutree_plugin.h13
-rw-r--r--kernel/sched/core.c3
-rw-r--r--kernel/sched/debug.c4
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/signal.c24
-rw-r--r--kernel/smp.c13
-rw-r--r--kernel/srcu.c37
-rw-r--r--kernel/trace/ftrace.c2
-rw-r--r--kernel/trace/trace_clock.c1
27 files changed, 714 insertions, 247 deletions
diff --git a/kernel/async.c b/kernel/async.c
index a1d585c351d..6f34904a0b5 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -86,18 +86,27 @@ static atomic_t entry_count;
*/
static async_cookie_t __lowest_in_progress(struct async_domain *running)
{
+ async_cookie_t first_running = next_cookie; /* infinity value */
+ async_cookie_t first_pending = next_cookie; /* ditto */
struct async_entry *entry;
+ /*
+ * Both running and pending lists are sorted but not disjoint.
+ * Take the first cookies from both and return the min.
+ */
if (!list_empty(&running->domain)) {
entry = list_first_entry(&running->domain, typeof(*entry), list);
- return entry->cookie;
+ first_running = entry->cookie;
}
- list_for_each_entry(entry, &async_pending, list)
- if (entry->running == running)
- return entry->cookie;
+ list_for_each_entry(entry, &async_pending, list) {
+ if (entry->running == running) {
+ first_pending = entry->cookie;
+ break;
+ }
+ }
- return next_cookie; /* "infinity" value */
+ return min(first_running, first_pending);
}
static async_cookie_t lowest_in_progress(struct async_domain *running)
@@ -118,13 +127,17 @@ static void async_run_entry_fn(struct work_struct *work)
{
struct async_entry *entry =
container_of(work, struct async_entry, work);
+ struct async_entry *pos;
unsigned long flags;
ktime_t uninitialized_var(calltime), delta, rettime;
struct async_domain *running = entry->running;
- /* 1) move self to the running queue */
+ /* 1) move self to the running queue, make sure it stays sorted */
spin_lock_irqsave(&async_lock, flags);
- list_move_tail(&entry->list, &running->domain);
+ list_for_each_entry_reverse(pos, &running->domain, list)
+ if (entry->cookie < pos->cookie)
+ break;
+ list_move_tail(&entry->list, &pos->list);
spin_unlock_irqrestore(&async_lock, flags);
/* 2) run (and print duration) */
diff --git a/kernel/compat.c b/kernel/compat.c
index f6150e92dfc..36700e9e2be 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -535,9 +535,11 @@ asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru)
return 0;
}
-asmlinkage long
-compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options,
- struct compat_rusage __user *ru)
+COMPAT_SYSCALL_DEFINE4(wait4,
+ compat_pid_t, pid,
+ compat_uint_t __user *, stat_addr,
+ int, options,
+ struct compat_rusage __user *, ru)
{
if (!ru) {
return sys_wait4(pid, stat_addr, options, NULL);
@@ -564,9 +566,10 @@ compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options,
}
}
-asmlinkage long compat_sys_waitid(int which, compat_pid_t pid,
- struct compat_siginfo __user *uinfo, int options,
- struct compat_rusage __user *uru)
+COMPAT_SYSCALL_DEFINE5(waitid,
+ int, which, compat_pid_t, pid,
+ struct compat_siginfo __user *, uinfo, int, options,
+ struct compat_rusage __user *, uru)
{
siginfo_t info;
struct rusage ru;
@@ -584,7 +587,11 @@ asmlinkage long compat_sys_waitid(int which, compat_pid_t pid,
return ret;
if (uru) {
- ret = put_compat_rusage(&ru, uru);
+ /* sys_waitid() overwrites everything in ru */
+ if (COMPAT_USE_64BIT_TIME)
+ ret = copy_to_user(uru, &ru, sizeof(ru));
+ else
+ ret = put_compat_rusage(&ru, uru);
if (ret)
return ret;
}
@@ -994,7 +1001,7 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
sigset_from_compat(&s, &s32);
if (uts) {
- if (get_compat_timespec(&t, uts))
+ if (compat_get_timespec(&t, uts))
return -EFAULT;
}
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index e0e07fd5550..d566aba7e80 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -1,3 +1,19 @@
+/*
+ * Context tracking: Probe on high level context boundaries such as kernel
+ * and userspace. This includes syscalls and exceptions entry/exit.
+ *
+ * This is used by RCU to remove its dependency on the timer tick while a CPU
+ * runs in userspace.
+ *
+ * Started by Frederic Weisbecker:
+ *
+ * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
+ * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
+ *
+ */
+
#include <linux/context_tracking.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
@@ -6,8 +22,8 @@
struct context_tracking {
/*
- * When active is false, hooks are not set to
- * minimize overhead: TIF flags are cleared
+ * When active is false, probes are unset in order
+ * to minimize overhead: TIF flags are cleared
* and calls to user_enter/exit are ignored. This
* may be further optimized using static keys.
*/
@@ -24,6 +40,15 @@ static DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
#endif
};
+/**
+ * user_enter - Inform the context tracking that the CPU is going to
+ * enter userspace mode.
+ *
+ * This function must be called right before we switch from the kernel
+ * to userspace, when it's guaranteed the remaining kernel instructions
+ * to execute won't use any RCU read side critical section because this
+ * function sets RCU in extended quiescent state.
+ */
void user_enter(void)
{
unsigned long flags;
@@ -39,40 +64,70 @@ void user_enter(void)
if (in_interrupt())
return;
+ /* Kernel threads aren't supposed to go to userspace */
WARN_ON_ONCE(!current->mm);
local_irq_save(flags);
if (__this_cpu_read(context_tracking.active) &&
__this_cpu_read(context_tracking.state) != IN_USER) {
__this_cpu_write(context_tracking.state, IN_USER);
+ /*
+ * At this stage, only low level arch entry code remains and
+ * then we'll run in userspace. We can assume there won't be
+ * any RCU read-side critical section until the next call to
+ * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
+ * on the tick.
+ */
rcu_user_enter();
}
local_irq_restore(flags);
}
+
+/**
+ * user_exit - Inform the context tracking that the CPU is
+ * exiting userspace mode and entering the kernel.
+ *
+ * This function must be called after we entered the kernel from userspace
+ * before any use of RCU read side critical section. This potentially include
+ * any high level kernel code like syscalls, exceptions, signal handling, etc...
+ *
+ * This call supports re-entrancy. This way it can be called from any exception
+ * handler without needing to know if we came from userspace or not.
+ */
void user_exit(void)
{
unsigned long flags;
- /*
- * Some contexts may involve an exception occuring in an irq,
- * leading to that nesting:
- * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
- * This would mess up the dyntick_nesting count though. And rcu_irq_*()
- * helpers are enough to protect RCU uses inside the exception. So
- * just return immediately if we detect we are in an IRQ.
- */
if (in_interrupt())
return;
local_irq_save(flags);
if (__this_cpu_read(context_tracking.state) == IN_USER) {
__this_cpu_write(context_tracking.state, IN_KERNEL);
+ /*
+ * We are going to run code that may use RCU. Inform
+ * RCU core about that (ie: we may need the tick again).
+ */
rcu_user_exit();
}
local_irq_restore(flags);
}
+
+/**
+ * context_tracking_task_switch - context switch the syscall callbacks
+ * @prev: the task that is being switched out
+ * @next: the task that is being switched in
+ *
+ * The context tracking uses the syscall slow path to implement its user-kernel
+ * boundaries probes on syscalls. This way it doesn't impact the syscall fast
+ * path on CPUs that don't do context tracking.
+ *
+ * But we need to clear the flag on the previous task because it may later
+ * migrate to some CPU that doesn't do the context tracking. As such the TIF
+ * flag may not be desired there.
+ */
void context_tracking_task_switch(struct task_struct *prev,
struct task_struct *next)
{
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 4d5f8d5612f..8875254120b 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1970,6 +1970,8 @@ static int kdb_lsmod(int argc, const char **argv)
kdb_printf("Module Size modstruct Used by\n");
list_for_each_entry(mod, kdb_modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
kdb_printf("%-20s%8u 0x%p ", mod->name,
mod->core_size, (void *)mod);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 301079d06f2..7b6646a8c06 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -908,6 +908,15 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
}
/*
+ * Initialize event state based on the perf_event_attr::disabled.
+ */
+static inline void perf_event__state_init(struct perf_event *event)
+{
+ event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF :
+ PERF_EVENT_STATE_INACTIVE;
+}
+
+/*
* Called at perf_event creation and when events are attached/detached from a
* group.
*/
@@ -6179,8 +6188,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
event->overflow_handler = overflow_handler;
event->overflow_handler_context = context;
- if (attr->disabled)
- event->state = PERF_EVENT_STATE_OFF;
+ perf_event__state_init(event);
pmu = NULL;
@@ -6609,9 +6617,17 @@ SYSCALL_DEFINE5(perf_event_open,
mutex_lock(&gctx->mutex);
perf_remove_from_context(group_leader);
+
+ /*
+ * Removing from the context ends up with disabled
+ * event. What we want here is event in the initial
+ * startup state, ready to be add into new context.
+ */
+ perf_event__state_init(group_leader);
list_for_each_entry(sibling, &group_leader->sibling_list,
group_entry) {
perf_remove_from_context(sibling);
+ perf_event__state_init(sibling);
put_ctx(gctx);
}
mutex_unlock(&gctx->mutex);
diff --git a/kernel/fork.c b/kernel/fork.c
index 65ca6d27f24..c535f33bbb9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1668,8 +1668,10 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int, tls_val)
#endif
{
- return do_fork(clone_flags, newsp, 0,
- parent_tidptr, child_tidptr);
+ long ret = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
+ asmlinkage_protect(5, ret, clone_flags, newsp,
+ parent_tidptr, child_tidptr, tls_val);
+ return ret;
}
#endif
diff --git a/kernel/module.c b/kernel/module.c
index b10b048367e..eab08274ec9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -188,6 +188,7 @@ struct load_info {
ongoing or failed initialization etc. */
static inline int strong_try_module_get(struct module *mod)
{
+ BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED);
if (mod && mod->state == MODULE_STATE_COMING)
return -EBUSY;
if (try_module_get(mod))
@@ -343,6 +344,9 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
#endif
};
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
+
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
return true;
}
@@ -450,16 +454,24 @@ const struct kernel_symbol *find_symbol(const char *name,
EXPORT_SYMBOL_GPL(find_symbol);
/* Search for module by name: must hold module_mutex. */
-struct module *find_module(const char *name)
+static struct module *find_module_all(const char *name,
+ bool even_unformed)
{
struct module *mod;
list_for_each_entry(mod, &modules, list) {
+ if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
+ continue;
if (strcmp(mod->name, name) == 0)
return mod;
}
return NULL;
}
+
+struct module *find_module(const char *name)
+{
+ return find_module_all(name, false);
+}
EXPORT_SYMBOL_GPL(find_module);
#ifdef CONFIG_SMP
@@ -525,6 +537,8 @@ bool is_module_percpu_address(unsigned long addr)
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
if (!mod->percpu_size)
continue;
for_each_possible_cpu(cpu) {
@@ -1048,6 +1062,8 @@ static ssize_t show_initstate(struct module_attribute *mattr,
case MODULE_STATE_GOING:
state = "going";
break;
+ default:
+ BUG();
}
return sprintf(buffer, "%s\n", state);
}
@@ -1786,6 +1802,8 @@ void set_all_modules_text_rw(void)
mutex_lock(&module_mutex);
list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
if ((mod->module_core) && (mod->core_text_size)) {
set_page_attributes(mod->module_core,
mod->module_core + mod->core_text_size,
@@ -1807,6 +1825,8 @@ void set_all_modules_text_ro(void)
mutex_lock(&module_mutex);
list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
if ((mod->module_core) && (mod->core_text_size)) {
set_page_attributes(mod->module_core,
mod->module_core + mod->core_text_size,
@@ -2527,6 +2547,13 @@ static int copy_module_from_fd(int fd, struct load_info *info)
err = -EFBIG;
goto out;
}
+
+ /* Don't hand 0 to vmalloc, it whines. */
+ if (stat.size == 0) {
+ err = -EINVAL;
+ goto out;
+ }
+
info->hdr = vmalloc(stat.size);
if (!info->hdr) {
err = -ENOMEM;
@@ -2990,8 +3017,9 @@ static bool finished_loading(const char *name)
bool ret;
mutex_lock(&module_mutex);
- mod = find_module(name);
- ret = !mod || mod->state != MODULE_STATE_COMING;
+ mod = find_module_all(name, true);
+ ret = !mod || mod->state == MODULE_STATE_LIVE
+ || mod->state == MODULE_STATE_GOING;
mutex_unlock(&module_mutex);
return ret;
@@ -3136,6 +3164,32 @@ static int load_module(struct load_info *info, const char __user *uargs,
goto free_copy;
}
+ /*
+ * We try to place it in the list now to make sure it's unique
+ * before we dedicate too many resources. In particular,
+ * temporary percpu memory exhaustion.
+ */
+ mod->state = MODULE_STATE_UNFORMED;
+again:
+ mutex_lock(&module_mutex);
+ if ((old = find_module_all(mod->name, true)) != NULL) {
+ if (old->state == MODULE_STATE_COMING
+ || old->state == MODULE_STATE_UNFORMED) {
+ /* Wait in case it fails to load. */
+ mutex_unlock(&module_mutex);
+ err = wait_event_interruptible(module_wq,
+ finished_loading(mod->name));
+ if (err)
+ goto free_module;
+ goto again;
+ }
+ err = -EEXIST;
+ mutex_unlock(&module_mutex);
+ goto free_module;
+ }
+ list_add_rcu(&mod->list, &modules);
+ mutex_unlock(&module_mutex);
+
#ifdef CONFIG_MODULE_SIG
mod->sig_ok = info->sig_ok;
if (!mod->sig_ok)
@@ -3145,7 +3199,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Now module is in final location, initialize linked lists, etc. */
err = module_unload_init(mod);
if (err)
- goto free_module;
+ goto unlink_mod;
/* Now we've got everything in the final locations, we can
* find optional sections. */
@@ -3180,54 +3234,33 @@ static int load_module(struct load_info *info, const char __user *uargs,
goto free_arch_cleanup;
}
- /* Mark state as coming so strong_try_module_get() ignores us. */
- mod->state = MODULE_STATE_COMING;
-
- /* Now sew it into the lists so we can get lockdep and oops
- * info during argument parsing. No one should access us, since
- * strong_try_module_get() will fail.
- * lockdep/oops can run asynchronous, so use the RCU list insertion
- * function to insert in a way safe to concurrent readers.
- * The mutex protects against concurrent writers.
- */
-again:
- mutex_lock(&module_mutex);
- if ((old = find_module(mod->name)) != NULL) {
- if (old->state == MODULE_STATE_COMING) {
- /* Wait in case it fails to load. */
- mutex_unlock(&module_mutex);
- err = wait_event_interruptible(module_wq,
- finished_loading(mod->name));
- if (err)
- goto free_arch_cleanup;
- goto again;
- }
- err = -EEXIST;
- goto unlock;
- }
-
- /* This has to be done once we're sure module name is unique. */
dynamic_debug_setup(info->debug, info->num_debug);
- /* Find duplicate symbols */
+ mutex_lock(&module_mutex);
+ /* Find duplicate symbols (must be called under lock). */
err = verify_export_symbols(mod);
if (err < 0)
- goto ddebug;
+ goto ddebug_cleanup;
+ /* This relies on module_mutex for list integrity. */
module_bug_finalize(info->hdr, info->sechdrs, mod);
- list_add_rcu(&mod->list, &modules);
+
+ /* Mark state as coming so strong_try_module_get() ignores us,
+ * but kallsyms etc. can see us. */
+ mod->state = MODULE_STATE_COMING;
+
mutex_unlock(&module_mutex);
/* Module is ready to execute: parsing args may do that. */
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-32768, 32767, &ddebug_dyndbg_module_param_cb);
if (err < 0)
- goto unlink;
+ goto bug_cleanup;
/* Link in to syfs. */
err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
if (err < 0)
- goto unlink;
+ goto bug_cleanup;
/* Get rid of temporary copy. */
free_copy(info);
@@ -3237,16 +3270,13 @@ again:
return do_init_module(mod);
- unlink:
+ bug_cleanup:
+ /* module_bug_cleanup needs module_mutex protection */
mutex_lock(&module_mutex);
- /* Unlink carefully: kallsyms could be walking list. */
- list_del_rcu(&mod->list);
module_bug_cleanup(mod);
- wake_up_all(&module_wq);
- ddebug:
- dynamic_debug_remove(info->debug);
- unlock:
+ ddebug_cleanup:
mutex_unlock(&module_mutex);
+ dynamic_debug_remove(info->debug);
synchronize_sched();
kfree(mod->args);
free_arch_cleanup:
@@ -3255,6 +3285,12 @@ again:
free_modinfo(mod);
free_unload:
module_unload_free(mod);
+ unlink_mod:
+ mutex_lock(&module_mutex);
+ /* Unlink carefully: kallsyms could be walking list. */
+ list_del_rcu(&mod->list);
+ wake_up_all(&module_wq);
+ mutex_unlock(&module_mutex);
free_module:
module_deallocate(mod, info);
free_copy:
@@ -3377,6 +3413,8 @@ const char *module_address_lookup(unsigned long addr,
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
if (within_module_init(addr, mod) ||
within_module_core(addr, mod)) {
if (modname)
@@ -3400,6 +3438,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
if (within_module_init(addr, mod) ||
within_module_core(addr, mod)) {
const char *sym;
@@ -3424,6 +3464,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
if (within_module_init(addr, mod) ||
within_module_core(addr, mod)) {
const char *sym;
@@ -3451,6 +3493,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
if (symnum < mod->num_symtab) {
*value = mod->symtab[symnum].st_value;
*type = mod->symtab[symnum].st_info;
@@ -3493,9 +3537,12 @@ unsigned long module_kallsyms_lookup_name(const char *name)
ret = mod_find_symname(mod, colon+1);
*colon = ':';
} else {
- list_for_each_entry_rcu(mod, &modules, list)
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
if ((ret = mod_find_symname(mod, name)) != 0)
break;
+ }
}
preempt_enable();
return ret;
@@ -3510,6 +3557,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
int ret;
list_for_each_entry(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
for (i = 0; i < mod->num_symtab; i++) {
ret = fn(data, mod->strtab + mod->symtab[i].st_name,
mod, mod->symtab[i].st_value);
@@ -3525,6 +3574,7 @@ static char *module_flags(struct module *mod, char *buf)
{
int bx = 0;
+ BUG_ON(mod->state == MODULE_STATE_UNFORMED);
if (mod->taints ||
mod->state == MODULE_STATE_GOING ||
mod->state == MODULE_STATE_COMING) {
@@ -3566,6 +3616,10 @@ static int m_show(struct seq_file *m, void *p)
struct module *mod = list_entry(p, struct module, list);
char buf[8];
+ /* We always ignore unformed modules. */
+ if (mod->state == MODULE_STATE_UNFORMED)
+ return 0;
+
seq_printf(m, "%s %u",
mod->name, mod->init_size + mod->core_size);
print_unload_info(m, mod);
@@ -3626,6 +3680,8 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
if (mod->num_exentries == 0)
continue;
@@ -3674,10 +3730,13 @@ struct module *__module_address(unsigned long addr)
if (addr < module_addr_min || addr > module_addr_max)
return NULL;
- list_for_each_entry_rcu(mod, &modules, list)
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
if (within_module_core(addr, mod)
|| within_module_init(addr, mod))
return mod;
+ }
return NULL;
}
EXPORT_SYMBOL_GPL(__module_address);
@@ -3730,8 +3789,11 @@ void print_modules(void)
printk(KERN_DEFAULT "Modules linked in:");
/* Most callers should already have preempt disabled, but make sure */
preempt_disable();
- list_for_each_entry_rcu(mod, &modules, list)
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
printk(" %s%s", mod->name, module_flags(mod, buf));
+ }
preempt_enable();
if (last_unloaded_module[0])
printk(" [last unloaded: %s]", last_unloaded_module);
diff --git a/kernel/pid.c b/kernel/pid.c
index de9af600006..f2c6a682509 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -331,7 +331,7 @@ out:
return pid;
out_unlock:
- spin_unlock(&pidmap_lock);
+ spin_unlock_irq(&pidmap_lock);
out_free:
while (++i <= ns->level)
free_pidmap(pid->numbers + i);
diff --git a/kernel/printk.c b/kernel/printk.c
index 0b31715f335..f24633afa46 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -88,12 +88,6 @@ static DEFINE_SEMAPHORE(console_sem);
struct console *console_drivers;
EXPORT_SYMBOL_GPL(console_drivers);
-#ifdef CONFIG_LOCKDEP
-static struct lockdep_map console_lock_dep_map = {
- .name = "console_lock"
-};
-#endif
-
/*
* This is used for debugging the mess that is the VT code by
* keeping track if we have the console semaphore held. It's
@@ -1925,7 +1919,6 @@ void console_lock(void)
return;
console_locked = 1;
console_may_schedule = 1;
- mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);
}
EXPORT_SYMBOL(console_lock);
@@ -1947,7 +1940,6 @@ int console_trylock(void)
}
console_locked = 1;
console_may_schedule = 0;
- mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_);
return 1;
}
EXPORT_SYMBOL(console_trylock);
@@ -2110,7 +2102,6 @@ skip:
local_irq_restore(flags);
}
console_locked = 0;
- mutex_release(&console_lock_dep_map, 1, _RET_IP_);
/* Release the exclusive_console once it is used */
if (unlikely(exclusive_console))
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1599157336a..6cbeaae4406 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -117,11 +117,45 @@ void __ptrace_unlink(struct task_struct *child)
* TASK_KILLABLE sleeps.
*/
if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child))
- signal_wake_up(child, task_is_traced(child));
+ ptrace_signal_wake_up(child, true);
spin_unlock(&child->sighand->siglock);
}
+/* Ensure that nothing can wake it up, even SIGKILL */
+static bool ptrace_freeze_traced(struct task_struct *task)
+{
+ bool ret = false;
+
+ /* Lockless, nobody but us can set this flag */
+ if (task->jobctl & JOBCTL_LISTENING)
+ return ret;
+
+ spin_lock_irq(&task->sighand->siglock);
+ if (task_is_traced(task) && !__fatal_signal_pending(task)) {
+ task->state = __TASK_TRACED;
+ ret = true;
+ }
+ spin_unlock_irq(&task->sighand->siglock);
+
+ return ret;
+}
+
+static void ptrace_unfreeze_traced(struct task_struct *task)
+{
+ if (task->state != __TASK_TRACED)
+ return;
+
+ WARN_ON(!task->ptrace || task->parent != current);
+
+ spin_lock_irq(&task->sighand->siglock);
+ if (__fatal_signal_pending(task))
+ wake_up_state(task, __TASK_TRACED);
+ else
+ task->state = TASK_TRACED;
+ spin_unlock_irq(&task->sighand->siglock);
+}
+
/**
* ptrace_check_attach - check whether ptracee is ready for ptrace operation
* @child: ptracee to check for
@@ -139,7 +173,7 @@ void __ptrace_unlink(struct task_struct *child)
* RETURNS:
* 0 on success, -ESRCH if %child is not ready.
*/
-int ptrace_check_attach(struct task_struct *child, bool ignore_state)
+static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
{
int ret = -ESRCH;
@@ -151,24 +185,29 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state)
* be changed by us so it's not changing right after this.
*/
read_lock(&tasklist_lock);
- if ((child->ptrace & PT_PTRACED) && child->parent == current) {
+ if (child->ptrace && child->parent == current) {
+ WARN_ON(child->state == __TASK_TRACED);
/*
* child->sighand can't be NULL, release_task()
* does ptrace_unlink() before __exit_signal().
*/
- spin_lock_irq(&child->sighand->siglock);
- WARN_ON_ONCE(task_is_stopped(child));
- if (ignore_state || (task_is_traced(child) &&
- !(child->jobctl & JOBCTL_LISTENING)))
+ if (ignore_state || ptrace_freeze_traced(child))
ret = 0;
- spin_unlock_irq(&child->sighand->siglock);
}
read_unlock(&tasklist_lock);
- if (!ret && !ignore_state)
- ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH;
+ if (!ret && !ignore_state) {
+ if (!wait_task_inactive(child, __TASK_TRACED)) {
+ /*
+ * This can only happen if may_ptrace_stop() fails and
+ * ptrace_stop() changes ->state back to TASK_RUNNING,
+ * so we should not worry about leaking __TASK_TRACED.
+ */
+ WARN_ON(child->state == __TASK_TRACED);
+ ret = -ESRCH;
+ }
+ }
- /* All systems go.. */
return ret;
}
@@ -317,7 +356,7 @@ static int ptrace_attach(struct task_struct *task, long request,
*/
if (task_is_stopped(task) &&
task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
- signal_wake_up(task, 1);
+ signal_wake_up_state(task, __TASK_STOPPED);
spin_unlock(&task->sighand->siglock);
@@ -737,7 +776,7 @@ int ptrace_request(struct task_struct *child, long request,
* tracee into STOP.
*/
if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP)))
- signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
+ ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
unlock_task_sighand(child, &flags);
ret = 0;
@@ -763,7 +802,7 @@ int ptrace_request(struct task_struct *child, long request,
* start of this trap and now. Trigger re-trap.
*/
if (child->jobctl & JOBCTL_TRAP_NOTIFY)
- signal_wake_up(child, true);
+ ptrace_signal_wake_up(child, true);
ret = 0;
}
unlock_task_sighand(child, &flags);
@@ -900,6 +939,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
goto out_put_task_struct;
ret = arch_ptrace(child, request, addr, data);
+ if (ret || request != PTRACE_DETACH)
+ ptrace_unfreeze_traced(child);
out_put_ta