diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/async.c | 30 | ||||
| -rw-r--r-- | kernel/audit.c | 40 | ||||
| -rw-r--r-- | kernel/audit_tree.c | 26 | ||||
| -rw-r--r-- | kernel/audit_watch.c | 2 | ||||
| -rw-r--r-- | kernel/auditfilter.c | 1 | ||||
| -rw-r--r-- | kernel/auditsc.c | 20 | ||||
| -rw-r--r-- | kernel/compat.c | 23 | ||||
| -rw-r--r-- | kernel/debug/kdb/kdb_main.c | 2 | ||||
| -rw-r--r-- | kernel/fork.c | 6 | ||||
| -rw-r--r-- | kernel/module.c | 181 | ||||
| -rw-r--r-- | kernel/ptrace.c | 74 | ||||
| -rw-r--r-- | kernel/rwsem.c | 10 | ||||
| -rw-r--r-- | kernel/sched/core.c | 3 | ||||
| -rw-r--r-- | kernel/signal.c | 24 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 17 | 
16 files changed, 335 insertions, 126 deletions
| diff --git a/kernel/async.c b/kernel/async.c index 9d311838485..6f34904a0b5 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -86,18 +86,27 @@ static atomic_t entry_count;   */  static async_cookie_t  __lowest_in_progress(struct async_domain *running)  { +	async_cookie_t first_running = next_cookie;	/* infinity value */ +	async_cookie_t first_pending = next_cookie;	/* ditto */  	struct async_entry *entry; +	/* +	 * Both running and pending lists are sorted but not disjoint. +	 * Take the first cookies from both and return the min. +	 */  	if (!list_empty(&running->domain)) {  		entry = list_first_entry(&running->domain, typeof(*entry), list); -		return entry->cookie; +		first_running = entry->cookie;  	} -	list_for_each_entry(entry, &async_pending, list) -		if (entry->running == running) -			return entry->cookie; +	list_for_each_entry(entry, &async_pending, list) { +		if (entry->running == running) { +			first_pending = entry->cookie; +			break; +		} +	} -	return next_cookie;	/* "infinity" value */ +	return min(first_running, first_pending);  }  static async_cookie_t  lowest_in_progress(struct async_domain *running) @@ -118,13 +127,17 @@ static void async_run_entry_fn(struct work_struct *work)  {  	struct async_entry *entry =  		container_of(work, struct async_entry, work); +	struct async_entry *pos;  	unsigned long flags;  	ktime_t uninitialized_var(calltime), delta, rettime;  	struct async_domain *running = entry->running; -	/* 1) move self to the running queue */ +	/* 1) move self to the running queue, make sure it stays sorted */  	spin_lock_irqsave(&async_lock, flags); -	list_move_tail(&entry->list, &running->domain); +	list_for_each_entry_reverse(pos, &running->domain, list) +		if (entry->cookie < pos->cookie) +			break; +	list_move_tail(&entry->list, &pos->list);  	spin_unlock_irqrestore(&async_lock, flags);  	/* 2) run (and print duration) */ @@ -196,6 +209,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a  	atomic_inc(&entry_count);  	spin_unlock_irqrestore(&async_lock, flags); +	/* mark that this task has queued an async job, used by module init */ +	current->flags |= PF_USED_ASYNC; +  	/* schedule for execution */  	queue_work(system_unbound_wq, &entry->work); diff --git a/kernel/audit.c b/kernel/audit.c index 40414e9143d..d596e5355f1 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -272,6 +272,8 @@ static int audit_log_config_change(char *function_name, int new, int old,  	int rc = 0;  	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); +	if (unlikely(!ab)) +		return rc;  	audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new,  			 old, from_kuid(&init_user_ns, loginuid), sessionid);  	if (sid) { @@ -619,6 +621,8 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type,  	}  	*ab = audit_log_start(NULL, GFP_KERNEL, msg_type); +	if (unlikely(!*ab)) +		return rc;  	audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u",  			 task_tgid_vnr(current),  			 from_kuid(&init_user_ns, current_uid()), @@ -1097,6 +1101,23 @@ static inline void audit_get_stamp(struct audit_context *ctx,  	}  } +/* + * Wait for auditd to drain the queue a little + */ +static void wait_for_auditd(unsigned long sleep_time) +{ +	DECLARE_WAITQUEUE(wait, current); +	set_current_state(TASK_INTERRUPTIBLE); +	add_wait_queue(&audit_backlog_wait, &wait); + +	if (audit_backlog_limit && +	    skb_queue_len(&audit_skb_queue) > audit_backlog_limit) +		schedule_timeout(sleep_time); + +	__set_current_state(TASK_RUNNING); +	remove_wait_queue(&audit_backlog_wait, &wait); +} +  /* Obtain an audit buffer.  This routine does locking to obtain the   * audit buffer, but then no locking is required for calls to   * audit_log_*format.  If the tsk is a task that is currently in a @@ -1142,20 +1163,13 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,  	while (audit_backlog_limit  	       && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { -		if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time -		    && time_before(jiffies, timeout_start + audit_backlog_wait_time)) { +		if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) { +			unsigned long sleep_time; -			/* Wait for auditd to drain the queue a little */ -			DECLARE_WAITQUEUE(wait, current); -			set_current_state(TASK_INTERRUPTIBLE); -			add_wait_queue(&audit_backlog_wait, &wait); - -			if (audit_backlog_limit && -			    skb_queue_len(&audit_skb_queue) > audit_backlog_limit) -				schedule_timeout(timeout_start + audit_backlog_wait_time - jiffies); - -			__set_current_state(TASK_RUNNING); -			remove_wait_queue(&audit_backlog_wait, &wait); +			sleep_time = timeout_start + audit_backlog_wait_time - +					jiffies; +			if ((long)sleep_time > 0) +				wait_for_auditd(sleep_time);  			continue;  		}  		if (audit_rate_check() && printk_ratelimit()) diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index e81175ef25f..642a89c4f3d 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -449,11 +449,26 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)  	return 0;  } +static void audit_log_remove_rule(struct audit_krule *rule) +{ +	struct audit_buffer *ab; + +	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); +	if (unlikely(!ab)) +		return; +	audit_log_format(ab, "op="); +	audit_log_string(ab, "remove rule"); +	audit_log_format(ab, " dir="); +	audit_log_untrustedstring(ab, rule->tree->pathname); +	audit_log_key(ab, rule->filterkey); +	audit_log_format(ab, " list=%d res=1", rule->listnr); +	audit_log_end(ab); +} +  static void kill_rules(struct audit_tree *tree)  {  	struct audit_krule *rule, *next;  	struct audit_entry *entry; -	struct audit_buffer *ab;  	list_for_each_entry_safe(rule, next, &tree->rules, rlist) {  		entry = container_of(rule, struct audit_entry, rule); @@ -461,14 +476,7 @@ static void kill_rules(struct audit_tree *tree)  		list_del_init(&rule->rlist);  		if (rule->tree) {  			/* not a half-baked one */ -			ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); -			audit_log_format(ab, "op="); -			audit_log_string(ab, "remove rule"); -			audit_log_format(ab, " dir="); -			audit_log_untrustedstring(ab, rule->tree->pathname); -			audit_log_key(ab, rule->filterkey); -			audit_log_format(ab, " list=%d res=1", rule->listnr); -			audit_log_end(ab); +			audit_log_remove_rule(rule);  			rule->tree = NULL;  			list_del_rcu(&entry->list);  			list_del(&entry->rule.list); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 4a599f699ad..22831c4d369 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -240,6 +240,8 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc  	if (audit_enabled) {  		struct audit_buffer *ab;  		ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE); +		if (unlikely(!ab)) +			return;  		audit_log_format(ab, "auid=%u ses=%u op=",  				 from_kuid(&init_user_ns, audit_get_loginuid(current)),  				 audit_get_sessionid(current)); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 7f19f23d38a..f9fc54bbe06 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1144,7 +1144,6 @@ static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid,   * audit_receive_filter - apply all rules to the specified message type   * @type: audit message type   * @pid: target pid for netlink audit messages - * @uid: target uid for netlink audit messages   * @seq: netlink audit message sequence (serial) number   * @data: payload data   * @datasz: size of payload data diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e37e6a12c5e..a371f857a0a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1464,14 +1464,14 @@ static void show_special(struct audit_context *context, int *call_panic)  			audit_log_end(ab);  			ab = audit_log_start(context, GFP_KERNEL,  					     AUDIT_IPC_SET_PERM); +			if (unlikely(!ab)) +				return;  			audit_log_format(ab,  				"qbytes=%lx ouid=%u ogid=%u mode=%#ho",  				context->ipc.qbytes,  				context->ipc.perm_uid,  				context->ipc.perm_gid,  				context->ipc.perm_mode); -			if (!ab) -				return;  		}  		break; }  	case AUDIT_MQ_OPEN: { @@ -2675,7 +2675,7 @@ void __audit_mmap_fd(int fd, int flags)  	context->type = AUDIT_MMAP;  } -static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) +static void audit_log_task(struct audit_buffer *ab)  {  	kuid_t auid, uid;  	kgid_t gid; @@ -2693,6 +2693,11 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)  	audit_log_task_context(ab);  	audit_log_format(ab, " pid=%d comm=", current->pid);  	audit_log_untrustedstring(ab, current->comm); +} + +static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) +{ +	audit_log_task(ab);  	audit_log_format(ab, " reason=");  	audit_log_string(ab, reason);  	audit_log_format(ab, " sig=%ld", signr); @@ -2715,6 +2720,8 @@ void audit_core_dumps(long signr)  		return;  	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); +	if (unlikely(!ab)) +		return;  	audit_log_abend(ab, "memory violation", signr);  	audit_log_end(ab);  } @@ -2723,8 +2730,11 @@ void __audit_seccomp(unsigned long syscall, long signr, int code)  {  	struct audit_buffer *ab; -	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); -	audit_log_abend(ab, "seccomp", signr); +	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP); +	if (unlikely(!ab)) +		return; +	audit_log_task(ab); +	audit_log_format(ab, " sig=%ld", signr);  	audit_log_format(ab, " syscall=%ld", syscall);  	audit_log_format(ab, " compat=%d", is_compat_task());  	audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current)); diff --git a/kernel/compat.c b/kernel/compat.c index f6150e92dfc..36700e9e2be 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -535,9 +535,11 @@ asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru)  	return 0;  } -asmlinkage long -compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options, -	struct compat_rusage __user *ru) +COMPAT_SYSCALL_DEFINE4(wait4, +	compat_pid_t, pid, +	compat_uint_t __user *, stat_addr, +	int, options, +	struct compat_rusage __user *, ru)  {  	if (!ru) {  		return sys_wait4(pid, stat_addr, options, NULL); @@ -564,9 +566,10 @@ compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options,  	}  } -asmlinkage long compat_sys_waitid(int which, compat_pid_t pid, -		struct compat_siginfo __user *uinfo, int options, -		struct compat_rusage __user *uru) +COMPAT_SYSCALL_DEFINE5(waitid, +		int, which, compat_pid_t, pid, +		struct compat_siginfo __user *, uinfo, int, options, +		struct compat_rusage __user *, uru)  {  	siginfo_t info;  	struct rusage ru; @@ -584,7 +587,11 @@ asmlinkage long compat_sys_waitid(int which, compat_pid_t pid,  		return ret;  	if (uru) { -		ret = put_compat_rusage(&ru, uru); +		/* sys_waitid() overwrites everything in ru */ +		if (COMPAT_USE_64BIT_TIME) +			ret = copy_to_user(uru, &ru, sizeof(ru)); +		else +			ret = put_compat_rusage(&ru, uru);  		if (ret)  			return ret;  	} @@ -994,7 +1001,7 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,  	sigset_from_compat(&s, &s32);  	if (uts) { -		if (get_compat_timespec(&t, uts)) +		if (compat_get_timespec(&t, uts))  			return -EFAULT;  	} diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 4d5f8d5612f..8875254120b 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1970,6 +1970,8 @@ static int kdb_lsmod(int argc, const char **argv)  	kdb_printf("Module                  Size  modstruct     Used by\n");  	list_for_each_entry(mod, kdb_modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		kdb_printf("%-20s%8u  0x%p ", mod->name,  			   mod->core_size, (void *)mod); diff --git a/kernel/fork.c b/kernel/fork.c index 65ca6d27f24..c535f33bbb9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1668,8 +1668,10 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,  		 int, tls_val)  #endif  { -	return do_fork(clone_flags, newsp, 0, -		parent_tidptr, child_tidptr); +	long ret = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr); +	asmlinkage_protect(5, ret, clone_flags, newsp, +			parent_tidptr, child_tidptr, tls_val); +	return ret;  }  #endif diff --git a/kernel/module.c b/kernel/module.c index 250092c1d57..eab08274ec9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -188,6 +188,7 @@ struct load_info {     ongoing or failed initialization etc. */  static inline int strong_try_module_get(struct module *mod)  { +	BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED);  	if (mod && mod->state == MODULE_STATE_COMING)  		return -EBUSY;  	if (try_module_get(mod)) @@ -343,6 +344,9 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr,  #endif  		}; +		if (mod->state == MODULE_STATE_UNFORMED) +			continue; +  		if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))  			return true;  	} @@ -450,16 +454,24 @@ const struct kernel_symbol *find_symbol(const char *name,  EXPORT_SYMBOL_GPL(find_symbol);  /* Search for module by name: must hold module_mutex. */ -struct module *find_module(const char *name) +static struct module *find_module_all(const char *name, +				      bool even_unformed)  {  	struct module *mod;  	list_for_each_entry(mod, &modules, list) { +		if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) +			continue;  		if (strcmp(mod->name, name) == 0)  			return mod;  	}  	return NULL;  } + +struct module *find_module(const char *name) +{ +	return find_module_all(name, false); +}  EXPORT_SYMBOL_GPL(find_module);  #ifdef CONFIG_SMP @@ -525,6 +537,8 @@ bool is_module_percpu_address(unsigned long addr)  	preempt_disable();  	list_for_each_entry_rcu(mod, &modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		if (!mod->percpu_size)  			continue;  		for_each_possible_cpu(cpu) { @@ -1048,6 +1062,8 @@ static ssize_t show_initstate(struct module_attribute *mattr,  	case MODULE_STATE_GOING:  		state = "going";  		break; +	default: +		BUG();  	}  	return sprintf(buffer, "%s\n", state);  } @@ -1786,6 +1802,8 @@ void set_all_modules_text_rw(void)  	mutex_lock(&module_mutex);  	list_for_each_entry_rcu(mod, &modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		if ((mod->module_core) && (mod->core_text_size)) {  			set_page_attributes(mod->module_core,  						mod->module_core + mod->core_text_size, @@ -1807,6 +1825,8 @@ void set_all_modules_text_ro(void)  	mutex_lock(&module_mutex);  	list_for_each_entry_rcu(mod, &modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		if ((mod->module_core) && (mod->core_text_size)) {  			set_page_attributes(mod->module_core,  						mod->module_core + mod->core_text_size, @@ -2527,6 +2547,13 @@ static int copy_module_from_fd(int fd, struct load_info *info)  		err = -EFBIG;  		goto out;  	} + +	/* Don't hand 0 to vmalloc, it whines. */ +	if (stat.size == 0) { +		err = -EINVAL; +		goto out; +	} +  	info->hdr = vmalloc(stat.size);  	if (!info->hdr) {  		err = -ENOMEM; @@ -2990,8 +3017,9 @@ static bool finished_loading(const char *name)  	bool ret;  	mutex_lock(&module_mutex); -	mod = find_module(name); -	ret = !mod || mod->state != MODULE_STATE_COMING; +	mod = find_module_all(name, true); +	ret = !mod || mod->state == MODULE_STATE_LIVE +		|| mod->state == MODULE_STATE_GOING;  	mutex_unlock(&module_mutex);  	return ret; @@ -3013,6 +3041,12 @@ static int do_init_module(struct module *mod)  {  	int ret = 0; +	/* +	 * We want to find out whether @mod uses async during init.  Clear +	 * PF_USED_ASYNC.  async_schedule*() will set it. +	 */ +	current->flags &= ~PF_USED_ASYNC; +  	blocking_notifier_call_chain(&module_notify_list,  			MODULE_STATE_COMING, mod); @@ -3058,8 +3092,25 @@ static int do_init_module(struct module *mod)  	blocking_notifier_call_chain(&module_notify_list,  				     MODULE_STATE_LIVE, mod); -	/* We need to finish all async code before the module init sequence is done */ -	async_synchronize_full(); +	/* +	 * We need to finish all async code before the module init sequence +	 * is done.  This has potential to deadlock.  For example, a newly +	 * detected block device can trigger request_module() of the +	 * default iosched from async probing task.  Once userland helper +	 * reaches here, async_synchronize_full() will wait on the async +	 * task waiting on request_module() and deadlock. +	 * +	 * This deadlock is avoided by perfomring async_synchronize_full() +	 * iff module init queued any async jobs.  This isn't a full +	 * solution as it will deadlock the same if module loading from +	 * async jobs nests more than once; however, due to the various +	 * constraints, this hack seems to be the best option for now. +	 * Please refer to the following thread for details. +	 * +	 * http://thread.gmane.org/gmane.linux.kernel/1420814 +	 */ +	if (current->flags & PF_USED_ASYNC) +		async_synchronize_full();  	mutex_lock(&module_mutex);  	/* Drop initial reference. */ @@ -3113,6 +3164,32 @@ static int load_module(struct load_info *info, const char __user *uargs,  		goto free_copy;  	} +	/* +	 * We try to place it in the list now to make sure it's unique +	 * before we dedicate too many resources.  In particular, +	 * temporary percpu memory exhaustion. +	 */ +	mod->state = MODULE_STATE_UNFORMED; +again: +	mutex_lock(&module_mutex); +	if ((old = find_module_all(mod->name, true)) != NULL) { +		if (old->state == MODULE_STATE_COMING +		    || old->state == MODULE_STATE_UNFORMED) { +			/* Wait in case it fails to load. */ +			mutex_unlock(&module_mutex); +			err = wait_event_interruptible(module_wq, +					       finished_loading(mod->name)); +			if (err) +				goto free_module; +			goto again; +		} +		err = -EEXIST; +		mutex_unlock(&module_mutex); +		goto free_module; +	} +	list_add_rcu(&mod->list, &modules); +	mutex_unlock(&module_mutex); +  #ifdef CONFIG_MODULE_SIG  	mod->sig_ok = info->sig_ok;  	if (!mod->sig_ok) @@ -3122,7 +3199,7 @@ static int load_module(struct load_info *info, const char __user *uargs,  	/* Now module is in final location, initialize linked lists, etc. */  	err = module_unload_init(mod);  	if (err) -		goto free_module; +		goto unlink_mod;  	/* Now we've got everything in the final locations, we can  	 * find optional sections. */ @@ -3157,54 +3234,33 @@ static int load_module(struct load_info *info, const char __user *uargs,  		goto free_arch_cleanup;  	} -	/* Mark state as coming so strong_try_module_get() ignores us. */ -	mod->state = MODULE_STATE_COMING; - -	/* Now sew it into the lists so we can get lockdep and oops -	 * info during argument parsing.  No one should access us, since -	 * strong_try_module_get() will fail. -	 * lockdep/oops can run asynchronous, so use the RCU list insertion -	 * function to insert in a way safe to concurrent readers. -	 * The mutex protects against concurrent writers. -	 */ -again: -	mutex_lock(&module_mutex); -	if ((old = find_module(mod->name)) != NULL) { -		if (old->state == MODULE_STATE_COMING) { -			/* Wait in case it fails to load. */ -			mutex_unlock(&module_mutex); -			err = wait_event_interruptible(module_wq, -					       finished_loading(mod->name)); -			if (err) -				goto free_arch_cleanup; -			goto again; -		} -		err = -EEXIST; -		goto unlock; -	} - -	/* This has to be done once we're sure module name is unique. */  	dynamic_debug_setup(info->debug, info->num_debug); -	/* Find duplicate symbols */ +	mutex_lock(&module_mutex); +	/* Find duplicate symbols (must be called under lock). */  	err = verify_export_symbols(mod);  	if (err < 0) -		goto ddebug; +		goto ddebug_cleanup; +	/* This relies on module_mutex for list integrity. */  	module_bug_finalize(info->hdr, info->sechdrs, mod); -	list_add_rcu(&mod->list, &modules); + +	/* Mark state as coming so strong_try_module_get() ignores us, +	 * but kallsyms etc. can see us. */ +	mod->state = MODULE_STATE_COMING; +  	mutex_unlock(&module_mutex);  	/* Module is ready to execute: parsing args may do that. */  	err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,  			 -32768, 32767, &ddebug_dyndbg_module_param_cb);  	if (err < 0) -		goto unlink; +		goto bug_cleanup;  	/* Link in to syfs. */  	err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);  	if (err < 0) -		goto unlink; +		goto bug_cleanup;  	/* Get rid of temporary copy. */  	free_copy(info); @@ -3214,16 +3270,13 @@ again:  	return do_init_module(mod); - unlink: + bug_cleanup: +	/* module_bug_cleanup needs module_mutex protection */  	mutex_lock(&module_mutex); -	/* Unlink carefully: kallsyms could be walking list. */ -	list_del_rcu(&mod->list);  	module_bug_cleanup(mod); -	wake_up_all(&module_wq); - ddebug: -	dynamic_debug_remove(info->debug); - unlock: + ddebug_cleanup:  	mutex_unlock(&module_mutex); +	dynamic_debug_remove(info->debug);  	synchronize_sched();  	kfree(mod->args);   free_arch_cleanup: @@ -3232,6 +3285,12 @@ again:  	free_modinfo(mod);   free_unload:  	module_unload_free(mod); + unlink_mod: +	mutex_lock(&module_mutex); +	/* Unlink carefully: kallsyms could be walking list. */ +	list_del_rcu(&mod->list); +	wake_up_all(&module_wq); +	mutex_unlock(&module_mutex);   free_module:  	module_deallocate(mod, info);   free_copy: @@ -3354,6 +3413,8 @@ const char *module_address_lookup(unsigned long addr,  	preempt_disable();  	list_for_each_entry_rcu(mod, &modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		if (within_module_init(addr, mod) ||  		    within_module_core(addr, mod)) {  			if (modname) @@ -3377,6 +3438,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)  	preempt_disable();  	list_for_each_entry_rcu(mod, &modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		if (within_module_init(addr, mod) ||  		    within_module_core(addr, mod)) {  			const char *sym; @@ -3401,6 +3464,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,  	preempt_disable();  	list_for_each_entry_rcu(mod, &modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		if (within_module_init(addr, mod) ||  		    within_module_core(addr, mod)) {  			const char *sym; @@ -3428,6 +3493,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,  	preempt_disable();  	list_for_each_entry_rcu(mod, &modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		if (symnum < mod->num_symtab) {  			*value = mod->symtab[symnum].st_value;  			*type = mod->symtab[symnum].st_info; @@ -3470,9 +3537,12 @@ unsigned long module_kallsyms_lookup_name(const char *name)  			ret = mod_find_symname(mod, colon+1);  		*colon = ':';  	} else { -		list_for_each_entry_rcu(mod, &modules, list) +		list_for_each_entry_rcu(mod, &modules, list) { +			if (mod->state == MODULE_STATE_UNFORMED) +				continue;  			if ((ret = mod_find_symname(mod, name)) != 0)  				break; +		}  	}  	preempt_enable();  	return ret; @@ -3487,6 +3557,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,  	int ret;  	list_for_each_entry(mod, &modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		for (i = 0; i < mod->num_symtab; i++) {  			ret = fn(data, mod->strtab + mod->symtab[i].st_name,  				 mod, mod->symtab[i].st_value); @@ -3502,6 +3574,7 @@ static char *module_flags(struct module *mod, char *buf)  {  	int bx = 0; +	BUG_ON(mod->state == MODULE_STATE_UNFORMED);  	if (mod->taints ||  	    mod->state == MODULE_STATE_GOING ||  	    mod->state == MODULE_STATE_COMING) { @@ -3543,6 +3616,10 @@ static int m_show(struct seq_file *m, void *p)  	struct module *mod = list_entry(p, struct module, list);  	char buf[8]; +	/* We always ignore unformed modules. */ +	if (mod->state == MODULE_STATE_UNFORMED) +		return 0; +  	seq_printf(m, "%s %u",  		   mod->name, mod->init_size + mod->core_size);  	print_unload_info(m, mod); @@ -3603,6 +3680,8 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)  	preempt_disable();  	list_for_each_entry_rcu(mod, &modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		if (mod->num_exentries == 0)  			continue; @@ -3651,10 +3730,13 @@ struct module *__module_address(unsigned long addr)  	if (addr < module_addr_min || addr > module_addr_max)  		return NULL; -	list_for_each_entry_rcu(mod, &modules, list) +	list_for_each_entry_rcu(mod, &modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		if (within_module_core(addr, mod)  		    || within_module_init(addr, mod))  			return mod; +	}  	return NULL;  }  EXPORT_SYMBOL_GPL(__module_address); @@ -3707,8 +3789,11 @@ void print_modules(void)  	printk(KERN_DEFAULT "Modules linked in:");  	/* Most callers should already have preempt disabled, but make sure */  	preempt_disable(); -	list_for_each_entry_rcu(mod, &modules, list) +	list_for_each_entry_rcu(mod, &modules, list) { +		if (mod->state == MODULE_STATE_UNFORMED) +			continue;  		printk(" %s%s", mod->name, module_flags(mod, buf)); +	}  	preempt_enable();  	if (last_unloaded_module[0])  		printk(" [last unloaded: %s]", last_unloaded_module); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1599157336a..6cbeaae4406 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -117,11 +117,45 @@ void __ptrace_unlink(struct task_struct *child)  	 * TASK_KILLABLE sleeps.  	 */  	if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child)) -		signal_wake_up(child, task_is_traced(child)); +		ptrace_signal_wake_up(child, true);  	spin_unlock(&child->sighand->siglock);  } +/* Ensure that nothing can wake it up, even SIGKILL */ +static bool ptrace_freeze_traced(struct task_struct *task) +{ +	bool ret = false; + +	/* Lockless, nobody but us can set this flag */ +	if (task->jobctl & JOBCTL_LISTENING) +		return ret; + +	spin_lock_irq(&task->sighand->siglock); +	if (task_is_traced(task) && !__fatal_signal_pending(task)) { +		task->state = __TASK_TRACED; +		ret = true; +	} +	spin_unlock_irq(&task->sighand->siglock); + +	return ret; +} + +static void ptrace_unfreeze_traced(struct task_struct *task) +{ +	if (task->state != __TASK_TRACED) +		return; + +	WARN_ON(!task->ptrace || task->parent != current); + +	spin_lock_irq(&task->sighand->siglock); +	if (__fatal_signal_pending(task)) +		wake_up_state(task, __TASK_TRACED); +	else +		task->state = TASK_TRACED; +	spin_unlock_irq(&task->sighand->siglock); +} +  /**   * ptrace_check_attach - check whether ptracee is ready for ptrace operation   * @child: ptracee to check for @@ -139,7 +173,7 @@ void __ptrace_unlink(struct task_struct *child)   * RETURNS:   * 0 on success, -ESRCH if %child is not ready.   */ -int ptrace_check_attach(struct task_struct *child, bool ignore_state) +static int ptrace_check_attach(struct task_struct *child, bool ignore_state)  {  	int ret = -ESRCH; @@ -151,24 +185,29 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state)  	 * be changed by us so it's not changing right after this.  	 */  	read_lock(&tasklist_lock); -	if ((child->ptrace & PT_PTRACED) && child->parent == current) { +	if (child->ptrace && child->parent == current) { +		WARN_ON(child->state == __TASK_TRACED);  		/*  		 * child->sighand can't be NULL, release_task()  		 * does ptrace_unlink() before __exit_signal().  		 */ -		spin_lock_irq(&child->sighand->siglock); -		WARN_ON_ONCE(task_is_stopped(child)); -		if (ignore_state || (task_is_traced(child) && -				     !(child->jobctl & JOBCTL_LISTENING))) +		if (ignore_state || ptrace_freeze_traced(child))  			ret = 0; -		spin_unlock_irq(&child->sighand->siglock);  	}  	read_unlock(&tasklist_lock); -	if (!ret && !ignore_state) -		ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; +	if (!ret && !ignore_state) { +		if (!wait_task_inactive(child, __TASK_TRACED)) { +			/* +			 * This can only happen if may_ptrace_stop() fails and +			 * ptrace_stop() changes ->state back to TASK_RUNNING, +			 * so we should not worry about leaking __TASK_TRACED. +			 */ +			WARN_ON(child->state == __TASK_TRACED); +			ret = -ESRCH; +		} +	} -	/* All systems go.. */  	return ret;  } @@ -317,7 +356,7 @@ static int ptrace_attach(struct task_struct *task, long request,  	 */  	if (task_is_stopped(task) &&  	    task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) -		signal_wake_up(task, 1); +		signal_wake_up_state(task, __TASK_STOPPED);  	spin_unlock(&task->sighand->siglock); @@ -737,7 +776,7 @@ int ptrace_request(struct task_struct *child, long request,  		 * tracee into STOP.  		 */  		if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP))) -			signal_wake_up(child, child->jobctl & JOBCTL_LISTENING); +			ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);  		unlock_task_sighand(child, &flags);  		ret = 0; @@ -763,7 +802,7 @@ int ptrace_request(struct task_struct *child, long request,  			 * start of this trap and now.  Trigger re-trap.  			 */  			if (child->jobctl & JOBCTL_TRAP_NOTIFY) -				signal_wake_up(child, true); +				ptrace_signal_wake_up(child, true);  			ret = 0;  		}  		unlock_task_sighand(child, &flags); @@ -900,6 +939,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,  		goto out_put_task_struct;  	ret = arch_ptrace(child, request, addr, data); +	if (ret || request != PTRACE_DETACH) +		ptrace_unfreeze_traced(child);   out_put_task_struct:  	put_task_struct(child); @@ -1039,8 +1080,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,  	ret = ptrace_check_attach(child, request == PTRACE_KILL ||  				  request == PTRACE_INTERRUPT); -	if (!ret) +	if (!ret) {  		ret = compat_arch_ptrace(child, request, addr, data); +		if (ret || request != PTRACE_DETACH) +			ptrace_unfreeze_traced(child); +	}   out_put_task_struct:  	put_task_struct(child); diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 6850f53e02d..b3c6c3fcd84 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -116,6 +116,16 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)  EXPORT_SYMBOL(down_read_nested); +void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) +{ +	might_sleep(); +	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); + +	LOCK_CONTENDED(sem, __down_write_trylock, __down_write); +} + +EXPORT_SYMBOL(_down_write_nest_lock); +  void down_write_nested(struct rw_semaphore *sem, int subclass)  {  	might_sleep(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 257002c13bb..26058d0bebb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1523,7 +1523,8 @@ out:   */  int wake_up_process(struct task_struct *p)  { -	return try_to_wake_up(p, TASK_ALL, 0); +	WARN_ON(task_is_stopped_or_traced(p)); +	return try_to_wake_up(p, TASK_NORMAL, 0);  }  EXPORT_SYMBOL(wake_up_process); diff --git a/kernel/signal.c b/kernel/signal.c index 372771e948c..3d09cf6cde7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -680,23 +680,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)   * No need to set need_resched since signal event passing   * goes through ->blocked   */ -void signal_wake_up(struct task_struct *t, int resume) +void signal_wake_up_state(struct task_struct *t, unsigned int state)  { -	unsigned int mask; -  	set_tsk_thread_flag(t, TIF_SIGPENDING); -  	/* -	 * For SIGKILL, we want to wake it up in the stopped/traced/killable +	 * TASK_WAKEKILL also means wake it up in the stopped/traced/killable  	 * case. We don't check t->state here because there is a race with it  	 * executing another processor and just now entering stopped state.  	 * By using wake_up_state, we ensure the process will wake up and  	 * handle its death signal.  	 */ -	mask = TASK_INTERRUPTIBLE; -	if (resume) -		mask |= TASK_WAKEKILL; -	if (!wake_up_state(t, mask)) +	if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))  		kick_process(t);  } @@ -844,7 +838,7 @@ static void ptrace_trap_notify(struct task_struct *t)  	assert_spin_locked(&t->sighand->siglock);  	task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); -	signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); +	ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);  }  /* @@ -1800,6 +1794,10 @@ static inline int may_ptrace_stop(void)  	 * If SIGKILL was already sent before the caller unlocked  	 * ->siglock we must see ->core_state != NULL. Otherwise it  	 * is safe to enter schedule(). +	 * +	 * This is almost outdated, a task with the pending SIGKILL can't +	 * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported +	 * after SIGKILL was already dequeued.  	 */  	if (unlikely(current->mm->core_state) &&  	    unlikely(current->mm == current->parent->mm)) @@ -1925,6 +1923,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)  		if (gstop_done)  			do_notify_parent_cldstop(current, false, why); +		/* tasklist protects us from ptrace_freeze_traced() */  		__set_current_state(TASK_RUNNING);  		if (clear_code)  			current->exit_code = 0; @@ -3116,8 +3115,9 @@ int __save_altstack(stack_t __user *uss, unsigned long sp)  #ifdef CONFIG_COMPAT  #ifdef CONFIG_GENERIC_SIGALTSTACK -asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, -				       compat_stack_t __user *uoss_ptr) +COMPAT_SYSCALL_DEFINE2(sigaltstack, +			const compat_stack_t __user *, uss_ptr, +			compat_stack_t __user *, uoss_ptr)  {  	stack_t uss, uoss;  	int ret; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3ffe4c5ad3f..41473b4ad7a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3998,7 +3998,7 @@ static int ftrace_module_notify(struct notifier_block *self,  struct notifier_block ftrace_module_nb = {  	.notifier_call = ftrace_module_notify, -	.priority = 0, +	.priority = INT_MAX,	/* Run before anything that can use kprobes */  };  extern unsigned long __start_mcount_loc[]; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e5125677efa..3c13e46d7d2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2899,6 +2899,8 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,  	if (copy_from_user(&buf, ubuf, cnt))  		return -EFAULT; +	buf[cnt] = 0; +  	trace_set_options(buf);  	*ppos += cnt; @@ -3452,7 +3454,7 @@ static int tracing_wait_pipe(struct file *filp)  			return -EINTR;  		/* -		 * We block until we read something and tracing is enabled. +		 * We block until we read something and tracing is disabled.  		 * We still block if tracing is disabled, but we have never  		 * read anything. This allows a user to cat this file, and  		 * then enable tracing. But after we have read something, @@ -3460,7 +3462,7 @@ static int tracing_wait_pipe(struct file *filp)  		 *  		 * iter->pos will be 0 if we haven't read anything.  		 */ -		if (tracing_is_enabled() && iter->pos) +		if (!tracing_is_enabled() && iter->pos)  			break;  	} @@ -4815,10 +4817,17 @@ rb_simple_write(struct file *filp, const char __user *ubuf,  		return ret;  	if (buffer) { -		if (val) +		mutex_lock(&trace_types_lock); +		if (val) {  			ring_buffer_record_on(buffer); -		else +			if (current_trace->start) +				current_trace->start(tr); +		} else {  			ring_buffer_record_off(buffer); +			if (current_trace->stop) +				current_trace->stop(tr); +		} +		mutex_unlock(&trace_types_lock);  	}  	(*ppos)++; | 
