diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/capability.c | 2 | ||||
| -rw-r--r-- | kernel/cred.c | 16 | ||||
| -rw-r--r-- | kernel/irq/migration.c | 14 | ||||
| -rw-r--r-- | kernel/module.c | 16 | ||||
| -rw-r--r-- | kernel/perf_event.c | 56 | ||||
| -rw-r--r-- | kernel/printk.c | 154 | ||||
| -rw-r--r-- | kernel/ptrace.c | 2 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 91 | ||||
| -rw-r--r-- | kernel/sched_rt.c | 2 | ||||
| -rw-r--r-- | kernel/sys.c | 3 | ||||
| -rw-r--r-- | kernel/sysctl.c | 3 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 7 | ||||
| -rw-r--r-- | kernel/time/timer_list.c | 4 | ||||
| -rw-r--r-- | kernel/timer.c | 8 | ||||
| -rw-r--r-- | kernel/trace/blktrace.c | 7 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 12 | ||||
| -rw-r--r-- | kernel/trace/trace_export.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 19 | ||||
| -rw-r--r-- | kernel/tracepoint.c | 31 | ||||
| -rw-r--r-- | kernel/watchdog.c | 53 | 
20 files changed, 295 insertions, 211 deletions
| diff --git a/kernel/capability.c b/kernel/capability.c index 2f05303715a..9e9385f132c 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -306,7 +306,7 @@ int capable(int cap)  		BUG();  	} -	if (security_capable(cap) == 0) { +	if (security_capable(current_cred(), cap) == 0) {  		current->flags |= PF_SUPERPRIV;  		return 1;  	} diff --git a/kernel/cred.c b/kernel/cred.c index 6a1aa004e37..3a9d6dd53a6 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -252,13 +252,13 @@ struct cred *cred_alloc_blank(void)  #endif  	atomic_set(&new->usage, 1); +#ifdef CONFIG_DEBUG_CREDENTIALS +	new->magic = CRED_MAGIC; +#endif  	if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)  		goto error; -#ifdef CONFIG_DEBUG_CREDENTIALS -	new->magic = CRED_MAGIC; -#endif  	return new;  error: @@ -657,6 +657,8 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)  	validate_creds(old);  	*new = *old; +	atomic_set(&new->usage, 1); +	set_cred_subscribers(new, 0);  	get_uid(new->user);  	get_group_info(new->group_info); @@ -674,8 +676,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)  	if (security_prepare_creds(new, old, GFP_KERNEL) < 0)  		goto error; -	atomic_set(&new->usage, 1); -	set_cred_subscribers(new, 0);  	put_cred(old);  	validate_creds(new);  	return new; @@ -748,7 +748,11 @@ bool creds_are_invalid(const struct cred *cred)  	if (cred->magic != CRED_MAGIC)  		return true;  #ifdef CONFIG_SECURITY_SELINUX -	if (selinux_is_enabled()) { +	/* +	 * cred->security == NULL if security_cred_alloc_blank() or +	 * security_prepare_creds() returned an error. +	 */ +	if (selinux_is_enabled() && cred->security) {  		if ((unsigned long) cred->security < PAGE_SIZE)  			return true;  		if ((*(u32 *)cred->security & 0xffffff00) == diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 1d254194048..441fd629ff0 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -56,6 +56,7 @@ void move_masked_irq(int irq)  void move_native_irq(int irq)  {  	struct irq_desc *desc = irq_to_desc(irq); +	bool masked;  	if (likely(!(desc->status & IRQ_MOVE_PENDING)))  		return; @@ -63,8 +64,15 @@ void move_native_irq(int irq)  	if (unlikely(desc->status & IRQ_DISABLED))  		return; -	desc->irq_data.chip->irq_mask(&desc->irq_data); +	/* +	 * Be careful vs. already masked interrupts. If this is a +	 * threaded interrupt with ONESHOT set, we can end up with an +	 * interrupt storm. +	 */ +	masked = desc->status & IRQ_MASKED; +	if (!masked) +		desc->irq_data.chip->irq_mask(&desc->irq_data);  	move_masked_irq(irq); -	desc->irq_data.chip->irq_unmask(&desc->irq_data); +	if (!masked) +		desc->irq_data.chip->irq_unmask(&desc->irq_data);  } - diff --git a/kernel/module.c b/kernel/module.c index 34e00b708fa..efa290ea94b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2460,9 +2460,9 @@ static void find_module_sections(struct module *mod, struct load_info *info)  #endif  #ifdef CONFIG_TRACEPOINTS -	mod->tracepoints = section_objs(info, "__tracepoints", -					sizeof(*mod->tracepoints), -					&mod->num_tracepoints); +	mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", +					     sizeof(*mod->tracepoints_ptrs), +					     &mod->num_tracepoints);  #endif  #ifdef HAVE_JUMP_LABEL  	mod->jump_entries = section_objs(info, "__jump_table", @@ -3393,7 +3393,7 @@ void module_layout(struct module *mod,  		   struct modversion_info *ver,  		   struct kernel_param *kp,  		   struct kernel_symbol *ks, -		   struct tracepoint *tp) +		   struct tracepoint * const *tp)  {  }  EXPORT_SYMBOL(module_layout); @@ -3407,8 +3407,8 @@ void module_update_tracepoints(void)  	mutex_lock(&module_mutex);  	list_for_each_entry(mod, &modules, list)  		if (!mod->taints) -			tracepoint_update_probe_range(mod->tracepoints, -				mod->tracepoints + mod->num_tracepoints); +			tracepoint_update_probe_range(mod->tracepoints_ptrs, +				mod->tracepoints_ptrs + mod->num_tracepoints);  	mutex_unlock(&module_mutex);  } @@ -3432,8 +3432,8 @@ int module_get_iter_tracepoints(struct tracepoint_iter *iter)  			else if (iter_mod > iter->module)  				iter->tracepoint = NULL;  			found = tracepoint_get_iter_range(&iter->tracepoint, -				iter_mod->tracepoints, -				iter_mod->tracepoints +				iter_mod->tracepoints_ptrs, +				iter_mod->tracepoints_ptrs  					+ iter_mod->num_tracepoints);  			if (found) {  				iter->module = iter_mod; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 84522c79698..999835b6112 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1901,11 +1901,12 @@ static void __perf_event_read(void *info)  		return;  	raw_spin_lock(&ctx->lock); -	update_context_time(ctx); +	if (ctx->is_active) +		update_context_time(ctx);  	update_event_times(event); +	if (event->state == PERF_EVENT_STATE_ACTIVE) +		event->pmu->read(event);  	raw_spin_unlock(&ctx->lock); - -	event->pmu->read(event);  }  static inline u64 perf_event_count(struct perf_event *event) @@ -1999,8 +2000,7 @@ static int alloc_callchain_buffers(void)  	 * accessed from NMI. Use a temporary manual per cpu allocation  	 * until that gets sorted out.  	 */ -	size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * -		num_possible_cpus(); +	size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);  	entries = kzalloc(size, GFP_KERNEL);  	if (!entries) @@ -2201,13 +2201,6 @@ find_lively_task_by_vpid(pid_t vpid)  	if (!task)  		return ERR_PTR(-ESRCH); -	/* -	 * Can't attach events to a dying task. -	 */ -	err = -ESRCH; -	if (task->flags & PF_EXITING) -		goto errout; -  	/* Reuse ptrace permission checks for now. */  	err = -EACCES;  	if (!ptrace_may_access(task, PTRACE_MODE_READ)) @@ -2268,14 +2261,27 @@ retry:  		get_ctx(ctx); -		if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { -			/* -			 * We raced with some other task; use -			 * the context they set. -			 */ +		err = 0; +		mutex_lock(&task->perf_event_mutex); +		/* +		 * If it has already passed perf_event_exit_task(). +		 * we must see PF_EXITING, it takes this mutex too. +		 */ +		if (task->flags & PF_EXITING) +			err = -ESRCH; +		else if (task->perf_event_ctxp[ctxn]) +			err = -EAGAIN; +		else +			rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); +		mutex_unlock(&task->perf_event_mutex); + +		if (unlikely(err)) {  			put_task_struct(task);  			kfree(ctx); -			goto retry; + +			if (err == -EAGAIN) +				goto retry; +			goto errout;  		}  	} @@ -5374,6 +5380,8 @@ free_dev:  	goto out;  } +static struct lock_class_key cpuctx_mutex; +  int perf_pmu_register(struct pmu *pmu, char *name, int type)  {  	int cpu, ret; @@ -5422,6 +5430,7 @@ skip_type:  		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);  		__perf_event_init_context(&cpuctx->ctx); +		lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);  		cpuctx->ctx.type = cpu_context;  		cpuctx->ctx.pmu = pmu;  		cpuctx->jiffies_interval = 1; @@ -6127,7 +6136,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)  	 * scheduled, so we are now safe from rescheduling changing  	 * our context.  	 */ -	child_ctx = child->perf_event_ctxp[ctxn]; +	child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);  	task_ctx_sched_out(child_ctx, EVENT_ALL);  	/* @@ -6440,11 +6449,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn)  	unsigned long flags;  	int ret = 0; -	child->perf_event_ctxp[ctxn] = NULL; - -	mutex_init(&child->perf_event_mutex); -	INIT_LIST_HEAD(&child->perf_event_list); -  	if (likely(!parent->perf_event_ctxp[ctxn]))  		return 0; @@ -6533,6 +6537,10 @@ int perf_event_init_task(struct task_struct *child)  {  	int ctxn, ret; +	memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp)); +	mutex_init(&child->perf_event_mutex); +	INIT_LIST_HEAD(&child->perf_event_list); +  	for_each_task_context_nr(ctxn) {  		ret = perf_event_init_context(child, ctxn);  		if (ret) diff --git a/kernel/printk.c b/kernel/printk.c index 53d9a9ec88e..36231525e22 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -97,7 +97,7 @@ static int console_locked, console_suspended;  /*   * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars   * It is also used in interesting ways to provide interlocking in - * release_console_sem(). + * console_unlock();.   */  static DEFINE_SPINLOCK(logbuf_lock); @@ -262,25 +262,47 @@ int dmesg_restrict = 1;  int dmesg_restrict;  #endif +static int syslog_action_restricted(int type) +{ +	if (dmesg_restrict) +		return 1; +	/* Unless restricted, we allow "read all" and "get buffer size" for everybody */ +	return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; +} + +static int check_syslog_permissions(int type, bool from_file) +{ +	/* +	 * If this is from /proc/kmsg and we've already opened it, then we've +	 * already done the capabilities checks at open time. +	 */ +	if (from_file && type != SYSLOG_ACTION_OPEN) +		return 0; + +	if (syslog_action_restricted(type)) { +		if (capable(CAP_SYSLOG)) +			return 0; +		/* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ +		if (capable(CAP_SYS_ADMIN)) { +			WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " +				 "but no CAP_SYSLOG (deprecated).\n"); +			return 0; +		} +		return -EPERM; +	} +	return 0; +} +  int do_syslog(int type, char __user *buf, int len, bool from_file)  {  	unsigned i, j, limit, count;  	int do_clear = 0;  	char c; -	int error = 0; +	int error; -	/* -	 * If this is from /proc/kmsg we only do the capabilities checks -	 * at open time. -	 */ -	if (type == SYSLOG_ACTION_OPEN || !from_file) { -		if (dmesg_restrict && !capable(CAP_SYSLOG)) -			goto warn; /* switch to return -EPERM after 2.6.39 */ -		if ((type != SYSLOG_ACTION_READ_ALL && -		     type != SYSLOG_ACTION_SIZE_BUFFER) && -		    !capable(CAP_SYSLOG)) -			goto warn; /* switch to return -EPERM after 2.6.39 */ -	} +	error = check_syslog_permissions(type, from_file); +	if (error) +		goto out;  	error = security_syslog(type);  	if (error) @@ -423,12 +445,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)  	}  out:  	return error; -warn: -	/* remove after 2.6.39 */ -	if (capable(CAP_SYS_ADMIN)) -		WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " -		  "but no CAP_SYSLOG (deprecated and denied).\n"); -	return -EPERM;  }  SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) @@ -501,7 +517,7 @@ static void _call_console_drivers(unsigned start,  /*   * Call the console drivers, asking them to write out   * log_buf[start] to log_buf[end - 1]. - * The console_sem must be held. + * The console_lock must be held.   */  static void call_console_drivers(unsigned start, unsigned end)  { @@ -604,11 +620,11 @@ static int have_callable_console(void)   *   * This is printk().  It can be called from any context.  We want it to work.   * - * We try to grab the console_sem.  If we succeed, it's easy - we log the output and + * We try to grab the console_lock.  If we succeed, it's easy - we log the output and   * call the console drivers.  If we fail to get the semaphore we place the output   * into the log buffer and return.  The current holder of the console_sem will - * notice the new output in release_console_sem() and will send it to the - * consoles before releasing the semaphore. + * notice the new output in console_unlock(); and will send it to the + * consoles before releasing the lock.   *   * One effect of this deferred printing is that code which calls printk() and   * then changes console_loglevel may break. This is because console_loglevel @@ -659,19 +675,19 @@ static inline int can_use_console(unsigned int cpu)  /*   * Try to get console ownership to actually show the kernel   * messages from a 'printk'. Return true (and with the - * console_semaphore held, and 'console_locked' set) if it + * console_lock held, and 'console_locked' set) if it   * is successful, false otherwise.   *   * This gets called with the 'logbuf_lock' spinlock held and   * interrupts disabled. It should return with 'lockbuf_lock'   * released but interrupts still disabled.   */ -static int acquire_console_semaphore_for_printk(unsigned int cpu) +static int console_trylock_for_printk(unsigned int cpu)  	__releases(&logbuf_lock)  {  	int retval = 0; -	if (!try_acquire_console_sem()) { +	if (console_trylock()) {  		retval = 1;  		/* @@ -827,12 +843,12 @@ asmlinkage int vprintk(const char *fmt, va_list args)  	 * actual magic (print out buffers, wake up klogd,  	 * etc).   	 * -	 * The acquire_console_semaphore_for_printk() function +	 * The console_trylock_for_printk() function  	 * will release 'logbuf_lock' regardless of whether it  	 * actually gets the semaphore or not.  	 */ -	if (acquire_console_semaphore_for_printk(this_cpu)) -		release_console_sem(); +	if (console_trylock_for_printk(this_cpu)) +		console_unlock();  	lockdep_on();  out_restore_irqs: @@ -993,7 +1009,7 @@ void suspend_console(void)  	if (!console_suspend_enabled)  		return;  	printk("Suspending console(s) (use no_console_suspend to debug)\n"); -	acquire_console_sem(); +	console_lock();  	console_suspended = 1;  	up(&console_sem);  } @@ -1004,7 +1020,7 @@ void resume_console(void)  		return;  	down(&console_sem);  	console_suspended = 0; -	release_console_sem(); +	console_unlock();  }  /** @@ -1027,21 +1043,21 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self,  	case CPU_DYING:  	case CPU_DOWN_FAILED:  	case CPU_UP_CANCELED: -		acquire_console_sem(); -		release_console_sem(); +		console_lock(); +		console_unlock();  	}  	return NOTIFY_OK;  }  /** - * acquire_console_sem - lock the console system for exclusive use. + * console_lock - lock the console system for exclusive use.   * - * Acquires a semaphore which guarantees that the caller has + * Acquires a lock which guarantees that the caller has   * exclusive access to the console system and the console_drivers list.   *   * Can sleep, returns nothing.   */ -void acquire_console_sem(void) +void console_lock(void)  {  	BUG_ON(in_interrupt());  	down(&console_sem); @@ -1050,21 +1066,29 @@ void acquire_console_sem(void)  	console_locked = 1;  	console_may_schedule = 1;  } -EXPORT_SYMBOL(acquire_console_sem); +EXPORT_SYMBOL(console_lock); -int try_acquire_console_sem(void) +/** + * console_trylock - try to lock the console system for exclusive use. + * + * Tried to acquire a lock which guarantees that the caller has + * exclusive access to the console system and the console_drivers list. + * + * returns 1 on success, and 0 on failure to acquire the lock. + */ +int console_trylock(void)  {  	if (down_trylock(&console_sem)) -		return -1; +		return 0;  	if (console_suspended) {  		up(&console_sem); -		return -1; +		return 0;  	}  	console_locked = 1;  	console_may_schedule = 0; -	return 0; +	return 1;  } -EXPORT_SYMBOL(try_acquire_console_sem); +EXPORT_SYMBOL(console_trylock);  int is_console_locked(void)  { @@ -1095,20 +1119,20 @@ void wake_up_klogd(void)  }  /** - * release_console_sem - unlock the console system + * console_unlock - unlock the console system   * - * Releases the semaphore which the caller holds on the console system + * Releases the console_lock which the caller holds on the console system   * and the console driver list.   * - * While the semaphore was held, console output may have been buffered - * by printk().  If this is the case, release_console_sem() emits - * the output prior to releasing the semaphore. + * While the console_lock was held, console output may have been buffered + * by printk().  If this is the case, console_unlock(); emits + * the output prior to releasing the lock.   *   * If there is output waiting for klogd, we wake it up.   * - * release_console_sem() may be called from any context. + * console_unlock(); may be called from any context.   */ -void release_console_sem(void) +void console_unlock(void)  {  	unsigned long flags;  	unsigned _con_start, _log_end; @@ -1141,7 +1165,7 @@ void release_console_sem(void)  	if (wake_klogd)  		wake_up_klogd();  } -EXPORT_SYMBOL(release_console_sem); +EXPORT_SYMBOL(console_unlock);  /**   * console_conditional_schedule - yield the CPU if required @@ -1150,7 +1174,7 @@ EXPORT_SYMBOL(release_console_sem);   * if this CPU should yield the CPU to another task, do   * so here.   * - * Must be called within acquire_console_sem(). + * Must be called within console_lock();.   */  void __sched console_conditional_schedule(void)  { @@ -1171,14 +1195,14 @@ void console_unblank(void)  		if (down_trylock(&console_sem) != 0)  			return;  	} else -		acquire_console_sem(); +		console_lock();  	console_locked = 1;  	console_may_schedule = 0;  	for_each_console(c)  		if ((c->flags & CON_ENABLED) && c->unblank)  			c->unblank(); -	release_console_sem(); +	console_unlock();  }  /* @@ -1189,7 +1213,7 @@ struct tty_driver *console_device(int *index)  	struct console *c;  	struct tty_driver *driver = NULL; -	acquire_console_sem(); +	console_lock();  	for_each_console(c) {  		if (!c->device)  			continue; @@ -1197,7 +1221,7 @@ struct tty_driver *console_device(int *index)  		if (driver)  			break;  	} -	release_console_sem(); +	console_unlock();  	return driver;  } @@ -1208,17 +1232,17 @@ struct tty_driver *console_device(int *index)   */  void console_stop(struct console *console)  { -	acquire_console_sem(); +	console_lock();  	console->flags &= ~CON_ENABLED; -	release_console_sem(); +	console_unlock();  }  EXPORT_SYMBOL(console_stop);  void console_start(struct console *console)  { -	acquire_console_sem(); +	console_lock();  	console->flags |= CON_ENABLED; -	release_console_sem(); +	console_unlock();  }  EXPORT_SYMBOL(console_start); @@ -1340,7 +1364,7 @@ void register_console(struct console *newcon)  	 *	Put this console in the list - keep the  	 *	preferred driver at the head of the list.  	 */ -	acquire_console_sem(); +	console_lock();  	if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) {  		newcon->next = console_drivers;  		console_drivers = newcon; @@ -1352,14 +1376,14 @@ void register_console(struct console *newcon)  	}  	if (newcon->flags & CON_PRINTBUFFER) {  		/* -		 * release_console_sem() will print out the buffered messages +		 * console_unlock(); will print out the buffered messages  		 * for us.  		 */  		spin_lock_irqsave(&logbuf_lock, flags);  		con_start = log_start;  		spin_unlock_irqrestore(&logbuf_lock, flags);  	} -	release_console_sem(); +	console_unlock();  	console_sysfs_notify();  	/* @@ -1396,7 +1420,7 @@ int unregister_console(struct console *console)  		return braille_unregister_console(console);  #endif -	acquire_console_sem(); +	console_lock();  	if (console_drivers == console) {  		console_drivers=console->next;  		res = 0; @@ -1418,7 +1442,7 @@ int unregister_console(struct console *console)  	if (console_drivers != NULL && console->flags & CON_CONSDEV)  		console_drivers->flags |= CON_CONSDEV; -	release_console_sem(); +	console_unlock();  	console_sysfs_notify();  	return res;  } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 99bbaa3e5b0..1708b1e2972 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -313,7 +313,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data)  		child->exit_code = data;  		dead = __ptrace_detach(current, child);  		if (!child->exit_state) -			wake_up_process(child); +			wake_up_state(child, TASK_TRACED | TASK_STOPPED);  	}  	write_unlock_irq(&tasklist_lock); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 77e9166d7bb..0c26e2df450 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -699,7 +699,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)  	cfs_rq->nr_running--;  } -#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_FAIR_GROUP_SCHED +# ifdef CONFIG_SMP  static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,  					    int global_update)  { @@ -721,10 +722,10 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)  	u64 now, delta;  	unsigned long load = cfs_rq->load.weight; -	if (!cfs_rq) +	if (cfs_rq->tg == &root_task_group)  		return; -	now = rq_of(cfs_rq)->clock; +	now = rq_of(cfs_rq)->clock_task;  	delta = now - cfs_rq->load_stamp;  	/* truncate load history at 4 idle periods */ @@ -762,6 +763,51 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)  		list_del_leaf_cfs_rq(cfs_rq);  } +static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, +				long weight_delta) +{ +	long load_weight, load, shares; + +	load = cfs_rq->load.weight + weight_delta; + +	load_weight = atomic_read(&tg->load_weight); +	load_weight -= cfs_rq->load_contribution; +	load_weight += load; + +	shares = (tg->shares * load); +	if (load_weight) +		shares /= load_weight; + +	if (shares < MIN_SHARES) +		shares = MIN_SHARES; +	if (shares > tg->shares) +		shares = tg->shares; + +	return shares; +} + +static void update_entity_shares_tick(struct cfs_rq *cfs_rq) +{ +	if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { +		update_cfs_load(cfs_rq, 0); +		update_cfs_shares(cfs_rq, 0); +	} +} +# else /* CONFIG_SMP */ +static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) +{ +} + +static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, +				long weight_delta) +{ +	return tg->shares; +} + +static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq) +{ +} +# endif /* CONFIG_SMP */  static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,  			    unsigned long weight)  { @@ -782,41 +828,20 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)  {  	struct task_group *tg;  	struct sched_entity *se; -	long load_weight, load, shares; - -	if (!cfs_rq) -		return; +	long shares;  	tg = cfs_rq->tg;  	se = tg->se[cpu_of(rq_of(cfs_rq))];  	if (!se)  		return; - -	load = cfs_rq->load.weight + weight_delta; - -	load_weight = atomic_read(&tg->load_weight); -	load_weight -= cfs_rq->load_contribution; -	load_weight += load; - -	shares = (tg->shares * load); -	if (load_weight) -		shares /= load_weight; - -	if (shares < MIN_SHARES) -		shares = MIN_SHARES; -	if (shares > tg->shares) -		shares = tg->shares; +#ifndef CONFIG_SMP +	if (likely(se->load.weight == tg->shares)) +		return; +#endif +	shares = calc_cfs_shares(cfs_rq, tg, weight_delta);  	reweight_entity(cfs_rq_of(se), se, shares);  } - -static void update_entity_shares_tick(struct cfs_rq *cfs_rq) -{ -	if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { -		update_cfs_load(cfs_rq, 0); -		update_cfs_shares(cfs_rq, 0); -	} -}  #else /* CONFIG_FAIR_GROUP_SCHED */  static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)  { @@ -1404,7 +1429,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,  static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)  { -	unsigned long this_load, load; +	s64 this_load, load;  	int idx, this_cpu, prev_cpu;  	unsigned long tl_per_task;  	struct task_group *tg; @@ -1443,8 +1468,8 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)  	 * Otherwise check if either cpus are near enough in load to allow this  	 * task to be woken on this_cpu.  	 */ -	if (this_load) { -		unsigned long this_eff_load, prev_eff_load; +	if (this_load > 0) { +		s64 this_eff_load, prev_eff_load;  		this_eff_load = 100;  		this_eff_load *= power_of(prev_cpu); diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index c914ec747ca..ad6267714c8 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -625,7 +625,7 @@ static void update_curr_rt(struct rq *rq)  	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);  	u64 delta_exec; -	if (!task_has_rt_policy(curr)) +	if (curr->sched_class != &rt_sched_class)  		return;  	delta_exec = rq->clock_task - curr->se.exec_start; diff --git a/kernel/sys.c b/kernel/sys.c index 31b71a276b4..18da702ec81 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1385,7 +1385,8 @@ static int check_prlimit_permission(struct task_struct *task)  	const struct cred *cred = current_cred(), *tcred;  	tcred = __task_cred(task); -	if ((cred->uid != tcred->euid || +	if (current != task && +	    (cred->uid != tcred->euid ||  	     cred->uid != tcred->suid ||  	     cred->uid != tcred->uid  ||  	     cred->gid != tcred->egid || diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bc86bb32e12..0f1bd83db98 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -170,7 +170,8 @@ static int proc_taint(struct ctl_table *table, int write,  #endif  #ifdef CONFIG_MAGIC_SYSRQ -static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */ +/* Note: sysrq code uses it's own private copy */ +static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;  static int sysrq_sysctl_handler(ctl_table *table, int write,  				void __user *buffer, size_t *lenp, diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3e216e01bbd..c55ea243347 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -642,8 +642,7 @@ static void tick_nohz_switch_to_nohz(void)  	}  	local_irq_enable(); -	printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", -	       smp_processor_id()); +	printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id());  }  /* @@ -795,8 +794,10 @@ void tick_setup_sched_timer(void)  	}  #ifdef CONFIG_NO_HZ -	if (tick_nohz_enabled) +	if (tick_nohz_enabled) {  		ts->nohz_mode = NOHZ_MODE_HIGHRES; +		printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); +	}  #endif  }  #endif /* HIGH_RES_TIMERS */ diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 32a19f9397f..3258455549f 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -41,7 +41,7 @@ static void print_name_offset(struct seq_file *m, void *sym)  	char symname[KSYM_NAME_LEN];  	if (lookup_symbol_name((unsigned long)sym, symname) < 0) -		SEQ_printf(m, "<%p>", sym); +		SEQ_printf(m, "<%pK>", sym);  	else  		SEQ_printf(m, "%s", symname);  } @@ -112,7 +112,7 @@ next_one:  static void  print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now)  { -	SEQ_printf(m, "  .base:       %p\n", base); +	SEQ_printf(m, "  .base:       %pK\n", base);  	SEQ_printf(m, "  .index:      %d\n",  			base->index);  	SEQ_printf(m, "  .resolution: %Lu nsecs\n", diff --git a/kernel/timer.c b/kernel/timer.c index 43ca9936f2d..d6459923d24 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -959,7 +959,7 @@ EXPORT_SYMBOL(try_to_del_timer_sync);   *   * Synchronization rules: Callers must prevent restarting of the timer,   * otherwise this function is meaningless. It must not be called from - * hardirq contexts. The caller must not hold locks which would prevent + * interrupt contexts. The caller must not hold locks which would prevent   * completion of the timer's handler. The timer's handler must not call   * add_timer_on(). Upon exit the timer is not queued and the handler is   * not running on any CPU. @@ -969,10 +969,12 @@ EXPORT_SYMBOL(try_to_del_timer_sync);  int del_timer_sync(struct timer_list *timer)  {  #ifdef CONFIG_LOCKDEP -	local_bh_disable(); +	unsigned long flags; + +	local_irq_save(flags);  	lock_map_acquire(&timer->lockdep_map);  	lock_map_release(&timer->lockdep_map); -	local_bh_enable(); +	local_irq_restore(flags);  #endif  	/*  	 * don't use it in hardirq context, because it diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 153562d0b93..d95721f3370 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -138,6 +138,13 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)  		     !blk_tracer_enabled))  		return; +	/* +	 * If the BLK_TC_NOTIFY action mask isn't set, don't send any note +	 * message to the trace. +	 */ +	if (!(bt->act_mask & BLK_TC_NOTIFY)) +		return; +  	local_irq_save(flags);  	buf = per_cpu_ptr(bt->msg_data, smp_processor_id());  	va_start(args, fmt); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 35fde09b81d..5f499e0438a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1284,7 +1284,7 @@ trace_create_file_ops(struct module *mod)  static void trace_module_add_events(struct module *mod)  {  	struct ftrace_module_file_ops *file_ops = NULL; -	struct ftrace_event_call *call, *start, *end; +	struct ftrace_event_call **call, **start, **end;  	start = mod->trace_events;  	end = mod->trace_events + mod->num_trace_events; @@ -1297,7 +1297,7 @@ static void trace_module_add_events(struct module *mod)  		return;  	for_each_event(call, start, end) { -		__trace_add_event_call(call, mod, +		__trace_add_event_call(*call, mod,  				       &file_ops->id, &file_ops->enable,  				       &file_ops->filter, &file_ops->format);  	} @@ -1367,8 +1367,8 @@ static struct notifier_block trace_module_nb = {  	.priority = 0,  }; -extern struct ftrace_event_call __start_ftrace_events[]; -extern struct ftrace_event_call __stop_ftrace_events[]; +extern struct ftrace_event_call *__start_ftrace_events[]; +extern struct ftrace_event_call *__stop_ftrace_events[];  static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; @@ -1384,7 +1384,7 @@ __setup("trace_event=", setup_trace_event);  static __init int event_trace_init(void)  { -	struct ftrace_event_call *call; +	struct ftrace_event_call **call;  	struct dentry *d_tracer;  	struct dentry *entry;  	struct dentry *d_events; @@ -1430,7 +1430,7 @@ static __init int event_trace_init(void)  		pr_warning("tracing: Failed to allocate common fields");  	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { -		__trace_add_event_call(call, NULL, &ftrace_event_id_fops, +		__trace_add_event_call(*call, NULL, &ftrace_event_id_fops,  				       &ftrace_enable_fops,  				       &ftrace_event_filter_fops,  				       &ftrace_event_format_fops); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4b74d71705c..bbeec31e0ae 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -161,13 +161,13 @@ struct ftrace_event_class event_class_ftrace_##call = {			\  	.fields			= LIST_HEAD_INIT(event_class_ftrace_##call.fields),\  };									\  									\ -struct ftrace_event_call __used						\ -__attribute__((__aligned__(4)))						\ -__attribute__((section("_ftrace_events"))) event_##call = {		\ +struct ftrace_event_call __used event_##call = {			\  	.name			= #call,				\  	.event.type		= etype,				\  	.class			= &event_class_ftrace_##call,		\  	.print_fmt		= print,				\  };									\ +struct ftrace_event_call __used						\ +__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;  #include "trace_entries.h" diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b706529b4fc..5c9fe08d209 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -55,20 +55,21 @@ struct ftrace_event_class event_class_syscall_exit = {  	.raw_init	= init_syscall_trace,  }; -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; +extern struct syscall_metadata *__start_syscalls_metadata[]; +extern struct syscall_metadata *__stop_syscalls_metadata[];  static struct syscall_metadata **syscalls_metadata; -static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +static __init struct syscall_metadata * +find_syscall_meta(unsigned long syscall)  { -	struct syscall_metadata *start; -	struct syscall_metadata *stop; +	struct syscall_metadata **start; +	struct syscall_metadata **stop;  	char str[KSYM_SYMBOL_LEN]; -	start = (struct syscall_metadata *)__start_syscalls_metadata; -	stop = (struct syscall_metadata *)__stop_syscalls_metadata; +	start = __start_syscalls_metadata; +	stop = __stop_syscalls_metadata;  	kallsyms_lookup(syscall, NULL, NULL, NULL, str);  	for ( ; start < stop; start++) { @@ -78,8 +79,8 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall)  		 * with "SyS" instead of "sys", leading to an unwanted  		 * mismatch.  		 */ -		if (start->name && !strcmp(start->name + 3, str + 3)) -			return start; +		if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) +			return *start;  	}  	return NULL;  } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index e95ee7f31d4..68187af4889 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -27,8 +27,8 @@  #include <linux/sched.h>  #include <linux/jump_label.h> -extern struct tracepoint __start___tracepoints[]; -extern struct tracepoint __stop___tracepoints[]; +extern struct tracepoint * const __start___tracepoints_ptrs[]; +extern struct tracepoint * const __stop___tracepoints_ptrs[];  /* Set to 1 to enable tracepoint debug output */  static const int tracepoint_debug; @@ -298,10 +298,10 @@ static void disable_tracepoint(struct tracepoint *elem)   *   * Updates the probe callback corresponding to a range of tracepoints.   */ -void -tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) +void tracepoint_update_probe_range(struct tracepoint * const *begin, +				   struct tracepoint * const *end)  { -	struct tracepoint *iter; +	struct tracepoint * const *iter;  	struct tracepoint_entry *mark_entry;  	if (!begin) @@ -309,12 +309,12 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end)  	mutex_lock(&tracepoints_mutex);  	for (iter = begin; iter < end; iter++) { -		mark_entry = get_tracepoint(iter->name); +		mark_entry = get_tracepoint((*iter)->name);  		if (mark_entry) { -			set_tracepoint(&mark_entry, iter, +			set_tracepoint(&mark_entry, *iter,  					!!mark_entry->refcount);  		} else { -			disable_tracepoint(iter); +			disable_tracepoint(*iter);  		}  	}  	mutex_unlock(&tracepoints_mutex); @@ -326,8 +326,8 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end)  static void tracepoint_update_probes(void)  {  	/* Core kernel tracepoints */ -	tracepoint_update_probe_range(__start___tracepoints, -		__stop___tracepoints); +	tracepoint_update_probe_range(__start___tracepoints_ptrs, +		__stop___tracepoints_ptrs);  	/* tracepoints in modules. */  	module_update_tracepoints();  } @@ -514,8 +514,8 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);   * Will return the first tracepoint in the range if the input tracepoint is   * NULL.   */ -int tracepoint_get_iter_range(struct tracepoint **tracepoint, -	struct tracepoint *begin, struct tracepoint *end) +int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, +	struct tracepoint * const *begin, struct tracepoint * const *end)  {  	if (!*tracepoint && begin != end) {  		*tracepoint = begin; @@ -534,7 +534,8 @@ static void tracepoint_get_iter(struct tracepoint_iter *iter)  	/* Core kernel tracepoints */  	if (!iter->module) {  		found = tracepoint_get_iter_range(&iter->tracepoint, -				__start___tracepoints, __stop___tracepoints); +				__start___tracepoints_ptrs, +				__stop___tracepoints_ptrs);  		if (found)  			goto end;  	} @@ -585,8 +586,8 @@ int tracepoint_module_notify(struct notifier_block *self,  	switch (val) {  	case MODULE_STATE_COMING:  	case MODULE_STATE_GOING: -		tracepoint_update_probe_range(mod->tracepoints, -			mod->tracepoints + mod->num_tracepoints); +		tracepoint_update_probe_range(mod->tracepoints_ptrs, +			mod->tracepoints_ptrs + mod->num_tracepoints);  		break;  	}  	return 0; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d7ebdf4cea9..18bb15776c5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -27,7 +27,7 @@  #include <asm/irq_regs.h>  #include <linux/perf_event.h> -int watchdog_enabled; +int watchdog_enabled = 1;  int __read_mostly softlockup_thresh = 60;  static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -43,9 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);  static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);  #endif -static int no_watchdog; - -  /* boot commands */  /*   * Should we panic when a soft-lockup or hard-lockup occurs: @@ -58,7 +55,7 @@ static int __init hardlockup_panic_setup(char *str)  	if (!strncmp(str, "panic", 5))  		hardlockup_panic = 1;  	else if (!strncmp(str, "0", 1)) -		no_watchdog = 1; +		watchdog_enabled = 0;  	return 1;  }  __setup("nmi_watchdog=", hardlockup_panic_setup); @@ -77,7 +74,7 @@ __setup("softlockup_panic=", softlockup_panic_setup);  static int __init nowatchdog_setup(char *str)  { -	no_watchdog = 1; +	watchdog_enabled = 0;  	return 1;  }  __setup("nowatchdog", nowatchdog_setup); @@ -85,7 +82,7 @@ __setup("nowatchdog", nowatchdog_setup);  /* deprecated */  static int __init nosoftlockup_setup(char *str)  { -	no_watchdog = 1; +	watchdog_enabled = 0;  	return 1;  }  __setup("nosoftlockup", nosoftlockup_setup); @@ -366,8 +363,14 @@ static int watchdog_nmi_enable(int cpu)  		goto out_save;  	} -	printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n", -	       cpu, PTR_ERR(event)); + +	/* vary the KERN level based on the returned errno */ +	if (PTR_ERR(event) == -EOPNOTSUPP) +		printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu); +	else if (PTR_ERR(event) == -ENOENT) +		printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu); +	else +		printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event));  	return PTR_ERR(event);  	/* success path */ @@ -432,9 +435,6 @@ static int watchdog_enable(int cpu)  		wake_up_process(p);  	} -	/* if any cpu succeeds, watchdog is considered enabled for the system */ -	watchdog_enabled = 1; -  	return 0;  } @@ -462,12 +462,16 @@ static void watchdog_disable(int cpu)  static void watchdog_enable_all_cpus(void)  {  	int cpu; -	int result = 0; + +	watchdog_enabled = 0;  	for_each_online_cpu(cpu) -		result += watchdog_enable(cpu); +		if (!watchdog_enable(cpu)) +			/* if any cpu succeeds, watchdog is considered +			   enabled for the system */ +			watchdog_enabled = 1; -	if (result) +	if (!watchdog_enabled)  		printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");  } @@ -476,9 +480,6 @@ static void watchdog_disable_all_cpus(void)  {  	int cpu; -	if (no_watchdog) -		return; -  	for_each_online_cpu(cpu)  		watchdog_disable(cpu); @@ -498,10 +499,12 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write,  {  	proc_dointvec(table, write, buffer, length, ppos); -	if (watchdog_enabled) -		watchdog_enable_all_cpus(); -	else -		watchdog_disable_all_cpus(); +	if (write) { +		if (watchdog_enabled) +			watchdog_enable_all_cpus(); +		else +			watchdog_disable_all_cpus(); +	}  	return 0;  } @@ -530,7 +533,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)  		break;  	case CPU_ONLINE:  	case CPU_ONLINE_FROZEN: -		err = watchdog_enable(hotcpu); +		if (watchdog_enabled) +			err = watchdog_enable(hotcpu);  		break;  #ifdef CONFIG_HOTPLUG_CPU  	case CPU_UP_CANCELED: @@ -555,9 +559,6 @@ void __init lockup_detector_init(void)  	void *cpu = (void *)(long)smp_processor_id();  	int err; -	if (no_watchdog) -		return; -  	err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);  	WARN_ON(notifier_to_errno(err)); | 
