diff options
Diffstat (limited to 'kernel')
65 files changed, 6165 insertions, 1556 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 6c07f30fa9b..c0cc67ad764 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,12 +5,12 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ - signal.o sys.o kmod.o workqueue.o pid.o \ + signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o + async.o range.o groups.o lglock.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files @@ -25,6 +25,9 @@ endif obj-y += sched/ obj-y += power/ +ifeq ($(CONFIG_CHECKPOINT_RESTORE),y) +obj-$(CONFIG_X86) += kcmp.o +endif obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/kernel/capability.c b/kernel/capability.c index 3f1adb6c647..493d9725948 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -419,3 +419,24 @@ bool nsown_capable(int cap) { return ns_capable(current_user_ns(), cap); } + +/** + * inode_capable - Check superior capability over inode + * @inode: The inode in question + * @cap: The capability in question + * + * Return true if the current task has the given superior capability + * targeted at it's own user namespace and that the given inode is owned + * by the current user namespace or a child namespace. + * + * Currently we check to see if an inode is owned by the current + * user namespace by seeing if the inode's owner maps into the + * current user namespace. + * + */ +bool inode_capable(const struct inode *inode, int cap) +{ + struct user_namespace *ns = current_user_ns(); + + return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid); +} diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e887b55f1f2..72fcd3069a9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2225,9 +2225,9 @@ retry_find_task: * only need to check permissions on one of them. */ tcred = __task_cred(tsk); - if (cred->euid && - cred->euid != tcred->uid && - cred->euid != tcred->suid) { + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->euid, tcred->suid)) { rcu_read_unlock(); ret = -EACCES; goto out_unlock_cgroup; @@ -5143,7 +5143,7 @@ EXPORT_SYMBOL_GPL(css_depth); * @root: the css supporsed to be an ancestor of the child. * * Returns true if "root" is an ancestor of "child" in its hierarchy. Because - * this function reads css->id, this use rcu_dereference() and rcu_read_lock(). + * this function reads css->id, the caller must hold rcu_read_lock(). * But, considering usual usage, the csses should be valid objects after test. * Assuming that the caller will do some action to the child if this returns * returns true, the caller must take "child";s reference count. @@ -5155,18 +5155,18 @@ bool css_is_ancestor(struct cgroup_subsys_state *child, { struct css_id *child_id; struct css_id *root_id; - bool ret = true; - rcu_read_lock(); child_id = rcu_dereference(child->id); + if (!child_id) + return false; root_id = rcu_dereference(root->id); - if (!child_id - || !root_id - || (child_id->depth < root_id->depth) - || (child_id->stack[root_id->depth] != root_id->id)) - ret = false; - rcu_read_unlock(); - return ret; + if (!root_id) + return false; + if (child_id->depth < root_id->depth) + return false; + if (child_id->stack[root_id->depth] != root_id->id) + return false; + return true; } void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) diff --git a/kernel/compat.c b/kernel/compat.c index d2c67aa49ae..c28a306ae05 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1073,15 +1073,7 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat if (copy_from_user(&newset32, unewset, sizeof(compat_sigset_t))) return -EFAULT; sigset_from_compat(&newset, &newset32); - sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP)); - - current->saved_sigmask = current->blocked; - set_current_blocked(&newset); - - current->state = TASK_INTERRUPTIBLE; - schedule(); - set_restore_sigmask(); - return -ERESTARTNOHAND; + return sigsuspend(&newset); } #endif /* __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 0e6353cf147..a4eb5227a19 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -10,7 +10,10 @@ #include <linux/sched.h> #include <linux/unistd.h> #include <linux/cpu.h> +#include <linux/oom.h> +#include <linux/rcupdate.h> #include <linux/export.h> +#include <linux/bug.h> #include <linux/kthread.h> #include <linux/stop_machine.h> #include <linux/mutex.h> @@ -173,6 +176,47 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_cpu_notifier); +/** + * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU + * @cpu: a CPU id + * + * This function walks all processes, finds a valid mm struct for each one and + * then clears a corresponding bit in mm's cpumask. While this all sounds + * trivial, there are various non-obvious corner cases, which this function + * tries to solve in a safe manner. + * + * Also note that the function uses a somewhat relaxed locking scheme, so it may + * be called only for an already offlined CPU. + */ +void clear_tasks_mm_cpumask(int cpu) +{ + struct task_struct *p; + + /* + * This function is called after the cpu is taken down and marked + * offline, so its not like new tasks will ever get this cpu set in + * their mm mask. -- Peter Zijlstra + * Thus, we may use rcu_read_lock() here, instead of grabbing + * full-fledged tasklist_lock. + */ + WARN_ON(cpu_online(cpu)); + rcu_read_lock(); + for_each_process(p) { + struct task_struct *t; + + /* + * Main thread might exit, but other threads may still have + * a valid mm. Find one. + */ + t = find_lock_task_mm(p); + if (!t) + continue; + cpumask_clear_cpu(cpu, mm_cpumask(t->mm)); + task_unlock(t); + } + rcu_read_unlock(); +} + static inline void check_for_tasks(int cpu) { struct task_struct *p; diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c index 249152e1530..9656a3c3650 100644 --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c @@ -81,7 +81,7 @@ int cpu_pm_unregister_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); /** - * cpm_pm_enter - CPU low power entry notifier + * cpu_pm_enter - CPU low power entry notifier * * Notifies listeners that a single CPU is entering a low power state that may * cause some blocks in the same power domain as the cpu to reset. @@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); * Must be called on the affected CPU with interrupts disabled. Platform is * responsible for ensuring that cpu_pm_enter is not called twice on the same * CPU before cpu_pm_exit is called. Notified drivers can include VFP - * co-processor, interrupt controller and it's PM extensions, local CPU + * co-processor, interrupt controller and its PM extensions, local CPU * timers context save/restore which shouldn't be interrupted. Hence it * must be called with interrupts disabled. * @@ -115,13 +115,13 @@ int cpu_pm_enter(void) EXPORT_SYMBOL_GPL(cpu_pm_enter); /** - * cpm_pm_exit - CPU low power exit notifier + * cpu_pm_exit - CPU low power exit notifier * * Notifies listeners that a single CPU is exiting a low power state that may * have caused some blocks in the same power domain as the cpu to reset. * * Notified drivers can include VFP co-processor, interrupt controller - * and it's PM extensions, local CPU timers context save/restore which + * and its PM extensions, local CPU timers context save/restore which * shouldn't be interrupted. Hence it must be called with interrupts disabled. * * Return conditions are same as __raw_notifier_call_chain. @@ -139,7 +139,7 @@ int cpu_pm_exit(void) EXPORT_SYMBOL_GPL(cpu_pm_exit); /** - * cpm_cluster_pm_enter - CPU cluster low power entry notifier + * cpu_cluster_pm_enter - CPU cluster low power entry notifier * * Notifies listeners that all cpus in a power domain are entering a low power * state that may cause some blocks in the same power domain to reset. @@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_exit); * Must be called after cpu_pm_enter has been called on all cpus in the power * domain, and before cpu_pm_exit has been called on any cpu in the power * domain. Notified drivers can include VFP co-processor, interrupt controller - * and it's PM extensions, local CPU timers context save/restore which + * and its PM extensions, local CPU timers context save/restore which * shouldn't be interrupted. Hence it must be called with interrupts disabled. * * Must be called with interrupts disabled. @@ -174,7 +174,7 @@ int cpu_cluster_pm_enter(void) EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); /** - * cpm_cluster_pm_exit - CPU cluster low power exit notifier + * cpu_cluster_pm_exit - CPU cluster low power exit notifier * * Notifies listeners that all cpus in a power domain are exiting form a * low power state that may have caused some blocks in the same power domain @@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); * Must be called after cpu_pm_exit has been called on all cpus in the power * domain, and before cpu_pm_exit has been called on any cpu in the power * domain. Notified drivers can include VFP co-processor, interrupt controller - * and it's PM extensions, local CPU timers context save/restore which + * and its PM extensions, local CPU timers context save/restore which * shouldn't be interrupted. Hence it must be called with interrupts disabled. * * Return conditions are same as __raw_notifier_call_chain. diff --git a/kernel/cred.c b/kernel/cred.c index e70683d9ec3..de728ac50d8 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -49,6 +49,14 @@ struct cred init_cred = { .subscribers = ATOMIC_INIT(2), .magic = CRED_MAGIC, #endif + .uid = GLOBAL_ROOT_UID, + .gid = GLOBAL_ROOT_GID, + .suid = GLOBAL_ROOT_UID, + .sgid = GLOBAL_ROOT_GID, + .euid = GLOBAL_ROOT_UID, + .egid = GLOBAL_ROOT_GID, + .fsuid = GLOBAL_ROOT_UID, + .fsgid = GLOBAL_ROOT_GID, .securebits = SECUREBITS_DEFAULT, .cap_inheritable = CAP_EMPTY_SET, .cap_permitted = CAP_FULL_SET, @@ -148,6 +156,7 @@ static void put_cred_rcu(struct rcu_head *rcu) if (cred->group_info) put_group_info(cred->group_info); free_uid(cred->user); + put_user_ns(cred->user_ns); kmem_cache_free(cred_jar, cred); } @@ -198,13 +207,6 @@ void exit_creds(struct task_struct *tsk) validate_creds(cred); alter_cred_subscribers(cred, -1); put_cred(cred); - - cred = (struct cred *) tsk->replacement_session_keyring; - if (cred) { - tsk->replacement_session_keyring = NULL; - validate_creds(cred); - put_cred(cred); - } } /** @@ -303,6 +305,7 @@ struct cred *prepare_creds(void) set_cred_subscribers(new, 0); get_group_info(new->group_info); get_uid(new->user); + get_user_ns(new->user_ns); #ifdef CONFIG_KEYS key_get(new->thread_keyring); @@ -386,8 +389,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct cred *new; int ret; - p->replacement_session_keyring = NULL; - if ( #ifdef CONFIG_KEYS !p->cred->thread_keyring && @@ -414,11 +415,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) goto error_put; } - /* cache user_ns in cred. Doesn't need a refcount because it will - * stay pinned by cred->user - */ - new->user_ns = new->user->user_ns; - #ifdef CONFIG_KEYS /* new threads get their own thread keyrings if their parent already * had one */ @@ -493,10 +489,10 @@ int commit_creds(struct cred *new) get_cred(new); /* we will require a ref for the subj creds too */ /* dumpability changes */ - if (old->euid != new->euid || - old->egid != new->egid || - old->fsuid != new->fsuid || - old->fsgid != new->fsgid || + if (!uid_eq(old->euid, new->euid) || + !gid_eq(old->egid, new->egid) || + !uid_eq(old->fsuid, new->fsuid) || + !gid_eq(old->fsgid, new->fsgid) || !cap_issubset(new->cap_permitted, old->cap_permitted)) { if (task->mm) set_dumpable(task->mm, suid_dumpable); @@ -505,9 +501,9 @@ int commit_creds(struct cred *new) } /* alter the thread keyring */ - if (new->fsuid != old->fsuid) + if (!uid_eq(new->fsuid, old->fsuid)) key_fsuid_changed(task); - if (new->fsgid != old->fsgid) + if (!gid_eq(new->fsgid, old->fsgid)) key_fsgid_changed(task); /* do it @@ -524,16 +520,16 @@ int commit_creds(struct cred *new) alter_cred_subscribers(old, -2); /* send notifications */ - if (new->uid != old->uid || - new->euid != old->euid || - new->suid != old->suid || - new->fsuid != old->fsuid) + if (!uid_eq(new->uid, old->uid) || + !uid_eq(new->euid, old->euid) || + !uid_eq(new->suid, old->suid) || + !uid_eq(new->fsuid, old->fsuid)) proc_id_connector(task, PROC_EVENT_UID); - if (new->gid != old->gid || - new->egid != old->egid || - new->sgid != old->sgid || - new->fsgid != old->fsgid) + if (!gid_eq(new->gid, old->gid) || + !gid_eq(new->egid, old->egid) || + !gid_eq(new->sgid, old->sgid) || + !gid_eq(new->fsgid, old->fsgid)) proc_id_connector(task, PROC_EVENT_GID); /* release the old obj and subj refs both */ @@ -678,6 +674,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) atomic_set(&new->usage, 1); set_cred_subscribers(new, 0); get_uid(new->user); + get_user_ns(new->user_ns); get_group_info(new->group_info); #ifdef CONFIG_KEYS diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 22d901f9caf..103f5d147b2 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile @@ -3,4 +3,7 @@ CFLAGS_REMOVE_core.o = -pg endif obj-y := core.o ring_buffer.o callchain.o + obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o +obj-$(CONFIG_UPROBES) += uprobes.o + diff --git a/kernel/events/core.c b/kernel/events/core.c index 91a44592585..5b06cbbf693 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2039,8 +2039,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, * accessing the event control register. If a NMI hits, then it will * not restart the event. */ -static void __perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next) +void __perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next) { int ctxn; @@ -2279,8 +2279,8 @@ static void perf_branch_stack_sched_in(struct task_struct *prev, * accessing the event control register. If a NMI hits, then it will * keep the event running. */ -static void __perf_event_task_sched_in(struct task_struct *prev, - struct task_struct *task) +void __perf_event_task_sched_in(struct task_struct *prev, + struct task_struct *task) { struct perf_event_context *ctx; int ctxn; @@ -2305,12 +2305,6 @@ static void __perf_event_task_sched_in(struct task_struct *prev, perf_branch_stack_sched_in(prev, task); } -void __perf_event_task_sched(struct task_struct *prev, struct task_struct *next) -{ - __perf_event_task_sched_out(prev, next); - __perf_event_task_sched_in(prev, next); -} - static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) { u64 frequency = event->attr.sample_freq; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c new file mode 100644 index 00000000000..985be4d80fe --- /dev/null +++ b/kernel/events/uprobes.c @@ -0,0 +1,1667 @@ +/* + * User-space Probes (UProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2008-2012 + * Authors: + * Srikar Dronamraju + * Jim Keniston + * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + */ + +#include <linux/kernel.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> /* read_mapping_page */ +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/rmap.h> /* anon_vma_prepare */ +#include <linux/mmu_notifier.h> /* set_pte_at_notify */ +#include <linux/swap.h> /* try_to_free_swap */ +#include <linux/ptrace.h> /* user_enable_single_step */ +#include <linux/kdebug.h> /* notifier mechanism */ + +#include <linux/uprobes.h> + +#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) +#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE + +static struct srcu_struct uprobes_srcu; +static struct rb_root uprobes_tree = RB_ROOT; + +static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ + +#define UPROBES_HASH_SZ 13 +< |