aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/capability.c13
-rw-r--r--kernel/cgroup.c33
-rw-r--r--kernel/extable.c2
-rw-r--r--kernel/fork.c38
-rw-r--r--kernel/groups.c2
-rw-r--r--kernel/kexec.c5
-rw-r--r--kernel/kprobes.c95
-rw-r--r--kernel/modsign_pubkey.c6
-rw-r--r--kernel/module.c30
-rw-r--r--kernel/nsproxy.c36
-rw-r--r--kernel/padata.c32
-rw-r--r--kernel/panic.c8
-rw-r--r--kernel/params.c22
-rw-r--r--kernel/pid.c1
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/power/snapshot.c12
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcupdate.c2
-rw-r--r--kernel/sched/core.c15
-rw-r--r--kernel/sched/cputime.c19
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/smp.c16
-rw-r--r--kernel/spinlock.c14
-rw-r--r--kernel/sys.c20
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/task_work.c40
-rw-r--r--kernel/trace/ftrace.c17
-rw-r--r--kernel/trace/trace.c37
-rw-r--r--kernel/trace/trace.h1
-rw-r--r--kernel/trace/trace_events.c207
-rw-r--r--kernel/trace/trace_syscalls.c10
-rw-r--r--kernel/uid16.c2
-rw-r--r--kernel/up.c58
-rw-r--r--kernel/user.c2
-rw-r--r--kernel/user_namespace.c2
-rw-r--r--kernel/utsname.c2
-rw-r--r--kernel/workqueue.c107
38 files changed, 423 insertions, 495 deletions
diff --git a/kernel/capability.c b/kernel/capability.c
index f6c2ce5701e..4e66bf9275b 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -433,18 +433,6 @@ bool capable(int cap)
EXPORT_SYMBOL(capable);
/**
- * nsown_capable - Check superior capability to one's own user_ns
- * @cap: The capability in question
- *
- * Return true if the current task has the given superior capability
- * targeted at its own user namespace.
- */
-bool nsown_capable(int cap)
-{
- return ns_capable(current_user_ns(), cap);
-}
-
-/**
* inode_capable - Check superior capability over inode
* @inode: The inode in question
* @cap: The capability in question
@@ -464,3 +452,4 @@ bool inode_capable(const struct inode *inode, int cap)
return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid);
}
+EXPORT_SYMBOL(inode_capable);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e0aeb32415f..2418b6e71a8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -60,6 +60,7 @@
#include <linux/poll.h>
#include <linux/flex_array.h> /* used in cgroup_attach_task */
#include <linux/kthread.h>
+#include <linux/file.h>
#include <linux/atomic.h>
@@ -4034,8 +4035,8 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
struct cgroup_event *event;
struct cgroup_subsys_state *cfile_css;
unsigned int efd, cfd;
- struct file *efile;
- struct file *cfile;
+ struct fd efile;
+ struct fd cfile;
char *endp;
int ret;
@@ -4058,31 +4059,31 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
INIT_WORK(&event->remove, cgroup_event_remove);
- efile = eventfd_fget(efd);
- if (IS_ERR(efile)) {
- ret = PTR_ERR(efile);
+ efile = fdget(efd);
+ if (!efile.file) {
+ ret = -EBADF;
goto out_kfree;
}
- event->eventfd = eventfd_ctx_fileget(efile);
+ event->eventfd = eventfd_ctx_fileget(efile.file);
if (IS_ERR(event->eventfd)) {
ret = PTR_ERR(event->eventfd);
goto out_put_efile;
}
- cfile = fget(cfd);
- if (!cfile) {
+ cfile = fdget(cfd);
+ if (!cfile.file) {
ret = -EBADF;
goto out_put_eventfd;
}
/* the process need read permission on control file */
/* AV: shouldn't we check that it's been opened for read instead? */
- ret = inode_permission(file_inode(cfile), MAY_READ);
+ ret = inode_permission(file_inode(cfile.file), MAY_READ);
if (ret < 0)
goto out_put_cfile;
- event->cft = __file_cft(cfile);
+ event->cft = __file_cft(cfile.file);
if (IS_ERR(event->cft)) {
ret = PTR_ERR(event->cft);
goto out_put_cfile;
@@ -4103,7 +4104,7 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
ret = -EINVAL;
event->css = cgroup_css(cgrp, event->cft->ss);
- cfile_css = css_from_dir(cfile->f_dentry->d_parent, event->cft->ss);
+ cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
if (event->css && event->css == cfile_css && css_tryget(event->css))
ret = 0;
@@ -4121,25 +4122,25 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
if (ret)
goto out_put_css;
- efile->f_op->poll(efile, &event->pt);
+ efile.file->f_op->poll(efile.file, &event->pt);
spin_lock(&cgrp->event_list_lock);
list_add(&event->list, &cgrp->event_list);
spin_unlock(&cgrp->event_list_lock);
- fput(cfile);
- fput(efile);
+ fdput(cfile);
+ fdput(efile);
return 0;
out_put_css:
css_put(event->css);
out_put_cfile:
- fput(cfile);
+ fdput(cfile);
out_put_eventfd:
eventfd_ctx_put(event->eventfd);
out_put_efile:
- fput(efile);
+ fdput(efile);
out_kfree:
kfree(event);
diff --git a/kernel/extable.c b/kernel/extable.c
index 67460b93b1a..832cb28105b 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -41,7 +41,7 @@ u32 __initdata main_extable_sort_needed = 1;
/* Sort the kernel's built-in exception table */
void __init sort_main_extable(void)
{
- if (main_extable_sort_needed) {
+ if (main_extable_sort_needed && __stop___ex_table > __start___ex_table) {
pr_notice("Sorting __ex_table...\n");
sort_extable(__start___ex_table, __stop___ex_table);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index bf46287c91a..81ccb4f010c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -351,7 +351,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
struct rb_node **rb_link, *rb_parent;
int retval;
unsigned long charge;
- struct mempolicy *pol;
uprobe_start_dup_mmap();
down_write(&oldmm->mmap_sem);
@@ -400,11 +399,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
goto fail_nomem;
*tmp = *mpnt;
INIT_LIST_HEAD(&tmp->anon_vma_chain);
- pol = mpol_dup(vma_policy(mpnt));
- retval = PTR_ERR(pol);
- if (IS_ERR(pol))
+ retval = vma_dup_policy(mpnt, tmp);
+ if (retval)
goto fail_nomem_policy;
- vma_set_policy(tmp, pol);
tmp->vm_mm = mm;
if (anon_vma_fork(tmp, mpnt))
goto fail_nomem_anon_vma_fork;
@@ -472,7 +469,7 @@ out:
uprobe_end_dup_mmap();
return retval;
fail_nomem_anon_vma_fork:
- mpol_put(pol);
+ mpol_put(vma_policy(tmp));
fail_nomem_policy:
kmem_cache_free(vm_area_cachep, tmp);
fail_nomem:
@@ -1173,13 +1170,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
return ERR_PTR(-EINVAL);
/*
- * If the new process will be in a different pid namespace
- * don't allow the creation of threads.
+ * If the new process will be in a different pid or user namespace
+ * do not allow it to share a thread group or signal handlers or
+ * parent with the forking task.
*/
- if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) &&
- (task_active_pid_ns(current) !=
- current->nsproxy->pid_ns_for_children))
- return ERR_PTR(-EINVAL);
+ if (clone_flags & (CLONE_SIGHAND | CLONE_PARENT)) {
+ if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
+ (task_active_pid_ns(current) !=
+ current->nsproxy->pid_ns_for_children))
+ return ERR_PTR(-EINVAL);
+ }
retval = security_task_create(clone_flags);
if (retval)
@@ -1576,15 +1576,6 @@ long do_fork(unsigned long clone_flags,
long nr;
/*
- * Do some preliminary argument and permissions checking before we
- * actually start allocating stuff
- */
- if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
- if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
- return -EINVAL;
- }
-
- /*
* Determine whether and which event to report to ptracer. When
* called from kernel_thread or CLONE_UNTRACED is explicitly
* requested, no event is reported; otherwise, report if the event
@@ -1825,11 +1816,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
if (unshare_flags & CLONE_NEWUSER)
unshare_flags |= CLONE_THREAD | CLONE_FS;
/*
- * If unsharing a pid namespace must also unshare the thread.
- */
- if (unshare_flags & CLONE_NEWPID)
- unshare_flags |= CLONE_THREAD;
- /*
* If unsharing a thread from a thread group, must also unshare vm.
*/
if (unshare_flags & CLONE_THREAD)
diff --git a/kernel/groups.c b/kernel/groups.c
index 6b2588dd04f..90cf1c38c8e 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
struct group_info *group_info;
int retval;
- if (!nsown_capable(CAP_SETGID))
+ if (!ns_capable(current_user_ns(), CAP_SETGID))
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 59f7b55ba74..2a74f307c5e 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1474,11 +1474,8 @@ static int __init __parse_crashkernel(char *cmdline,
if (first_colon && (!first_space || first_colon < first_space))
return parse_crashkernel_mem(ck_cmdline, system_ram,
crash_size, crash_base);
- else
- return parse_crashkernel_simple(ck_cmdline, crash_size,
- crash_base);
- return 0;
+ return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
}
/*
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 6e33498d665..a0d367a4912 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -112,6 +112,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = {
struct kprobe_insn_page {
struct list_head list;
kprobe_opcode_t *insns; /* Page of instruction slots */
+ struct kprobe_insn_cache *cache;
int nused;
int ngarbage;
char slot_used[];
@@ -121,12 +122,6 @@ struct kprobe_insn_page {
(offsetof(struct kprobe_insn_page, slot_used) + \
(sizeof(char) * (slots)))
-struct kprobe_insn_cache {
- struct list_head pages; /* list of kprobe_insn_page */
- size_t insn_size; /* size of instruction slot */
- int nr_garbage;
-};
-
static int slots_per_page(struct kprobe_insn_cache *c)
{
return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
@@ -138,8 +133,20 @@ enum kprobe_slot_state {
SLOT_USED = 2,
};
-static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_slots */
-static struct kprobe_insn_cache kprobe_insn_slots = {
+static void *alloc_insn_page(void)
+{
+ return module_alloc(PAGE_SIZE);
+}
+
+static void free_insn_page(void *page)
+{
+ module_free(NULL, page);
+}
+
+struct kprobe_insn_cache kprobe_insn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
+ .alloc = alloc_insn_page,
+ .free = free_insn_page,
.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
.insn_size = MAX_INSN_SIZE,
.nr_garbage = 0,
@@ -150,10 +157,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c);
* __get_insn_slot() - Find a slot on an executable page for an instruction.
* We allocate an executable page if there's no room on existing ones.
*/
-static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
+kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
{
struct kprobe_insn_page *kip;
+ kprobe_opcode_t *slot = NULL;
+ mutex_lock(&c->mutex);
retry:
list_for_each_entry(kip, &c->pages, list) {
if (kip->nused < slots_per_page(c)) {
@@ -162,7 +171,8 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
if (kip->slot_used[i] == SLOT_CLEAN) {
kip->slot_used[i] = SLOT_USED;
kip->nused++;
- return kip->insns + (i * c->insn_size);
+ slot = kip->insns + (i * c->insn_size);
+ goto out;
}
}
/* kip->nused is broken. Fix it. */
@@ -178,37 +188,29 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
/* All out of space. Need to allocate a new page. */
kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
if (!kip)
- return NULL;
+ goto out;
/*
* Use module_alloc so this page is within +/- 2GB of where the
* kernel image and loaded module images reside. This is required
* so x86_64 can correctly handle the %rip-relative fixups.
*/
- kip->insns = module_alloc(PAGE_SIZE);
+ kip->insns = c->alloc();
if (!kip->insns) {
kfree(kip);
- return NULL;
+ goto out;
}
INIT_LIST_HEAD(&kip->list);
memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
kip->slot_used[0] = SLOT_USED;
kip->nused = 1;
kip->ngarbage = 0;
+ kip->cache = c;
list_add(&kip->list, &c->pages);
- return kip->insns;
-}
-
-
-kprobe_opcode_t __kprobes *get_insn_slot(void)
-{
- kprobe_opcode_t *ret = NULL;
-
- mutex_lock(&kprobe_insn_mutex);
- ret = __get_insn_slot(&kprobe_insn_slots);
- mutex_unlock(&kprobe_insn_mutex);
-
- return ret;
+ slot = kip->insns;
+out:
+ mutex_unlock(&c->mutex);
+ return slot;
}
/* Return 1 if all garbages are collected, otherwise 0. */
@@ -225,7 +227,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
*/
if (!list_is_singular(&kip->list)) {
list_del(&kip->list);
- module_free(NULL, kip->insns);
+ kip->cache->free(kip->insns);
kfree(kip);
}
return 1;
@@ -255,11 +257,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
return 0;
}
-static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
- kprobe_opcode_t *slot, int dirty)
+void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
+ kprobe_opcode_t *slot, int dirty)
{
struct kprobe_insn_page *kip;
+ mutex_lock(&c->mutex);
list_for_each_entry(kip, &c->pages, list) {
long idx = ((long)slot - (long)kip->insns) /
(c->insn_size * sizeof(kprobe_opcode_t));
@@ -272,45 +275,25 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
collect_garbage_slots(c);
} else
collect_one_slot(kip, idx);
- return;
+ goto out;
}
}
/* Could not free this slot. */
WARN_ON(1);
+out:
+ mutex_unlock(&c->mutex);
}
-void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
-{
- mutex_lock(&kprobe_insn_mutex);
- __free_insn_slot(&kprobe_insn_slots, slot, dirty);
- mutex_unlock(&kprobe_insn_mutex);
-}
#ifdef CONFIG_OPTPROBES
/* For optimized_kprobe buffer */
-static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
-static struct kprobe_insn_cache kprobe_optinsn_slots = {
+struct kprobe_insn_cache kprobe_optinsn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
+ .alloc = alloc_insn_page,
+ .free = free_insn_page,
.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
/* .insn_size is initialized later */
.nr_garbage = 0,
};
-/* Get a slot for optimized_kprobe buffer */
-kprobe_opcode_t __kprobes *get_optinsn_slot(void)
-{
- kprobe_opcode_t *ret = NULL;
-
- mutex_lock(&kprobe_optinsn_mutex);
- ret = __get_insn_slot(&kprobe_optinsn_slots);
- mutex_unlock(&kprobe_optinsn_mutex);
-
- return ret;
-}
-
-void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
-{
- mutex_lock(&kprobe_optinsn_mutex);
- __free_insn_slot(&kprobe_optinsn_slots, slot, dirty);
- mutex_unlock(&kprobe_optinsn_mutex);
-}
#endif
#endif
diff --git a/kernel/modsign_pubkey.c b/kernel/modsign_pubkey.c
index 2b6e69909c3..7cbd4507a7e 100644
--- a/kernel/modsign_pubkey.c
+++ b/kernel/modsign_pubkey.c
@@ -18,14 +18,14 @@
struct key *modsign_keyring;
-extern __initdata const u8 modsign_certificate_list[];
-extern __initdata const u8 modsign_certificate_list_end[];
+extern __initconst const u8 modsign_certificate_list[];
+extern __initconst const u8 modsign_certificate_list_end[];
/*
* We need to make sure ccache doesn't cache the .o file as it doesn't notice
* if modsign.pub changes.
*/
-static __initdata const char annoy_ccache[] = __TIME__ "foo";
+static __initconst const char annoy_ccache[] = __TIME__ "foo";
/*
* Load the compiled-in keys
diff --git a/kernel/module.c b/kernel/module.c
index 206915830d2..dc582749fa1 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -136,6 +136,7 @@ static int param_set_bool_enable_only(const char *val,
}
static const struct kernel_param_ops param_ops_bool_enable_only = {
+ .flags = KERNEL_PARAM_FL_NOARG,
.set = param_set_bool_enable_only,
.get = param_get_bool,
};
@@ -603,7 +604,7 @@ static void setup_modinfo_##field(struct module *mod, const char *s) \
static ssize_t show_modinfo_##field(struct module_attribute *mattr, \
struct module_kobject *mk, char *buffer) \
{ \
- return sprintf(buffer, "%s\n", mk->mod->field); \
+ return scnprintf(buffer, PAGE_SIZE, "%s\n", mk->mod->field); \
} \
static int modinfo_##field##_exists(struct module *mod) \
{ \
@@ -1611,6 +1612,14 @@ static void module_remove_modinfo_attrs(struct module *mod)
kfree(mod->modinfo_attrs);
}
+static void mod_kobject_put(struct module *mod)
+{
+ DECLARE_COMPLETION_ONSTACK(c);
+ mod->mkobj.kobj_completion = &c;
+ kobject_put(&mod->mkobj.kobj);
+ wait_for_completion(&c);
+}
+
static int mod_sysfs_init(struct module *mod)
{
int err;
@@ -1638,7 +1647,7 @@ static int mod_sysfs_init(struct module *mod)
err = kobject_init_and_add(&mod->mkobj.kobj, &module_ktype, NULL,
"%s", mod->name);
if (err)
- kobject_put(&mod->mkobj.kobj);
+ mod_kobject_put(mod);
/* delay uevent until full sysfs population */
out:
@@ -1682,7 +1691,7 @@ out_unreg_param:
out_unreg_holders:
kobject_put(mod->holders_dir);
out_unreg:
- kobject_put(&mod->mkobj.kobj);
+ mod_kobject_put(mod);
out:
return err;
}
@@ -1691,7 +1700,7 @@ static void mod_sysfs_fini(struct module *mod)
{
remove_notes_attrs(mod);
remove_sect_attrs(mod);
- kobject_put(&mod->mkobj.kobj);
+ mod_kobject_put(mod);
}
#else /* !CONFIG_SYSFS */
@@ -2540,21 +2549,20 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
/* Sets info->hdr and info->len. */
static int copy_module_from_fd(int fd, struct load_info *info)
{
- struct file *file;
+ struct fd f = fdget(fd);
int err;
struct kstat stat;
loff_t pos;
ssize_t bytes = 0;
- file = fget(fd);
- if (!file)
+ if (!f.file)
return -ENOEXEC;
- err = security_kernel_module_from_file(file);
+ err = security_kernel_module_from_file(f.file);
if (err)
goto out;
- err = vfs_getattr(&file->f_path, &stat);
+ err = vfs_getattr(&f.file->f_path, &stat);
if (err)
goto out;
@@ -2577,7 +2585,7 @@ static int copy_module_from_fd(int fd, struct load_info *info)
pos = 0;
while (pos < stat.size) {
- bytes = kernel_read(file, pos, (char *)(info->hdr) + pos,
+ bytes = kernel_read(f.file, pos, (char *)(info->hdr) + pos,
stat.size - pos);
if (bytes < 0) {
vfree(info->hdr);
@@ -2591,7 +2599,7 @@ static int copy_module_from_fd(int fd, struct load_info *info)
info->len = pos;
out:
- fput(file);
+ fdput(f);
return err;
}
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 997cbb951a3..8e7811086b8 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -126,22 +126,16 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns;
- int err = 0;
- if (!old_ns)
+ if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+ CLONE_NEWPID | CLONE_NEWNET)))) {
+ get_nsproxy(old_ns);
return 0;
-
- get_nsproxy(old_ns);
-
- if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWPID | CLONE_NEWNET)))
- return 0;
-
- if (!ns_capable(user_ns, CAP_SYS_ADMIN)) {
- err = -EPERM;
- goto out;
}
+ if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+
/*
* CLONE_NEWIPC must detach from the undolist: after switching
* to a new ipc namespace, the semaphore arrays from the old
@@ -149,22 +143,16 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
* means share undolist with parent, so we must forbid using
* it along with CLONE_NEWIPC.
*/
- if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) {
- err = -EINVAL;
- goto out;
- }
+ if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) ==
+ (CLONE_NEWIPC | CLONE_SYSVSEM))
+ return -EINVAL;
new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);
- if (IS_ERR(new_ns)) {
- err = PTR_ERR(new_ns);
- goto out;
- }
+ if (IS_ERR(new_ns))
+ return PTR_ERR(new_ns);
tsk->nsproxy = new_ns;
-
-out:
- put_nsproxy(old_ns);
- return err;
+ return 0;
}
void free_nsproxy(struct nsproxy *ns)
diff --git a/kernel/padata.c b/kernel/padata.c
index 072f4ee4eb8..07af2c95dcf 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -846,6 +846,8 @@ static int padata_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
if (!pinst_has_cpu(pinst, cpu))
break;
mutex_lock(&pinst->lock);
@@ -857,6 +859,8 @@ static int padata_cpu_callback(struct notifier_block *nfb,
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
if (!pinst_has_cpu(pinst, cpu))
break;
mutex_lock(&pinst->lock);
@@ -865,22 +869,6 @@ static int padata_cpu_callback(struct notifier_block *nfb,
if (err)
return notifier_from_errno(err);
break;
-
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- if (!pinst_has_cpu(pinst, cpu))
- break;
- mutex_lock(&pinst->lock);
- __padata_remove_cpu(pinst, cpu);
- mutex_unlock(&pinst->lock);
-
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- if (!pinst_has_cpu(pinst, cpu))
- break;
- mutex_lock(&pinst->lock);
- __padata_add_cpu(pinst, cpu);
- mutex_unlock(&pinst->lock);
}
return NOTIFY_OK;
@@ -1086,18 +1074,18 @@ struct padata_instance *padata_alloc(struct workqueue_struct *wq,
pinst->flags = 0;
-#ifdef CONFIG_HOTPLUG_CPU
- pinst->cpu_notifier.notifier_call = padata_cpu_callback;
- pinst->cpu_notifier.priority = 0;
- register_hotcpu_notifier(&pinst->cpu_notifier);
-#endif
-
put_online_cpus();
BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
kobject_init(&pinst->kobj, &padata_attr_type);
mutex_init(&pinst->lock);
+#ifdef CONFIG_HOTPLUG_CPU
+ pinst->cpu_notifier.notifier_call = padata_cpu_callback;
+ pinst->cpu_notifier.priority = 0;
+ register_hotcpu_notifier(&pinst->cpu_notifier);
+#endif
+
return pinst;
err_free_masks:
diff --git a/kernel/panic.c b/kernel/panic.c
index 80186460051..b6c482ccc5d 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -123,10 +123,14 @@ void panic(const char *fmt, ...)
*/
smp_send_stop();
- kmsg_dump(KMSG_DUMP_PANIC);
-
+ /*
+ * Run any panic handlers, including those that might need to
+ * add information to the kmsg dump output.
+ */
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
+ kmsg_dump(KMSG_DUMP_PANIC);
+
bust_spinlocks(0);
if (!panic_blink)
diff --git a/kernel/params.c b/kernel/params.c
index 440e65d1a54..501bde4f3be 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -103,8 +103,8 @@ static int parse_one(char *param,
|| params[i].level > max_level)
return 0;
/* No one handled NULL, so do it here. */
- if (!val && params[i].ops->set != param_set_bool
- && params[i].ops->set != param_set_bint)
+ if (!val &&
+ !(params[i].ops->flags & KERNEL_PARAM_FL_NOARG))
return -EINVAL;
pr_debug("handling %s with %p\n", param,
params[i].ops->set);
@@ -241,7 +241,8 @@ int parse_args(const char *doing,
} \
int param_get_##name(char *buffer, const struct kernel_param *kp) \
{ \
- return sprintf(buffer, format, *((type *)kp->arg)); \
+ return scnprintf(buffer, PAGE_SIZE, format, \
+ *((type *)kp->arg)); \
} \
struct kernel_param_ops param_ops_##name = { \
.set = param_set_##name, \
@@ -252,7 +253,7 @@ int parse_args(const char *doing,
EXPORT_SYMBOL(param_ops_##name)
-STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, strict_strtoul);
+STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, strict_strtoul);
STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol);
STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, strict_strtoul);
STANDARD_PARAM_DEF(int, int, "%i", long, strict_strtol);
@@ -285,7 +286,7 @@ EXPORT_SYMBOL(param_set_charp);
int param_get_charp(char *buffer, const struct kernel_param *kp)
{
- return sprintf(buffer, "%s", *((char **)kp->arg));
+ return scnprintf(buffer, PAGE_SIZE, "%s", *((char **)kp->arg));
}
EXPORT_SYMBOL(param_get_charp);
@@ -320,6 +321,7 @@ int param_get_bool(char *buffer, const struct kernel_param *kp)
EXPORT_SYMBOL(param_get_bool);
struct kernel_param_ops param_ops_bool = {
+ .flags = KERNEL_PARAM_FL_NOARG,
.set = param_set_bool,
.get = param_get_bool,
};
@@ -370,6 +372,7 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
EXPORT_SYMBOL(param_set_bint);
struct kernel_param_ops param_ops_bint = {
+ .flags = KERNEL_PARAM_FL_NOARG,
.set = param_set_bint,
.get = param_get_int,
};
@@ -827,7 +830,7 @@ ssize_t __modver_version_show(struct module_attribute *mattr,
struct module_version_attribute *vattr =
container_of(mattr, struct module_version_attribute, mattr);
- return sprintf(buf, "%s\n", vattr->version);
+ return scnprintf(buf, PAGE_SIZE, "%s\n", vattr->version);
}
extern const struct module_version_attribute *__start___modver[];
@@ -912,7 +915,14 @@ static const struct kset_uevent_ops module_uevent_ops = {
struct kset *module_kset;
int module_sysfs_initialized;
+static void module_kobj_release(struct kobject *kobj)
+{
+ struct module_kobject *mk = to_module_kobject(kobj);
+ complete(mk->kobj_completion);
+}
+
struct kobj_type module_ktype = {
+ .release = module_kobj_release,
.sysfs_ops = &module_sysfs_ops,
};
diff --git a/kernel/pid.c b/kernel/pid.c
index 66505c1dfc5..ebe5e80b10f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -265,6 +265,7 @@ void free_pid(struct pid *pid)
struct pid_namespace *ns = upid->ns;
hlist_del_rcu(&upid->pid_chain);
switch(--ns->nr_hashed) {
+ case 2:
case 1:
/* When all that is left in the pid namespace
* is the reaper wake up the reaper. The reaper
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 601bb361c23..42086551a24 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -329,7 +329,7 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns)
struct pid_namespace *ancestor, *new = ns;
if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
- !nsown_capable(CAP_SYS_ADMIN))
+ !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
/*
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 349587bb03e..358a146fd4d 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -352,7 +352,7 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
struct mem_extent *ext, *cur, *aux;
zone_start = zone->zone_start_pfn;
- zone_end = zone->zone_start_pfn + zone->spanned_pages;
+ zone_end = zone_end_pfn(zone);
list_for_each_entry(ext, list, hook)
if (zone_start <= ext->end)
@@ -884,7 +884,7 @@ static unsigned int count_highmem_pages(void)
continue;
mark_free_pages(zone);
- max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (saveable_highmem_page(zone, pfn))
n++;
@@ -948,7 +948,7 @@ static unsigned int count_data_pages(void)
continue;
mark_free_pages(zone);
- max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (saveable_page(zone, pfn))
n++;
@@ -1041,7 +1041,7 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
unsigned long max_zone_pfn;
mark_free_pages(zone);
- max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (page_is_saveable(zone, pfn))
memory_bm_set_bit(orig_bm, pfn);
@@ -1093,7 +1093,7 @@ void swsusp_free(void)
unsigned long pfn, max_zone_pfn;
for_each_populated_zone(zone) {
- max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (pfn_valid(pfn)) {
struct page *page = pfn_to_page(pfn);
@@ -1755,7 +1755,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
/* Clear page flags */
for_each_populated_zone(zone) {
- max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (pfn_valid(pfn))
swsusp_unset_page_free(pfn_to_page(pfn));
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index a146ee327f6..dd562e9aa2c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -236,7 +236,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
*/
int dumpable = 0;
/* Don't let security modules deny introspection */
- if (task == current)
+ if (same_thread_group(task, current))
return 0;
rcu_read_lock();
tcred = __task_cred(task);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 33eb4620aa1..b02a339836b 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -122,7 +122,7 @@ struct lockdep_map rcu_sched_lock_map =
STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
-int debug_lockdep_rcu_enabled(void)
+int notrace debug_lockdep_rcu_enabled(void)
{
return rcu_scheduler_active && debug_locks &&
current->lockdep_recursion == 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 725aa067ad6..5ac63c9a995 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -978,13 +978,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
rq->skip_clock_update = 1;
}
-static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
-
-void register_task_migration_notifier(struct notifier_block *n)
-{
- atomic_notifier_chain_register(&task_migration_notifier, n);
-}
-
#ifdef CONFIG_SMP
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
@@ -1015,18 +1008,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
trace_sched_migrate_task(p, new_cpu);
if (task_cpu(p) != new_cpu) {
- struct task_migration_notifier tmn;
-
if (p->sched_class->migrate_task_rq)
p->sched_class->migrate_task_rq(p, new_cpu);
p->se.nr_migrations++;
perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
-
- tmn.task = p;
- tmn.from_cpu = task_cpu(p);
- tmn.to_cpu = new_cpu;
-
- atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
}
__set_task_cpu(p, new_cpu);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index ace34f95e20..99947919e30 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -551,10 +551,7 @@ static void cputime_adjust(struct task_cputime *curr,
struct cputime *prev,
cputime_t *ut, cputime_t *st)
{
- cputime_t rtime, stime, utime, total;
-
- stime = curr->stime;
- total = stime + curr->utime;
+ cputime_t rtime, stime, utime;
/*
* Tick based cputime accounting depend on random scheduling
@@ -576,13 +573,19 @@ static void cputime_adjust(struct task_cputime *curr,
if (prev->stime + prev->utime >= rtime)
goto out;
- if (total) {
+ stime = curr->stime;
+ utime = curr->utime;
+
+ if (utime == 0) {
+ stime = rtime;
+ } else if (stime == 0) {
+ utime = rtime;
+ } else {
+ cputime_t total = stime + utime;
+
stime = scale_stime((__force u64)stime,
(__force u64)rtime, (__force u64)total);
utime = rtime - stime;
- } else {
- stime = rtime;
- utime = 0;
}
/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7f0a5e6cdae..9b3fe1cd8f4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5151,7 +5151,7 @@ static int should_we_balance(struct lb_env *env)
* First idle cpu or the first cpu(busiest) in this sched group
* is eligible for doing load balancing at this and above domains.
*/
- return balance_cpu != env->dst_cpu;
+ return balance_cpu == env->dst_cpu;
}
/*
diff --git a/kernel/signal.c b/kernel/signal.c
index 50e41075ac7..ded28b91fa5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3394,7 +3394,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
new_ka.sa.sa_restorer = compat_ptr(restorer);
#endif
ret |= copy_from_user(&mask, &act->sa_mask, sizeof(mask));
- ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+ ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
if (ret)
return -EFAULT;
sigset_from_compat(&new_ka.sa.sa_mask, &mask);
@@ -3406,7 +3406,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
ret = put_user(ptr_to_compat(old_ka.sa.sa_handler),
&oact->sa_handler);
ret |= copy_to_user(&oact->sa_mask, &mask, sizeof(mask));
- ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+ ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
#ifdef __ARCH_HAS_SA_RESTORER
ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer),
&oact->sa_restorer);
diff --git a/kernel/smp.c b/kernel/smp.c
index 449b707fc20..0564571dcdf 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -48,10 +48,13 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
cpu_to_node(cpu)))
return notifier_from_errno(-ENOMEM);
if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
- cpu_to_node(cpu)))
+ cpu_to_node(cpu))) {
+ free_cpumask_var(cfd->cpumask);
return notifier_from_errno(-ENOMEM);
+ }
cfd->csd = alloc_percpu(struct call_single_data);
if (!cfd->csd) {
+ free_cpumask_var(cfd->cpumask_ipi);
free_cpumask_var(cfd->cpumask);
return notifier_from_errno(-ENOMEM);
}
@@ -572,8 +575,10 @@ EXPORT_SYMBOL(on_each_cpu);
*
* If @wait is true, then returns once @func has returned.
*
- * You must not call this function with disabled interrupts or
- * from a hardware interrupt handler or from a bottom half handler.
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler. The
+ * exception is that it may be used during early boot while
+ * early_boot_irqs_disabled is set.
*/
void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
void *info, bool wait)
@@ -582,9 +587,10 @@ void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
smp_call_function_many(mask, func, info, wait);
if (cpumask_test_cpu(cpu, mask)) {
- local_irq_disable();
+ unsigned long flags;
+ local_irq_save(flags);
func(info);
- local_irq_enable();
+ local_irq_restore(flags);
}
put_cpu();
}
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 5cdd8065a3c..4b082b5cac9 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -34,6 +34,20 @@
#else
#define raw_read_can_lock(l) read_can_lock(l)
#define raw_write_can_lock(l) write_can_lock(l)
+
+/*
+ * Some architectures can relax in favour of the CPU owning the lock.
+ */
+#ifndef arch_read_relax
+# define arch_read_relax(l) cpu_relax()
+#endif
+#ifndef arch_write_relax
+# define arch_write_relax(l) cpu_relax()
+#endif
+#ifndef arch_spin_relax
+# define arch_spin_relax(l) cpu_relax()
+#endif
+
/*
* We build the __lock_function inlines here. They are too large for
* inlining all over the place, but here is only one user per function
diff --git a/kernel/sys.c b/kernel/sys.c
index 771129b299f..c18ecca575b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -337,7 +337,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
if (rgid != (gid_t) -1) {
if (gid_eq(old->gid, krgid) ||
gid_eq(old->egid, krgid) ||
- nsown_capable(CAP_SETGID))
+ ns_capable(old->user_ns, CAP_SETGID))
new->gid = krgid;
else
goto error;
@@ -346,7 +346,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
if (gid_eq(old->gid, kegid) ||
gid_eq(old->egid, kegid) ||
gid_eq(old->sgid, kegid) ||
- nsown_capable(CAP_SETGID))
+ ns_capable(old->user_ns, CAP_SETGID))
new->egid = kegid;
else
goto error;
@@ -387,7 +387,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
old = current_cred();
retval = -EPERM;
- if (nsown_capable(CAP_SETGID))
+ if (ns_capable(old->user_ns, CAP_SETGID))
new->gid = new->egid = new->sgid = new->fsgid = kgid;
else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
new->egid = new->fsgid = kgid;
@@ -471,7 +471,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
new->uid = kruid;
if (!uid_eq(old->uid, kruid) &&
!uid_eq(old->euid, kruid) &&
- !nsown_capable(CAP_SETUID))
+ !ns_capable(old->user_ns, CAP_SETUID))
goto error;
}
@@ -480,7 +480,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
if (!uid_eq(old->uid, keuid) &&
!uid_eq(old->euid, keuid) &&
!uid_eq(old->suid, keuid) &&
- !nsown_capable(CAP_SETUID))
+ !ns_capable(old->user_ns, CAP_SETUID))
goto error;
}
@@ -534,7 +534,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
old = current_cred();
retval = -EPERM;
- if (nsown_capable(CAP_SETUID)) {
+ if (ns_capable(old->user_ns, CAP_SETUID)) {
new->suid = new->uid = kuid;
if (!uid_eq(kuid, old->uid)) {
retval = set_user(new);
@@ -591,7 +591,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
old = current_cred();
retval = -EPERM;
- if (!nsown_capable(CAP_SETUID)) {
+ if (!ns_capable(old->user_ns, CAP_SETUID)) {
if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
!uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
goto error;
@@ -673,7 +673,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
old = current_cred();
retval = -EPERM;
- if (!nsown_capable(CAP_SETGID)) {
+ if (!ns_capable(old->user_ns, CAP_SETGID)) {
if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
!gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
goto error;
@@ -744,7 +744,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) ||
uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
- nsown_capable(CAP_SETUID)) {
+ ns_capable(old->user_ns, CAP_SETUID)) {
if (!uid_eq(kuid, old->fsuid)) {
new->fsuid = kuid;
if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
@@ -783,7 +783,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) ||
gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) ||
- nsown_capable(CAP_SETGID)) {
+ ns_capable(old->user_ns, CAP_SETGID)) {
if (!gid_eq(kgid, old->fsgid)) {
new->fsgid = kgid;
goto change_okay;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 07f6fc468e1..dc69093a8ec 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1225,7 +1225,7 @@ static struct ctl_table vm_table[] = {
.data = &hugepages_treat_as_movable,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = hugetlb_treat_movable_handler,
+ .proc_handler = proc_dointvec,
},
{
.procname = "nr_overcommit_hugepages",
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 65bd3c92d6f..8727032e3a6 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -4,6 +4,23 @@
static struct callback_head work_exited; /* all we need is ->next == NULL */
+/**
+ * task_work_add - ask the @task to execute @work->func()
+ * @task: the task which should run the callback
+ * @work: the callback to run
+ * @notify: send the notification if true
+ *
+ * Queue @work for task_work_run() below and notify the @task if @notify.
+ * Fails if the @task is exiting/exited and thus it can't process this @work.
+ * Otherwise @work->func() will be called when the @task returns from kernel
+ * mode or exits.
+ *
+ * This is like the signal handler which runs in kernel mode, but it doesn't
+ * try to wake up the @task.
+ *
+ * RETURNS:
+ * 0 if succeeds or -ESRCH.
+ */
int
task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
{
@@ -21,11 +38,22 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
return 0;
}
+/**
+ * task_work_cancel - cancel a pending work added by task_work_add()
+ * @task: the task which should execute the work
+ * @func: identifies the work to remove
+ *
+ * Find the last queued pending work with ->func == @func and remove
+ * it from queue.
+ *
+ * RETURNS:
+ * The found work or NULL if not found.
+ */
struct callback_head *
task_work_cancel(struct task_struct *task, task_work_func_t func)
{
struct callback_head **pprev = &task->task_works;
- struct callback_head *work = NULL;
+ struct callback_head *work;
unsigned long flags;
/*
* If cmpxchg() fails we continue without updating pprev.
@@ -35,7 +63,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
*/
raw_spin_lock_irqsave(&task->pi_lock, flags);
while ((work = ACCESS_ONCE(*pprev))) {
- read_barrier_depends();
+ smp_read_barrier_depends();
if (work->func != func)
pprev = &work->next;
else if (cmpxchg(pprev, work, work->next) == work)
@@ -46,6 +74,14 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
return work;
}
+/**
+ * task_work_run - execute the works added by task_work_add()
+ *
+ * Flush the pending works. Should be used by the core kernel code.
+ * Called before the task returns to the user-mode or stops, or when
+ * it exits. In the latter case task_work_add() can no longer add the
+ * new work after task_work_run() returns.
+ */
void task_work_run(void)
{
struct task_struct *task = current;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a6d098c6df3..03cf44ac54d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1978,12 +1978,27 @@ int __weak ftrace_arch_code_modify_post_process(void)
void ftrace_modify_all_code(int command)
{
+ int update = command & FTRACE_UPDATE_TRACE_FUNC;
+
+ /*
+ * If the ftrace_caller calls a ftrace_ops func directly,
+ * we need to make sure that it only traces functions it
+ * expects to trace. When doing the switch of functions,
+ * we need to update to the ftrace_ops_list_func first
+ * before the transition between old and new calls are set,
+ * as the ftrace_ops_list_func will check the ops hashes
+ * to make sure the ops are having the right functions
+ * traced.
+ */
+ if (update)
+ ftrace_update_ftrace_func(ftrace_ops_list_func);
+
if (command & FTRACE_UPDATE_CALLS)
ftrace_replace_code(1);
else if (command & FTRACE_DISABLE_CALLS)
ftrace_replace_code(0);
- if (command & FTRACE_UPDATE_TRACE_FUNC)
+ if (update && ftrace_trace_function != ftrace_ops_list_func)
ftrace_update_ftrace_func(ftrace_trace_function);
if (command & FTRACE_START_FUNC_RET)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 496f94d5769..7974ba20557 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3166,11 +3166,6 @@ static const struct file_operations show_traces_fops = {
};
/*
- * Only trace on a CPU if the bitmask is set:
- */
-static cpumask_var_t tracing_cpumask;
-
-/*
* The tracer itself will not take this lock, but still we want
* to provide a consistent cpumask to user-space:
*/
@@ -3186,11 +3181,12 @@ static ssize_t
tracing_cpumask_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *ppos)
{
+ struct trace_array *tr = file_inode(filp)->i_private;
int len;
mutex_lock(&tracing_cpumask_update_lock);
- len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+ len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
if (count - len < 2) {
count = -EINVAL;
goto out_err;
@@ -3208,7 +3204,7 @@ static ssize_t
tracing_cpumask_write(struct file *filp, const char __user *ubuf,
size_t count, loff_t *ppos)
{
- struct trace_array *tr = filp->private_data;
+ struct trace_array *tr = file_inode(filp)->i_private;
cpumask_var_t tracing_cpumask_new;
int err, cpu;
@@ -3228,12 +3224,12 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
* Increase/decrease the disabled counter if we are
* about to flip a bit in the cpumask:
*/
- if (cpumask_test_cpu(cpu, tracing_cpumask) &&
+ if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
}
- if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
+ if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
@@ -3242,7 +3238,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
arch_spin_unlock(&ftrace_max_lock);
local_irq_enable();
- cpumask_copy(tracing_cpumask, tracing_cpumask_new);
+ cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
mutex_unlock(&tracing_cpumask_update_lock);
free_cpumask_var(tracing_cpumask_new);
@@ -3256,9 +3252,10 @@ err_unlock:
}
static const struct file_operations tracing_cpumask_fops = {
- .open = tracing_open_generic,
+ .open = tracing_open_generic_tr,
.read = tracing_cpumask_read,
.write = tracing_cpumask_write,
+ .release = tracing_release_generic_tr,
.llseek = generic_file_llseek,
};
@@ -5938,6 +5935,11 @@ static int new_instance_create(const char *name)
if (!tr->name)
goto out_free_tr;
+ if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
+ goto out_free_tr;
+
+ cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
+
raw_spin_lock_init(&tr->start_lock);
tr->current_trace = &nop_trace;
@@ -5969,6 +5971,7 @@ static int new_instance_create(const char *name)
out_free_tr:
if (tr->trace_buffer.buffer)
ring_buffer_free(tr->trace_buffer.buffer);
+ free_cpumask_var(tr->tracing_cpumask);
kfree(tr->name);
kfree(tr);
@@ -6098,6 +6101,9 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
{
int cpu;
+ trace_create_file("tracing_cpumask", 0644, d_tracer,
+ tr, &tracing_cpumask_fops);
+
trace_create_file("trace_options", 0644, d_tracer,
tr, &tracing_iter_fops);
@@ -6147,9 +6153,6 @@ static __init int tracer_init_debugfs(void)
init_tracer_debugfs(&global_trace, d_tracer);
- trace_create_file("tracing_cpumask", 0644, d_tracer,
- &global_trace, &tracing_cpumask_fops);
-
trace_create_file("available_tracers", 0444, d_tracer,
&global_trace, &show_traces_fops);
@@ -6371,7 +6374,7 @@ __init static int tracer_alloc_buffers(void)
if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
goto out;
- if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
+ if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
goto out_free_buffer_mask;
/* Only allocate trace_printk buffers if a trace_printk exists */
@@ -6386,7 +6389,7 @@ __init static int tracer_alloc_buffers(void)
ring_buf_size = 1;
cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
- cpumask_copy(tracing_cpumask, cpu_all_mask);
+ cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
raw_spin_lock_init(&global_trace.start_lock);
@@ -6441,7 +6444,7 @@ out_free_cpumask:
#ifdef CONFIG_TRACER_MAX_TRACE
free_percpu(global_trace.max_buffer.data);
#endif
- free_cpumask_var(tracing_cpumask);
+ free_cpumask_var(global_trace.tracing_cpumask);
out_free_buffer_mask:
free_cpumask_var(tracing_buffer_mask);
out:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fe39acd4c1a..10c86fb7a2b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -206,6 +206,7 @@ struct trace_array {
struct dentry *event_dir;
struct list_head systems;
struct list_head events;
+ cpumask_var_t tracing_cpumask; /* only trace on set CPUs */
int ref;
};
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 29a7ebcfb42..368a4d50cc3 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1489,12 +1489,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
}
static int
-event_create_dir(struct dentry *parent,
- struct ftrace_event_file *file,
- const struct file_operations *id,
- const struct file_operations *enable,
- const struct file_operations *filter,
- const struct file_operations *format)
+event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
{
struct ftrace_event_call *call = file->event_call;
struct trace_array *tr = file->tr;
@@ -1522,12 +1517,13 @@ event_create_dir(struct dentry *parent,
if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
trace_create_file("enable", 0644, file->dir, file,
- enable);
+ &ftrace_enable_fops);
#ifdef CONFIG_PERF_EVENTS
if (call->event.type && call->class->reg)
trace_create_file("id", 0444, file->dir,
- (void *)(long)call->event.type, id);
+ (void *)(long)call->event.type,
+ &ftrace_event_id_fops);
#endif
/*
@@ -1544,10 +1540,10 @@ event_create_dir(struct dentry *parent,
}
}
trace_create_file("filter", 0644, file->dir, call,
- filter);
+ &ftrace_event_filter_fops);
trace_create_file("format", 0444, file->dir, call,
- format);
+ &ftrace_event_format_fops);
return 0;
}
@@ -1648,12 +1644,7 @@ trace_create_new_event(struct ftrace_event_call *call,
/* Add an event to a trace directory */
static int
-__trace_add_new_event(struct ftrace_event_call *call,
- struct trace_array *tr,
- const struct file_operations *id,
- const struct file_operations *enable,
- const struct file_operations *filter,
- const struct file_operations *format)
+__trace_add_new_event(struct ftrace_event_call *call, struct trace_array *tr)
{
struct ftrace_event_file *file;
@@ -1661,7 +1652,7 @@ __trace_add_new_event(struct ftrace_event_call *call,
if (!file)
return -ENOMEM;
- return event_create_dir(tr->event_dir, file, id, enable, filter, format);
+ return event_create_dir(tr->event_dir, file);
}
/*
@@ -1683,8 +1674,7 @@ __trace_early_add_new_event(struct ftrace_event_call *call,
}
struct ftrace_module_file_ops;
-static void __add_event_to_tracers(struct ftrace_event_call *call,
- struct ftrace_module_file_ops *file_ops);
+static void __add_event_to_tracers(struct ftrace_event_call *call);
/* Add an additional event_call dynamically */
int trace_add_event_call(struct ftrace_event_call *call)
@@ -1695,7 +1685,7 @@ int trace_add_event_call(struct ftrace_event_call *call)
ret = __register_event(call, NULL);
if (ret >= 0)
- __add_event_to_tracers(call, NULL);
+ __add_event_to_tracers(call);
mutex_unlock(&event_mutex);
mutex_unlock(&trace_types_lock);
@@ -1769,100 +1759,21 @@ int trace_remove_event_call(struct ftrace_event_call *call)
#ifdef CONFIG_MODULES
-static LIST_HEAD(ftrace_module_file_list);
-
-/*
- * Modules must own their file_operations to keep up with
- * reference counting.
- */
-struct ftrace_module_file_ops {
- struct list_head list;
- struct module *mod;
- struct file_operations id;
- struct file_operations enable;
- struct file_operations format;
- struct file_operations filter;
-};
-
-static struct ftrace_module_file_ops *
-find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod)
-{
- /*
- * As event_calls are added in groups by module,
- * when we find one file_ops, we don't need to search for
- * each call in that module, as the rest should be the
- * same. Only search for a new one if the last one did
- * not match.
- */
- if (file_ops && mod == file_ops->mod)
- return file_ops;
-
- list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
- if (file_ops->mod == mod)
- return file_ops;
- }
- return NULL;
-}
-
-static struct ftrace_module_file_ops *
-trace_create_file_ops(struct module *mod)
-{
- struct ftrace_module_file_ops *file_ops;
-
- /*
- * This is a bit of a PITA. To allow for correct reference
- * counting, modules must "own" their file_operations.
- * To do this, we allocate the file operations that will be
- * used in the event directory.
- */
-
- file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
- if (!file_ops)
- return NULL;
-
- file_ops->mod = mod;
-
- file_ops->id = ftrace_event_id_fops;
- file_ops->id.owner = mod;
-
- file_ops->enable = ftrace_enable_fops;
- file_ops->enable.owner = mod;
-
- file_ops->filter = ftrace_event_filter_fops;
- file_ops->filter.owner = mod;
-
- file_ops->format = ftrace_event_format_fops;
- file_ops->format.owner = mod;
-
- list_add(&file_ops->list, &ftrace_module_file_list);
-
- return file_ops;
-}
-
static void trace_module_add_events(struct module *mod)
{
- struct ftrace_module_file_ops *file_ops = NULL;
struct ftrace_event_call **call, **start, **end;
start = mod->trace_events;
end = mod->trace_events + mod->num_trace_events;
- if (start == end)
- return;
-
- file_ops = trace_create_file_ops(mod);
- if (!file_ops)
- return;
-
for_each_event(call, start, end) {
__register_event(*call, mod);
- __add_event_to_tracers(*call, file_ops);
+ __add_event_to_tracers(*call);
}
}
static void trace_module_remove_events(struct module *mod)
{
- struct ftrace_module_file_ops *file_ops;
struct ftrace_event_call *call, *p;
bool clear_trace = false;
@@ -1874,16 +1785,6 @@ static void trace_module_remove_events(struct module *mod)
__trace_remove_event_call(call);
}
}
-
- /* Now free the file_operations */
- list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
- if (file_ops->mod == mod)
- break;
- }
- if (&file_ops->list != &ftrace_module_file_list) {
- list_del(&file_ops->list);
- kfree(file_ops);
- }
up_write(&trace_event_sem);
/*
@@ -1919,67 +1820,21 @@ static int trace_module_notify(struct notifier_block *self,
return 0;
}
-static int
-__trace_add_new_mod_event(struct ftrace_event_call *call,
- struct trace_array *tr,
- struct ftrace_module_file_ops *file_ops)
-{
- return __trace_add_new_event(call, tr,
- &file_ops->id, &file_ops->enable,
- &file_ops->filter, &file_ops->format);
-}
-
-#else
-static inline struct ftrace_module_file_ops *
-find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod)
-{
- return NULL;
-}
-static inline int trace_module_notify(struct notifier_block *self,
- unsigned long val, void *data)
-{
- return 0;
-}
-static inline int
-__trace_add_new_mod_event(struct ftrace_event_call *call,
- struct trace_array *tr,
- struct ftrace_module_file_ops *file_ops)
-{
- return -ENODEV;
-}
+static struct notifier_block trace_module_nb = {
+ .notifier_call = trace_module_notify,
+ .priority = 0,
+};
#endif /* CONFIG_MODULES */
/* Create a new event directory structure for a trace directory. */
static void
__trace_add_event_dirs(struct trace_array *tr)
{
- struct ftrace_module_file_ops *file_ops = NULL;
struct ftrace_event_call *call;
int ret;
list_for_each_entry(call, &ftrace_events, list) {
- if (call->mod) {
- /*
- * Directories for events by modules need to
- * keep module ref counts when opened (as we don't
- * want the module to disappear when reading one
- * of these files). The file_ops keep account of
- * the module ref count.
- */
- file_ops = find_ftrace_file_ops(file_ops, call->mod);
- if (!file_ops)
- continue; /* Warn? */
- ret = __trace_add_new_mod_event(call, tr, file_ops);
- if (ret < 0)
- pr_warning("Could not create directory for event %s\n",
- call->name);
- continue;
- }
- ret = __trace_add_new_event(call, tr,
- &ftrace_event_id_fops,
- &ftrace_enable_fops,
- &ftrace_event_filter_fops,
- &ftrace_event_format_fops);
+ ret = __trace_add_new_event(call, tr);
if (ret < 0)
pr_warning("Could not create directory for event %s\n",
call->name);
@@ -2287,11 +2142,7 @@ __trace_early_add_event_dirs(struct trace_array *tr)
list_for_each_entry(file, &tr->events, list) {
- ret = event_create_dir(tr->event_dir, file,
- &ftrace_event_id_fops,
- &ftrace_enable_fops,
- &ftrace_event_filter_fops,
- &ftrace_event_format_fops);
+ ret = event_create_dir(tr->event_dir, file);
if (ret < 0)
pr_warning("Could not create directory for event %s\n",
file->event_call->name);
@@ -2332,29 +2183,14 @@ __trace_remove_event_dirs(struct trace_array *tr)
remove_event_file_dir(file);
}
-static void
-__add_event_to_tracers(struct ftrace_event_call *call,
- struct ftrace_module_file_ops *file_ops)
+static void __add_event_to_tracers(struct ftrace_event_call *call)
{
struct trace_array *tr;
- list_for_each_entry(tr, &ftrace_trace_arrays, list) {
- if (file_ops)
- __trace_add_new_mod_event(call, tr, file_ops);
- else
- __trace_add_new_event(call, tr,
- &ftrace_event_id_fops,
- &ftrace_enable_fops,
- &ftrace_event_filter_fops,
- &ftrace_event_format_fops);
- }
+ list_for_each_entry(tr, &ftrace_trace_arrays, list)
+ __trace_add_new_event(call, tr);
}
-static struct notifier_block trace_module_nb = {
- .notifier_call = trace_module_notify,
- .priority = 0,
-};
-
extern struct ftrace_event_call *__start_ftrace_events[];
extern struct ftrace_event_call *__stop_ftrace_events[];
@@ -2559,10 +2395,11 @@ static __init int event_trace_init(void)
if (ret)
return ret;
+#ifdef CONFIG_MODULES
ret = register_module_notifier(&trace_module_nb);
if (ret)
pr_warning("Failed to register trace events module notifier\n");
-
+#endif
return 0;
}
early_initcall(event_trace_memsetup);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8fd03657bc7..559329d9bd2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -200,8 +200,8 @@ extern char *__bad_type_size(void);
#type, #name, offsetof(typeof(trace), name), \
sizeof(trace.name), is_signed_type(type)
-static
-int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
+static int __init
+__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
{
int i;
int pos = 0;
@@ -228,7 +228,7 @@ int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
return pos;
}
-static int set_syscall_print_fmt(struct ftrace_event_call *call)
+static int __init set_syscall_print_fmt(struct ftrace_event_call *call)
{
char *print_fmt;
int len;
@@ -253,7 +253,7 @@ static int set_syscall_print_fmt(struct ftrace_event_call *call)
return 0;
}
-static void free_syscall_print_fmt(struct ftrace_event_call *call)
+static void __init free_syscall_print_fmt(struct ftrace_event_call *call)
{
struct syscall_metadata *entry = call->data;
@@ -459,7 +459,7 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file,
mutex_unlock(&syscall_trace_lock);
}
-static int init_syscall_trace(struct ftrace_event_call *call)
+static int __init init_syscall_trace(struct ftrace_event_call *call)
{
int id;
int num;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index f6c83d7ef00..602e5bbbcef 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
struct group_info *group_info;
int retval;
- if (!nsown_capable(CAP_SETGID))
+ if (!ns_capable(current_user_ns(), CAP_SETGID))
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
diff --git a/kernel/up.c b/kernel/up.c
index c54c75e9faf..630d72bf7e4 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -10,12 +10,64 @@
int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int wait)
{
+ unsigned long flags;
+
WARN_ON(cpu != 0);
- local_irq_disable();
- (func)(info);
- local_irq_enable();
+ local_irq_save(flags);
+ func(info);
+ local_irq_restore(flags);
return 0;
}
EXPORT_SYMBOL(smp_call_function_single);
+
+int on_each_cpu(smp_call_func_t func, void *info, int wait)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ func(info);
+ local_irq_restore(flags);
+ return 0;
+}
+EXPORT_SYMBOL(on_each_cpu);
+
+/*
+ * Note we still need to test the mask even for UP
+ * because we actually can get an empty mask from
+ * code that on SMP might call us without the local
+ * CPU in the mask.
+ */
+void on_each_cpu_mask(const struct cpumask *mask,
+ smp_call_func_t func, void *info, bool wait)
+{
+ unsigned long flags;
+
+ if (cpumask_test_cpu(0, mask)) {
+ local_irq_save(flags);
+ func(info);
+ local_irq_restore(flags);
+ }
+}
+EXPORT_SYMBOL(on_each_cpu_mask);
+
+/*
+ * Preemption is disabled here to make sure the cond_func is called under the
+ * same condtions in UP and SMP.
+ */
+void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
+ smp_call_func_t func, void *info, bool wait,
+ gfp_t gfp_flags)
+{
+ unsigned long flags;
+
+ preempt_disable();
+ if (cond_func(0, info)) {
+ local_irq_save(flags);
+ func(info);
+ local_irq_restore(flags);
+ }
+ preempt_enable();
+}
+EXPORT_SYMBOL(on_each_cpu_cond);
diff --git a/kernel/user.c b/kernel/user.c
index 69b4c3d48cd..5bbb91988e6 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,8 +51,6 @@ struct user_namespace init_user_ns = {
.owner = GLOBAL_ROOT_UID,
.group = GLOBAL_ROOT_GID,
.proc_inum = PROC_USER_INIT_INO,
- .may_mount_sysfs = true,
- .may_mount_proc = true,
};
EXPORT_SYMBOL_GPL(init_user_ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 9064b919a40..13fb1134ba5 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -101,8 +101,6 @@ int create_user_ns(struct cred *new)
set_cred_user_ns(new, ns);
- update_mnt_policy(ns);
-
return 0;
}
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 2fc8576efaa..fd393124e50 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -114,7 +114,7 @@ static int utsns_install(struct nsproxy *nsproxy, void *new)
struct uts_namespace *ns = new;
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
- !nsown_capable(CAP_SYS_ADMIN))
+ !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
get_uts_ns(ns);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 29b79852a84..987293d03eb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -541,6 +541,8 @@ static int worker_pool_assign_id(struct worker_pool *pool)
* This must be called either with pwq_lock held or sched RCU read locked.
* If the pwq needs to be used beyond the locking in effect, the caller is
* responsible for guaranteeing that the pwq stays online.
+ *
+ * Return: The unbound pool_workqueue for @node.
*/
static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
int node)
@@ -639,8 +641,6 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
* get_work_pool - return the worker_pool a given work was associated with
* @work: the work item of interest
*
- * Return the worker_pool @work was last associated with. %NULL if none.
- *
* Pools are created and destroyed under wq_pool_mutex, and allows read
* access under sched-RCU read lock. As such, this function should be
* called under wq_pool_mutex or with preemption disabled.
@@ -649,6 +649,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
* mentioned locking is in effect. If the returned pool needs to be used
* beyond the critical section, the caller is responsible for ensuring the
* returned pool is and stays online.
+ *
+ * Return: The worker_pool @work was last associated with. %NULL if none.
*/
static struct worker_pool *get_work_pool(struct work_struct *work)
{
@@ -672,7 +674,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
* get_work_pool_id - return the worker pool ID a given work is associated with
* @work: the work item of interest
*
- * Return the worker_pool ID @work was last associated with.
+ * Return: The worker_pool ID @work was last associated with.
* %WORK_OFFQ_POOL_NONE if none.
*/
static int get_work_pool_id(struct work_struct *work)
@@ -831,7 +833,7 @@ void wq_worker_waking_up(struct task_struct *task, int cpu)
* CONTEXT:
* spin_lock_irq(rq->lock)
*
- * RETURNS:
+ * Return:
* Worker task on @cpu to wake up, %NULL if none.
*/
struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
@@ -966,8 +968,8 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
* CONTEXT:
* spin_lock_irq(pool->lock).
*
- * RETURNS:
- * Pointer to worker which is executing @work if found, NULL
+ * Return:
+ * Pointer to worker which is executing @work if found, %NULL
* otherwise.
*/
static struct worker *find_worker_executing_work(struct worker_pool *pool,
@@ -1155,14 +1157,16 @@ out_put:
* @flags: place to store irq state
*
* Try to grab PENDING bit of @work. This function can handle @work in any
- * stable state - idle, on timer or on worklist. Return values are
+ * stable state - idle, on timer or on worklist.
*
+ * Return:
* 1 if @work was pending and we successfully stole PENDING
* 0 if @work was idle and we claimed PENDING
* -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
* -ENOENT if someone else is canceling @work, this state may persist
* for arbitrarily long
*
+ * Note:
* On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
* interrupted while holding PENDING and @work off queue, irq must be
* disabled on entry. This, combined with delayed_work->timer being
@@ -1404,10 +1408,10 @@ retry:
* @wq: workqueue to use
* @work: work to queue
*
- * Returns %false if @work was already on a queue, %true otherwise.
- *
* We queue the work to a specific CPU, the caller must ensure it
* can't go away.
+ *
+ * Return: %false if @work was already on a queue, %true otherwise.
*/
bool queue_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work)
@@ -1477,7 +1481,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
* @dwork: work to queue
* @delay: number of jiffies to wait before queueing
*
- * Returns %false if @work was already on a queue, %true otherwise. If
+ * Return: %false if @work was already on a queue, %true otherwise. If
* @delay is zero and @dwork is idle, it will be scheduled for immediate
* execution.
*/
@@ -1513,7 +1517,7 @@ EXPORT_SYMBOL(queue_delayed_work_on);
* zero, @work is guaranteed to be scheduled immediately regardless of its
* current state.
*
- * Returns %false if @dwork was idle and queued, %true if @dwork was
+ * Return: %false if @dwork was idle and queued, %true if @dwork was
* pending and its timer was modified.
*
* This function is safe to call from any context including IRQ handler.
@@ -1628,7 +1632,7 @@ static void worker_leave_idle(struct worker *worker)
* Might sleep. Called without any lock but returns with pool->lock
* held.
*
- * RETURNS:
+ * Return:
* %true if the associated pool is online (@worker is successfully
* bound), %false if offline.
*/
@@ -1689,7 +1693,7 @@ static struct worker *alloc_worker(void)
* CONTEXT:
* Might sleep. Does GFP_KERNEL allocations.
*
- * RETURNS:
+ * Return:
* Pointer to the newly created worker.
*/
static struct worker *create_worker(struct worker_pool *pool)
@@ -1789,6 +1793,8 @@ static void start_worker(struct worker *worker)
* @pool: the target pool
*
* Grab the managership of @pool and create and start a new worker for it.
+ *
+ * Return: 0 on success. A negative error code otherwise.
*/
static int create_and_start_worker(struct worker_pool *pool)
{
@@ -1933,7 +1939,7 @@ static void pool_mayday_timeout(unsigned long __pool)
* multiple times. Does GFP_KERNEL allocations. Called only from
* manager.
*
- * RETURNS:
+ * Return:
* %false if no action was taken and pool->lock stayed locked, %true
* otherwise.
*/
@@ -1990,7 +1996,7 @@ restart:
* spin_lock_irq(pool->lock) which may be released and regrabbed
* multiple times. Called only from manager.
*
- * RETURNS:
+ * Return:
* %false if no action was taken and pool->lock stayed locked, %true
* otherwise.
*/
@@ -2033,7 +2039,7 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
* spin_lock_irq(pool->lock) which may be released and regrabbed
* multiple times. Does GFP_KERNEL allocations.
*
- * RETURNS:
+ * Return:
* %false if the pool don't need management and the caller can safely start
* processing works, %true indicates that the function released pool->lock
* and reacquired it to perform some management function and that the
@@ -2259,6 +2265,8 @@ static void process_scheduled_works(struct worker *worker)
* work items regardless of their specific target workqueue. The only
* exception is work items which belong to workqueues with a rescuer which
* will be explained in rescuer_thread().
+ *
+ * Return: 0
*/
static int worker_thread(void *__worker)
{
@@ -2357,6 +2365,8 @@ sleep:
* those works so that forward progress can be guaranteed.
*
* This should happen rarely.
+ *
+ * Return: 0
*/
static int rescuer_thread(void *__rescuer)
{
@@ -2529,7 +2539,7 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
* CONTEXT:
* mutex_lock(wq->mutex).
*
- * RETURNS:
+ * Return:
* %true if @flush_color >= 0 and there's something to flush. %false
* otherwise.
*/
@@ -2850,7 +2860,7 @@ static bool __flush_work(struct work_struct *work)
* Wait until @work has finished execution. @work is guaranteed to be idle
* on return if it hasn't been requeued since flush started.
*
- * RETURNS:
+ * Return:
* %true if flush_work() waited for the work to finish execution,
* %false if it was already idle.
*/
@@ -2902,7 +2912,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
* The caller must ensure that the workqueue on which @work was last
* queued can't be destroyed before this function returns.
*
- * RETURNS:
+ * Return:
* %true if @work was pending, %false otherwise.
*/
bool cancel_work_sync(struct work_struct *work)
@@ -2919,7 +2929,7 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
* immediate execution. Like flush_work(), this function only
* considers the last queueing instance of @dwork.
*
- * RETURNS:
+ * Return:
* %true if flush_work() waited for the work to finish execution,
* %false if it was already idle.
*/
@@ -2937,11 +2947,15 @@ EXPORT_SYMBOL(flush_delayed_work);
* cancel_delayed_work - cancel a delayed work
* @dwork: delayed_work to cancel
*
- * Kill off a pending delayed_work. Returns %true if @dwork was pending
- * and canceled; %false if wasn't pending. Note that the work callback
- * function may still be running on return, unless it returns %true and the
- * work doesn't re-arm itself. Explicitly flush or use
- * cancel_delayed_work_sync() to wait on it.
+ * Kill off a pending delayed_work.
+ *
+ * Return: %true if @dwork was pending and canceled; %false if it wasn't
+ * pending.
+ *
+ * Note:
+ * The work callback function may still be running on return, unless
+ * it returns %true and the work doesn't re-arm itself. Explicitly flush or
+ * use cancel_delayed_work_sync() to wait on it.
*
* This function is safe to call from any context including IRQ handler.
*/
@@ -2970,7 +2984,7 @@ EXPORT_SYMBOL(cancel_delayed_work);
*
* This is cancel_work_sync() for delayed works.
*
- * RETURNS:
+ * Return:
* %true if @dwork was pending, %false otherwise.
*/
bool cancel_delayed_work_sync(struct delayed_work *dwork)
@@ -2987,7 +3001,7 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
* system workqueue and blocks until all CPUs have completed.
* schedule_on_each_cpu() is very slow.
*
- * RETURNS:
+ * Return:
* 0 on success, -errno on failure.
*/
int schedule_on_each_cpu(work_func_t func)
@@ -3055,7 +3069,7 @@ EXPORT_SYMBOL(flush_scheduled_work);
* Executes the function immediately if process context is available,
* otherwise schedules the function for delayed execution.
*
- * Returns: 0 - function was executed
+ * Return: 0 - function was executed
* 1 - function was scheduled for execution
*/
int execute_in_process_context(work_func_t fn, struct execute_work *ew)
@@ -3315,7 +3329,7 @@ static void wq_device_release(struct device *dev)
* apply_workqueue_attrs() may race against userland updating the
* attributes.
*
- * Returns 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
*/
int workqueue_sysfs_register(struct workqueue_struct *wq)
{
@@ -3408,7 +3422,9 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs)
* @gfp_mask: allocation mask to use
*
* Allocate a new workqueue_attrs, initialize with default settings and
- * return it. Returns NULL on failure.
+ * return it.
+ *
+ * Return: The allocated new workqueue_attr on success. %NULL on failure.
*/
struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
{
@@ -3467,7 +3483,8 @@ static bool wqattrs_equal(const struct workqueue_attrs *a,
* @pool: worker_pool to initialize
*
* Initiailize a newly zalloc'd @pool. It also allocates @pool->attrs.
- * Returns 0 on success, -errno on failure. Even on failure, all fields
+ *
+ * Return: 0 on success, -errno on failure. Even on failure, all fields
* inside @pool proper are initialized and put_unbound_pool() can be called
* on @pool safely to release it.
*/
@@ -3574,9 +3591,12 @@ static void put_unbound_pool(struct worker_pool *pool)
* Obtain a worker_pool which has the same attributes as @attrs, bump the
* reference count and return it. If there already is a matching
* worker_pool, it will be used; otherwise, this function attempts to
- * create a new one. On failure, returns NULL.
+ * create a new one.
*
* Should be called with wq_pool_mutex held.
+ *
+ * Return: On success, a worker_pool with the same attributes as @attrs.
+ * On failure, %NULL.
*/
static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
{
@@ -3812,9 +3832,7 @@ static void free_unbound_pwq(struct pool_workqueue *pwq)
*
* Calculate the cpumask a workqueue with @attrs should use on @node. If
* @cpu_going_down is >= 0, that cpu is considered offline during
- * calculation. The result is stored in @cpumask. This function returns
- * %true if the resulting @cpumask is different from @attrs->cpumask,
- * %false if equal.
+ * calculation. The result is stored in @cpumask.
*
* If NUMA affinity is not enabled, @attrs->cpumask is always used. If
* enabled and @node has online CPUs requested by @attrs, the returned
@@ -3823,6 +3841,9 @@ static void free_unbound_pwq(struct pool_workqueue *pwq)
*
* The caller is responsible for ensuring that the cpumask of @node stays
* stable.
+ *
+ * Return: %true if the resulting @cpumask is different from @attrs->cpumask,
+ * %false if equal.
*/
static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
int cpu_going_down, cpumask_t *cpumask)
@@ -3876,8 +3897,9 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
* items finish. Note that a work item which repeatedly requeues itself
* back-to-back will stay on its current pwq.
*
- * Performs GFP_KERNEL allocations. Returns 0 on success and -errno on
- * failure.
+ * Performs GFP_KERNEL allocations.
+ *
+ * Return: 0 on success and -errno on failure.
*/
int apply_workqueue_attrs(struct workqueue_struct *wq,
const struct workqueue_attrs *attrs)
@@ -4345,6 +4367,8 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
*
* Determine whether %current is a workqueue rescuer. Can be used from
* work functions to determine whether it's being run off the rescuer task.
+ *
+ * Return: %true if %current is a workqueue rescuer. %false otherwise.
*/
bool current_is_workqueue_rescuer(void)
{
@@ -4368,7 +4392,7 @@ bool current_is_workqueue_rescuer(void)
* workqueue being congested on one CPU doesn't mean the workqueue is also
* contested on other CPUs / NUMA nodes.
*
- * RETURNS:
+ * Return:
* %true if congested, %false otherwise.
*/
bool workqueue_congested(int cpu, struct workqueue_struct *wq)
@@ -4401,7 +4425,7 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
* synchronization around this function and the test result is
* unreliable and only useful as advisory hints or for debugging.
*
- * RETURNS:
+ * Return:
* OR'd bitmask of WORK_BUSY_* bits.
*/
unsigned int work_busy(struct work_struct *work)
@@ -4779,9 +4803,10 @@ static void work_for_cpu_fn(struct work_struct *work)
* @fn: the function to run
* @arg: the function arg
*
- * This will return the value @fn returns.
* It is up to the caller to ensure that the cpu doesn't go offline.
* The caller must not hold any locks which would prevent @fn from completing.
+ *
+ * Return: The value @fn returns.
*/
long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
{
@@ -4853,7 +4878,7 @@ void freeze_workqueues_begin(void)
* CONTEXT:
* Grabs and releases wq_pool_mutex.
*
- * RETURNS:
+ * Return:
* %true if some freezable workqueues are still busy. %false if freezing
* is complete.
*/