diff options
Diffstat (limited to 'kernel')
54 files changed, 1298 insertions, 950 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 203dfead2e0..02e6167a53b 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -84,11 +84,10 @@ static void do_acct_process(struct bsd_acct_struct *acct, * the cache line to have the data after getting the lock. */ struct bsd_acct_struct { - volatile int active; - volatile int needcheck; + int active; + unsigned long needcheck; struct file *file; struct pid_namespace *ns; - struct timer_list timer; struct list_head list; }; @@ -96,15 +95,6 @@ static DEFINE_SPINLOCK(acct_lock); static LIST_HEAD(acct_list); /* - * Called whenever the timer says to check the free space. - */ -static void acct_timeout(unsigned long x) -{ - struct bsd_acct_struct *acct = (struct bsd_acct_struct *)x; - acct->needcheck = 1; -} - -/* * Check the amount of free space and suspend/resume accordingly. */ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) @@ -112,12 +102,12 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) struct kstatfs sbuf; int res; int act; - sector_t resume; - sector_t suspend; + u64 resume; + u64 suspend; spin_lock(&acct_lock); res = acct->active; - if (!file || !acct->needcheck) + if (!file || time_is_before_jiffies(acct->needcheck)) goto out; spin_unlock(&acct_lock); @@ -127,8 +117,8 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) suspend = sbuf.f_blocks * SUSPEND; resume = sbuf.f_blocks * RESUME; - sector_div(suspend, 100); - sector_div(resume, 100); + do_div(suspend, 100); + do_div(resume, 100); if (sbuf.f_bavail <= suspend) act = -1; @@ -160,10 +150,7 @@ static int check_free_space(struct bsd_acct_struct *acct, struct file *file) } } - del_timer(&acct->timer); - acct->needcheck = 0; - acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ; - add_timer(&acct->timer); + acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; res = acct->active; out: spin_unlock(&acct_lock); @@ -185,9 +172,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, if (acct->file) { old_acct = acct->file; old_ns = acct->ns; - del_timer(&acct->timer); acct->active = 0; - acct->needcheck = 0; acct->file = NULL; acct->ns = NULL; list_del(&acct->list); @@ -195,13 +180,9 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file, if (file) { acct->file = file; acct->ns = ns; - acct->needcheck = 0; + acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; acct->active = 1; list_add(&acct->list, &acct_list); - /* It's been deleted if it was used before so this is safe */ - setup_timer(&acct->timer, acct_timeout, (unsigned long)acct); - acct->timer.expires = jiffies + ACCT_TIMEOUT*HZ; - add_timer(&acct->timer); } if (old_acct) { mnt_unpin(old_acct->f_path.mnt); @@ -334,7 +315,7 @@ void acct_auto_close(struct super_block *sb) spin_lock(&acct_lock); restart: list_for_each_entry(acct, &acct_list, list) - if (acct->file && acct->file->f_path.mnt->mnt_sb == sb) { + if (acct->file && acct->file->f_path.dentry->d_sb == sb) { acct_file_reopen(acct, NULL, NULL); goto restart; } @@ -348,7 +329,6 @@ void acct_exit_ns(struct pid_namespace *ns) if (acct == NULL) return; - del_timer_sync(&acct->timer); spin_lock(&acct_lock); if (acct->file != NULL) acct_file_reopen(acct, NULL, NULL); @@ -498,7 +478,7 @@ static void do_acct_process(struct bsd_acct_struct *acct, * Fill the accounting struct with the needed info as recorded * by the different kernel functions. */ - memset((caddr_t)&ac, 0, sizeof(acct_t)); + memset(&ac, 0, sizeof(acct_t)); ac.ac_version = ACCT_VERSION | ACCT_BYTEORDER; strlcpy(ac.ac_comm, current->comm, sizeof(ac.ac_comm)); diff --git a/kernel/async.c b/kernel/async.c index 80b74b88fef..bd0c168a3bb 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -78,8 +78,6 @@ static DECLARE_WAIT_QUEUE_HEAD(async_done); static atomic_t entry_count; -extern int initcall_debug; - /* * MUST be called with the lock held! diff --git a/kernel/audit.c b/kernel/audit.c index 09fae2677a4..57e3f510793 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -601,13 +601,13 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) case AUDIT_TTY_SET: case AUDIT_TRIM: case AUDIT_MAKE_EQUIV: - if (security_netlink_recv(skb, CAP_AUDIT_CONTROL)) + if (!capable(CAP_AUDIT_CONTROL)) err = -EPERM; break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG ... AUDIT_LAST_USER_MSG: case AUDIT_FIRST_USER_MSG2 ... AUDIT_LAST_USER_MSG2: - if (security_netlink_recv(skb, CAP_AUDIT_WRITE)) + if (!capable(CAP_AUDIT_WRITE)) err = -EPERM; break; default: /* bad msg */ @@ -1260,12 +1260,13 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt, avail = audit_expand(ab, max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail)); if (!avail) - goto out; + goto out_va_end; len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2); } - va_end(args2); if (len > 0) skb_put(skb, len); +out_va_end: + va_end(args2); out: return; } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 47b7fc1ea89..e7fe2b0d29b 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -210,12 +210,12 @@ struct audit_context { struct { uid_t uid; gid_t gid; - mode_t mode; + umode_t mode; u32 osid; int has_perm; uid_t perm_uid; gid_t perm_gid; - mode_t perm_mode; + umode_t perm_mode; unsigned long qbytes; } ipc; struct { @@ -234,7 +234,7 @@ struct audit_context { } mq_sendrecv; struct { int oflag; - mode_t mode; + umode_t mode; struct mq_attr attr; } mq_open; struct { @@ -308,7 +308,7 @@ static int audit_match_perm(struct audit_context *ctx, int mask) static int audit_match_filetype(struct audit_context *ctx, int which) { unsigned index = which & ~S_IFMT; - mode_t mode = which & S_IFMT; + umode_t mode = which & S_IFMT; if (unlikely(!ctx)) return 0; @@ -1249,7 +1249,7 @@ static void show_special(struct audit_context *context, int *call_panic) case AUDIT_IPC: { u32 osid = context->ipc.osid; - audit_log_format(ab, "ouid=%u ogid=%u mode=%#o", + audit_log_format(ab, "ouid=%u ogid=%u mode=%#ho", context->ipc.uid, context->ipc.gid, context->ipc.mode); if (osid) { char *ctx = NULL; @@ -1267,7 +1267,7 @@ static void show_special(struct audit_context *context, int *call_panic) ab = audit_log_start(context, GFP_KERNEL, AUDIT_IPC_SET_PERM); audit_log_format(ab, - "qbytes=%lx ouid=%u ogid=%u mode=%#o", + "qbytes=%lx ouid=%u ogid=%u mode=%#ho", context->ipc.qbytes, context->ipc.perm_uid, context->ipc.perm_gid, @@ -1278,7 +1278,7 @@ static void show_special(struct audit_context *context, int *call_panic) break; } case AUDIT_MQ_OPEN: { audit_log_format(ab, - "oflag=0x%x mode=%#o mq_flags=0x%lx mq_maxmsg=%ld " + "oflag=0x%x mode=%#ho mq_flags=0x%lx mq_maxmsg=%ld " "mq_msgsize=%ld mq_curmsgs=%ld", context->mq_open.oflag, context->mq_open.mode, context->mq_open.attr.mq_flags, @@ -1502,7 +1502,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts if (n->ino != (unsigned long)-1) { audit_log_format(ab, " inode=%lu" - " dev=%02x:%02x mode=%#o" + " dev=%02x:%02x mode=%#ho" " ouid=%u ogid=%u rdev=%02x:%02x", n->ino, MAJOR(n->dev), @@ -2160,7 +2160,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) * @attr: queue attributes * */ -void __audit_mq_open(int oflag, mode_t mode, struct mq_attr *attr) +void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr) { struct audit_context *context = current->audit_context; @@ -2260,7 +2260,7 @@ void __audit_ipc_obj(struct kern_ipc_perm *ipcp) * * Called only after audit_ipc_obj(). */ -void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode) +void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode) { struct audit_context *context = current->audit_context; diff --git a/kernel/capability.c b/kernel/capability.c index b463871a4e6..0fcf1c14a29 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -287,74 +287,84 @@ error: } /** - * has_capability - Does a task have a capability in init_user_ns + * has_ns_capability - Does a task have a capability in a specific user ns * @t: The task in question + * @ns: target user namespace * @cap: The capability to be tested for * * Return true if the specified task has the given superior capability - * currently in effect to the initial user namespace, false if not. + * currently in effect to the specified user namespace, false if not. * * Note that this does not set PF_SUPERPRIV on the task. */ -bool has_capability(struct task_struct *t, int cap) +bool has_ns_capability(struct task_struct *t, + struct user_namespace *ns, int cap) { - int ret = security_real_capable(t, &init_user_ns, cap); + int ret; + + rcu_read_lock(); + ret = security_capable(__task_cred(t), ns, cap); + rcu_read_unlock(); return (ret == 0); } /** - * has_capability - Does a task have a capability in a specific user ns + * has_capability - Does a task have a capability in init_user_ns * @t: The task in question - * @ns: target user namespace * @cap: The capability to be tested for * * Return true if the specified task has the given superior capability - * currently in effect to the specified user namespace, false if not. + * currently in effect to the initial user namespace, false if not. * * Note that this does not set PF_SUPERPRIV on the task. */ -bool has_ns_capability(struct task_struct *t, - struct user_namespace *ns, int cap) +bool has_capability(struct task_struct *t, int cap) { - int ret = security_real_capable(t, ns, cap); - - return (ret == 0); + return has_ns_capability(t, &init_user_ns, cap); } /** - * has_capability_noaudit - Does a task have a capability (unaudited) + * has_ns_capability_noaudit - Does a task have a capability (unaudited) + * in a specific user ns. * @t: The task in question + * @ns: target user namespace * @cap: The capability to be tested for * * Return true if the specified task has the given superior capability - * currently in effect to init_user_ns, false if not. Don't write an - * audit message for the check. + * currently in effect to the specified user namespace, false if not. + * Do not write an audit message for the check. * * Note that this does not set PF_SUPERPRIV on the task. */ -bool has_capability_noaudit(struct task_struct *t, int cap) +bool has_ns_capability_noaudit(struct task_struct *t, + struct user_namespace *ns, int cap) { - int ret = security_real_capable_noaudit(t, &init_user_ns, cap); + int ret; + + rcu_read_lock(); + ret = security_capable_noaudit(__task_cred(t), ns, cap); + rcu_read_unlock(); return (ret == 0); } /** - * capable - Determine if the current task has a superior capability in effect + * has_capability_noaudit - Does a task have a capability (unaudited) in the + * initial user ns + * @t: The task in question * @cap: The capability to be tested for * - * Return true if the current task has the given superior capability currently - * available for use, false if not. + * Return true if the specified task has the given superior capability + * currently in effect to init_user_ns, false if not. Don't write an + * audit message for the check. * - * This sets PF_SUPERPRIV on the task if the capability is available on the - * assumption that it's about to be used. + * Note that this does not set PF_SUPERPRIV on the task. */ -bool capable(int cap) +bool has_capability_noaudit(struct task_struct *t, int cap) { - return ns_capable(&init_user_ns, cap); + return has_ns_capability_noaudit(t, &init_user_ns, cap); } -EXPORT_SYMBOL(capable); /** * ns_capable - Determine if the current task has a superior capability in effect @@ -374,7 +384,7 @@ bool ns_capable(struct user_namespace *ns, int cap) BUG(); } - if (security_capable(ns, current_cred(), cap) == 0) { + if (has_ns_capability(current, ns, cap)) { current->flags |= PF_SUPERPRIV; return true; } @@ -383,18 +393,20 @@ bool ns_capable(struct user_namespace *ns, int cap) EXPORT_SYMBOL(ns_capable); /** - * task_ns_capable - Determine whether current task has a superior - * capability targeted at a specific task's user namespace. - * @t: The task whose user namespace is targeted. - * @cap: The capability in question. + * capable - Determine if the current task has a superior capability in effect + * @cap: The capability to be tested for + * + * Return true if the current task has the given superior capability currently + * available for use, false if not. * - * Return true if it does, false otherwise. + * This sets PF_SUPERPRIV on the task if the capability is available on the + * assumption that it's about to be used. */ -bool task_ns_capable(struct task_struct *t, int cap) +bool capable(int cap) { - return ns_capable(task_cred_xxx(t, user)->user_ns, cap); + return ns_capable(&init_user_ns, cap); } -EXPORT_SYMBOL(task_ns_capable); +EXPORT_SYMBOL(capable); /** * nsown_capable - Check superior capability to one's own user_ns diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a184470cf9b..a5d3b5325f7 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -63,7 +63,24 @@ #include <linux/atomic.h> +/* + * cgroup_mutex is the master lock. Any modification to cgroup or its + * hierarchy must be performed while holding it. + * + * cgroup_root_mutex nests inside cgroup_mutex and should be held to modify + * cgroupfs_root of any cgroup hierarchy - subsys list, flags, + * release_agent_path and so on. Modifying requires both cgroup_mutex and + * cgroup_root_mutex. Readers can acquire either of the two. This is to + * break the following locking order cycle. + * + * A. cgroup_mutex -> cred_guard_mutex -> s_type->i_mutex_key -> namespace_sem + * B. namespace_sem -> cgroup_mutex + * + * B happens only through cgroup_show_options() and using cgroup_root_mutex + * breaks it. + */ static DEFINE_MUTEX(cgroup_mutex); +static DEFINE_MUTEX(cgroup_root_mutex); /* * Generate an array of cgroup subsystem pointers. At boot time, this is @@ -760,7 +777,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); * -> cgroup_mkdir. */ -static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); +static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); static int cgroup_populate_dir(struct cgroup *cgrp); @@ -775,7 +792,7 @@ static struct backing_dev_info cgroup_backing_dev_info = { static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, struct cgroup *child); -static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) +static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) { struct inode *inode = new_inode(sb); @@ -921,7 +938,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry) * * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; */ -DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); +static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) { @@ -953,6 +970,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, int i; BUG_ON(!mutex_is_locked(&cgroup_mutex)); + BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); removed_bits = root->actual_subsys_bits & ~final_bits; added_bits = final_bits & ~root->actual_subsys_bits; @@ -1038,12 +1056,12 @@ static int rebind_subsystems(struct cgroupfs_root *root, return 0; } -static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) +static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) { - struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info; + struct cgroupfs_root *root = dentry->d_sb->s_fs_info; struct cgroup_subsys *ss; - mutex_lock(&cgroup_mutex); + mutex_lock(&cgroup_root_mutex); for_each_subsys(root, ss) seq_printf(seq, ",%s", ss->name); if (test_bit(ROOT_NOPREFIX, &root->flags)) @@ -1054,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",clone_children"); if (strlen(root->name)) seq_printf(seq, ",name=%s", root->name); - mutex_unlock(&cgroup_mutex); + mutex_unlock(&cgroup_root_mutex); return 0; } @@ -1175,10 +1193,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) /* * If the 'all' option was specified select all the subsystems, - * otherwise 'all, 'none' and a subsystem name options were not - * specified, let's default to 'all' + * otherwise if 'none', 'name=' and a subsystem name options + * were not specified, let's default to 'all' */ - if (all_ss || (!all_ss && !one_ss && !opts->none)) { + if (all_ss || (!one_ss && !opts->none && !opts->name)) { for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; if (ss == NULL) @@ -1269,6 +1287,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) mutex_lock(&cgrp->dentry->d_inode->i_mutex); mutex_lock(&cgroup_mutex); + mutex_lock(&cgroup_root_mutex); /* See what subsystems are wanted */ ret = parse_cgroupfs_options(data, &opts); @@ -1297,6 +1316,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) out_unlock: kfree(opts.release_agent); kfree(opts.name); + mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_mutex); mutex_unlock(&cgrp->dentry->d_inode->i_mutex); return ret; @@ -1481,6 +1501,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, int ret = 0; struct super_block *sb; struct cgroupfs_root *new_root; + struct inode *inode; /* First find the desired set of subsystems */ mutex_lock(&cgroup_mutex); @@ -1514,7 +1535,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, /* We used the new root structure, so this is a new hierarchy */ struct list_head tmp_cg_links; struct cgroup *root_cgrp = &root->top_cgroup; - struct inode *inode; struct cgroupfs_root *existing_root; const struct cred *cred; int i; @@ -1528,18 +1548,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, mutex_lock(&inode->i_mutex); mutex_lock(&cgroup_mutex); + mutex_lock(&cgroup_root_mutex); - if (strlen(root->name)) { - /* Check for name clashes with existing mounts */ - for_each_active_root(existing_root) { - if (!strcmp(existing_root->name, root->name)) { - ret = -EBUSY; - mutex_unlock(&cgroup_mutex); - mutex_unlock(&inode->i_mutex); - goto drop_new_super; - } - } - } + /* Check for name clashes with existing mounts */ + ret = -EBUSY; + if (strlen(root->name)) + for_each_active_root(existing_root) + if (!strcmp(existing_root->name, root->name)) + goto unlock_drop; /* * We're accessing css_set_count without locking @@ -1549,18 +1565,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, * have some link structures left over */ ret = allocate_cg_links(css_set_count, &tmp_cg_links); - if (ret) { - mutex_unlock(&cgroup_mutex); - mutex_unlock(&inode->i_mutex); - goto drop_new_super; - } + if (ret) + goto unlock_drop; ret = rebind_subsystems(root, root->subsys_bits); if (ret == -EBUSY) { - mutex_unlock(&cgroup_mutex); - mutex_unlock(&inode->i_mutex); free_cg_links(&tmp_cg_links); - goto drop_new_super; + goto unlock_drop; } /* * There must be no failure case after here, since rebinding @@ -1599,6 +1610,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, cred = override_creds(&init_cred); cgroup_populate_dir(root_cgrp); revert_creds(cred); + mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_mutex); mutex_unlock(&inode->i_mutex); } else { @@ -1615,6 +1627,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, kfree(opts.name); return dget(sb->s_root); + unlock_drop: + mutex_unlock(&cgroup_root_mutex); + mutex_unlock(&cgroup_mutex); + mutex_unlock(&inode->i_mutex); drop_new_super: deactivate_locked_super(sb); drop_modules: @@ -1639,6 +1655,7 @@ static void cgroup_kill_sb(struct super_block *sb) { BUG_ON(!list_empty(&cgrp->sibling)); |