diff options
Diffstat (limited to 'security/commoncap.c')
| -rw-r--r-- | security/commoncap.c | 232 | 
1 files changed, 138 insertions, 94 deletions
diff --git a/security/commoncap.c b/security/commoncap.c index 04b80f9912b..b9d613e0ef1 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -27,7 +27,9 @@  #include <linux/sched.h>  #include <linux/prctl.h>  #include <linux/securebits.h> -#include <linux/syslog.h> +#include <linux/user_namespace.h> +#include <linux/binfmts.h> +#include <linux/personality.h>  /*   * If a non-root user executes a setuid-root binary in @@ -53,22 +55,13 @@ static void warn_setuid_and_fcaps_mixed(const char *fname)  int cap_netlink_send(struct sock *sk, struct sk_buff *skb)  { -	NETLINK_CB(skb).eff_cap = current_cap();  	return 0;  } -int cap_netlink_recv(struct sk_buff *skb, int cap) -{ -	if (!cap_raised(NETLINK_CB(skb).eff_cap, cap)) -		return -EPERM; -	return 0; -} -EXPORT_SYMBOL(cap_netlink_recv); -  /**   * cap_capable - Determine whether a task has a particular effective capability - * @tsk: The task to query   * @cred: The credentials to use + * @ns:  The user namespace in which we need the capability   * @cap: The capability to check for   * @audit: Whether to write an audit message or not   * @@ -80,10 +73,39 @@ EXPORT_SYMBOL(cap_netlink_recv);   * cap_has_capability() returns 0 when a task has a capability, but the   * kernel's capable() and has_capability() returns 1 for this case.   */ -int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap, -		int audit) +int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, +		int cap, int audit)  { -	return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM; +	struct user_namespace *ns = targ_ns; + +	/* See if cred has the capability in the target user namespace +	 * by examining the target user namespace and all of the target +	 * user namespace's parents. +	 */ +	for (;;) { +		/* Do we have the necessary capabilities? */ +		if (ns == cred->user_ns) +			return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM; + +		/* Have we tried all of the parent namespaces? */ +		if (ns == &init_user_ns) +			return -EPERM; + +		/*  +		 * The owner of the user namespace in the parent of the +		 * user namespace has all caps. +		 */ +		if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid)) +			return 0; + +		/* +		 * If you have a capability in a parent user ns, then you have +		 * it over all children user namespaces as well. +		 */ +		ns = ns->parent; +	} + +	/* We never get here */  }  /** @@ -94,7 +116,7 @@ int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap,   * Determine whether the current process may set the system clock and timezone   * information, returning 0 if permission granted, -ve if denied.   */ -int cap_settime(struct timespec *ts, struct timezone *tz) +int cap_settime(const struct timespec *ts, const struct timezone *tz)  {  	if (!capable(CAP_SYS_TIME))  		return -EPERM; @@ -107,18 +129,30 @@ int cap_settime(struct timespec *ts, struct timezone *tz)   * @child: The process to be accessed   * @mode: The mode of attachment.   * + * If we are in the same or an ancestor user_ns and have all the target + * task's capabilities, then ptrace access is allowed. + * If we have the ptrace capability to the target user_ns, then ptrace + * access is allowed. + * Else denied. + *   * Determine whether a process may access another, returning 0 if permission   * granted, -ve if denied.   */  int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)  {  	int ret = 0; +	const struct cred *cred, *child_cred;  	rcu_read_lock(); -	if (!cap_issubset(__task_cred(child)->cap_permitted, -			  current_cred()->cap_permitted) && -	    !capable(CAP_SYS_PTRACE)) -		ret = -EPERM; +	cred = current_cred(); +	child_cred = __task_cred(child); +	if (cred->user_ns == child_cred->user_ns && +	    cap_issubset(child_cred->cap_permitted, cred->cap_permitted)) +		goto out; +	if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE)) +		goto out; +	ret = -EPERM; +out:  	rcu_read_unlock();  	return ret;  } @@ -127,18 +161,30 @@ int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)   * cap_ptrace_traceme - Determine whether another process may trace the current   * @parent: The task proposed to be the tracer   * + * If parent is in the same or an ancestor user_ns and has all current's + * capabilities, then ptrace access is allowed. + * If parent has the ptrace capability to current's user_ns, then ptrace + * access is allowed. + * Else denied. + *   * Determine whether the nominated task is permitted to trace the current   * process, returning 0 if permission is granted, -ve if denied.   */  int cap_ptrace_traceme(struct task_struct *parent)  {  	int ret = 0; +	const struct cred *cred, *child_cred;  	rcu_read_lock(); -	if (!cap_issubset(current_cred()->cap_permitted, -			  __task_cred(parent)->cap_permitted) && -	    !has_capability(parent, CAP_SYS_PTRACE)) -		ret = -EPERM; +	cred = __task_cred(parent); +	child_cred = current_cred(); +	if (cred->user_ns == child_cred->user_ns && +	    cap_issubset(child_cred->cap_permitted, cred->cap_permitted)) +		goto out; +	if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE)) +		goto out; +	ret = -EPERM; +out:  	rcu_read_unlock();  	return ret;  } @@ -178,8 +224,8 @@ static inline int cap_inh_is_capped(void)  	/* they are so limited unless the current task has the CAP_SETPCAP  	 * capability  	 */ -	if (cap_capable(current, current_cred(), CAP_SETPCAP, -			SECURITY_CAP_AUDIT) == 0) +	if (cap_capable(current_cred(), current_cred()->user_ns, +			CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)  		return 0;  	return 1;  } @@ -287,7 +333,8 @@ int cap_inode_killpriv(struct dentry *dentry)   */  static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,  					  struct linux_binprm *bprm, -					  bool *effective) +					  bool *effective, +					  bool *has_cap)  {  	struct cred *new = bprm->cred;  	unsigned i; @@ -296,6 +343,9 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,  	if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)  		*effective = true; +	if (caps->magic_etc & VFS_CAP_REVISION_MASK) +		*has_cap = true; +  	CAP_FOR_EACH_U32(i) {  		__u32 permitted = caps->permitted.cap[i];  		__u32 inheritable = caps->inheritable.cap[i]; @@ -379,7 +429,7 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data   * its xattrs and, if present, apply them to the proposed credentials being   * constructed by execve().   */ -static int get_file_caps(struct linux_binprm *bprm, bool *effective) +static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_cap)  {  	struct dentry *dentry;  	int rc = 0; @@ -390,7 +440,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective)  	if (!file_caps_enabled)  		return 0; -	if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) +	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)  		return 0;  	dentry = dget(bprm->file->f_dentry); @@ -405,7 +455,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective)  		goto out;  	} -	rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective); +	rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_cap);  	if (rc == -EINVAL)  		printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",  		       __func__, rc, bprm->filename); @@ -430,21 +480,24 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)  {  	const struct cred *old = current_cred();  	struct cred *new = bprm->cred; -	bool effective; +	bool effective, has_cap = false;  	int ret; +	kuid_t root_uid;  	effective = false; -	ret = get_file_caps(bprm, &effective); +	ret = get_file_caps(bprm, &effective, &has_cap);  	if (ret < 0)  		return ret; +	root_uid = make_kuid(new->user_ns, 0); +  	if (!issecure(SECURE_NOROOT)) {  		/*  		 * If the legacy file capability is set, then don't set privs  		 * for a setuid root binary run by a non-root user.  Do set it  		 * for a root user just to cause least surprise to an admin.  		 */ -		if (effective && new->uid != 0 && new->euid == 0) { +		if (has_cap && !uid_eq(new->uid, root_uid) && uid_eq(new->euid, root_uid)) {  			warn_setuid_and_fcaps_mixed(bprm->filename);  			goto skip;  		} @@ -455,25 +508,33 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)  		 *  		 * If only the real uid is 0, we do not set the effective bit.  		 */ -		if (new->euid == 0 || new->uid == 0) { +		if (uid_eq(new->euid, root_uid) || uid_eq(new->uid, root_uid)) {  			/* pP' = (cap_bset & ~0) | (pI & ~0) */  			new->cap_permitted = cap_combine(old->cap_bset,  							 old->cap_inheritable);  		} -		if (new->euid == 0) +		if (uid_eq(new->euid, root_uid))  			effective = true;  	}  skip: +	/* if we have fs caps, clear dangerous personality flags */ +	if (!cap_issubset(new->cap_permitted, old->cap_permitted)) +		bprm->per_clear |= PER_CLEAR_ON_SETID; + +  	/* Don't let someone trace a set[ug]id/setpcap binary with the revised -	 * credentials unless they have the appropriate permit +	 * credentials unless they have the appropriate permit. +	 * +	 * In addition, if NO_NEW_PRIVS, then ensure we get no new privs.  	 */ -	if ((new->euid != old->uid || -	     new->egid != old->gid || +	if ((!uid_eq(new->euid, old->uid) || +	     !gid_eq(new->egid, old->gid) ||  	     !cap_issubset(new->cap_permitted, old->cap_permitted)) &&  	    bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) {  		/* downgrade; they get no more than they had, and maybe less */ -		if (!capable(CAP_SETUID)) { +		if (!capable(CAP_SETUID) || +		    (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {  			new->euid = new->uid;  			new->egid = new->gid;  		} @@ -484,15 +545,10 @@ skip:  	new->suid = new->fsuid = new->euid;  	new->sgid = new->fsgid = new->egid; -	/* For init, we want to retain the capabilities set in the initial -	 * task.  Thus we skip the usual capability rules -	 */ -	if (!is_global_init(current)) { -		if (effective) -			new->cap_effective = new->cap_permitted; -		else -			cap_clear(new->cap_effective); -	} +	if (effective) +		new->cap_effective = new->cap_permitted; +	else +		cap_clear(new->cap_effective);  	bprm->cap_effective = effective;  	/* @@ -509,7 +565,7 @@ skip:  	 */  	if (!cap_isclear(new->cap_effective)) {  		if (!cap_issubset(CAP_FULL_SET, new->cap_effective) || -		    new->euid != 0 || new->uid != 0 || +		    !uid_eq(new->euid, root_uid) || !uid_eq(new->uid, root_uid) ||  		    issecure(SECURE_NOROOT)) {  			ret = audit_log_bprm_fcaps(bprm, new, old);  			if (ret < 0) @@ -534,16 +590,17 @@ skip:  int cap_bprm_secureexec(struct linux_binprm *bprm)  {  	const struct cred *cred = current_cred(); +	kuid_t root_uid = make_kuid(cred->user_ns, 0); -	if (cred->uid != 0) { +	if (!uid_eq(cred->uid, root_uid)) {  		if (bprm->cap_effective)  			return 1;  		if (!cap_isclear(cred->cap_permitted))  			return 1;  	} -	return (cred->euid != cred->uid || -		cred->egid != cred->gid); +	return (!uid_eq(cred->euid, cred->uid) || +		!gid_eq(cred->egid, cred->gid));  }  /** @@ -633,15 +690,21 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name)   */  static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)  { -	if ((old->uid == 0 || old->euid == 0 || old->suid == 0) && -	    (new->uid != 0 && new->euid != 0 && new->suid != 0) && +	kuid_t root_uid = make_kuid(old->user_ns, 0); + +	if ((uid_eq(old->uid, root_uid) || +	     uid_eq(old->euid, root_uid) || +	     uid_eq(old->suid, root_uid)) && +	    (!uid_eq(new->uid, root_uid) && +	     !uid_eq(new->euid, root_uid) && +	     !uid_eq(new->suid, root_uid)) &&  	    !issecure(SECURE_KEEP_CAPS)) {  		cap_clear(new->cap_permitted);  		cap_clear(new->cap_effective);  	} -	if (old->euid == 0 && new->euid != 0) +	if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))  		cap_clear(new->cap_effective); -	if (old->euid != 0 && new->euid == 0) +	if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))  		new->cap_effective = new->cap_permitted;  } @@ -674,11 +737,12 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)  		 *          if not, we might be a bit too harsh here.  		 */  		if (!issecure(SECURE_NO_SETUID_FIXUP)) { -			if (old->fsuid == 0 && new->fsuid != 0) +			kuid_t root_uid = make_kuid(old->user_ns, 0); +			if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))  				new->cap_effective =  					cap_drop_fs_set(new->cap_effective); -			if (old->fsuid != 0 && new->fsuid == 0) +			if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))  				new->cap_effective =  					cap_raise_fs_set(new->cap_effective,  							 new->cap_permitted); @@ -704,16 +768,16 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)   */  static int cap_safe_nice(struct task_struct *p)  { -	int is_subset; +	int is_subset, ret = 0;  	rcu_read_lock();  	is_subset = cap_issubset(__task_cred(p)->cap_permitted,  				 current_cred()->cap_permitted); +	if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) +		ret = -EPERM;  	rcu_read_unlock(); -	if (!is_subset && !capable(CAP_SYS_NICE)) -		return -EPERM; -	return 0; +	return ret;  }  /** @@ -760,7 +824,7 @@ int cap_task_setnice(struct task_struct *p, int nice)   */  static long cap_prctl_drop(struct cred *new, unsigned long cap)  { -	if (!capable(CAP_SETPCAP)) +	if (!ns_capable(current_user_ns(), CAP_SETPCAP))  		return -EPERM;  	if (!cap_valid(cap))  		return -EINVAL; @@ -830,7 +894,8 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,  		     & (new->securebits ^ arg2))			/*[1]*/  		    || ((new->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/  		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/ -		    || (cap_capable(current, current_cred(), CAP_SETPCAP, +		    || (cap_capable(current_cred(), +				    current_cred()->user_ns, CAP_SETPCAP,  				    SECURITY_CAP_AUDIT) != 0)		/*[4]*/  			/*  			 * [1] no changing of bits that are locked @@ -884,26 +949,6 @@ error:  }  /** - * cap_syslog - Determine whether syslog function is permitted - * @type: Function requested - * @from_file: Whether this request came from an open file (i.e. /proc) - * - * Determine whether the current process is permitted to use a particular - * syslog function, returning 0 if permission is granted, -ve if not. - */ -int cap_syslog(int type, bool from_file) -{ -	if (type != SYSLOG_ACTION_OPEN && from_file) -		return 0; -	if (dmesg_restrict && !capable(CAP_SYS_ADMIN)) -		return -EPERM; -	if ((type != SYSLOG_ACTION_READ_ALL && -	     type != SYSLOG_ACTION_SIZE_BUFFER) && !capable(CAP_SYS_ADMIN)) -		return -EPERM; -	return 0; -} - -/**   * cap_vm_enough_memory - Determine whether a new virtual mapping is permitted   * @mm: The VM space in which the new mapping is to be made   * @pages: The size of the mapping @@ -915,34 +960,27 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)  {  	int cap_sys_admin = 0; -	if (cap_capable(current, current_cred(), CAP_SYS_ADMIN, +	if (cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,  			SECURITY_CAP_NOAUDIT) == 0)  		cap_sys_admin = 1;  	return __vm_enough_memory(mm, pages, cap_sys_admin);  }  /* - * cap_file_mmap - check if able to map given addr - * @file: unused - * @reqprot: unused - * @prot: unused - * @flags: unused + * cap_mmap_addr - check if able to map given addr   * @addr: address attempting to be mapped - * @addr_only: unused   *   * If the process is attempting to map memory below dac_mmap_min_addr they need   * CAP_SYS_RAWIO.  The other parameters to this function are unused by the   * capability security module.  Returns 0 if this mapping should be allowed   * -EPERM if not.   */ -int cap_file_mmap(struct file *file, unsigned long reqprot, -		  unsigned long prot, unsigned long flags, -		  unsigned long addr, unsigned long addr_only) +int cap_mmap_addr(unsigned long addr)  {  	int ret = 0;  	if (addr < dac_mmap_min_addr) { -		ret = cap_capable(current, current_cred(), CAP_SYS_RAWIO, +		ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,  				  SECURITY_CAP_AUDIT);  		/* set PF_SUPERPRIV if it turns out we allow the low mmap */  		if (ret == 0) @@ -950,3 +988,9 @@ int cap_file_mmap(struct file *file, unsigned long reqprot,  	}  	return ret;  } + +int cap_mmap_file(struct file *file, unsigned long reqprot, +		  unsigned long prot, unsigned long flags) +{ +	return 0; +}  | 
