diff options
Diffstat (limited to 'kernel/taskstats.c')
| -rw-r--r-- | kernel/taskstats.c | 174 | 
1 files changed, 111 insertions, 63 deletions
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index c8231fb1570..13d2f7cd65d 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -27,8 +27,9 @@  #include <linux/cgroup.h>  #include <linux/fs.h>  #include <linux/file.h> +#include <linux/pid_namespace.h>  #include <net/genetlink.h> -#include <asm/atomic.h> +#include <linux/atomic.h>  /*   * Maximum length of a cpumask that can be specified in @@ -89,8 +90,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,  		return -ENOMEM;  	if (!info) { -		int seq = get_cpu_var(taskstats_seqnum)++; -		put_cpu_var(taskstats_seqnum); +		int seq = this_cpu_inc_return(taskstats_seqnum) - 1;  		reply = genlmsg_put(skb, 0, seq, &family, 0, cmd);  	} else @@ -175,7 +175,9 @@ static void send_cpu_listeners(struct sk_buff *skb,  	up_write(&listeners->sem);  } -static void fill_stats(struct task_struct *tsk, struct taskstats *stats) +static void fill_stats(struct user_namespace *user_ns, +		       struct pid_namespace *pid_ns, +		       struct task_struct *tsk, struct taskstats *stats)  {  	memset(stats, 0, sizeof(*stats));  	/* @@ -191,7 +193,7 @@ static void fill_stats(struct task_struct *tsk, struct taskstats *stats)  	stats->version = TASKSTATS_VERSION;  	stats->nvcsw = tsk->nvcsw;  	stats->nivcsw = tsk->nivcsw; -	bacct_add_tsk(stats, tsk); +	bacct_add_tsk(user_ns, pid_ns, stats, tsk);  	/* fill in extended acct fields */  	xacct_add_tsk(stats, tsk); @@ -208,7 +210,7 @@ static int fill_stats_for_pid(pid_t pid, struct taskstats *stats)  	rcu_read_unlock();  	if (!tsk)  		return -ESRCH; -	fill_stats(tsk, stats); +	fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats);  	put_task_struct(tsk);  	return 0;  } @@ -286,26 +288,41 @@ ret:  static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)  {  	struct listener_list *listeners; -	struct listener *s, *tmp; +	struct listener *s, *tmp, *s2;  	unsigned int cpu; +	int ret = 0;  	if (!cpumask_subset(mask, cpu_possible_mask))  		return -EINVAL; +	if (current_user_ns() != &init_user_ns) +		return -EINVAL; + +	if (task_active_pid_ns(current) != &init_pid_ns) +		return -EINVAL; +  	if (isadd == REGISTER) {  		for_each_cpu(cpu, mask) { -			s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, -					 cpu_to_node(cpu)); -			if (!s) +			s = kmalloc_node(sizeof(struct listener), +					GFP_KERNEL, cpu_to_node(cpu)); +			if (!s) { +				ret = -ENOMEM;  				goto cleanup; +			}  			s->pid = pid; -			INIT_LIST_HEAD(&s->list);  			s->valid = 1;  			listeners = &per_cpu(listener_array, cpu);  			down_write(&listeners->sem); +			list_for_each_entry(s2, &listeners->list, list) { +				if (s2->pid == pid && s2->valid) +					goto exists; +			}  			list_add(&s->list, &listeners->list); +			s = NULL; +exists:  			up_write(&listeners->sem); +			kfree(s); /* nop if NULL */  		}  		return 0;  	} @@ -324,7 +341,7 @@ cleanup:  		}  		up_write(&listeners->sem);  	} -	return 0; +	return ret;  }  static int parse(struct nlattr *na, struct cpumask *mask) @@ -349,29 +366,55 @@ static int parse(struct nlattr *na, struct cpumask *mask)  	return ret;  } +#if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +#define TASKSTATS_NEEDS_PADDING 1 +#endif +  static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)  {  	struct nlattr *na, *ret;  	int aggr; -	/* If we don't pad, we end up with alignment on a 4 byte boundary. -	 * This causes lots of runtime warnings on systems requiring 8 byte -	 * alignment */ -	u32 pids[2] = { pid, 0 }; -	int pid_size = ALIGN(sizeof(pid), sizeof(long)); -  	aggr = (type == TASKSTATS_TYPE_PID)  			? TASKSTATS_TYPE_AGGR_PID  			: TASKSTATS_TYPE_AGGR_TGID; +	/* +	 * The taskstats structure is internally aligned on 8 byte +	 * boundaries but the layout of the aggregrate reply, with +	 * two NLA headers and the pid (each 4 bytes), actually +	 * force the entire structure to be unaligned. This causes +	 * the kernel to issue unaligned access warnings on some +	 * architectures like ia64. Unfortunately, some software out there +	 * doesn't properly unroll the NLA packet and assumes that the start +	 * of the taskstats structure will always be 20 bytes from the start +	 * of the netlink payload. Aligning the start of the taskstats +	 * structure breaks this software, which we don't want. So, for now +	 * the alignment only happens on architectures that require it +	 * and those users will have to update to fixed versions of those +	 * packages. Space is reserved in the packet only when needed. +	 * This ifdef should be removed in several years e.g. 2012 once +	 * we can be confident that fixed versions are installed on most +	 * systems. We add the padding before the aggregate since the +	 * aggregate is already a defined type. +	 */ +#ifdef TASKSTATS_NEEDS_PADDING +	if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0) +		goto err; +#endif  	na = nla_nest_start(skb, aggr);  	if (!na)  		goto err; -	if (nla_put(skb, type, pid_size, pids) < 0) + +	if (nla_put(skb, type, sizeof(pid), &pid) < 0) { +		nla_nest_cancel(skb, na);  		goto err; +	}  	ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); -	if (!ret) +	if (!ret) { +		nla_nest_cancel(skb, na);  		goto err; +	}  	nla_nest_end(skb, na);  	return nla_data(ret); @@ -387,16 +430,15 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)  	struct nlattr *na;  	size_t size;  	u32 fd; -	struct file *file; -	int fput_needed; +	struct fd f;  	na = info->attrs[CGROUPSTATS_CMD_ATTR_FD];  	if (!na)  		return -EINVAL;  	fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); -	file = fget_light(fd, &fput_needed); -	if (!file) +	f = fdget(fd); +	if (!f.file)  		return 0;  	size = nla_total_size(sizeof(struct cgroupstats)); @@ -408,10 +450,16 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)  	na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,  				sizeof(struct cgroupstats)); +	if (na == NULL) { +		nlmsg_free(rep_skb); +		rc = -EMSGSIZE; +		goto err; +	} +  	stats = nla_data(na);  	memset(stats, 0, sizeof(*stats)); -	rc = cgroupstats_build(stats, file->f_dentry); +	rc = cgroupstats_build(stats, f.file->f_dentry);  	if (rc < 0) {  		nlmsg_free(rep_skb);  		goto err; @@ -420,7 +468,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)  	rc = send_reply(rep_skb, info);  err: -	fput_light(file, fput_needed); +	fdput(f);  	return rc;  } @@ -434,7 +482,7 @@ static int cmd_attr_register_cpumask(struct genl_info *info)  	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);  	if (rc < 0)  		goto out; -	rc = add_del_listener(info->snd_pid, mask, REGISTER); +	rc = add_del_listener(info->snd_portid, mask, REGISTER);  out:  	free_cpumask_var(mask);  	return rc; @@ -450,12 +498,24 @@ static int cmd_attr_deregister_cpumask(struct genl_info *info)  	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);  	if (rc < 0)  		goto out; -	rc = add_del_listener(info->snd_pid, mask, DEREGISTER); +	rc = add_del_listener(info->snd_portid, mask, DEREGISTER);  out:  	free_cpumask_var(mask);  	return rc;  } +static size_t taskstats_packet_size(void) +{ +	size_t size; + +	size = nla_total_size(sizeof(u32)) + +		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); +#ifdef TASKSTATS_NEEDS_PADDING +	size += nla_total_size(0); /* Padding for alignment */ +#endif +	return size; +} +  static int cmd_attr_pid(struct genl_info *info)  {  	struct taskstats *stats; @@ -464,8 +524,7 @@ static int cmd_attr_pid(struct genl_info *info)  	u32 pid;  	int rc; -	size = nla_total_size(sizeof(u32)) + -		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); +	size = taskstats_packet_size();  	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);  	if (rc < 0) @@ -494,8 +553,7 @@ static int cmd_attr_tgid(struct genl_info *info)  	u32 tgid;  	int rc; -	size = nla_total_size(sizeof(u32)) + -		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); +	size = taskstats_packet_size();  	rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);  	if (rc < 0) @@ -570,8 +628,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)  	/*  	 * Size includes space for nested attributes  	 */ -	size = nla_total_size(sizeof(u32)) + -		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); +	size = taskstats_packet_size();  	is_thread_group = !!taskstats_tgid_alloc(tsk);  	if (is_thread_group) { @@ -581,7 +638,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)  		fill_tgid_exit(tsk);  	} -	listeners = &__raw_get_cpu_var(listener_array); +	listeners = __this_cpu_ptr(&listener_array);  	if (list_empty(&listeners->list))  		return; @@ -589,11 +646,12 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)  	if (rc < 0)  		return; -	stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid); +	stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, +			 task_pid_nr_ns(tsk, &init_pid_ns));  	if (!stats)  		goto err; -	fill_stats(tsk, stats); +	fill_stats(&init_user_ns, &init_pid_ns, tsk, stats);  	/*  	 * Doesn't matter if tsk is the leader or the last group member leaving @@ -601,7 +659,8 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)  	if (!is_thread_group || !group_dead)  		goto send; -	stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid); +	stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, +			 task_tgid_nr_ns(tsk, &init_pid_ns));  	if (!stats)  		goto err; @@ -614,16 +673,18 @@ err:  	nlmsg_free(rep_skb);  } -static struct genl_ops taskstats_ops = { -	.cmd		= TASKSTATS_CMD_GET, -	.doit		= taskstats_user_cmd, -	.policy		= taskstats_cmd_get_policy, -}; - -static struct genl_ops cgroupstats_ops = { -	.cmd		= CGROUPSTATS_CMD_GET, -	.doit		= cgroupstats_user_cmd, -	.policy		= cgroupstats_cmd_get_policy, +static const struct genl_ops taskstats_ops[] = { +	{ +		.cmd		= TASKSTATS_CMD_GET, +		.doit		= taskstats_user_cmd, +		.policy		= taskstats_cmd_get_policy, +		.flags		= GENL_ADMIN_PERM, +	}, +	{ +		.cmd		= CGROUPSTATS_CMD_GET, +		.doit		= cgroupstats_user_cmd, +		.policy		= cgroupstats_cmd_get_policy, +	},  };  /* Needed early in initialization */ @@ -642,26 +703,13 @@ static int __init taskstats_init(void)  {  	int rc; -	rc = genl_register_family(&family); +	rc = genl_register_family_with_ops(&family, taskstats_ops);  	if (rc)  		return rc; -	rc = genl_register_ops(&family, &taskstats_ops); -	if (rc < 0) -		goto err; - -	rc = genl_register_ops(&family, &cgroupstats_ops); -	if (rc < 0) -		goto err_cgroup_ops; -  	family_registered = 1; -	printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); +	pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);  	return 0; -err_cgroup_ops: -	genl_unregister_ops(&family, &taskstats_ops); -err: -	genl_unregister_family(&family); -	return rc;  }  /*  | 
