diff options
Diffstat (limited to 'kernel/taskstats.c')
| -rw-r--r-- | kernel/taskstats.c | 174 |
1 files changed, 111 insertions, 63 deletions
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index c8231fb1570..13d2f7cd65d 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -27,8 +27,9 @@ #include <linux/cgroup.h> #include <linux/fs.h> #include <linux/file.h> +#include <linux/pid_namespace.h> #include <net/genetlink.h> -#include <asm/atomic.h> +#include <linux/atomic.h> /* * Maximum length of a cpumask that can be specified in @@ -89,8 +90,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, return -ENOMEM; if (!info) { - int seq = get_cpu_var(taskstats_seqnum)++; - put_cpu_var(taskstats_seqnum); + int seq = this_cpu_inc_return(taskstats_seqnum) - 1; reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); } else @@ -175,7 +175,9 @@ static void send_cpu_listeners(struct sk_buff *skb, up_write(&listeners->sem); } -static void fill_stats(struct task_struct *tsk, struct taskstats *stats) +static void fill_stats(struct user_namespace *user_ns, + struct pid_namespace *pid_ns, + struct task_struct *tsk, struct taskstats *stats) { memset(stats, 0, sizeof(*stats)); /* @@ -191,7 +193,7 @@ static void fill_stats(struct task_struct *tsk, struct taskstats *stats) stats->version = TASKSTATS_VERSION; stats->nvcsw = tsk->nvcsw; stats->nivcsw = tsk->nivcsw; - bacct_add_tsk(stats, tsk); + bacct_add_tsk(user_ns, pid_ns, stats, tsk); /* fill in extended acct fields */ xacct_add_tsk(stats, tsk); @@ -208,7 +210,7 @@ static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) rcu_read_unlock(); if (!tsk) return -ESRCH; - fill_stats(tsk, stats); + fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats); put_task_struct(tsk); return 0; } @@ -286,26 +288,41 @@ ret: static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) { struct listener_list *listeners; - struct listener *s, *tmp; + struct listener *s, *tmp, *s2; unsigned int cpu; + int ret = 0; if (!cpumask_subset(mask, cpu_possible_mask)) return -EINVAL; + if (current_user_ns() != &init_user_ns) + return -EINVAL; + + if (task_active_pid_ns(current) != &init_pid_ns) + return -EINVAL; + if (isadd == REGISTER) { for_each_cpu(cpu, mask) { - s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, - cpu_to_node(cpu)); - if (!s) + s = kmalloc_node(sizeof(struct listener), + GFP_KERNEL, cpu_to_node(cpu)); + if (!s) { + ret = -ENOMEM; goto cleanup; + } s->pid = pid; - INIT_LIST_HEAD(&s->list); s->valid = 1; listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); + list_for_each_entry(s2, &listeners->list, list) { + if (s2->pid == pid && s2->valid) + goto exists; + } list_add(&s->list, &listeners->list); + s = NULL; +exists: up_write(&listeners->sem); + kfree(s); /* nop if NULL */ } return 0; } @@ -324,7 +341,7 @@ cleanup: } up_write(&listeners->sem); } - return 0; + return ret; } static int parse(struct nlattr *na, struct cpumask *mask) @@ -349,29 +366,55 @@ static int parse(struct nlattr *na, struct cpumask *mask) return ret; } +#if defined(CONFIG_64BIT) && !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +#define TASKSTATS_NEEDS_PADDING 1 +#endif + static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) { struct nlattr *na, *ret; int aggr; - /* If we don't pad, we end up with alignment on a 4 byte boundary. - * This causes lots of runtime warnings on systems requiring 8 byte - * alignment */ - u32 pids[2] = { pid, 0 }; - int pid_size = ALIGN(sizeof(pid), sizeof(long)); - aggr = (type == TASKSTATS_TYPE_PID) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; + /* + * The taskstats structure is internally aligned on 8 byte + * boundaries but the layout of the aggregrate reply, with + * two NLA headers and the pid (each 4 bytes), actually + * force the entire structure to be unaligned. This causes + * the kernel to issue unaligned access warnings on some + * architectures like ia64. Unfortunately, some software out there + * doesn't properly unroll the NLA packet and assumes that the start + * of the taskstats structure will always be 20 bytes from the start + * of the netlink payload. Aligning the start of the taskstats + * structure breaks this software, which we don't want. So, for now + * the alignment only happens on architectures that require it + * and those users will have to update to fixed versions of those + * packages. Space is reserved in the packet only when needed. + * This ifdef should be removed in several years e.g. 2012 once + * we can be confident that fixed versions are installed on most + * systems. We add the padding before the aggregate since the + * aggregate is already a defined type. + */ +#ifdef TASKSTATS_NEEDS_PADDING + if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0) + goto err; +#endif na = nla_nest_start(skb, aggr); if (!na) goto err; - if (nla_put(skb, type, pid_size, pids) < 0) + + if (nla_put(skb, type, sizeof(pid), &pid) < 0) { + nla_nest_cancel(skb, na); goto err; + } ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); - if (!ret) + if (!ret) { + nla_nest_cancel(skb, na); goto err; + } nla_nest_end(skb, na); return nla_data(ret); @@ -387,16 +430,15 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) struct nlattr *na; size_t size; u32 fd; - struct file *file; - int fput_needed; + struct fd f; na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; if (!na) return -EINVAL; fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); - file = fget_light(fd, &fput_needed); - if (!file) + f = fdget(fd); + if (!f.file) return 0; size = nla_total_size(sizeof(struct cgroupstats)); @@ -408,10 +450,16 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, sizeof(struct cgroupstats)); + if (na == NULL) { + nlmsg_free(rep_skb); + rc = -EMSGSIZE; + goto err; + } + stats = nla_data(na); memset(stats, 0, sizeof(*stats)); - rc = cgroupstats_build(stats, file->f_dentry); + rc = cgroupstats_build(stats, f.file->f_dentry); if (rc < 0) { nlmsg_free(rep_skb); goto err; @@ -420,7 +468,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) rc = send_reply(rep_skb, info); err: - fput_light(file, fput_needed); + fdput(f); return rc; } @@ -434,7 +482,7 @@ static int cmd_attr_register_cpumask(struct genl_info *info) rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); if (rc < 0) goto out; - rc = add_del_listener(info->snd_pid, mask, REGISTER); + rc = add_del_listener(info->snd_portid, mask, REGISTER); out: free_cpumask_var(mask); return rc; @@ -450,12 +498,24 @@ static int cmd_attr_deregister_cpumask(struct genl_info *info) rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); if (rc < 0) goto out; - rc = add_del_listener(info->snd_pid, mask, DEREGISTER); + rc = add_del_listener(info->snd_portid, mask, DEREGISTER); out: free_cpumask_var(mask); return rc; } +static size_t taskstats_packet_size(void) +{ + size_t size; + + size = nla_total_size(sizeof(u32)) + + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); +#ifdef TASKSTATS_NEEDS_PADDING + size += nla_total_size(0); /* Padding for alignment */ +#endif + return size; +} + static int cmd_attr_pid(struct genl_info *info) { struct taskstats *stats; @@ -464,8 +524,7 @@ static int cmd_attr_pid(struct genl_info *info) u32 pid; int rc; - size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) @@ -494,8 +553,7 @@ static int cmd_attr_tgid(struct genl_info *info) u32 tgid; int rc; - size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) @@ -570,8 +628,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) /* * Size includes space for nested attributes */ - size = nla_total_size(sizeof(u32)) + - nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); + size = taskstats_packet_size(); is_thread_group = !!taskstats_tgid_alloc(tsk); if (is_thread_group) { @@ -581,7 +638,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) fill_tgid_exit(tsk); } - listeners = &__raw_get_cpu_var(listener_array); + listeners = __this_cpu_ptr(&listener_array); if (list_empty(&listeners->list)) return; @@ -589,11 +646,12 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) if (rc < 0) return; - stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, tsk->pid); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, + task_pid_nr_ns(tsk, &init_pid_ns)); if (!stats) goto err; - fill_stats(tsk, stats); + fill_stats(&init_user_ns, &init_pid_ns, tsk, stats); /* * Doesn't matter if tsk is the leader or the last group member leaving @@ -601,7 +659,8 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) if (!is_thread_group || !group_dead) goto send; - stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tsk->tgid); + stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, + task_tgid_nr_ns(tsk, &init_pid_ns)); if (!stats) goto err; @@ -614,16 +673,18 @@ err: nlmsg_free(rep_skb); } -static struct genl_ops taskstats_ops = { - .cmd = TASKSTATS_CMD_GET, - .doit = taskstats_user_cmd, - .policy = taskstats_cmd_get_policy, -}; - -static struct genl_ops cgroupstats_ops = { - .cmd = CGROUPSTATS_CMD_GET, - .doit = cgroupstats_user_cmd, - .policy = cgroupstats_cmd_get_policy, +static const struct genl_ops taskstats_ops[] = { + { + .cmd = TASKSTATS_CMD_GET, + .doit = taskstats_user_cmd, + .policy = taskstats_cmd_get_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = CGROUPSTATS_CMD_GET, + .doit = cgroupstats_user_cmd, + .policy = cgroupstats_cmd_get_policy, + }, }; /* Needed early in initialization */ @@ -642,26 +703,13 @@ static int __init taskstats_init(void) { int rc; - rc = genl_register_family(&family); + rc = genl_register_family_with_ops(&family, taskstats_ops); if (rc) return rc; - rc = genl_register_ops(&family, &taskstats_ops); - if (rc < 0) - goto err; - - rc = genl_register_ops(&family, &cgroupstats_ops); - if (rc < 0) - goto err_cgroup_ops; - family_registered = 1; - printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); + pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); return 0; -err_cgroup_ops: - genl_unregister_ops(&family, &taskstats_ops); -err: - genl_unregister_family(&family); - return rc; } /* |
