diff options
Diffstat (limited to 'kernel/sched/cpuacct.c')
| -rw-r--r-- | kernel/sched/cpuacct.c | 296 | 
1 files changed, 296 insertions, 0 deletions
| diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c new file mode 100644 index 00000000000..dbb7e2cd95e --- /dev/null +++ b/kernel/sched/cpuacct.c @@ -0,0 +1,296 @@ +#include <linux/cgroup.h> +#include <linux/slab.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> +#include <linux/cpumask.h> +#include <linux/seq_file.h> +#include <linux/rcupdate.h> +#include <linux/kernel_stat.h> +#include <linux/err.h> + +#include "sched.h" + +/* + * CPU accounting code for task groups. + * + * Based on the work by Paul Menage (menage@google.com) and Balbir Singh + * (balbir@in.ibm.com). + */ + +/* Time spent by the tasks of the cpu accounting group executing in ... */ +enum cpuacct_stat_index { +	CPUACCT_STAT_USER,	/* ... user mode */ +	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */ + +	CPUACCT_STAT_NSTATS, +}; + +/* track cpu usage of a group of tasks and its child groups */ +struct cpuacct { +	struct cgroup_subsys_state css; +	/* cpuusage holds pointer to a u64-type object on every cpu */ +	u64 __percpu *cpuusage; +	struct kernel_cpustat __percpu *cpustat; +}; + +/* return cpu accounting group corresponding to this container */ +static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) +{ +	return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id), +			    struct cpuacct, css); +} + +/* return cpu accounting group to which this task belongs */ +static inline struct cpuacct *task_ca(struct task_struct *tsk) +{ +	return container_of(task_subsys_state(tsk, cpuacct_subsys_id), +			    struct cpuacct, css); +} + +static inline struct cpuacct *__parent_ca(struct cpuacct *ca) +{ +	return cgroup_ca(ca->css.cgroup->parent); +} + +static inline struct cpuacct *parent_ca(struct cpuacct *ca) +{ +	if (!ca->css.cgroup->parent) +		return NULL; +	return cgroup_ca(ca->css.cgroup->parent); +} + +static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); +static struct cpuacct root_cpuacct = { +	.cpustat	= &kernel_cpustat, +	.cpuusage	= &root_cpuacct_cpuusage, +}; + +/* create a new cpu accounting group */ +static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) +{ +	struct cpuacct *ca; + +	if (!cgrp->parent) +		return &root_cpuacct.css; + +	ca = kzalloc(sizeof(*ca), GFP_KERNEL); +	if (!ca) +		goto out; + +	ca->cpuusage = alloc_percpu(u64); +	if (!ca->cpuusage) +		goto out_free_ca; + +	ca->cpustat = alloc_percpu(struct kernel_cpustat); +	if (!ca->cpustat) +		goto out_free_cpuusage; + +	return &ca->css; + +out_free_cpuusage: +	free_percpu(ca->cpuusage); +out_free_ca: +	kfree(ca); +out: +	return ERR_PTR(-ENOMEM); +} + +/* destroy an existing cpu accounting group */ +static void cpuacct_css_free(struct cgroup *cgrp) +{ +	struct cpuacct *ca = cgroup_ca(cgrp); + +	free_percpu(ca->cpustat); +	free_percpu(ca->cpuusage); +	kfree(ca); +} + +static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) +{ +	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); +	u64 data; + +#ifndef CONFIG_64BIT +	/* +	 * Take rq->lock to make 64-bit read safe on 32-bit platforms. +	 */ +	raw_spin_lock_irq(&cpu_rq(cpu)->lock); +	data = *cpuusage; +	raw_spin_unlock_irq(&cpu_rq(cpu)->lock); +#else +	data = *cpuusage; +#endif + +	return data; +} + +static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) +{ +	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); + +#ifndef CONFIG_64BIT +	/* +	 * Take rq->lock to make 64-bit write safe on 32-bit platforms. +	 */ +	raw_spin_lock_irq(&cpu_rq(cpu)->lock); +	*cpuusage = val; +	raw_spin_unlock_irq(&cpu_rq(cpu)->lock); +#else +	*cpuusage = val; +#endif +} + +/* return total cpu usage (in nanoseconds) of a group */ +static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) +{ +	struct cpuacct *ca = cgroup_ca(cgrp); +	u64 totalcpuusage = 0; +	int i; + +	for_each_present_cpu(i) +		totalcpuusage += cpuacct_cpuusage_read(ca, i); + +	return totalcpuusage; +} + +static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, +								u64 reset) +{ +	struct cpuacct *ca = cgroup_ca(cgrp); +	int err = 0; +	int i; + +	if (reset) { +		err = -EINVAL; +		goto out; +	} + +	for_each_present_cpu(i) +		cpuacct_cpuusage_write(ca, i, 0); + +out: +	return err; +} + +static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, +				   struct seq_file *m) +{ +	struct cpuacct *ca = cgroup_ca(cgroup); +	u64 percpu; +	int i; + +	for_each_present_cpu(i) { +		percpu = cpuacct_cpuusage_read(ca, i); +		seq_printf(m, "%llu ", (unsigned long long) percpu); +	} +	seq_printf(m, "\n"); +	return 0; +} + +static const char * const cpuacct_stat_desc[] = { +	[CPUACCT_STAT_USER] = "user", +	[CPUACCT_STAT_SYSTEM] = "system", +}; + +static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, +			      struct cgroup_map_cb *cb) +{ +	struct cpuacct *ca = cgroup_ca(cgrp); +	int cpu; +	s64 val = 0; + +	for_each_online_cpu(cpu) { +		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); +		val += kcpustat->cpustat[CPUTIME_USER]; +		val += kcpustat->cpustat[CPUTIME_NICE]; +	} +	val = cputime64_to_clock_t(val); +	cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val); + +	val = 0; +	for_each_online_cpu(cpu) { +		struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); +		val += kcpustat->cpustat[CPUTIME_SYSTEM]; +		val += kcpustat->cpustat[CPUTIME_IRQ]; +		val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; +	} + +	val = cputime64_to_clock_t(val); +	cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); + +	return 0; +} + +static struct cftype files[] = { +	{ +		.name = "usage", +		.read_u64 = cpuusage_read, +		.write_u64 = cpuusage_write, +	}, +	{ +		.name = "usage_percpu", +		.read_seq_string = cpuacct_percpu_seq_read, +	}, +	{ +		.name = "stat", +		.read_map = cpuacct_stats_show, +	}, +	{ }	/* terminate */ +}; + +/* + * charge this task's execution time to its accounting group. + * + * called with rq->lock held. + */ +void cpuacct_charge(struct task_struct *tsk, u64 cputime) +{ +	struct cpuacct *ca; +	int cpu; + +	cpu = task_cpu(tsk); + +	rcu_read_lock(); + +	ca = task_ca(tsk); + +	while (true) { +		u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); +		*cpuusage += cputime; + +		ca = parent_ca(ca); +		if (!ca) +			break; +	} + +	rcu_read_unlock(); +} + +/* + * Add user/system time to cpuacct. + * + * Note: it's the caller that updates the account of the root cgroup. + */ +void cpuacct_account_field(struct task_struct *p, int index, u64 val) +{ +	struct kernel_cpustat *kcpustat; +	struct cpuacct *ca; + +	rcu_read_lock(); +	ca = task_ca(p); +	while (ca != &root_cpuacct) { +		kcpustat = this_cpu_ptr(ca->cpustat); +		kcpustat->cpustat[index] += val; +		ca = __parent_ca(ca); +	} +	rcu_read_unlock(); +} + +struct cgroup_subsys cpuacct_subsys = { +	.name		= "cpuacct", +	.css_alloc	= cpuacct_css_alloc, +	.css_free	= cpuacct_css_free, +	.subsys_id	= cpuacct_subsys_id, +	.base_cftypes	= files, +	.early_init	= 1, +}; | 
