aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2009-04-16 17:35:26 -0700
committerDavid S. Miller <davem@davemloft.net>2009-04-16 17:35:26 -0700
commita54bfa40fd16aeb90bc556189221576f746f8567 (patch)
tree176bb7a99ffab5f42f0dd4e9671f335be3f3efa0 /kernel/sched.c
parentfe957c40ec5e2763b9977c565beab3bde3aaf85b (diff)
parent134ffb4cad92a6aa534e55a9be145bca780a32c1 (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c160
1 files changed, 145 insertions, 15 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 6cc1fd5d507..5724508c3b6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1418,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct rq_iterator *iterator);
#endif
+/* Time spent by the tasks of the cpu accounting group executing in ... */
+enum cpuacct_stat_index {
+ CPUACCT_STAT_USER, /* ... user mode */
+ CPUACCT_STAT_SYSTEM, /* ... kernel mode */
+
+ CPUACCT_STAT_NSTATS,
+};
+
#ifdef CONFIG_CGROUP_CPUACCT
static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
+static void cpuacct_update_stats(struct task_struct *tsk,
+ enum cpuacct_stat_index idx, cputime_t val);
#else
static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
+static inline void cpuacct_update_stats(struct task_struct *tsk,
+ enum cpuacct_stat_index idx, cputime_t val) {}
#endif
static inline void inc_cpu_load(struct rq *rq, unsigned long load)
@@ -4511,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
EXPORT_PER_CPU_SYMBOL(kstat);
/*
- * Return any ns on the sched_clock that have not yet been banked in
+ * Return any ns on the sched_clock that have not yet been accounted in
* @p in case that task is currently running.
+ *
+ * Called with task_rq_lock() held on @rq.
*/
+static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
+{
+ u64 ns = 0;
+
+ if (task_current(rq, p)) {
+ update_rq_clock(rq);
+ ns = rq->clock - p->se.exec_start;
+ if ((s64)ns < 0)
+ ns = 0;
+ }
+
+ return ns;
+}
+
unsigned long long task_delta_exec(struct task_struct *p)
{
unsigned long flags;
@@ -4521,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p)
u64 ns = 0;
rq = task_rq_lock(p, &flags);
+ ns = do_task_delta_exec(p, rq);
+ task_rq_unlock(rq, &flags);
- if (task_current(rq, p)) {
- u64 delta_exec;
+ return ns;
+}
- update_rq_clock(rq);
- delta_exec = rq->clock - p->se.exec_start;
- if ((s64)delta_exec > 0)
- ns = delta_exec;
- }
+/*
+ * Return accounted runtime for the task.
+ * In case the task is currently running, return the runtime plus current's
+ * pending runtime that have not been accounted yet.
+ */
+unsigned long long task_sched_runtime(struct task_struct *p)
+{
+ unsigned long flags;
+ struct rq *rq;
+ u64 ns = 0;
+
+ rq = task_rq_lock(p, &flags);
+ ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
+ task_rq_unlock(rq, &flags);
+
+ return ns;
+}
+
+/*
+ * Return sum_exec_runtime for the thread group.
+ * In case the task is currently running, return the sum plus current's
+ * pending runtime that have not been accounted yet.
+ *
+ * Note that the thread group might have other running tasks as well,
+ * so the return value not includes other pending runtime that other
+ * running tasks might have.
+ */
+unsigned long long thread_group_sched_runtime(struct task_struct *p)
+{
+ struct task_cputime totals;
+ unsigned long flags;
+ struct rq *rq;
+ u64 ns;
+ rq = task_rq_lock(p, &flags);
+ thread_group_cputime(p, &totals);
+ ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
task_rq_unlock(rq, &flags);
return ns;
@@ -4559,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
cpustat->nice = cputime64_add(cpustat->nice, tmp);
else
cpustat->user = cputime64_add(cpustat->user, tmp);
+
+ cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
/* Account for user time used */
acct_update_integrals(p);
}
@@ -4620,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
else
cpustat->system = cputime64_add(cpustat->system, tmp);
+ cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
+
/* Account for system time used */
acct_update_integrals(p);
}
@@ -7302,7 +7367,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
cpumask_or(groupmask, groupmask, sched_group_cpus(group));
cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
- printk(KERN_CONT " %s", str);
+ printk(KERN_CONT " %s (__cpu_power = %d)", str,
+ group->__cpu_power);
group = group->next;
} while (group != sd->groups);
@@ -9925,6 +9991,7 @@ struct cpuacct {
struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */
u64 *cpuusage;
+ struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
struct cpuacct *parent;
};
@@ -9949,20 +10016,32 @@ static struct cgroup_subsys_state *cpuacct_create(
struct cgroup_subsys *ss, struct cgroup *cgrp)
{
struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ int i;
if (!ca)
- return ERR_PTR(-ENOMEM);
+ goto out;
ca->cpuusage = alloc_percpu(u64);
- if (!ca->cpuusage) {
- kfree(ca);
- return ERR_PTR(-ENOMEM);
- }
+ if (!ca->cpuusage)
+ goto out_free_ca;
+
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+ if (percpu_counter_init(&ca->cpustat[i], 0))
+ goto out_free_counters;
if (cgrp->parent)
ca->parent = cgroup_ca(cgrp->parent);
return &ca->css;
+
+out_free_counters:
+ while (--i >= 0)
+ percpu_counter_destroy(&ca->cpustat[i]);
+ free_percpu(ca->cpuusage);
+out_free_ca:
+ kfree(ca);
+out:
+ return ERR_PTR(-ENOMEM);
}
/* destroy an existing cpu accounting group */
@@ -9970,7 +10049,10 @@ static void
cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
{
struct cpuacct *ca = cgroup_ca(cgrp);
+ int i;
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
+ percpu_counter_destroy(&ca->cpustat[i]);
free_percpu(ca->cpuusage);
kfree(ca);
}
@@ -10057,6 +10139,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
return 0;
}
+static const char *cpuacct_stat_desc[] = {
+ [CPUACCT_STAT_USER] = "user",
+ [CPUACCT_STAT_SYSTEM] = "system",
+};
+
+static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
+ struct cgroup_map_cb *cb)
+{
+ struct cpuacct *ca = cgroup_ca(cgrp);
+ int i;
+
+ for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
+ s64 val = percpu_counter_read(&ca->cpustat[i]);
+ val = cputime64_to_clock_t(val);
+ cb->fill(cb, cpuacct_stat_desc[i], val);
+ }
+ return 0;
+}
+
static struct cftype files[] = {
{
.name = "usage",
@@ -10067,7 +10168,10 @@ static struct cftype files[] = {
.name = "usage_percpu",
.read_seq_string = cpuacct_percpu_seq_read,
},
-
+ {
+ .name = "stat",
+ .read_map = cpuacct_stats_show,
+ },
};
static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -10089,12 +10193,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
return;
cpu = task_cpu(tsk);
+
+ rcu_read_lock();
+
ca = task_ca(tsk);
for (; ca; ca = ca->parent) {
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
+
+ rcu_read_unlock();
+}
+
+/*
+ * Charge the system/user time to the task's accounting group.
+ */
+static void cpuacct_update_stats(struct task_struct *tsk,
+ enum cpuacct_stat_index idx, cputime_t val)
+{
+ struct cpuacct *ca;
+
+ if (unlikely(!cpuacct_subsys.active))
+ return;
+
+ rcu_read_lock();
+ ca = task_ca(tsk);
+
+ do {
+ percpu_counter_add(&ca->cpustat[idx], val);
+ ca = ca->parent;
+ } while (ca);
+ rcu_read_unlock();
}
struct cgroup_subsys cpuacct_subsys = {