aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c180
1 files changed, 122 insertions, 58 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 4d46e90f59c..a9ecac398bb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -49,6 +49,7 @@
#include <linux/syscalls.h>
#include <linux/times.h>
#include <linux/acct.h>
+#include <linux/kprobes.h>
#include <asm/tlb.h>
#include <asm/unistd.h>
@@ -144,7 +145,8 @@
(v1) * (v2_max) / (v1_max)
#define DELTA(p) \
- (SCALE(TASK_NICE(p), 40, MAX_BONUS) + INTERACTIVE_DELTA)
+ (SCALE(TASK_NICE(p) + 20, 40, MAX_BONUS) - 20 * MAX_BONUS / 40 + \
+ INTERACTIVE_DELTA)
#define TASK_INTERACTIVE(p) \
((p)->prio <= (p)->static_prio - DELTA(p))
@@ -237,6 +239,7 @@ struct runqueue {
task_t *migration_thread;
struct list_head migration_queue;
+ int cpu;
#endif
#ifdef CONFIG_SCHEDSTATS
@@ -707,12 +710,6 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
DEF_TIMESLICE);
} else {
/*
- * The lower the sleep avg a task has the more
- * rapidly it will rise with sleep time.
- */
- sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1;
-
- /*
* Tasks waking from uninterruptible sleep are
* limited in their sleep_avg rise as they
* are likely to be waiting on I/O
@@ -1551,8 +1548,14 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
finish_lock_switch(rq, prev);
if (mm)
mmdrop(mm);
- if (unlikely(prev_task_flags & PF_DEAD))
+ if (unlikely(prev_task_flags & PF_DEAD)) {
+ /*
+ * Remove function-return probe instances associated with this
+ * task and put them back on the free list.
+ */
+ kprobe_flush_task(prev);
put_task_struct(prev);
+ }
}
/**
@@ -1622,7 +1625,7 @@ unsigned long nr_uninterruptible(void)
{
unsigned long i, sum = 0;
- for_each_cpu(i)
+ for_each_possible_cpu(i)
sum += cpu_rq(i)->nr_uninterruptible;
/*
@@ -1639,7 +1642,7 @@ unsigned long long nr_context_switches(void)
{
unsigned long long i, sum = 0;
- for_each_cpu(i)
+ for_each_possible_cpu(i)
sum += cpu_rq(i)->nr_switches;
return sum;
@@ -1649,7 +1652,7 @@ unsigned long nr_iowait(void)
{
unsigned long i, sum = 0;
- for_each_cpu(i)
+ for_each_possible_cpu(i)
sum += atomic_read(&cpu_rq(i)->nr_iowait);
return sum;
@@ -1660,6 +1663,9 @@ unsigned long nr_iowait(void)
/*
* double_rq_lock - safely lock two runqueues
*
+ * We must take them in cpu order to match code in
+ * dependent_sleeper and wake_dependent_sleeper.
+ *
* Note this does not disable interrupts like task_rq_lock,
* you need to do so manually before calling.
*/
@@ -1671,7 +1677,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
spin_lock(&rq1->lock);
__acquire(rq2->lock); /* Fake it out ;) */
} else {
- if (rq1 < rq2) {
+ if (rq1->cpu < rq2->cpu) {
spin_lock(&rq1->lock);
spin_lock(&rq2->lock);
} else {
@@ -1707,7 +1713,7 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
__acquires(this_rq->lock)
{
if (unlikely(!spin_trylock(&busiest->lock))) {
- if (busiest < this_rq) {
+ if (busiest->cpu < this_rq->cpu) {
spin_unlock(&this_rq->lock);
spin_lock(&busiest->lock);
spin_lock(&this_rq->lock);
@@ -2873,13 +2879,11 @@ asmlinkage void __sched schedule(void)
* schedule() atomically, we ignore that path for now.
* Otherwise, whine if we are scheduling when we should not be.
*/
- if (likely(!current->exit_state)) {
- if (unlikely(in_atomic())) {
- printk(KERN_ERR "scheduling while atomic: "
- "%s/0x%08x/%d\n",
- current->comm, preempt_count(), current->pid);
- dump_stack();
- }
+ if (unlikely(in_atomic() && !current->exit_state)) {
+ printk(KERN_ERR "BUG: scheduling while atomic: "
+ "%s/0x%08x/%d\n",
+ current->comm, preempt_count(), current->pid);
+ dump_stack();
}
profile_hit(SCHED_PROFILING, __builtin_return_address(0));
@@ -5570,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu)
}
#endif
+#ifdef CONFIG_SCHED_MC
+static DEFINE_PER_CPU(struct sched_domain, core_domains);
+static struct sched_group sched_group_core[NR_CPUS];
+#endif
+
+#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
+static int cpu_to_core_group(int cpu)
+{
+ return first_cpu(cpu_sibling_map[cpu]);
+}
+#elif defined(CONFIG_SCHED_MC)
+static int cpu_to_core_group(int cpu)
+{
+ return cpu;
+}
+#endif
+
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
static struct sched_group sched_group_phys[NR_CPUS];
static int cpu_to_phys_group(int cpu)
{
-#ifdef CONFIG_SCHED_SMT
+#if defined(CONFIG_SCHED_MC)
+ cpumask_t mask = cpu_coregroup_map(cpu);
+ return first_cpu(mask);
+#elif defined(CONFIG_SCHED_SMT)
return first_cpu(cpu_sibling_map[cpu]);
#else
return cpu;
@@ -5597,6 +5621,32 @@ static int cpu_to_allnodes_group(int cpu)
{
return cpu_to_node(cpu);
}
+static void init_numa_sched_groups_power(struct sched_group *group_head)
+{
+ struct sched_group *sg = group_head;
+ int j;
+
+ if (!sg)
+ return;
+next_sg:
+ for_each_cpu_mask(j, sg->cpumask) {
+ struct sched_domain *sd;
+
+ sd = &per_cpu(phys_domains, j);
+ if (j != first_cpu(sd->groups->cpumask)) {
+ /*
+ * Only add "power" once for each
+ * physical package.
+ */
+ continue;
+ }
+
+ sg->cpu_power += sd->groups->cpu_power;
+ }
+ sg = sg->next;
+ if (sg != group_head)
+ goto next_sg;
+}
#endif
/*
@@ -5672,6 +5722,17 @@ void build_sched_domains(const cpumask_t *cpu_map)
sd->parent = p;
sd->groups = &sched_group_phys[group];
+#ifdef CONFIG_SCHED_MC
+ p = sd;
+ sd = &per_cpu(core_domains, i);
+ group = cpu_to_core_group(i);
+ *sd = SD_MC_INIT;
+ sd->span = cpu_coregroup_map(i);
+ cpus_and(sd->span, sd->span, *cpu_map);
+ sd->parent = p;
+ sd->groups = &sched_group_core[group];
+#endif
+
#ifdef CONFIG_SCHED_SMT
p = sd;
sd = &per_cpu(cpu_domains, i);
@@ -5697,6 +5758,19 @@ void build_sched_domains(const cpumask_t *cpu_map)
}
#endif
+#ifdef CONFIG_SCHED_MC
+ /* Set up multi-core groups */
+ for_each_cpu_mask(i, *cpu_map) {
+ cpumask_t this_core_map = cpu_coregroup_map(i);
+ cpus_and(this_core_map, this_core_map, *cpu_map);
+ if (i != first_cpu(this_core_map))
+ continue;
+ init_sched_build_groups(sched_group_core, this_core_map,
+ &cpu_to_core_group);
+ }
+#endif
+
+
/* Set up physical groups */
for (i = 0; i < MAX_NUMNODES; i++) {
cpumask_t nodemask = node_to_cpumask(i);
@@ -5793,51 +5867,38 @@ void build_sched_domains(const cpumask_t *cpu_map)
power = SCHED_LOAD_SCALE;
sd->groups->cpu_power = power;
#endif
+#ifdef CONFIG_SCHED_MC
+ sd = &per_cpu(core_domains, i);
+ power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
+ * SCHED_LOAD_SCALE / 10;
+ sd->groups->cpu_power = power;
+
+ sd = &per_cpu(phys_domains, i);
+ /*
+ * This has to be < 2 * SCHED_LOAD_SCALE
+ * Lets keep it SCHED_LOAD_SCALE, so that
+ * while calculating NUMA group's cpu_power
+ * we can simply do
+ * numa_group->cpu_power += phys_group->cpu_power;
+ *
+ * See "only add power once for each physical pkg"
+ * comment below
+ */
+ sd->groups->cpu_power = SCHED_LOAD_SCALE;
+#else
sd = &per_cpu(phys_domains, i);
power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
(cpus_weight(sd->groups->cpumask)-1) / 10;
sd->groups->cpu_power = power;
-
-#ifdef CONFIG_NUMA
- sd = &per_cpu(allnodes_domains, i);
- if (sd->groups) {
- power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
- (cpus_weight(sd->groups->cpumask)-1) / 10;
- sd->groups->cpu_power = power;
- }
#endif
}
#ifdef CONFIG_NUMA
- for (i = 0; i < MAX_NUMNODES; i++) {
- struct sched_group *sg = sched_group_nodes[i];
- int j;
-
- if (sg == NULL)
- continue;
-next_sg:
- for_each_cpu_mask(j, sg->cpumask) {
- struct sched_domain *sd;
- int power;
+ for (i = 0; i < MAX_NUMNODES; i++)
+ init_numa_sched_groups_power(sched_group_nodes[i]);
- sd = &per_cpu(phys_domains, j);
- if (j != first_cpu(sd->groups->cpumask)) {
- /*
- * Only add "power" once for each
- * physical package.
- */
- continue;
- }
- power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
- (cpus_weight(sd->groups->cpumask)-1) / 10;
-
- sg->cpu_power += power;
- }
- sg = sg->next;
- if (sg != sched_group_nodes[i])
- goto next_sg;
- }
+ init_numa_sched_groups_power(sched_group_allnodes);
#endif
/* Attach the domains */
@@ -5845,6 +5906,8 @@ next_sg:
struct sched_domain *sd;
#ifdef CONFIG_SCHED_SMT
sd = &per_cpu(cpu_domains, i);
+#elif defined(CONFIG_SCHED_MC)
+ sd = &per_cpu(core_domains, i);
#else
sd = &per_cpu(phys_domains, i);
#endif
@@ -6017,7 +6080,7 @@ void __init sched_init(void)
runqueue_t *rq;
int i, j, k;
- for_each_cpu(i) {
+ for_each_possible_cpu(i) {
prio_array_t *array;
rq = cpu_rq(i);
@@ -6035,6 +6098,7 @@ void __init sched_init(void)
rq->push_cpu = 0;
rq->migration_thread = NULL;
INIT_LIST_HEAD(&rq->migration_queue);
+ rq->cpu = i;
#endif
atomic_set(&rq->nr_iowait, 0);
@@ -6075,7 +6139,7 @@ void __might_sleep(char *file, int line)
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
return;
prev_jiffy = jiffies;
- printk(KERN_ERR "Debug: sleeping function called from invalid"
+ printk(KERN_ERR "BUG: sleeping function called from invalid"
" context at %s:%d\n", file, line);
printk("in_atomic():%d, irqs_disabled():%d\n",
in_atomic(), irqs_disabled());