aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c11
-rw-r--r--kernel/cpuset.c4
-rw-r--r--kernel/hrtimer.c1
-rw-r--r--kernel/irq/autoprobe.c15
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/handle.c48
-rw-r--r--kernel/irq/manage.c22
-rw-r--r--kernel/irq/migration.c14
-rw-r--r--kernel/irq/numa_migrate.c7
-rw-r--r--kernel/irq/proc.c29
-rw-r--r--kernel/irq/spurious.c5
-rw-r--r--kernel/profile.c4
-rw-r--r--kernel/rcuclassic.c2
-rw-r--r--kernel/sched.c970
-rw-r--r--kernel/sched_cpupri.c39
-rw-r--r--kernel/sched_cpupri.h5
-rw-r--r--kernel/sched_fair.c32
-rw-r--r--kernel/sched_rt.c73
-rw-r--r--kernel/sched_stats.h3
-rw-r--r--kernel/softirq.c20
-rw-r--r--kernel/taskstats.c2
-rw-r--r--kernel/time/clockevents.c2
-rw-r--r--kernel/time/tick-broadcast.c2
-rw-r--r--kernel/time/tick-common.c12
-rw-r--r--kernel/time/tick-sched.c10
-rw-r--r--kernel/trace/trace.c4
26 files changed, 776 insertions, 562 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8ea32e8d68b..bae131a1211 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -24,19 +24,20 @@
cpumask_t cpu_present_map __read_mostly;
EXPORT_SYMBOL(cpu_present_map);
-#ifndef CONFIG_SMP
-
/*
* Represents all cpu's that are currently online.
*/
-cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
+cpumask_t cpu_online_map __read_mostly;
EXPORT_SYMBOL(cpu_online_map);
+#ifdef CONFIG_INIT_ALL_POSSIBLE
cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
+#else
+cpumask_t cpu_possible_map __read_mostly;
+#endif
EXPORT_SYMBOL(cpu_possible_map);
-#else /* CONFIG_SMP */
-
+#ifdef CONFIG_SMP
/* Serializes the updates to cpu_online_map, cpu_present_map */
static DEFINE_MUTEX(cpu_add_remove_lock);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 96c0ba13b8c..39c1a4c1c5a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -896,7 +896,7 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
if (!*buf) {
cpus_clear(trialcs.cpus_allowed);
} else {
- retval = cpulist_parse(buf, trialcs.cpus_allowed);
+ retval = cpulist_parse(buf, &trialcs.cpus_allowed);
if (retval < 0)
return retval;
@@ -1482,7 +1482,7 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
mask = cs->cpus_allowed;
mutex_unlock(&callback_mutex);
- return cpulist_scnprintf(page, PAGE_SIZE, mask);
+ return cpulist_scnprintf(page, PAGE_SIZE, &mask);
}
static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index bda9cb92427..eb2bfefa6dc 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -32,7 +32,6 @@
*/
#include <linux/cpu.h>
-#include <linux/irq.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 650ce4102a6..cc0f7321b8c 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -40,9 +40,6 @@ unsigned long probe_irq_on(void)
* flush such a longstanding irq before considering it as spurious.
*/
for_each_irq_desc_reverse(i, desc) {
- if (!desc)
- continue;
-
spin_lock_irq(&desc->lock);
if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
/*
@@ -71,9 +68,6 @@ unsigned long probe_irq_on(void)
* happened in the previous stage, it may have masked itself)
*/
for_each_irq_desc_reverse(i, desc) {
- if (!desc)
- continue;
-
spin_lock_irq(&desc->lock);
if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -92,9 +86,6 @@ unsigned long probe_irq_on(void)
* Now filter out any obviously spurious interrupts
*/
for_each_irq_desc(i, desc) {
- if (!desc)
- continue;
-
spin_lock_irq(&desc->lock);
status = desc->status;
@@ -133,9 +124,6 @@ unsigned int probe_irq_mask(unsigned long val)
int i;
for_each_irq_desc(i, desc) {
- if (!desc)
- continue;
-
spin_lock_irq(&desc->lock);
status = desc->status;
@@ -178,9 +166,6 @@ int probe_irq_off(unsigned long val)
unsigned int status;
for_each_irq_desc(i, desc) {
- if (!desc)
- continue;
-
spin_lock_irq(&desc->lock);
status = desc->status;
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6eb3c7952b6..f63c706d25e 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -46,7 +46,7 @@ void dynamic_irq_init(unsigned int irq)
desc->irq_count = 0;
desc->irqs_unhandled = 0;
#ifdef CONFIG_SMP
- cpus_setall(desc->affinity);
+ cpumask_setall(&desc->affinity);
#endif
spin_unlock_irqrestore(&desc->lock, flags);
}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6492400cb50..c20db0be917 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -56,10 +56,6 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
int nr_irqs = NR_IRQS;
EXPORT_SYMBOL_GPL(nr_irqs);
-void __init __attribute__((weak)) arch_early_irq_init(void)
-{
-}
-
#ifdef CONFIG_SPARSE_IRQ
static struct irq_desc irq_desc_init = {
.irq = -1,
@@ -90,13 +86,11 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
desc->kstat_irqs = (unsigned int *)ptr;
}
-void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
-{
-}
-
static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
{
memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
+
+ spin_lock_init(&desc->lock);
desc->irq = irq;
#ifdef CONFIG_SMP
desc->cpu = cpu;
@@ -134,7 +128,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
/* FIXME: use bootmem alloc ...*/
static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS];
-void __init early_irq_init(void)
+int __init early_irq_init(void)
{
struct irq_desc *desc;
int legacy_count;
@@ -146,6 +140,7 @@ void __init early_irq_init(void)
for (i = 0; i < legacy_count; i++) {
desc[i].irq = i;
desc[i].kstat_irqs = kstat_irqs_legacy[i];
+ lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
irq_desc_ptrs[i] = desc + i;
}
@@ -153,7 +148,7 @@ void __init early_irq_init(void)
for (i = legacy_count; i < NR_IRQS; i++)
irq_desc_ptrs[i] = NULL;
- arch_early_irq_init();
+ return arch_early_irq_init();
}
struct irq_desc *irq_to_desc(unsigned int irq)
@@ -203,7 +198,7 @@ out_unlock:
return desc;
}
-#else
+#else /* !CONFIG_SPARSE_IRQ */
struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
[0 ... NR_IRQS-1] = {
@@ -218,7 +213,31 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
}
};
-#endif
+int __init early_irq_init(void)
+{
+ struct irq_desc *desc;
+ int count;
+ int i;
+
+ desc = irq_desc;
+ count = ARRAY_SIZE(irq_desc);
+
+ for (i = 0; i < count; i++)
+ desc[i].irq = i;
+
+ return arch_early_irq_init();
+}
+
+struct irq_desc *irq_to_desc(unsigned int irq)
+{
+ return (irq < NR_IRQS) ? irq_desc + irq : NULL;
+}
+
+struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+{
+ return irq_to_desc(irq);
+}
+#endif /* !CONFIG_SPARSE_IRQ */
/*
* What should we do if we get a hw irq event on an illegal vector?
@@ -428,9 +447,6 @@ void early_init_irq_lock_class(void)
int i;
for_each_irq_desc(i, desc) {
- if (!desc)
- continue;
-
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
}
}
@@ -439,7 +455,7 @@ void early_init_irq_lock_class(void)
unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
{
struct irq_desc *desc = irq_to_desc(irq);
- return desc->kstat_irqs[cpu];
+ return desc ? desc->kstat_irqs[cpu] : 0;
}
#endif
EXPORT_SYMBOL(kstat_irqs_cpu);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 540f6c49f3f..61c4a9b6216 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -79,7 +79,7 @@ int irq_can_set_affinity(unsigned int irq)
* @cpumask: cpumask
*
*/
-int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
+int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
@@ -91,14 +91,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
- desc->affinity = cpumask;
+ cpumask_copy(&desc->affinity, cpumask);
desc->chip->set_affinity(irq, cpumask);
} else {
desc->status |= IRQ_MOVE_PENDING;
- desc->pending_mask = cpumask;
+ cpumask_copy(&desc->pending_mask, cpumask);
}
#else
- desc->affinity = cpumask;
+ cpumask_copy(&desc->affinity, cpumask);
desc->chip->set_affinity(irq, cpumask);
#endif
desc->status |= IRQ_AFFINITY_SET;
@@ -112,26 +112,24 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
*/
int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
{
- cpumask_t mask;
-
if (!irq_can_set_affinity(irq))
return 0;
- cpus_and(mask, cpu_online_map, irq_default_affinity);
-
/*
* Preserve an userspace affinity setup, but make sure that
* one of the targets is online.
*/
if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
- if (cpus_intersects(desc->affinity, cpu_online_map))
- mask = desc->affinity;
+ if (cpumask_any_and(&desc->affinity, cpu_online_mask)
+ < nr_cpu_ids)
+ goto set_affinity;
else
desc->status &= ~IRQ_AFFINITY_SET;
}
- desc->affinity = mask;
- desc->chip->set_affinity(irq, mask);
+ cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity);
+set_affinity:
+ desc->chip->set_affinity(irq, &desc->affinity);
return 0;
}
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 9db681d9581..bd72329e630 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -4,7 +4,6 @@
void move_masked_irq(int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
- cpumask_t tmp;
if (likely(!(desc->status & IRQ_MOVE_PENDING)))
return;
@@ -19,7 +18,7 @@ void move_masked_irq(int irq)
desc->status &= ~IRQ_MOVE_PENDING;
- if (unlikely(cpus_empty(desc->pending_mask)))
+ if (unlikely(cpumask_empty(&desc->pending_mask)))
return;
if (!desc->chip->set_affinity)
@@ -27,8 +26,6 @@ void move_masked_irq(int irq)
assert_spin_locked(&desc->lock);
- cpus_and(tmp, desc->pending_mask, cpu_online_map);
-
/*
* If there was a valid mask to work with, please
* do the disable, re-program, enable sequence.
@@ -41,10 +38,13 @@ void move_masked_irq(int irq)
* For correct operation this depends on the caller
* masking the irqs.
*/
- if (likely(!cpus_empty(tmp))) {
- desc->chip->set_affinity(irq,tmp);
+ if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask)
+ < nr_cpu_ids)) {
+ cpumask_and(&desc->affinity,
+ &desc->pending_mask, cpu_online_mask);
+ desc->chip->set_affinity(irq, &desc->affinity);
}
- cpus_clear(desc->pending_mask);
+ cpumask_clear(&desc->pending_mask);
}
void move_native_irq(int irq)
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 089c3746358..ecf765c6a77 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -42,6 +42,7 @@ static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
struct irq_desc *desc, int cpu)
{
memcpy(desc, old_desc, sizeof(struct irq_desc));
+ spin_lock_init(&desc->lock);
desc->cpu = cpu;
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
@@ -74,10 +75,8 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
node = cpu_to_node(cpu);
desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
- printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n",
- irq, cpu, node);
if (!desc) {
- printk(KERN_ERR "can not get new irq_desc for moving\n");
+ printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq);
/* still use old one */
desc = old_desc;
goto out_unlock;
@@ -106,8 +105,6 @@ struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
return desc;
old_cpu = desc->cpu;
- printk(KERN_DEBUG
- "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu);
if (old_cpu != cpu) {
node = cpu_to_node(cpu);
old_node = cpu_to_node(old_cpu);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index f6b3440f05b..d2c0e5ee53c 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -40,33 +40,42 @@ static ssize_t irq_affinity_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
{
unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
- cpumask_t new_value;
+ cpumask_var_t new_value;
int err;
if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
irq_balancing_disabled(irq))
return -EIO;
+ if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
+ return -ENOMEM;
+
err = cpumask_parse_user(buffer, count, new_value);
if (err)
- return err;
+ goto free_cpumask;
- if (!is_affinity_mask_valid(new_value))
- return -EINVAL;
+ if (!is_affinity_mask_valid(*new_value)) {
+ err = -EINVAL;
+ goto free_cpumask;
+ }
/*
* Do not allow disabling IRQs completely - it's a too easy
* way to make the system unusable accidentally :-) At least
* one online CPU still has to be targeted.
*/
- if (!cpus_intersects(new_value, cpu_online_map))
+ if (!cpumask_intersects(new_value, cpu_online_mask)) {
/* Special case for empty set - allow the architecture
code to set default SMP affinity. */
- return irq_select_affinity_usr(irq) ? -EINVAL : count;
-
- irq_set_affinity(irq, new_value);
+ err = irq_select_affinity_usr(irq) ? -EINVAL : count;
+ } else {
+ irq_set_affinity(irq, new_value);
+ err = count;
+ }
- return count;
+free_cpumask:
+ free_cpumask_var(new_value);
+ return err;
}
static int irq_affinity_proc_open(struct inode *inode, struct file *file)
@@ -95,7 +104,7 @@ static ssize_t default_affinity_write(struct file *file,
cpumask_t new_value;
int err;
- err = cpumask_parse_user(buffer, count, new_value);
+ err = cpumask_parse_user(buffer, count, &new_value);
if (err)
return err;
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 3738107531f..dd364c11e56 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -91,9 +91,6 @@ static int misrouted_irq(int irq)
int i, ok = 0;
for_each_irq_desc(i, desc) {
- if (!desc)
- continue;
-
if (!i)
continue;
@@ -115,8 +112,6 @@ static void poll_spurious_irqs(unsigned long dummy)
for_each_irq_desc(i, desc) {
unsigned int status;
- if (!desc)
- continue;
if (!i)
continue;
diff --git a/kernel/profile.c b/kernel/profile.c
index 60adefb59b5..4cb7d68fed8 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -442,7 +442,7 @@ void profile_tick(int type)
static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
+ int len = cpumask_scnprintf(page, count, (cpumask_t *)data);
if (count - len < 2)
return -EINVAL;
len += sprintf(page + len, "\n");
@@ -456,7 +456,7 @@ static int prof_cpu_mask_write_proc(struct file *file,
unsigned long full_count = count, err;
cpumask_t new_value;
- err = cpumask_parse_user(buffer, count, new_value);
+ err = cpumask_parse_user(buffer, count, &new_value);
if (err)
return err;
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index e503a002f33..c03ca3e6191 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -393,7 +393,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
* unnecessarily.
*/
smp_mb();
- cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
+ cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask);
rcp->signaled = 0;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 635eaffe1e4..930bf2e6d71 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -498,18 +498,26 @@ struct rt_rq {
*/
struct root_domain {
atomic_t refcount;
- cpumask_t span;
- cpumask_t online;
+ cpumask_var_t span;
+ cpumask_var_t online;
/*
* The "RT overload" flag: it gets set if a CPU has more than
* one runnable RT task.
*/
- cpumask_t rto_mask;
+ cpumask_var_t rto_mask;
atomic_t rto_count;
#ifdef CONFIG_SMP
struct cpupri cpupri;
#endif
+#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+ /*
+ * Preferred wake up cpu nominated by sched_mc balance that will be
+ * used when most cpus are idle in the system indicating overall very
+ * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
+ */
+ unsigned int sched_mc_preferred_wakeup_cpu;
+#endif
};
/*
@@ -1514,7 +1522,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
struct sched_domain *sd = data;
int i;
- for_each_cpu_mask(i, sd->span) {
+ for_each_cpu(i, sched_domain_span(sd)) {
/*
* If there are currently no tasks on the cpu pretend there
* is one of average load so that when a new task gets to
@@ -1535,7 +1543,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
shares = tg->shares;
- for_each_cpu_mask(i, sd->span)
+ for_each_cpu(i, sched_domain_span(sd))
update_group_shares_cpu(tg, i, shares, rq_weight);
return 0;
@@ -2101,15 +2109,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
int i;
/* Skip over this group if it has no CPUs allowed */
- if (!cpus_intersects(group->cpumask, p->cpus_allowed))
+ if (!cpumask_intersects(sched_group_cpus(group),
+ &p->cpus_allowed))
continue;
- local_group = cpu_isset(this_cpu, group->cpumask);
+ local_group = cpumask_test_cpu(this_cpu,
+ sched_group_cpus(group));
/* Tally up the load of all CPUs in the group */
avg_load = 0;
- for_each_cpu_mask_nr(i, group->cpumask) {
+ for_each_cpu(i, sched_group_cpus(group)) {
/* Bias balancing toward cpus of our domain */
if (local_group)
load = source_load(i, load_idx);
@@ -2141,17 +2151,14 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
* find_idlest_cpu - find the idlest cpu among the cpus in group.
*/
static int
-find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
- cpumask_t *tmp)
+find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
{
unsigned long load, min_load = ULONG_MAX;
int idlest = -1;
int i;
/* Traverse only the allowed CPUs */
- cpus_and(*tmp, group->cpumask, p->cpus_allowed);
-
- for_each_cpu_mask_nr(i, *tmp) {
+ for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
load = weighted_cpuload(i);
if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2193,7 +2200,6 @@ static int sched_balance_self(int cpu, int flag)
update_shares(sd);
while (sd) {
- cpumask_t span, tmpmask;
struct sched_group *group;
int new_cpu, weight;
@@ -2202,14 +2208,13 @@ static int sched_balance_self(int cpu, int flag)
continue;
}
- span = sd->span;
group = find_idlest_group(sd, t, cpu);
if (!group) {
sd = sd->child;
continue;
}
- new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask);
+ new_cpu = find_idlest_cpu(group, t, cpu);
if (new_cpu == -1 || new_cpu == cpu) {
/* Now try balancing at a lower domain level of cpu */
sd = sd->child;
@@ -2218,10 +2223,10 @@ static int sched_balance_self(int cpu, int flag)
/* Now try balancing at a lower domain level of new_cpu */
cpu = new_cpu;
+ weight = cpumask_weight(sched_domain_span(sd));
sd = NULL;
- weight = cpus_weight(span);
for_each_domain(cpu, tmp) {
- if (weight <= cpus_weight(tmp->span))
+ if (weight <= cpumask_weight(sched_domain_span(tmp)))
break;
if (tmp->flags & flag)
sd = tmp;
@@ -2266,7 +2271,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
cpu = task_cpu(p);
for_each_domain(this_cpu, sd) {
- if (cpu_isset(cpu, sd->span)) {
+ if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
update_shares(sd);
break;
}
@@ -2315,7 +2320,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
else {
struct sched_domain *sd;
for_each_domain(this_cpu, sd) {
- if (cpu_isset(cpu, sd->span)) {
+ if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
schedstat_inc(sd, ttwu_wake_remote);
break;
}
@@ -2846,7 +2851,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
struct rq *rq;
rq = task_rq_lock(p, &flags);
- if (!cpu_isset(dest_cpu, p->cpus_allowed)
+ if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
|| unlikely(!cpu_active(dest_cpu)))
goto out;
@@ -2911,7 +2916,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
* 2) cannot be migrated to this CPU due to cpus_allowed, or
* 3) are cache-hot on their current CPU.
*/
- if (!cpu_isset(this_cpu, p->cpus_allowed)) {
+ if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
schedstat_inc(p, se.nr_failed_migrations_affine);
return 0;
}
@@ -3086,7 +3091,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
static struct sched_group *
find_busiest_group(struct sched_domain *sd, int this_cpu,
unsigned long *imbalance, enum cpu_idle_type idle,
- int *sd_idle, const cpumask_t *cpus, int *balance)
+ int *sd_idle, const struct cpumask *cpus, int *balance)
{
struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -3122,10 +3127,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
unsigned long sum_avg_load_per_task;
unsigned long avg_load_per_task;
- local_group = cpu_isset(this_cpu, group->cpumask);
+ local_group = cpumask_test_cpu(this_cpu,
+ sched_group_cpus(group));
if (local_group)
- balance_cpu = first_cpu(group->cpumask);
+ balance_cpu = cpumask_first(sched_group_cpus(group));
/* Tally up the load of all CPUs in the group */
sum_weighted_load = sum_nr_running = avg_load = 0;
@@ -3134,13 +3140,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
max_cpu_load = 0;
min_cpu_load = ~0UL;
- for_each_cpu_mask_nr(i, group->cpumask) {
- struct rq *rq;
-
- if (!cpu_isset(i, *cpus))
- continue;
-
- rq = cpu_rq(i);
+ for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+ struct rq *rq = cpu_rq(i);
if (*sd_idle && rq->nr_running)
*sd_idle = 0;
@@ -3251,8 +3252,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
*/
if ((sum_nr_running < min_nr_running) ||
(sum_nr_running == min_nr_running &&
- first_cpu(group->cpumask) <
- first_cpu(group_min->cpumask))) {
+ cpumask_first(sched_group_cpus(group)) >
+ cpumask_first(sched_group_cpus(group_min)))) {
group_min = group;
min_nr_running = sum_nr_running;
min_load_per_task = sum_weighted_load /
@@ -3267,8 +3268,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
if (sum_nr_running <= group_capacity - 1) {
if (sum_nr_running > leader_nr_running ||
(sum_nr_running == leader_nr_running &&
- first_cpu(group->cpumask) >
- first_cpu(group_leader->cpumask))) {
+ cpumask_first(sched_group_cpus(group)) <
+ cpumask_first(sched_group_cpus(group_leader)))) {
group_leader = group;
leader_nr_running = sum_nr_running;
}
@@ -3394,6 +3395,10 @@ out_balanced:
if (this == group_leader && group_leader != group_min) {
*imbalance = min_load_per_task;
+ if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
+ cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
+ cpumask_first(sched_group_cpus(group_leader));
+ }
return group_min;
}
#endif
@@ -3407,16 +3412,16 @@ ret:
*/
static struct rq *
find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
- unsigned long imbalance, const cpumask_t *cpus)
+ unsigned long imbalance, const struct cpumask *cpus)
{
struct rq *busiest = NULL, *rq;
unsigned long max_load = 0;
int i;
- for_each_cpu_mask_nr(i, group->cpumask) {
+ for_each_cpu(i, sched_group_cpus(group)) {
unsigned long wl;
- if (!cpu_isset(i, *cpus))
+ if (!cpumask_test_cpu(i, cpus))
continue;
rq = cpu_rq(i);
@@ -3446,7 +3451,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
*/
static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_domain *sd, enum cpu_idle_type idle,
- int *balance, cpumask_t *cpus)
+ int *balance, struct cpumask *cpus)
{
int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
struct sched_group *group;
@@ -3454,7 +3459,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
struct rq *busiest;
unsigned long flags;
- cpus_setall(*cpus);
+ cpumask_setall(cpus);
/*
* When power savings policy is enabled for the parent domain, idle
@@ -3514,8 +3519,8 @@ redo:
/* All tasks on this runqueue were pinned by CPU affinity */
if (unlikely(all_pinned)) {
- cpu_clear(cpu_of(busiest), *cpus);
- if (!cpus_empty(*cpus))
+ cpumask_clear_cpu(cpu_of(busiest), cpus);
+ if (!cpumask_empty(cpus))
goto redo;
goto out_balanced;
}
@@ -3532,7 +3537,8 @@ redo:
/* don't kick the migration_thread, if the curr
* task on busiest cpu can't be moved to this_cpu
*/
- if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) {
+ if (!cpumask_test_cpu(this_cpu,
+ &busiest->curr->cpus_allowed)) {
spin_unlock_irqrestore(&busiest->lock, flags);
all_pinned = 1;
goto out_one_pinned;
@@ -3607,7 +3613,7 @@ out:
*/
static int
load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
- cpumask_t *cpus)
+ struct cpumask *cpus)
{
struct sched_group *group;
struct rq *busiest = NULL;
@@ -3616,7 +3622,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
int sd_idle = 0;
int all_pinned = 0;
- cpus_setall(*cpus);
+ cpumask_setall(cpus);
/*
* When power savings policy is enabled for the parent domain, idle
@@ -3660,17 +3666,71 @@ redo:
double_unlock_balance(this_rq, busiest);
if (unlikely(all_pinned)) {
- cpu_clear(cpu_of(busiest), *cpus);
- if (!cpus_empty(*cpus))
+ cpumask_clear_cpu(cpu_of(busiest), cpus);
+ if (!cpumask_empty(cpus))
goto redo;
}
}
if (!ld_moved) {
+ int active_balance = 0;
+
schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
!test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
return -1;
+
+ if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
+ return -1;
+
+ if (sd->nr_balance_failed++ < 2)
+ return -1;
+
+ /*
+ * The only task running in a non-idle cpu can be moved to this
+ * cpu in an attempt to completely freeup the other CPU
+ * package. The same method used to move task in load_balance()
+ * have been extended for load_balance_newidle() to speedup
+ * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2)
+ *
+ * The package power saving logic comes from
+ * find_busiest_group(). If there are no imbalance, then
+ * f_b_g() will return NULL. However when sched_mc={1,2} then
+ * f_b_g() will select a group from which a running task may be
+ * pulled to this cpu in order to make the other package idle.
+ * If there is no opportunity to make a package idle and if
+ * there are no imbalance, then f_b_g() will return NULL and no
+ * action will be taken in load_balance_newidle().
+ *
+ * Under normal task pull operation due to imbala