diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 150 |
1 files changed, 95 insertions, 55 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ce514afd78f..e0c0b4bc3f0 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -341,23 +341,20 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) cfs_rq->rb_leftmost = next_node; } - if (cfs_rq->next == se) - cfs_rq->next = NULL; - rb_erase(&se->run_node, &cfs_rq->tasks_timeline); } -static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) -{ - return cfs_rq->rb_leftmost; -} - static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) { - return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); + struct rb_node *left = cfs_rq->rb_leftmost; + + if (!left) + return NULL; + + return rb_entry(left, struct sched_entity, run_node); } -static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) +static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) { struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); @@ -389,20 +386,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, #endif /* - * delta *= P[w / rw] - */ -static inline unsigned long -calc_delta_weight(unsigned long delta, struct sched_entity *se) -{ - for_each_sched_entity(se) { - delta = calc_delta_mine(delta, - se->load.weight, &cfs_rq_of(se)->load); - } - - return delta; -} - -/* * delta /= w */ static inline unsigned long @@ -443,12 +426,20 @@ static u64 __sched_period(unsigned long nr_running) */ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) { - unsigned long nr_running = cfs_rq->nr_running; + u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq); + + for_each_sched_entity(se) { + struct load_weight *load = &cfs_rq->load; - if (unlikely(!se->on_rq)) - nr_running++; + if (unlikely(!se->on_rq)) { + struct load_weight lw = cfs_rq->load; - return calc_delta_weight(__sched_period(nr_running), se); + update_load_add(&lw, se->load.weight); + load = &lw; + } + slice = calc_delta_mine(slice, se->load.weight, load); + } + return slice; } /* @@ -495,6 +486,8 @@ static void update_curr(struct cfs_rq *cfs_rq) * overflow on 32 bits): */ delta_exec = (unsigned long)(now - curr->exec_start); + if (!delta_exec) + return; __update_curr(cfs_rq, curr, delta_exec); curr->exec_start = now; @@ -719,6 +712,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) __enqueue_entity(cfs_rq, se); } +static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ + if (cfs_rq->last == se) + cfs_rq->last = NULL; + + if (cfs_rq->next == se) + cfs_rq->next = NULL; +} + static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) { @@ -741,6 +743,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) #endif } + clear_buddies(cfs_rq, se); + if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); account_entity_dequeue(cfs_rq, se); @@ -794,24 +798,15 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) static int wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); -static struct sched_entity * -pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) -{ - if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1) - return se; - - return cfs_rq->next; -} - static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) { - struct sched_entity *se = NULL; + struct sched_entity *se = __pick_next_entity(cfs_rq); - if (first_fair(cfs_rq)) { - se = __pick_next_entity(cfs_rq); - se = pick_next(cfs_rq, se); - set_next_entity(cfs_rq, se); - } + if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1) + return cfs_rq->next; + + if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1) + return cfs_rq->last; return se; } @@ -983,6 +978,8 @@ static void yield_task_fair(struct rq *rq) if (unlikely(cfs_rq->nr_running == 1)) return; + clear_buddies(cfs_rq, se); + if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { update_rq_clock(rq); /* @@ -1016,16 +1013,33 @@ static void yield_task_fair(struct rq *rq) * search starts with cpus closest then further out as needed, * so we always favor a closer, idle cpu. * Domains may include CPUs that are not usable for migration, - * hence we need to mask them out (cpu_active_map) + * hence we need to mask them out (cpu_active_mask) * * Returns the CPU we should wake onto. */ #if defined(ARCH_HAS_SCHED_WAKE_IDLE) static int wake_idle(int cpu, struct task_struct *p) { - cpumask_t tmp; struct sched_domain *sd; int i; + unsigned int chosen_wakeup_cpu; + int this_cpu; + + /* + * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu + * are idle and this is not a kernel thread and this task's affinity + * allows it to be moved to preferred cpu, then just move! + */ + + this_cpu = smp_processor_id(); + chosen_wakeup_cpu = + cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; + + if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && + idle_cpu(cpu) && idle_cpu(this_cpu) && + p->mm && !(p->flags & PF_KTHREAD) && + cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) + return chosen_wakeup_cpu; /* * If it is idle, then it is the best cpu to run this task. @@ -1043,10 +1057,9 @@ static int wake_idle(int cpu, struct task_struct *p) if ((sd->flags & SD_WAKE_IDLE) || ((sd->flags & SD_WAKE_IDLE_FAR) && !task_hot(p, task_rq(p)->clock, sd))) { - cpus_and(tmp, sd->span, p->cpus_allowed); - cpus_and(tmp, tmp, cpu_active_map); - for_each_cpu_mask_nr(i, tmp) { - if (idle_cpu(i)) { + for_each_cpu_and(i, sched_domain_span(sd), + &p->cpus_allowed) { + if (cpu_active(i) && idle_cpu(i)) { if (i != task_cpu(p)) { schedstat_inc(p, se.nr_wakeups_idle); @@ -1239,13 +1252,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync) * this_cpu and prev_cpu are present in: */ for_each_domain(this_cpu, sd) { - if (cpu_isset(prev_cpu, sd->span)) { + if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { this_sd = sd; break; } } - if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) + if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) goto out; /* @@ -1325,26 +1338,52 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) return 0; } +static void set_last_buddy(struct sched_entity *se) +{ + for_each_sched_entity(se) + cfs_rq_of(se)->last = se; +} + +static void set_next_buddy(struct sched_entity *se) +{ + for_each_sched_entity(se) + cfs_rq_of(se)->next = se; +} + /* * Preempt the current task with a newly woken task if needed: */ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) { struct task_struct *curr = rq->curr; - struct cfs_rq *cfs_rq = task_cfs_rq(curr); struct sched_entity *se = &curr->se, *pse = &p->se; + struct cfs_rq *cfs_rq = task_cfs_rq(curr); + + update_curr(cfs_rq); if (unlikely(rt_prio(p->prio))) { - update_rq_clock(rq); - update_curr(cfs_rq); resched_task(curr); return; } + if (unlikely(p->sched_class != &fair_sched_class)) + return; + if (unlikely(se == pse)) return; - cfs_rq_of(pse)->next = pse; + /* + * Only set the backward buddy when the current task is still on the + * rq. This can happen when a wakeup gets interleaved with schedule on + * the ->pre_schedule() or idle_balance() point, either of which can + * drop the rq lock. + * + * Also, during early boot the idle thread is in the fair class, for + * obvious reasons its a bad idea to schedule back to the idle thread. + */ + if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) + set_last_buddy(se); + set_next_buddy(pse); /* * We can come here with TIF_NEED_RESCHED already set from new task @@ -1396,6 +1435,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) do { se = pick_next_entity(cfs_rq); + set_next_entity(cfs_rq, se); cfs_rq = group_cfs_rq(se); } while (cfs_rq); |