diff options
Diffstat (limited to 'kernel/sched_fair.c')
| -rw-r--r-- | kernel/sched_fair.c | 234 | 
1 files changed, 58 insertions, 176 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index fb8994c6d4b..18fd17172eb 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -409,64 +409,6 @@ static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)  }  /* - * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in - * that it favours >=0 over <0. - * - *   -20         | - *               | - *     0 --------+------- - *             .' - *    19     .' - * - */ -static unsigned long -calc_delta_asym(unsigned long delta, struct sched_entity *se) -{ -	struct load_weight lw = { -		.weight = NICE_0_LOAD, -		.inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT) -	}; - -	for_each_sched_entity(se) { -		struct load_weight *se_lw = &se->load; -		unsigned long rw = cfs_rq_of(se)->load.weight; - -#ifdef CONFIG_FAIR_SCHED_GROUP -		struct cfs_rq *cfs_rq = se->my_q; -		struct task_group *tg = NULL - -		if (cfs_rq) -			tg = cfs_rq->tg; - -		if (tg && tg->shares < NICE_0_LOAD) { -			/* -			 * scale shares to what it would have been had -			 * tg->weight been NICE_0_LOAD: -			 * -			 *   weight = 1024 * shares / tg->weight -			 */ -			lw.weight *= se->load.weight; -			lw.weight /= tg->shares; - -			lw.inv_weight = 0; - -			se_lw = &lw; -			rw += lw.weight - se->load.weight; -		} else -#endif - -		if (se->load.weight < NICE_0_LOAD) { -			se_lw = &lw; -			rw += NICE_0_LOAD - se->load.weight; -		} - -		delta = calc_delta_mine(delta, rw, se_lw); -	} - -	return delta; -} - -/*   * Update the current task's runtime statistics. Skip current tasks that   * are not in our scheduling class.   */ @@ -586,11 +528,12 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)  	update_load_add(&cfs_rq->load, se->load.weight);  	if (!parent_entity(se))  		inc_cpu_load(rq_of(cfs_rq), se->load.weight); -	if (entity_is_task(se)) +	if (entity_is_task(se)) {  		add_cfs_task_weight(cfs_rq, se->load.weight); +		list_add(&se->group_node, &cfs_rq->tasks); +	}  	cfs_rq->nr_running++;  	se->on_rq = 1; -	list_add(&se->group_node, &cfs_rq->tasks);  }  static void @@ -599,11 +542,12 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)  	update_load_sub(&cfs_rq->load, se->load.weight);  	if (!parent_entity(se))  		dec_cpu_load(rq_of(cfs_rq), se->load.weight); -	if (entity_is_task(se)) +	if (entity_is_task(se)) {  		add_cfs_task_weight(cfs_rq, -se->load.weight); +		list_del_init(&se->group_node); +	}  	cfs_rq->nr_running--;  	se->on_rq = 0; -	list_del_init(&se->group_node);  }  static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -1085,7 +1029,6 @@ static long effective_load(struct task_group *tg, int cpu,  		long wl, long wg)  {  	struct sched_entity *se = tg->se[cpu]; -	long more_w;  	if (!tg->parent)  		return wl; @@ -1097,18 +1040,17 @@ static long effective_load(struct task_group *tg, int cpu,  	if (!wl && sched_feat(ASYM_EFF_LOAD))  		return wl; -	/* -	 * Instead of using this increment, also add the difference -	 * between when the shares were last updated and now. -	 */ -	more_w = se->my_q->load.weight - se->my_q->rq_weight; -	wl += more_w; -	wg += more_w; -  	for_each_sched_entity(se) { -#define D(n) (likely(n) ? (n) : 1) -  		long S, rw, s, a, b; +		long more_w; + +		/* +		 * Instead of using this increment, also add the difference +		 * between when the shares were last updated and now. +		 */ +		more_w = se->my_q->load.weight - se->my_q->rq_weight; +		wl += more_w; +		wg += more_w;  		S = se->my_q->tg->shares;  		s = se->my_q->shares; @@ -1117,7 +1059,11 @@ static long effective_load(struct task_group *tg, int cpu,  		a = S*(rw + wl);  		b = S*rw + s*wg; -		wl = s*(a-b)/D(b); +		wl = s*(a-b); + +		if (likely(b)) +			wl /= b; +  		/*  		 * Assume the group is already running and will  		 * thus already be accounted for in the weight. @@ -1126,7 +1072,6 @@ static long effective_load(struct task_group *tg, int cpu,  		 * alter the group weight.  		 */  		wg = 0; -#undef D  	}  	return wl; @@ -1143,7 +1088,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,  #endif  static int -wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, +wake_affine(struct sched_domain *this_sd, struct rq *this_rq,  	    struct task_struct *p, int prev_cpu, int this_cpu, int sync,  	    int idx, unsigned long load, unsigned long this_load,  	    unsigned int imbalance) @@ -1158,6 +1103,11 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,  	if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))  		return 0; +	if (!sync && sched_feat(SYNC_WAKEUPS) && +	    curr->se.avg_overlap < sysctl_sched_migration_cost && +	    p->se.avg_overlap < sysctl_sched_migration_cost) +		sync = 1; +  	/*  	 * If sync wakeup then subtract the (maximum possible)  	 * effect of the currently running task from the load @@ -1182,17 +1132,14 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,  	 * a reasonable amount of time then attract this newly  	 * woken task:  	 */ -	if (sync && balanced) { -		if (curr->se.avg_overlap < sysctl_sched_migration_cost && -		    p->se.avg_overlap < sysctl_sched_migration_cost) -			return 1; -	} +	if (sync && balanced) +		return 1;  	schedstat_inc(p, se.nr_wakeups_affine_attempts);  	tl_per_task = cpu_avg_load_per_task(this_cpu); -	if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || -			balanced) { +	if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= +			tl_per_task)) {  		/*  		 * This domain has SD_WAKE_AFFINE and  		 * p is cache cold in this domain, and @@ -1211,16 +1158,17 @@ static int select_task_rq_fair(struct task_struct *p, int sync)  	struct sched_domain *sd, *this_sd = NULL;  	int prev_cpu, this_cpu, new_cpu;  	unsigned long load, this_load; -	struct rq *rq, *this_rq; +	struct rq *this_rq;  	unsigned int imbalance;  	int idx;  	prev_cpu	= task_cpu(p); -	rq		= task_rq(p);  	this_cpu	= smp_processor_id();  	this_rq		= cpu_rq(this_cpu);  	new_cpu		= prev_cpu; +	if (prev_cpu == this_cpu) +		goto out;  	/*  	 * 'this_sd' is the first domain that both  	 * this_cpu and prev_cpu are present in: @@ -1248,13 +1196,10 @@ static int select_task_rq_fair(struct task_struct *p, int sync)  	load = source_load(prev_cpu, idx);  	this_load = target_load(this_cpu, idx); -	if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, +	if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,  				     load, this_load, imbalance))  		return this_cpu; -	if (prev_cpu == this_cpu) -		goto out; -  	/*  	 * Start passive balancing when half the imbalance_pct  	 * limit is reached. @@ -1281,62 +1226,20 @@ static unsigned long wakeup_gran(struct sched_entity *se)  	 * + nice tasks.  	 */  	if (sched_feat(ASYM_GRAN)) -		gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); -	else -		gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se); +		gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);  	return gran;  }  /* - * Should 'se' preempt 'curr'. - * - *             |s1 - *        |s2 - *   |s3 - *         g - *      |<--->|c - * - *  w(c, s1) = -1 - *  w(c, s2) =  0 - *  w(c, s3) =  1 - * - */ -static int -wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) -{ -	s64 gran, vdiff = curr->vruntime - se->vruntime; - -	if (vdiff < 0) -		return -1; - -	gran = wakeup_gran(curr); -	if (vdiff > gran) -		return 1; - -	return 0; -} - -/* return depth at which a sched entity is present in the hierarchy */ -static inline int depth_se(struct sched_entity *se) -{ -	int depth = 0; - -	for_each_sched_entity(se) -		depth++; - -	return depth; -} - -/*   * Preempt the current task with a newly woken task if needed:   */ -static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) +static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)  {  	struct task_struct *curr = rq->curr;  	struct cfs_rq *cfs_rq = task_cfs_rq(curr);  	struct sched_entity *se = &curr->se, *pse = &p->se; -	int se_depth, pse_depth; +	s64 delta_exec;  	if (unlikely(rt_prio(p->prio))) {  		update_rq_clock(rq); @@ -1351,6 +1254,13 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)  	cfs_rq_of(pse)->next = pse;  	/* +	 * We can come here with TIF_NEED_RESCHED already set from new task +	 * wake up path. +	 */ +	if (test_tsk_need_resched(curr)) +		return; + +	/*  	 * Batch tasks do not preempt (their preemption is driven by  	 * the tick):  	 */ @@ -1360,33 +1270,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)  	if (!sched_feat(WAKEUP_PREEMPT))  		return; -	/* -	 * preemption test can be made between sibling entities who are in the -	 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of -	 * both tasks until we find their ancestors who are siblings of common -	 * parent. -	 */ - -	/* First walk up until both entities are at same depth */ -	se_depth = depth_se(se); -	pse_depth = depth_se(pse); - -	while (se_depth > pse_depth) { -		se_depth--; -		se = parent_entity(se); -	} - -	while (pse_depth > se_depth) { -		pse_depth--; -		pse = parent_entity(pse); -	} - -	while (!is_same_group(se, pse)) { -		se = parent_entity(se); -		pse = parent_entity(pse); +	if (sched_feat(WAKEUP_OVERLAP) && (sync || +			(se->avg_overlap < sysctl_sched_migration_cost && +			 pse->avg_overlap < sysctl_sched_migration_cost))) { +		resched_task(curr); +		return;  	} -	if (wakeup_preempt_entity(se, pse) == 1) +	delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; +	if (delta_exec > wakeup_gran(pse))  		resched_task(curr);  } @@ -1445,19 +1337,9 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)  	if (next == &cfs_rq->tasks)  		return NULL; -	/* Skip over entities that are not tasks */ -	do { -		se = list_entry(next, struct sched_entity, group_node); -		next = next->next; -	} while (next != &cfs_rq->tasks && !entity_is_task(se)); - -	if (next == &cfs_rq->tasks) -		return NULL; - -	cfs_rq->balance_iterator = next; - -	if (entity_is_task(se)) -		p = task_of(se); +	se = list_entry(next, struct sched_entity, group_node); +	p = task_of(se); +	cfs_rq->balance_iterator = next->next;  	return p;  } @@ -1507,7 +1389,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,  	rcu_read_lock();  	update_h_load(busiest_cpu); -	list_for_each_entry(tg, &task_groups, list) { +	list_for_each_entry_rcu(tg, &task_groups, list) {  		struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];  		unsigned long busiest_h_load = busiest_cfs_rq->h_load;  		unsigned long busiest_weight = busiest_cfs_rq->load.weight; @@ -1620,10 +1502,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)  		 * 'current' within the tree based on its new key value.  		 */  		swap(curr->vruntime, se->vruntime); +		resched_task(rq->curr);  	}  	enqueue_task_fair(rq, p, 0); -	resched_task(rq->curr);  }  /* @@ -1642,7 +1524,7 @@ static void prio_changed_fair(struct rq *rq, struct task_struct *p,  		if (p->prio > oldprio)  			resched_task(rq->curr);  	} else -		check_preempt_curr(rq, p); +		check_preempt_curr(rq, p, 0);  }  /* @@ -1659,7 +1541,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p,  	if (running)  		resched_task(rq->curr);  	else -		check_preempt_curr(rq, p); +		check_preempt_curr(rq, p, 0);  }  /* Account for a task changing its policy or group.  | 
