diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-02-28 10:31:01 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-02-28 10:31:01 -0800 |
commit | f66ffdedbf0fc059a92219bb08c1dbcac88f074b (patch) | |
tree | 9db4ad51764455123130e82fb7acf4f0a0be58ce /kernel | |
parent | 2531216f236cb2a1f39ffa12a4a9339541e52191 (diff) | |
parent | dd5feea14a7de4edbd9f36db1a2db785de91b88d (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (25 commits)
sched: Fix SCHED_MC regression caused by change in sched cpu_power
sched: Don't use possibly stale sched_class
kthread, sched: Remove reference to kthread_create_on_cpu
sched: cpuacct: Use bigger percpu counter batch values for stats counters
percpu_counter: Make __percpu_counter_add an inline function on UP
sched: Remove member rt_se from struct rt_rq
sched: Change usage of rt_rq->rt_se to rt_rq->tg->rt_se[cpu]
sched: Remove unused update_shares_locked()
sched: Use for_each_bit
sched: Queue a deboosted task to the head of the RT prio queue
sched: Implement head queueing for sched_rt
sched: Extend enqueue_task to allow head queueing
sched: Remove USER_SCHED
sched: Fix the place where group powers are updated
sched: Assume *balance is valid
sched: Remove load_balance_newidle()
sched: Unify load_balance{,_newidle}()
sched: Add a lock break for PREEMPT=y
sched: Remove from fwd decls
sched: Remove rq_iterator from move_one_task
...
Fix up trivial conflicts in kernel/sched.c
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/ksysfs.c | 8 | ||||
-rw-r--r-- | kernel/kthread.c | 2 | ||||
-rw-r--r-- | kernel/sched.c | 2125 | ||||
-rw-r--r-- | kernel/sched_cpupri.c | 4 | ||||
-rw-r--r-- | kernel/sched_fair.c | 1699 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 23 | ||||
-rw-r--r-- | kernel/sched_rt.c | 54 | ||||
-rw-r--r-- | kernel/sys.c | 5 | ||||
-rw-r--r-- | kernel/user.c | 305 |
9 files changed, 1785 insertions, 2440 deletions
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 3feaf5a7451..6b1ccc3f020 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -197,16 +197,8 @@ static int __init ksysfs_init(void) goto group_exit; } - /* create the /sys/kernel/uids/ directory */ - error = uids_sysfs_init(); - if (error) - goto notes_exit; - return 0; -notes_exit: - if (notes_size > 0) - sysfs_remove_bin_file(kernel_kobj, ¬es_attr); group_exit: sysfs_remove_group(kernel_kobj, &kernel_attr_group); kset_exit: diff --git a/kernel/kthread.c b/kernel/kthread.c index fbb6222fe7e..82ed0ea1519 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -101,7 +101,7 @@ static void create_kthread(struct kthread_create_info *create) * * Description: This helper function creates and names a kernel * thread. The thread will be stopped: use wake_up_process() to start - * it. See also kthread_run(), kthread_create_on_cpu(). + * it. See also kthread_run(). * * When woken, the thread will run @threadfn() with @data as its * argument. @threadfn() can either call do_exit() directly if it is a diff --git a/kernel/sched.c b/kernel/sched.c index caf54e1eef6..6a212c97f52 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -233,7 +233,7 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) */ static DEFINE_MUTEX(sched_domains_mutex); -#ifdef CONFIG_GROUP_SCHED +#ifdef CONFIG_CGROUP_SCHED #include <linux/cgroup.h> @@ -243,13 +243,7 @@ static LIST_HEAD(task_groups); /* task group related information */ struct task_group { -#ifdef CONFIG_CGROUP_SCHED struct cgroup_subsys_state css; -#endif - -#ifdef CONFIG_USER_SCHED - uid_t uid; -#endif #ifdef CONFIG_FAIR_GROUP_SCHED /* schedulable entities of this group on each cpu */ @@ -274,35 +268,7 @@ struct task_group { struct list_head children; }; -#ifdef CONFIG_USER_SCHED - -/* Helper function to pass uid information to create_sched_user() */ -void set_tg_uid(struct user_struct *user) -{ - user->tg->uid = user->uid; -} - -/* - * Root task group. - * Every UID task group (including init_task_group aka UID-0) will - * be a child to this group. - */ -struct task_group root_task_group; - -#ifdef CONFIG_FAIR_GROUP_SCHED -/* Default task group's sched entity on each cpu */ -static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); -/* Default task group's cfs_rq on each cpu */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq); -#endif /* CONFIG_FAIR_GROUP_SCHED */ - -#ifdef CONFIG_RT_GROUP_SCHED -static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); -static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var); -#endif /* CONFIG_RT_GROUP_SCHED */ -#else /* !CONFIG_USER_SCHED */ #define root_task_group init_task_group -#endif /* CONFIG_USER_SCHED */ /* task_group_lock serializes add/remove of task groups and also changes to * a task group's cpu shares. @@ -318,11 +284,7 @@ static int root_task_group_empty(void) } #endif -#ifdef CONFIG_USER_SCHED -# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) -#else /* !CONFIG_USER_SCHED */ # define INIT_TASK_GROUP_LOAD NICE_0_LOAD -#endif /* CONFIG_USER_SCHED */ /* * A weight of 0 or 1 can cause arithmetics problems. @@ -348,11 +310,7 @@ static inline struct task_group *task_group(struct task_struct *p) { struct task_group *tg; -#ifdef CONFIG_USER_SCHED - rcu_read_lock(); - tg = __task_cred(p)->user->tg; - rcu_read_unlock(); -#elif defined(CONFIG_CGROUP_SCHED) +#ifdef CONFIG_CGROUP_SCHED tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), struct task_group, css); #else @@ -383,7 +341,7 @@ static inline struct task_group *task_group(struct task_struct *p) return NULL; } -#endif /* CONFIG_GROUP_SCHED */ +#endif /* CONFIG_CGROUP_SCHED */ /* CFS-related fields in a runqueue */ struct cfs_rq { @@ -478,7 +436,6 @@ struct rt_rq { struct rq *rq; struct list_head leaf_rt_rq_list; struct task_group *tg; - struct sched_rt_entity *rt_se; #endif }; @@ -1414,32 +1371,6 @@ static const u32 prio_to_wmult[40] = { /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, }; -static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); - -/* - * runqueue iterator, to support SMP load-balancing between different - * scheduling classes, without having to expose their internal data - * structures to the load-balancing proper: - */ -struct rq_iterator { - void *arg; - struct task_struct *(*start)(void *); - struct task_struct *(*next)(void *); -}; - -#ifdef CONFIG_SMP -static unsigned long -balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_load_move, struct sched_domain *sd, - enum cpu_idle_type idle, int *all_pinned, - int *this_best_prio, struct rq_iterator *iterator); - -static int -iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, - struct sched_domain *sd, enum cpu_idle_type idle, - struct rq_iterator *iterator); -#endif - /* Time spent by the tasks of the cpu accounting group executing in ... */ enum cpuacct_stat_index { CPUACCT_STAT_USER, /* ... user mode */ @@ -1725,16 +1656,6 @@ static void update_shares(struct sched_domain *sd) } } -static void update_shares_locked(struct rq *rq, struct sched_domain *sd) -{ - if (root_task_group_empty()) - return; - - raw_spin_unlock(&rq->lock); - update_shares(sd); - raw_spin_lock(&rq->lock); -} - static void update_h_load(long cpu) { if (root_task_group_empty()) @@ -1749,10 +1670,6 @@ static inline void update_shares(struct sched_domain *sd) { } -static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) -{ -} - #endif #ifdef CONFIG_PREEMPT @@ -1829,6 +1746,51 @@ static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) raw_spin_unlock(&busiest->lock); lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); } + +/* + * double_rq_lock - safely lock two runqueues + * + * Note this does not disable interrupts like task_rq_lock, + * you need to do so manually before calling. + */ +static void double_rq_lock(struct rq *rq1, struct rq *rq2) + __acquires(rq1->lock) + __acquires(rq2->lock) +{ + BUG_ON(!irqs_disabled()); + if (rq1 == rq2) { + raw_spin_lock(&rq1->lock); + __acquire(rq2->lock); /* Fake it out ;) */ + } else { + if (rq1 < rq2) { + raw_spin_lock(&rq1->lock); + raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); + } else { + raw_spin_lock(&rq2->lock); + raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); + } + } + update_rq_clock(rq1); + update_rq_clock(rq2); +} + +/* + * double_rq_unlock - safely unlock two runqueues + * + * Note this does not restore interrupts like task_rq_unlock, + * you need to do so manually after calling. + */ +static void double_rq_unlock(struct rq *rq1, struct rq *rq2) + __releases(rq1->lock) + __releases(rq2->lock) +{ + raw_spin_unlock(&rq1->lock); + if (rq1 != rq2) + raw_spin_unlock(&rq2->lock); + else + __release(rq2->lock); +} + #endif #ifdef CONFIG_FAIR_GROUP_SCHED @@ -1858,18 +1820,14 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) #endif } -#include "sched_stats.h" -#include "sched_idletask.c" -#include "sched_fair.c" -#include "sched_rt.c" -#ifdef CONFIG_SCHED_DEBUG -# include "sched_debug.c" -#endif +static const struct sched_class rt_sched_class; #define sched_class_highest (&rt_sched_class) #define for_each_class(class) \ for (class = sched_class_highest; class; class = class->next) +#include "sched_stats.h" + static void inc_nr_running(struct rq *rq) { rq->nr_running++; @@ -1907,13 +1865,14 @@ static void update_avg(u64 *avg, u64 sample) *avg += diff >> 3; } -static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) +static void +enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head) { if (wakeup) p->se.start_runtime = p->se.sum_exec_runtime; sched_info_queued(p); - p->sched_class->enqueue_task(rq, p, wakeup); + p->sched_class->enqueue_task(rq, p, wakeup, head); p->se.on_rq = 1; } @@ -1936,6 +1895,37 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) } /* + * activate_task - move a task to the runqueue. + */ +static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) +{ + if (task_contributes_to_load(p)) + rq->nr_uninterruptible--; + + enqueue_task(rq, p, wakeup, false); + inc_nr_running(rq); +} + +/* + * deactivate_task - remove a task from the runqueue. + */ +static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) +{ + if (task_contributes_to_load(p)) + rq->nr_uninterruptible++; + + dequeue_task(rq, p, sleep); + dec_nr_running(rq); +} + +#include "sched_idletask.c" +#include "sched_fair.c" +#include "sched_rt.c" +#ifdef CONFIG_SCHED_DEBUG +# include "sched_debug.c" +#endif + +/* * __normal_prio - return the priority that is based on the static prio */ static inline int __normal_prio(struct task_struct *p) @@ -1981,30 +1971,6 @@ static int effective_prio(struct task_struct *p) return p->prio; } -/* - * activate_task - move a task to the runqueue. - */ -static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) -{ - if (task_contributes_to_load(p)) - rq->nr_uninterruptible--; - - enqueue_task(rq, p, wakeup); - inc_nr_running(rq); -} - -/* - * deactivate_task - remove a task from the runqueue. - */ -static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) -{ - if (task_contributes_to_load(p)) - rq->nr_uninterruptible++; - - dequeue_task(rq, p, sleep); - dec_nr_running(rq); -} - /** * task_curr - is this task currently executing on a CPU? * @p: the task in question. @@ -3148,50 +3114,6 @@ static void update_cpu_load(struct rq *this_rq) #ifdef CONFIG_SMP /* - * double_rq_lock - safely lock two runqueues - * - * Note this does not disable interrupts like task_rq_lock, - * you need to do so manually before calling. - */ -static void double_rq_lock(struct rq *rq1, struct rq *rq2) - __acquires(rq1->lock) - __acquires(rq2->lock) -{ - BUG_ON(!irqs_disabled()); - if (rq1 == rq2) { - raw_spin_lock(&rq1->lock); - __acquire(rq2->lock); /* Fake it out ;) */ - } else { - if (rq1 < rq2) { - raw_spin_lock(&rq1->lock); - raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); - } else { - raw_spin_lock(&rq2->lock); - raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); - } - } - update_rq_clock(rq1); - update_rq_clock(rq2); -} - -/* - * double_rq_unlock - safely unlock two runqueues - * - * Note this does not restore interrupts like task_rq_unlock, - * you need to do so manually after calling. - */ -static void double_rq_unlock(struct rq *rq1, struct rq *rq2) - __releases(rq1->lock) - __releases(rq2->lock) -{ - raw_spin_unlock(&rq1->lock); - if (rq1 != rq2) - raw_spin_unlock(&rq2->lock); - else - __release(rq2->lock); -} - -/* * sched_exec - execve() is a valuable balancing opportunity, because at * this point the task has the smallest effective memory and cache footprint. */ @@ -3239,1782 +3161,6 @@ again: task_rq_unlock(rq, &flags); } -/* - * pull_task - move a task from a remote runqueue to the local runqueue. - * Both runqueues must be locked. - */ -static void pull_task(struct rq *src_rq, struct task_struct *p, - struct rq *this_rq, int this_cpu) -{ - deactivate_task(src_rq, p, 0); - set_task_cpu(p, this_cpu); - activate_task(this_rq, p, 0); - check_preempt_curr(this_rq, p, 0); -} - -/* - * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? - */ -static -int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned) -{ - int tsk_cache_hot = 0; - /* - * We do not migrate tasks that are: - * 1) running (obviously), or - * 2) cannot be migrated to this CPU due to cpus_allowed, or - * 3) are cache-hot on their current CPU. - */ - if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) { - schedstat_inc(p, se.nr_failed_migrations_affine); - return 0; - } - *all_pinned = 0; - - if (task_running(rq, p)) { - schedstat_inc(p, se.nr_failed_migrations_running); - return 0; - } - - /* - * Aggressive migration if: - * 1) task is cache cold, or - * 2) too many balance attempts have failed. - */ - - tsk_cache_hot = task_hot(p, rq->clock, sd); - if (!tsk_cache_hot || - sd->nr_balance_failed > sd->cache_nice_tries) { -#ifdef CONFIG_SCHEDSTATS - if (tsk_cache_hot) { - schedstat_inc(sd, lb_hot_gained[idle]); - schedstat_inc(p, se.nr_forced_migrations); - } -#endif - return 1; - } - - if (tsk_cache_hot) { - schedstat_inc(p, se.nr_failed_migrations_hot); - return 0; - } - return 1; -} - -static unsigned long -balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_load_move, struct sched_domain *sd, - enum cpu_idle_type idle, int *all_pinned, - int *this_best_prio, struct rq_iterator *iterator) -{ - int loops = 0, pulled = 0, pinned = 0; - struct task_struct *p; - long rem_load_move = max_load_move; - - if (max_load_move == 0) - goto out; - - pinned = 1; - - /* - * Start the load-balancing iterator: - */ - p = iterator->start(iterator->arg); -next: - if (!p || loops++ > sysctl_sched_nr_migrate) - goto out; - - if ((p->se.load.weight >> 1) > rem_load_move || - !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { - p = iterator->next(iterator->arg); - goto next; - } - - pull_task(busiest, p, this_rq, this_cpu); - pulled++; - rem_load_move -= p->se.load.weight; - -#ifdef CONFIG_PREEMPT - /* - * NEWIDLE balancing is a source of latency, so preemptible kernels - * will stop after the first task is pulled to minimize the critical - * section. - */ - if (idle == CPU_NEWLY_IDLE) - goto out; -#endif - - /* - * We only want to steal up to the prescribed amount of weighted load. - */ - if (rem_load_move > 0) { - if (p->prio < *this_best_prio) - *this_best_prio = p->prio; - p = iterator->next(iterator->arg); - goto next; - } -out: - /* - * Right now, this is one of only two places pull_task() is called, - * so we can safely collect pull_task() stats here rather than - * inside pull_task(). - */ - schedstat_add(sd, lb_gained[idle], pulled); - - if (all_pinned) - *all_pinned = pinned; - - return max_load_move - rem_load_move; -} - -/* - * move_tasks tries to move up to max_load_move weighted load from busiest to - * this_rq, as part of a balancing operation within domain "sd". - * Returns 1 if successful and 0 otherwise. - * - * Called with both runqueues locked. - */ -static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned) -{ - const struct sched_class *class = sched_class_highest; - unsigned long total_load_moved = 0; - int this_best_prio = this_rq->curr->prio; - - do { - total_load_moved += - class->load_balance(this_rq, this_cpu, busiest, - max_load_move - total_load_moved, - sd, idle, all_pinned, &this_best_prio); - class = class->next; - -#ifdef CONFIG_PREEMPT - /* - * NEWIDLE balancing is a source of latency, so preemptible - * kernels will stop after the first task is pulled to minimize - * the critical section. - */ - if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) - break; -#endif - } while (class && max_load_move > total_load_moved); - - return total_load_moved > 0; -} - -static int -iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, - struct sched_domain *sd, enum cpu_idle_type idle, - struct rq_iterator *iterator) -{ - struct task_struct *p = iterator->start(iterator->arg); - int pinned = 0; - - while (p) { - if (can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { - pull_task(busiest, p, this_rq, this_cpu); - /* - * Right now, this is only the second place pull_task() - * is called, so we can safely collect pull_task() - * stats here rather than inside pull_task(). - */ - schedstat_inc(sd, lb_gained[idle]); - - return 1; - } - p = iterator->next(iterator->arg); - } - - return 0; -} - -/* - * move_one_task tries to move exactly one task from busiest to this_rq, as - * part of active balancing operations within "domain". - * Returns 1 if successful and 0 otherwise. - * - * Called with both runqueues locked. - */ -static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, - struct sched_domain *sd, enum cpu_idle_type idle) -{ - const struct sched_class *class; - - for_each_class(class) { - if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle)) - return 1; - } - - return 0; -} -/********** Helpers for find_busiest_group ************************/ -/* - * sd_lb_stats - Structure to store the statistics of a sched_domain - * during load balancing. - */ -struct sd_lb_stats { - struct sched_group *busiest; /* Busiest group in this sd */ - struct sched_group *this; /* Local group in this sd */ - unsigned long total_load; /* Total load of all groups in sd */ - unsigned long total_pwr; /* Total power of all groups in sd */ - unsigned long avg_load; /* Average load across all groups in sd */ - - /** Statistics of this group */ - unsigned long this_load; - unsigned long this_load_per_task; - unsigned long this_nr_running; - - /* Statistics of the busiest group */ - unsigned long max_load; - unsigned long busiest_load_per_task; - unsigned long busiest_nr_running; - - int group_imb; /* Is there imbalance in this sd */ -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) - int power_savings_balance; /* Is powersave balance needed for this sd */ - struct sched_group *group_min; /* Least loaded group in sd */ - struct sched_group *group_leader; /* Group which relieves group_min */ - unsigned long min_load_per_task; /* load_per_task in group_min */ - unsigned long leader_nr_running; /* Nr running of group_leader */ - unsigned long min_nr_running; /* Nr running of group_min */ -#endif -}; - -/* - * sg_lb_stats - stats of a sched_group required for load_balancing - */ -struct sg_lb_stats { - unsigned long avg_load; /*Avg load across the CPUs of the group */ - unsigned long group_load; /* Total load over the CPUs of the group */ - unsigned long sum_nr_running; /* Nr tasks running in the group */ - unsigned long sum_weighted_load; /* Weighted load of group's tasks */ - unsigned long group_capacity; - int group_imb; /* Is there an imbalance in the group ? */ -}; - -/** - * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. - * @group: The group whose first cpu is to be returned. - */ -static inline unsigned int group_first_cpu(struct sched_group *group) -{ - return cpumask_first(sched_group_cpus(group)); -} - -/** - * get_sd_load_idx - Obtain the load index for a given sched domain. - * @sd: The sched_domain whose load_idx is to be obtained. - * @idle: The Idle status of the CPU for whose sd load_icx is obtained. - */ -static inline int get_sd_load_idx(struct sched_domain *sd, - enum cpu_idle_type idle) -{ - int load_idx; - - switch (idle) { - case CPU_NOT_IDLE: - load_idx = sd->busy_idx; - break; - - case CPU_NEWLY_IDLE: - load_idx = sd->newidle_idx; - break; - default: - load_idx = sd->idle_idx; - break; - } - - return load_idx; -} - - -#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -/** - * init_sd_power_savings_stats - Initialize power savings statistics for - * the given sched_domain, during load balancing. - * - * @sd: Sched domain whose power-savings statistics are to be initialized. - * @sds: Variable containing the statistics for sd. - * @idle: Idle status of the CPU at which we're performing load-balancing. - */ -static inline void init_sd_power_savings_stats(struct sched_domain *sd, - struct sd_lb_stats *sds, enum cpu_idle_type idle) -{ - /* - * Busy processors will not participate in power savings - * balance. - */ - if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) - sds->power_savings_balance = 0; - else { - sds->power_savings_balance = 1; - sds->min_nr_running = ULONG_MAX; - sds->leader_nr_running = 0; - } -} - -/** - * update_sd_power_savings_stats - Update the power saving stats for a - * sched_domain while performing load balancing. - * - * @group: sched_group belonging to the sched_domain under consideration. - * @sds: Variable containing the statistics of the sched_domain - * @local_group: Does group contain the CPU for which we're performing - * load balancing ? - * @sgs: Variable containing the statistics of the group. - */ -static inline void update_sd_power_savings_stats(struct sched_group *group, - struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs) -{ - - if (!sds->power_savings_balance) - return; - - /* - * If the local group is idle or completely loaded - * no need to do power savings balance at this domain - */ - if (local_group && (sds->this_nr_running >= sgs->group_capacity || - !sds->this_nr_running)) - sds->power_savings_balance = 0; - - /* - * If a group is already running at full capacity or idle, - * don't include that group in power savings calculations - */ - if (!sds->power_savings_balance || - sgs->sum_nr_running >= sgs->group_capacity || - !sgs->sum_nr_running) - return; - - /* - * Calculate the group which has the least non-idle load. - * This is the group from where we need to pick up the load - * for saving power - */ - if ((sgs->sum_nr_running < sds->min_nr_running) || - (sgs->sum_nr_running == sds->min_nr_running && - group_first_cpu(group) > group_first_cpu(sds->group_min))) { - sds->group_min = group; - sds->min_nr_running = sgs->sum_nr_running; - sds->min_load_per_task = sgs->sum_weighted_load / - sgs->sum_nr_running; - } - - /* - * Calculate the group which is almost near its - * capacity but still has some space to pick up some load - * from other group and save more power - */ - if (sgs->sum_nr_running + 1 > sgs->group_capacity) - return; - - if (sgs->sum_nr_running > sds->leader_nr_running || - (sgs->sum_nr_running == sds->leader_nr_running && - group_first_cpu(group) < group_first_cpu(sds->group_leader))) { - sds->group_leader = group; - sds->leader_nr_running = sgs->sum_nr_running; - } -} - -/** - * check_power_save_busiest_group - see if there is potential for some power-savings balance - * @sds: Variable containing the statistics of the sched_domain - * under consideration. - * @this_cpu: Cpu at which we're currently performing load-balancing. - * @imbalance: Variable to store the imbalance. - * - * Description: - * Check if we have potential to perform some power-savings balance. - * If yes, set the busiest group to be the least loaded group in the - * sched_domain, so that it's CPUs can be put to idle. - * - * Returns 1 if there is potential to perform power-savings balance. - * Else returns 0. - */ -static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, - int this_cpu, unsigned long *imbalance) -{ - if (!sds->power_savings_balance) - return 0; - - if (sds->this != sds->group_leader || - sds->group_leader == sds->group_min) - return 0; - - *imbalance = sds->min_load_per_task; - sds->busiest = sds->group_min; - - return 1; - -} -#else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ -static inline void init_sd_power_savings_stats(struct sched_domain *sd, - struct sd_lb_stats *sds, enum cpu_idle_type idle) -{ - return; -} - -static inline void update_sd_power_savings_stats(struct sched_group *group, - struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs) -{ - return; -} - -static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, - int this_cpu, unsigned long *imbalance) -{ - return 0; -} -#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ - - -unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) -{ - return SCHED_LOAD_SCALE; -} - -unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) -{ - return default_scale_freq_power(sd, cpu); -} - -unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) -{ - unsigned long weight = cpumask_weight(sched_domain_span(sd)); - unsigned long smt_gain = sd->smt_gain; - - smt_gain /= weight; - - return smt_gain; -} - -unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) -{ - return default_scale_smt_power(sd, cpu); -} - -unsigned long scale_rt_power(int cpu) -{ - struct rq *rq = cpu_rq(cpu); - u64 total, available; - - sched_avg_update(rq); - - total = sched_avg_period() + (rq->clock - rq->age_stamp); - available = total - rq->rt_avg; - - if (unlikely((s64)total < SCHED_LOAD_SCALE)) - total = SCHED_LOAD_SCALE; - - total >>= SCHED_LOAD_SHIFT; - - return div_u64(available, total); -} - -static void update_cpu_power(struct sched_domain *sd, int cpu) -{ - unsigned long weight = cpumask_weight(sched_domain_span(sd)); - unsigned long power = SCHED_LOAD_SCALE; - struct sched_group *sdg = sd->groups; - - if (sched_feat(ARCH_POWER)) - power *= arch_scale_freq_power(sd, cpu); - else - power *= default_scale_freq_power(sd, cpu); - - power >>= SCHED_LOAD_SHIFT; - - if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { - if (sched_feat(ARCH_POWER)) - power *= arch_scale_smt_power(sd, cpu); - else - power *= default_scale_smt_power(sd, cpu); - - power >>= SCHED_LOAD_SHIFT; - } - - power *= scale_rt_power(cpu); - power >>= SCHED_LOAD_SHIFT; - - if (!power) - power = 1; - - sdg->cpu_power = power; -} - -static void update_group_power(struct sched_domain *sd, int cpu) -{ - struct sched_domain *child = sd->child; - struct sched_group *group, *sdg = sd->groups; - unsigned long power; - - if (!child) { - update_cpu_power(sd, cpu); - return; - } - - power = 0; - - group = child->groups; - do { - power += group->cpu_power; - group = group->next; - } while (group != child->groups); - - sdg->cpu_power = power; -} - -/** - * update_sg_lb_stats - Update sched_group's statistics for load balancing. - * @sd: The sched_domain whose statistics are to be updated. - * @group: sched_group whose statistics are to be updated. - * @this_cpu: Cpu for which load balance is currently performed. - * @idle: Idle status of this_cpu - * @load_idx: Load index of sched_domain of this_cpu for load calc. - * @sd_idle: Idle status of the sched_domain containing group. - * @local_group: Does group contain this_cpu. - * @cpus: Set of cpus considered for load balancing. - * @balance: Should we balance. - * @sgs: variable to hold the statistics for this group. - */ -static inline void update_sg_lb_stats(struct sched_domain *sd, - struct sched_group *group, int this_cpu, - enum cpu_idle_type idle, int load_idx, int *sd_idle, - int local_group, const struct cpumask *cpus, - int *balance, struct sg_lb_stats *sgs) -{ - unsigned long load, max_cpu_load, min_cpu_load; - int i; - unsigned int balance_cpu = -1, first_idle_cpu = 0; - unsigned long sum_avg_load_per_task; - unsigned long avg_load_per_task; - - if (local_group) { - balance_cpu = group_first_cpu(group); - if (balance_cpu == this_cpu) - update_group_power(sd, this_cpu); - } - - /* Tally up the load of all CPUs in the group */ - sum_avg_load_per_task = avg_load_per_task = 0; - max_cpu_load = 0; - min_cpu_load = ~0UL; - - for_each_cpu_and(i, sched_group_cpus(group), cpus) { - struct rq *rq = cpu_rq(i); - - if (*sd_idle && rq->nr_running) - *sd_idle = 0; - - /* Bias balancing toward cpus of our domain */ - if (local_group) { - if (idle_cpu(i) && !first_idle_cpu) { - first_idle_cpu = 1; - balance_cpu = i; - } - - load = target_load(i, load_idx); - } else { - load = source_load(i, load_idx); - if (load > max_cpu_load) - max_cpu_load = load; - if (min_cpu_load > load) - min_cpu_load = load; - } - - sgs->group_load += load; - sgs->sum_nr_running += rq->nr_running; - sgs->sum_weighted_load += weighted_cpuload(i); - - sum_avg_load_per_task += cpu_avg_load_per_task(i); - } - - /* - * First idle cpu or the first cpu(busiest) in this sched group - * is eligible for doing load balancing at this and above - * domains. In the newly idle case, we will allow all the cpu's - * to do the newly idle load balance. - */ - if (idle != CPU_NEWLY_IDLE && local_group && - balance_cpu != this_cpu && balance) { - *balance = 0; - return; - } - - /* Adjust by relative CPU power of the group */ - sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power; - - - /* - * Consider the group unbalanced when the imbalance is larger - * than the average weight of two tasks. - * - * APZ: with cgroup the avg task weight can vary wildly and - * might not be a suitable number - should we keep a - * normalized nr_running number somewhere that negates - * the hierarchy? - */ - avg_load_per_task = (sum_avg_load_per_task * SCHED_LOAD_SCALE) / - group->cpu_power; - - if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) - sgs->group_imb = 1; - - sgs->group_capacity = - DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); -} - -/** - * update_sd_lb_stats - Update sched_group's statistics for load balancing. - * @sd: sched_domain whose statistics are to be updated. - * @this_cpu: Cpu for which load balance is currently performed. - * @idle: Idle status of this_cpu - * @sd_idle: Idle status of the sched_domain containing group. - * @cpus: Set of cpus considered for load balancing. - * @balance: Should we balance. - * @sds: variable to hold the statistics for this sched_domain. - */ -static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, - enum cpu_idle_type idle, int *sd_idle, - const struct cpumask *cpus, int *balance, - struct sd_lb_stats *sds) -{ - struct sched_domain *child = sd->child; - struct sched_group *group = sd->groups; - struct sg_lb_stats sgs; - int load_idx, prefer_sibling = 0; |