diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 16 | ||||
-rw-r--r-- | kernel/delayacct.c | 2 | ||||
-rw-r--r-- | kernel/exit.c | 6 | ||||
-rw-r--r-- | kernel/fork.c | 6 | ||||
-rw-r--r-- | kernel/hrtimer.c | 24 | ||||
-rw-r--r-- | kernel/ksysfs.c | 8 | ||||
-rw-r--r-- | kernel/lockdep.c | 26 | ||||
-rw-r--r-- | kernel/lockdep_proc.c | 61 | ||||
-rw-r--r-- | kernel/mutex.c | 35 | ||||
-rw-r--r-- | kernel/nsproxy.c | 15 | ||||
-rw-r--r-- | kernel/posix-timers.c | 6 | ||||
-rw-r--r-- | kernel/rcupdate.c | 8 | ||||
-rw-r--r-- | kernel/sched.c | 1445 | ||||
-rw-r--r-- | kernel/sched_debug.c | 282 | ||||
-rw-r--r-- | kernel/sched_fair.c | 811 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 8 | ||||
-rw-r--r-- | kernel/sched_rt.c | 19 | ||||
-rw-r--r-- | kernel/sched_stats.h | 28 | ||||
-rw-r--r-- | kernel/softirq.c | 4 | ||||
-rw-r--r-- | kernel/sysctl.c | 41 | ||||
-rw-r--r-- | kernel/time/Kconfig | 5 | ||||
-rw-r--r-- | kernel/time/Makefile | 2 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 3 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 44 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 5 | ||||
-rw-r--r-- | kernel/user.c | 249 |
26 files changed, 1834 insertions, 1325 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index eb0f9165b40..2924251a654 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -847,18 +847,10 @@ static void audit_receive_skb(struct sk_buff *skb) } /* Receive messages from netlink socket. */ -static void audit_receive(struct sock *sk, int length) +static void audit_receive(struct sk_buff *skb) { - struct sk_buff *skb; - unsigned int qlen; - mutex_lock(&audit_cmd_mutex); - - for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { - skb = skb_dequeue(&sk->sk_receive_queue); - audit_receive_skb(skb); - kfree_skb(skb); - } + audit_receive_skb(skb); mutex_unlock(&audit_cmd_mutex); } @@ -876,8 +868,8 @@ static int __init audit_init(void) printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, - NULL, THIS_MODULE); + audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0, + audit_receive, NULL, THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); else diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 81e69782963..09e9574eeb2 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -119,7 +119,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) * No locking available for sched_info (and too expensive to add one) * Mitigate by taking snapshot of values */ - t1 = tsk->sched_info.pcnt; + t1 = tsk->sched_info.pcount; t2 = tsk->sched_info.run_delay; t3 = tsk->sched_info.cpu_time; diff --git a/kernel/exit.c b/kernel/exit.c index 993369ee94d..7f7959de4a8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -111,6 +111,7 @@ static void __exit_signal(struct task_struct *tsk) */ sig->utime = cputime_add(sig->utime, tsk->utime); sig->stime = cputime_add(sig->stime, tsk->stime); + sig->gtime = cputime_add(sig->gtime, tsk->gtime); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; @@ -1242,6 +1243,11 @@ static int wait_task_zombie(struct task_struct *p, int noreap, cputime_add(p->stime, cputime_add(sig->stime, sig->cstime))); + psig->cgtime = + cputime_add(psig->cgtime, + cputime_add(p->gtime, + cputime_add(sig->gtime, + sig->cgtime))); psig->cmin_flt += p->min_flt + sig->min_flt + sig->cmin_flt; psig->cmaj_flt += diff --git a/kernel/fork.c b/kernel/fork.c index 33f12f48684..3fc3c138391 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -877,6 +877,8 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts sig->tty_old_pgrp = NULL; sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; + sig->gtime = cputime_zero; + sig->cgtime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; @@ -1045,6 +1047,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->utime = cputime_zero; p->stime = cputime_zero; + p->gtime = cputime_zero; #ifdef CONFIG_TASK_XACCT p->rchar = 0; /* I/O counter: bytes read */ @@ -1608,7 +1611,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| - CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER)) + CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER| + CLONE_NEWNET)) goto bad_unshare_out; if ((err = unshare_thread(unshare_flags))) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c21ca6bfaa6..dc8a4451d79 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -277,6 +277,30 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) } EXPORT_SYMBOL_GPL(ktime_add_ns); + +/** + * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable + * @kt: minuend + * @nsec: the scalar nsec value to subtract + * + * Returns the subtraction of @nsec from @kt in ktime_t format + */ +ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec) +{ + ktime_t tmp; + + if (likely(nsec < NSEC_PER_SEC)) { + tmp.tv64 = nsec; + } else { + unsigned long rem = do_div(nsec, NSEC_PER_SEC); + + tmp = ktime_set((long)nsec, rem); + } + + return ktime_sub(kt, tmp); +} + +EXPORT_SYMBOL_GPL(ktime_sub_ns); # endif /* !CONFIG_KTIME_SCALAR */ /* diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index d0e5c48e18c..6046939d080 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/kexec.h> +#include <linux/sched.h> #define KERNEL_ATTR_RO(_name) \ static struct subsys_attribute _name##_attr = __ATTR_RO(_name) @@ -116,6 +117,13 @@ static int __init ksysfs_init(void) ¬es_attr); } + /* + * Create "/sys/kernel/uids" directory and corresponding root user's + * directory under it. + */ + if (!error) + error = uids_kobject_init(); + return error; } diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 734da579ad1..a6f1ee9c92d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -1521,7 +1521,7 @@ cache_hit: } static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, - struct held_lock *hlock, int chain_head) + struct held_lock *hlock, int chain_head, u64 chain_key) { /* * Trylock needs to maintain the stack of held locks, but it @@ -1534,7 +1534,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, * graph_lock for us) */ if (!hlock->trylock && (hlock->check == 2) && - lookup_chain_cache(curr->curr_chain_key, hlock->class)) { + lookup_chain_cache(chain_key, hlock->class)) { /* * Check whether last held lock: * @@ -1576,7 +1576,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, #else static inline int validate_chain(struct task_struct *curr, struct lockdep_map *lock, struct held_lock *hlock, - int chain_head) + int chain_head, u64 chain_key) { return 1; } @@ -2450,11 +2450,11 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, chain_head = 1; } chain_key = iterate_chain_key(chain_key, id); - curr->curr_chain_key = chain_key; - if (!validate_chain(curr, lock, hlock, chain_head)) + if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) return 0; + curr->curr_chain_key = chain_key; curr->lockdep_depth++; check_chain_key(curr); #ifdef CONFIG_DEBUG_LOCKDEP @@ -3199,3 +3199,19 @@ void debug_show_held_locks(struct task_struct *task) } EXPORT_SYMBOL_GPL(debug_show_held_locks); + +void lockdep_sys_exit(void) +{ + struct task_struct *curr = current; + + if (unlikely(curr->lockdep_depth)) { + if (!debug_locks_off()) + return; + printk("\n================================================\n"); + printk( "[ BUG: lock held when returning to user space! ]\n"); + printk( "------------------------------------------------\n"); + printk("%s/%d is leaving the kernel with locks still held!\n", + curr->comm, curr->pid); + lockdep_print_held_locks(curr); + } +} diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index c851b2dcc68..8a135bd163c 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -25,28 +25,38 @@ static void *l_next(struct seq_file *m, void *v, loff_t *pos) { - struct lock_class *class = v; + struct lock_class *class; (*pos)++; - if (class->lock_entry.next != &all_lock_classes) - class = list_entry(class->lock_entry.next, struct lock_class, - lock_entry); - else - class = NULL; - m->private = class; + if (v == SEQ_START_TOKEN) + class = m->private; + else { + class = v; + + if (class->lock_entry.next != &all_lock_classes) + class = list_entry(class->lock_entry.next, + struct lock_class, lock_entry); + else + class = NULL; + } return class; } static void *l_start(struct seq_file *m, loff_t *pos) { - struct lock_class *class = m->private; + struct lock_class *class; + loff_t i = 0; - if (&class->lock_entry == all_lock_classes.next) - seq_printf(m, "all lock classes:\n"); + if (*pos == 0) + return SEQ_START_TOKEN; - return class; + list_for_each_entry(class, &all_lock_classes, lock_entry) { + if (++i == *pos) + return class; + } + return NULL; } static void l_stop(struct seq_file *m, void *v) @@ -101,10 +111,15 @@ static void print_name(struct seq_file *m, struct lock_class *class) static int l_show(struct seq_file *m, void *v) { unsigned long nr_forward_deps, nr_backward_deps; - struct lock_class *class = m->private; + struct lock_class *class = v; struct lock_list *entry; char c1, c2, c3, c4; + if (v == SEQ_START_TOKEN) { + seq_printf(m, "all lock classes:\n"); + return 0; + } + seq_printf(m, "%p", class->key); #ifdef CONFIG_DEBUG_LOCKDEP seq_printf(m, " OPS:%8ld", class->ops); @@ -523,10 +538,11 @@ static void *ls_start(struct seq_file *m, loff_t *pos) { struct lock_stat_seq *data = m->private; - if (data->iter == data->stats) - seq_header(m); + if (*pos == 0) + return SEQ_START_TOKEN; - if (data->iter == data->iter_end) + data->iter = data->stats + *pos; + if (data->iter >= data->iter_end) data->iter = NULL; return data->iter; @@ -538,8 +554,13 @@ static void *ls_next(struct seq_file *m, void *v, loff_t *pos) (*pos)++; - data->iter = v; - data->iter++; + if (v == SEQ_START_TOKEN) + data->iter = data->stats; + else { + data->iter = v; + data->iter++; + } + if (data->iter == data->iter_end) data->iter = NULL; @@ -552,9 +573,11 @@ static void ls_stop(struct seq_file *m, void *v) static int ls_show(struct seq_file *m, void *v) { - struct lock_stat_seq *data = m->private; + if (v == SEQ_START_TOKEN) + seq_header(m); + else + seq_stats(m, v); - seq_stats(m, data->iter); return 0; } diff --git a/kernel/mutex.c b/kernel/mutex.c index 691b86564dd..d7fe50cc556 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -51,6 +51,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) EXPORT_SYMBOL(__mutex_init); +#ifndef CONFIG_DEBUG_LOCK_ALLOC /* * We split the mutex lock/unlock logic into separate fastpath and * slowpath functions, to reduce the register pressure on the fastpath. @@ -92,6 +93,7 @@ void inline fastcall __sched mutex_lock(struct mutex *lock) } EXPORT_SYMBOL(mutex_lock); +#endif static void fastcall noinline __sched __mutex_unlock_slowpath(atomic_t *lock_count); @@ -122,7 +124,8 @@ EXPORT_SYMBOL(mutex_unlock); * Lock a mutex (possibly interruptible), slowpath: */ static inline int __sched -__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) +__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, + unsigned long ip) { struct task_struct *task = current; struct mutex_waiter waiter; @@ -132,7 +135,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) spin_lock_mutex(&lock->wait_lock, flags); debug_mutex_lock_common(lock, &waiter); - mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + mutex_acquire(&lock->dep_map, subclass, 0, ip); debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); /* add waiting tasks to the end of the waitqueue (FIFO): */ @@ -143,7 +146,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) if (old_val == 1) goto done; - lock_contended(&lock->dep_map, _RET_IP_); + lock_contended(&lock->dep_map, ip); for (;;) { /* @@ -166,7 +169,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) if (unlikely(state == TASK_INTERRUPTIBLE && signal_pending(task))) { mutex_remove_waiter(lock, &waiter, task_thread_info(task)); - mutex_release(&lock->dep_map, 1, _RET_IP_); + mutex_release(&lock->dep_map, 1, ip); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); @@ -197,20 +200,12 @@ done: return 0; } -static void fastcall noinline __sched -__mutex_lock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); -} - #ifdef CONFIG_DEBUG_LOCK_ALLOC void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_nested); @@ -219,7 +214,7 @@ int __sched mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); @@ -271,6 +266,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count) __mutex_unlock_common_slowpath(lock_count, 1); } +#ifndef CONFIG_DEBUG_LOCK_ALLOC /* * Here come the less common (and hence less performance-critical) APIs: * mutex_lock_interruptible() and mutex_trylock(). @@ -298,13 +294,22 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock) EXPORT_SYMBOL(mutex_lock_interruptible); +static void fastcall noinline __sched +__mutex_lock_slowpath(atomic_t *lock_count) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); +} + static int fastcall noinline __sched __mutex_lock_interruptible_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_); } +#endif /* * Spinlock based trylock, we take the spinlock and check whether we diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index a4fb7d46971..f1decd21a53 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -20,6 +20,7 @@ #include <linux/mnt_namespace.h> #include <linux/utsname.h> #include <linux/pid_namespace.h> +#include <net/net_namespace.h> static struct kmem_cache *nsproxy_cachep; @@ -98,8 +99,17 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_user; } + new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); + if (IS_ERR(new_nsp->net_ns)) { + err = PTR_ERR(new_nsp->net_ns); + goto out_net; + } + return new_nsp; +out_net: + if (new_nsp->user_ns) + put_user_ns(new_nsp->user_ns); out_user: if (new_nsp->pid_ns) put_pid_ns(new_nsp->pid_ns); @@ -132,7 +142,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) get_nsproxy(old_ns); - if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER))) + if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET))) return 0; if (!capable(CAP_SYS_ADMIN)) { @@ -164,6 +174,7 @@ void free_nsproxy(struct nsproxy *ns) put_pid_ns(ns->pid_ns); if (ns->user_ns) put_user_ns(ns->user_ns); + put_net(ns->net_ns); kmem_cache_free(nsproxy_cachep, ns); } @@ -177,7 +188,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, int err = 0; if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWUSER))) + CLONE_NEWUSER | CLONE_NEWNET))) return 0; if (!capable(CAP_SYS_ADMIN)) diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 7a15afb73ed..57efe0400bc 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -712,7 +712,7 @@ sys_timer_getoverrun(timer_t timer_id) { struct k_itimer *timr; int overrun; - long flags; + unsigned long flags; timr = lock_timer(timer_id, &flags); if (!timr) @@ -784,7 +784,7 @@ sys_timer_settime(timer_t timer_id, int flags, struct k_itimer *timr; struct itimerspec new_spec, old_spec; int error = 0; - long flag; + unsigned long flag; struct itimerspec *rtn = old_setting ? &old_spec : NULL; if (!new_setting) @@ -836,7 +836,7 @@ asmlinkage long sys_timer_delete(timer_t timer_id) { struct k_itimer *timer; - long flags; + unsigned long flags; retry_delete: timer = lock_timer(timer_id, &flags); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 2c2dd8410dc..130214f3d22 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -49,6 +49,14 @@ #include <linux/cpu.h> #include <linux/mutex.h> +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key rcu_lock_key; +struct lockdep_map rcu_lock_map = + STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); + +EXPORT_SYMBOL_GPL(rcu_lock_map); +#endif + /* Definition for rcupdate control block. */ static struct rcu_ctrlblk rcu_ctrlblk = { .cur = -300, diff --git a/kernel/sched.c b/kernel/sched.c index 6107a0cd632..bba57adb950 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -61,6 +61,7 @@ #include <linux/delayacct.h> #include <linux/reciprocal_div.h> #include <linux/unistd.h> +#include <linux/pagemap.h> #include <asm/tlb.h> @@ -95,7 +96,7 @@ unsigned long long __attribute__((weak)) sched_clock(void) /* * Some helpers for converting nanosecond timing to jiffy resolution */ -#define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ)) +#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (1000000000 / HZ)) #define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) #define NICE_0_LOAD SCHED_LOAD_SCALE @@ -104,11 +105,9 @@ unsigned long long __attribute__((weak)) sched_clock(void) /* * These are the 'tuning knobs' of the scheduler: * - * Minimum timeslice is 5 msecs (or 1 jiffy, whichever is larger), - * default timeslice is 100 msecs, maximum timeslice is 800 msecs. + * default timeslice is 100 msecs (used only for SCHED_RR tasks). * Timeslices get refilled after they expire. */ -#define MIN_TIMESLICE max(5 * HZ / 1000, 1) #define DEF_TIMESLICE (100 * HZ / 1000) #ifdef CONFIG_SMP @@ -132,24 +131,6 @@ static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val) } #endif -#define SCALE_PRIO(x, prio) \ - max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE) - -/* - * static_prio_timeslice() scales user-nice values [ -20 ... 0 ... 19 ] - * to time slice values: [800ms ... 100ms ... 5ms] - */ -static unsigned int static_prio_timeslice(int static_prio) -{ - if (static_prio == NICE_TO_PRIO(19)) - return 1; - - if (static_prio < NICE_TO_PRIO(0)) - return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio); - else - return SCALE_PRIO(DEF_TIMESLICE, static_prio); -} - static inline int rt_policy(int policy) { if (unlikely(policy == SCHED_FIFO) || unlikely(policy == SCHED_RR)) @@ -170,31 +151,91 @@ struct rt_prio_array { struct list_head queue[MAX_RT_PRIO]; }; -struct load_stat { - struct load_weight load; - u64 load_update_start, load_update_last; - unsigned long delta_fair, delta_exec, delta_stat; +#ifdef CONFIG_FAIR_GROUP_SCHED + +struct cfs_rq; + +/* task group related information */ +struct task_group { + /* schedulable entities of this group on each cpu */ + struct sched_entity **se; + /* runqueue "owned" by this group on each cpu */ + struct cfs_rq **cfs_rq; + unsigned long shares; + /* spinlock to serialize modification to shares */ + spinlock_t lock; +}; + +/* Default task group's sched entity on each cpu */ +static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); +/* Default task group's cfs_rq on each cpu */ +static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; + +static struct sched_entity *init_sched_entity_p[NR_CPUS]; +static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; + +/* Default task group. + * Every task in system belong to this group at bootup. + */ +struct task_group init_task_group = { + .se = init_sched_entity_p, + .cfs_rq = init_cfs_rq_p, }; +#ifdef CONFIG_FAIR_USER_SCHED +# define INIT_TASK_GRP_LOAD 2*NICE_0_LOAD +#else +# define INIT_TASK_GRP_LOAD NICE_0_LOAD +#endif + +static int init_task_group_load = INIT_TASK_GRP_LOAD; + +/* return group to which a task belongs */ +static inline struct task_group *task_group(struct task_struct *p) +{ + struct task_group *tg; + +#ifdef CONFIG_FAIR_USER_SCHED + tg = p->user->tg; +#else + tg = &init_task_group; +#endif + + return tg; +} + +/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ +static inline void set_task_cfs_rq(struct task_struct *p) +{ + p->se.cfs_rq = task_group(p)->cfs_rq[task_cpu(p)]; + p->se.parent = task_group(p)->se[task_cpu(p)]; +} + +#else + +static inline void set_task_cfs_rq(struct task_struct *p) { } + +#endif /* CONFIG_FAIR_GROUP_SCHED */ + /* CFS-related fields in a runqueue */ struct cfs_rq { struct load_weight load; unsigned long nr_running; - s64 fair_clock; u64 exec_clock; - s64 wait_runtime; - u64 sleeper_bonus; - unsigned long wait_runtime_overruns, wait_runtime_underruns; + u64 min_vruntime; struct rb_root tasks_timeline; struct rb_node *rb_leftmost; struct rb_node *rb_load_balance_curr; -#ifdef CONFIG_FAIR_GROUP_SCHED /* 'curr' points to currently running entity on this cfs_rq. * It is set to NULL otherwise (i.e when none are currently running). */ struct sched_entity *curr; + + unsigned long nr_spread_over; + +#ifdef CONFIG_FAIR_GROUP_SCHED struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ /* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in @@ -205,6 +246,8 @@ struct cfs_rq { * list is used during load balance. */ struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */ + struct task_group *tg; /* group that "owns" this runqueue */ + struct rcu_head rcu; #endif }; @@ -236,7 +279,7 @@ struct rq { #ifdef CONFIG_NO_HZ unsigned char in_nohz_recently; #endif - struct load_stat ls; /* capture load from *all* tasks on this cpu */ + struct load_weight load; /* capture load from *all* tasks on this cpu */ unsigned long nr_load_updates; u64 nr_switches; @@ -288,16 +331,19 @@ struct rq { unsigned long yld_exp_empty; unsigned long yld_act_empty; unsigned long yld_both_empty; - unsigned long yld_cnt; + unsigned long yld_count; /* schedule() stats */ unsigned long sched_switch; - unsigned long sched_cnt; + unsigned long sched_count; unsigned long sched_goidle; /* try_to_wake_up() stats */ - unsigned long ttwu_cnt; + unsigned long ttwu_count; unsigned long ttwu_local; + + /* BKL stats */ + unsigned long bkl_count; #endif struct lock_class_key rq_lock_key; }; @@ -382,6 +428,37 @@ static void update_rq_clock(struct rq *rq) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) /* + * Tunables that become constants when CONFIG_SCHED_DEBUG is off: + */ +#ifdef CONFIG_SCHED_DEBUG +# define const_debug __read_mostly +#else +# define const_debug static const +#endif + +/* + * Debugging: various feature bits + */ +enum { + SCHED_FEAT_NEW_FAIR_SLEEPERS = 1, + SCHED_FEAT_START_DEBIT = 2, + SCHED_FEAT_TREE_AVG = 4, + SCHED_FEAT_APPROX_AVG = 8, + SCHED_FEAT_WAKEUP_PREEMPT = 16, + SCHED_FEAT_PREEMPT_RESTRICT = 32, +}; + +const_debug unsigned int sysctl_sched_features = + SCHED_FEAT_NEW_FAIR_SLEEPERS *1 | + SCHED_FEAT_START_DEBIT *1 | + SCHED_FEAT_TREE_AVG *0 | + SCHED_FEAT_APPROX_AVG *0 | + SCHED_FEAT_WAKEUP_PREEMPT *1 | + SCHED_FEAT_PREEMPT_RESTRICT *1; + +#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) + +/* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu * clock constructed from sched_clock(): */ @@ -399,18 +476,7 @@ unsigned long long cpu_clock(int cpu) return now; } - -#ifdef CONFIG_FAIR_GROUP_SCHED -/* Change a task's ->cfs_rq if it moves across CPUs */ -static inline void set_task_cfs_rq(struct task_struct *p) -{ - p->se.cfs_rq = &task_rq(p)->cfs; -} -#else -static inline void set_task_cfs_rq(struct task_struct *p) -{ -} -#endif +EXPORT_SYMBOL_GPL(cpu_clock); #ifndef prepare_arch_switch # define prepare_arch_switch(next) do { } while (0) @@ -496,16 +562,13 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) static inline struct rq *__task_rq_lock(struct task_struct *p) __acquires(rq->lock) { - struct rq *rq; - -repeat_lock_task: - rq = task_rq(p); - spin_lock(&rq->lock); - if (unlikely(rq != task_rq(p))) { + for (;;) { + struct rq *rq = task_rq(p); + spin_lock(&rq->lock); + if (likely(rq == task_rq(p))) + return rq; spin_unlock(&rq->lock); - goto repeat_lock_task; } - return rq; } /* @@ -518,18 +581,17 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) { struct rq *rq; -repeat_lock_task: - local_irq_save(*flags); - rq = task_rq(p); - spin_lock(&rq->lock); - if (unlikely(rq != task_rq(p))) { + for (;;) { + local_irq_save(*flags); + rq = task_rq(p); + spin_lock(&rq->lock); + if (likely(rq == task_rq(p))) + return rq; spin_unlock_irqrestore(&rq->lock, *flags); - goto repeat_lock_task; } - return rq; } -static inline void __task_rq_unlock(struct rq *rq) +static void __task_rq_unlock(struct rq *rq) __releases(rq->lock) { spin_unlock(&rq->lock); @@ -544,7 +606,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) /* * this_rq_lock - lock this runqueue and disable interrupts. */ -static inline struct rq *this_rq_lock(void) +static struct rq *this_rq_lock(void) __acquires(rq->lock) { struct rq *rq; @@ -644,19 +706,6 @@ static inline void resched_task(struct task_struct *p) } #endif -static u64 div64_likely32(u64 divident, unsigned long divisor) -{ -#if BITS_PER_LONG == 32 - if (likely(divident <= 0xffffffffULL)) - return (u32)divident / divisor; - do_div(divident, divisor); - - return divident; -#else - return divident / divisor; -#endif -} - #if BITS_PER_LONG == 32 # define WMULT_CONST (~0UL) #else @@ -698,16 +747,14 @@ calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); } |