aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c16
-rw-r--r--kernel/delayacct.c2
-rw-r--r--kernel/exit.c6
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/hrtimer.c24
-rw-r--r--kernel/ksysfs.c8
-rw-r--r--kernel/lockdep.c26
-rw-r--r--kernel/lockdep_proc.c61
-rw-r--r--kernel/mutex.c35
-rw-r--r--kernel/nsproxy.c15
-rw-r--r--kernel/posix-timers.c6
-rw-r--r--kernel/rcupdate.c8
-rw-r--r--kernel/sched.c1445
-rw-r--r--kernel/sched_debug.c282
-rw-r--r--kernel/sched_fair.c811
-rw-r--r--kernel/sched_idletask.c8
-rw-r--r--kernel/sched_rt.c19
-rw-r--r--kernel/sched_stats.h28
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/sysctl.c41
-rw-r--r--kernel/time/Kconfig5
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/clockevents.c3
-rw-r--r--kernel/time/tick-broadcast.c44
-rw-r--r--kernel/time/tick-common.c5
-rw-r--r--kernel/user.c249
26 files changed, 1834 insertions, 1325 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index eb0f9165b40..2924251a654 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -847,18 +847,10 @@ static void audit_receive_skb(struct sk_buff *skb)
}
/* Receive messages from netlink socket. */
-static void audit_receive(struct sock *sk, int length)
+static void audit_receive(struct sk_buff *skb)
{
- struct sk_buff *skb;
- unsigned int qlen;
-
mutex_lock(&audit_cmd_mutex);
-
- for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
- skb = skb_dequeue(&sk->sk_receive_queue);
- audit_receive_skb(skb);
- kfree_skb(skb);
- }
+ audit_receive_skb(skb);
mutex_unlock(&audit_cmd_mutex);
}
@@ -876,8 +868,8 @@ static int __init audit_init(void)
printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
audit_default ? "enabled" : "disabled");
- audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
- NULL, THIS_MODULE);
+ audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0,
+ audit_receive, NULL, THIS_MODULE);
if (!audit_sock)
audit_panic("cannot initialize netlink socket");
else
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 81e69782963..09e9574eeb2 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -119,7 +119,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
* No locking available for sched_info (and too expensive to add one)
* Mitigate by taking snapshot of values
*/
- t1 = tsk->sched_info.pcnt;
+ t1 = tsk->sched_info.pcount;
t2 = tsk->sched_info.run_delay;
t3 = tsk->sched_info.cpu_time;
diff --git a/kernel/exit.c b/kernel/exit.c
index 993369ee94d..7f7959de4a8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -111,6 +111,7 @@ static void __exit_signal(struct task_struct *tsk)
*/
sig->utime = cputime_add(sig->utime, tsk->utime);
sig->stime = cputime_add(sig->stime, tsk->stime);
+ sig->gtime = cputime_add(sig->gtime, tsk->gtime);
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
@@ -1242,6 +1243,11 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
cputime_add(p->stime,
cputime_add(sig->stime,
sig->cstime)));
+ psig->cgtime =
+ cputime_add(psig->cgtime,
+ cputime_add(p->gtime,
+ cputime_add(sig->gtime,
+ sig->cgtime)));
psig->cmin_flt +=
p->min_flt + sig->min_flt + sig->cmin_flt;
psig->cmaj_flt +=
diff --git a/kernel/fork.c b/kernel/fork.c
index 33f12f48684..3fc3c138391 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -877,6 +877,8 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
sig->tty_old_pgrp = NULL;
sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+ sig->gtime = cputime_zero;
+ sig->cgtime = cputime_zero;
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
@@ -1045,6 +1047,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->utime = cputime_zero;
p->stime = cputime_zero;
+ p->gtime = cputime_zero;
#ifdef CONFIG_TASK_XACCT
p->rchar = 0; /* I/O counter: bytes read */
@@ -1608,7 +1611,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
err = -EINVAL;
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
- CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER))
+ CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER|
+ CLONE_NEWNET))
goto bad_unshare_out;
if ((err = unshare_thread(unshare_flags)))
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c21ca6bfaa6..dc8a4451d79 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -277,6 +277,30 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec)
}
EXPORT_SYMBOL_GPL(ktime_add_ns);
+
+/**
+ * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable
+ * @kt: minuend
+ * @nsec: the scalar nsec value to subtract
+ *
+ * Returns the subtraction of @nsec from @kt in ktime_t format
+ */
+ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec)
+{
+ ktime_t tmp;
+
+ if (likely(nsec < NSEC_PER_SEC)) {
+ tmp.tv64 = nsec;
+ } else {
+ unsigned long rem = do_div(nsec, NSEC_PER_SEC);
+
+ tmp = ktime_set((long)nsec, rem);
+ }
+
+ return ktime_sub(kt, tmp);
+}
+
+EXPORT_SYMBOL_GPL(ktime_sub_ns);
# endif /* !CONFIG_KTIME_SCALAR */
/*
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index d0e5c48e18c..6046939d080 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kexec.h>
+#include <linux/sched.h>
#define KERNEL_ATTR_RO(_name) \
static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
@@ -116,6 +117,13 @@ static int __init ksysfs_init(void)
&notes_attr);
}
+ /*
+ * Create "/sys/kernel/uids" directory and corresponding root user's
+ * directory under it.
+ */
+ if (!error)
+ error = uids_kobject_init();
+
return error;
}
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 734da579ad1..a6f1ee9c92d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1521,7 +1521,7 @@ cache_hit:
}
static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
- struct held_lock *hlock, int chain_head)
+ struct held_lock *hlock, int chain_head, u64 chain_key)
{
/*
* Trylock needs to maintain the stack of held locks, but it
@@ -1534,7 +1534,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
* graph_lock for us)
*/
if (!hlock->trylock && (hlock->check == 2) &&
- lookup_chain_cache(curr->curr_chain_key, hlock->class)) {
+ lookup_chain_cache(chain_key, hlock->class)) {
/*
* Check whether last held lock:
*
@@ -1576,7 +1576,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
#else
static inline int validate_chain(struct task_struct *curr,
struct lockdep_map *lock, struct held_lock *hlock,
- int chain_head)
+ int chain_head, u64 chain_key)
{
return 1;
}
@@ -2450,11 +2450,11 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
chain_head = 1;
}
chain_key = iterate_chain_key(chain_key, id);
- curr->curr_chain_key = chain_key;
- if (!validate_chain(curr, lock, hlock, chain_head))
+ if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
return 0;
+ curr->curr_chain_key = chain_key;
curr->lockdep_depth++;
check_chain_key(curr);
#ifdef CONFIG_DEBUG_LOCKDEP
@@ -3199,3 +3199,19 @@ void debug_show_held_locks(struct task_struct *task)
}
EXPORT_SYMBOL_GPL(debug_show_held_locks);
+
+void lockdep_sys_exit(void)
+{
+ struct task_struct *curr = current;
+
+ if (unlikely(curr->lockdep_depth)) {
+ if (!debug_locks_off())
+ return;
+ printk("\n================================================\n");
+ printk( "[ BUG: lock held when returning to user space! ]\n");
+ printk( "------------------------------------------------\n");
+ printk("%s/%d is leaving the kernel with locks still held!\n",
+ curr->comm, curr->pid);
+ lockdep_print_held_locks(curr);
+ }
+}
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index c851b2dcc68..8a135bd163c 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -25,28 +25,38 @@
static void *l_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct lock_class *class = v;
+ struct lock_class *class;
(*pos)++;
- if (class->lock_entry.next != &all_lock_classes)
- class = list_entry(class->lock_entry.next, struct lock_class,
- lock_entry);
- else
- class = NULL;
- m->private = class;
+ if (v == SEQ_START_TOKEN)
+ class = m->private;
+ else {
+ class = v;
+
+ if (class->lock_entry.next != &all_lock_classes)
+ class = list_entry(class->lock_entry.next,
+ struct lock_class, lock_entry);
+ else
+ class = NULL;
+ }
return class;
}
static void *l_start(struct seq_file *m, loff_t *pos)
{
- struct lock_class *class = m->private;
+ struct lock_class *class;
+ loff_t i = 0;
- if (&class->lock_entry == all_lock_classes.next)
- seq_printf(m, "all lock classes:\n");
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
- return class;
+ list_for_each_entry(class, &all_lock_classes, lock_entry) {
+ if (++i == *pos)
+ return class;
+ }
+ return NULL;
}
static void l_stop(struct seq_file *m, void *v)
@@ -101,10 +111,15 @@ static void print_name(struct seq_file *m, struct lock_class *class)
static int l_show(struct seq_file *m, void *v)
{
unsigned long nr_forward_deps, nr_backward_deps;
- struct lock_class *class = m->private;
+ struct lock_class *class = v;
struct lock_list *entry;
char c1, c2, c3, c4;
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(m, "all lock classes:\n");
+ return 0;
+ }
+
seq_printf(m, "%p", class->key);
#ifdef CONFIG_DEBUG_LOCKDEP
seq_printf(m, " OPS:%8ld", class->ops);
@@ -523,10 +538,11 @@ static void *ls_start(struct seq_file *m, loff_t *pos)
{
struct lock_stat_seq *data = m->private;
- if (data->iter == data->stats)
- seq_header(m);
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
- if (data->iter == data->iter_end)
+ data->iter = data->stats + *pos;
+ if (data->iter >= data->iter_end)
data->iter = NULL;
return data->iter;
@@ -538,8 +554,13 @@ static void *ls_next(struct seq_file *m, void *v, loff_t *pos)
(*pos)++;
- data->iter = v;
- data->iter++;
+ if (v == SEQ_START_TOKEN)
+ data->iter = data->stats;
+ else {
+ data->iter = v;
+ data->iter++;
+ }
+
if (data->iter == data->iter_end)
data->iter = NULL;
@@ -552,9 +573,11 @@ static void ls_stop(struct seq_file *m, void *v)
static int ls_show(struct seq_file *m, void *v)
{
- struct lock_stat_seq *data = m->private;
+ if (v == SEQ_START_TOKEN)
+ seq_header(m);
+ else
+ seq_stats(m, v);
- seq_stats(m, data->iter);
return 0;
}
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 691b86564dd..d7fe50cc556 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -51,6 +51,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
EXPORT_SYMBOL(__mutex_init);
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
/*
* We split the mutex lock/unlock logic into separate fastpath and
* slowpath functions, to reduce the register pressure on the fastpath.
@@ -92,6 +93,7 @@ void inline fastcall __sched mutex_lock(struct mutex *lock)
}
EXPORT_SYMBOL(mutex_lock);
+#endif
static void fastcall noinline __sched
__mutex_unlock_slowpath(atomic_t *lock_count);
@@ -122,7 +124,8 @@ EXPORT_SYMBOL(mutex_unlock);
* Lock a mutex (possibly interruptible), slowpath:
*/
static inline int __sched
-__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
+__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
+ unsigned long ip)
{
struct task_struct *task = current;
struct mutex_waiter waiter;
@@ -132,7 +135,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
spin_lock_mutex(&lock->wait_lock, flags);
debug_mutex_lock_common(lock, &waiter);
- mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ mutex_acquire(&lock->dep_map, subclass, 0, ip);
debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
/* add waiting tasks to the end of the waitqueue (FIFO): */
@@ -143,7 +146,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
if (old_val == 1)
goto done;
- lock_contended(&lock->dep_map, _RET_IP_);
+ lock_contended(&lock->dep_map, ip);
for (;;) {
/*
@@ -166,7 +169,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
if (unlikely(state == TASK_INTERRUPTIBLE &&
signal_pending(task))) {
mutex_remove_waiter(lock, &waiter, task_thread_info(task));
- mutex_release(&lock->dep_map, 1, _RET_IP_);
+ mutex_release(&lock->dep_map, 1, ip);
spin_unlock_mutex(&lock->wait_lock, flags);
debug_mutex_free_waiter(&waiter);
@@ -197,20 +200,12 @@ done:
return 0;
}
-static void fastcall noinline __sched
-__mutex_lock_slowpath(atomic_t *lock_count)
-{
- struct mutex *lock = container_of(lock_count, struct mutex, count);
-
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
-}
-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __sched
mutex_lock_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_nested);
@@ -219,7 +214,7 @@ int __sched
mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
{
might_sleep();
- return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass);
+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -271,6 +266,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count)
__mutex_unlock_common_slowpath(lock_count, 1);
}
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
/*
* Here come the less common (and hence less performance-critical) APIs:
* mutex_lock_interruptible() and mutex_trylock().
@@ -298,13 +294,22 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
EXPORT_SYMBOL(mutex_lock_interruptible);
+static void fastcall noinline __sched
+__mutex_lock_slowpath(atomic_t *lock_count)
+{
+ struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
+}
+
static int fastcall noinline __sched
__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
- return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_);
}
+#endif
/*
* Spinlock based trylock, we take the spinlock and check whether we
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index a4fb7d46971..f1decd21a53 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -20,6 +20,7 @@
#include <linux/mnt_namespace.h>
#include <linux/utsname.h>
#include <linux/pid_namespace.h>
+#include <net/net_namespace.h>
static struct kmem_cache *nsproxy_cachep;
@@ -98,8 +99,17 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
goto out_user;
}
+ new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
+ if (IS_ERR(new_nsp->net_ns)) {
+ err = PTR_ERR(new_nsp->net_ns);
+ goto out_net;
+ }
+
return new_nsp;
+out_net:
+ if (new_nsp->user_ns)
+ put_user_ns(new_nsp->user_ns);
out_user:
if (new_nsp->pid_ns)
put_pid_ns(new_nsp->pid_ns);
@@ -132,7 +142,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
get_nsproxy(old_ns);
- if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER)))
+ if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET)))
return 0;
if (!capable(CAP_SYS_ADMIN)) {
@@ -164,6 +174,7 @@ void free_nsproxy(struct nsproxy *ns)
put_pid_ns(ns->pid_ns);
if (ns->user_ns)
put_user_ns(ns->user_ns);
+ put_net(ns->net_ns);
kmem_cache_free(nsproxy_cachep, ns);
}
@@ -177,7 +188,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
int err = 0;
if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWUSER)))
+ CLONE_NEWUSER | CLONE_NEWNET)))
return 0;
if (!capable(CAP_SYS_ADMIN))
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 7a15afb73ed..57efe0400bc 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -712,7 +712,7 @@ sys_timer_getoverrun(timer_t timer_id)
{
struct k_itimer *timr;
int overrun;
- long flags;
+ unsigned long flags;
timr = lock_timer(timer_id, &flags);
if (!timr)
@@ -784,7 +784,7 @@ sys_timer_settime(timer_t timer_id, int flags,
struct k_itimer *timr;
struct itimerspec new_spec, old_spec;
int error = 0;
- long flag;
+ unsigned long flag;
struct itimerspec *rtn = old_setting ? &old_spec : NULL;
if (!new_setting)
@@ -836,7 +836,7 @@ asmlinkage long
sys_timer_delete(timer_t timer_id)
{
struct k_itimer *timer;
- long flags;
+ unsigned long flags;
retry_delete:
timer = lock_timer(timer_id, &flags);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2c2dd8410dc..130214f3d22 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -49,6 +49,14 @@
#include <linux/cpu.h>
#include <linux/mutex.h>
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key rcu_lock_key;
+struct lockdep_map rcu_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
+
+EXPORT_SYMBOL_GPL(rcu_lock_map);
+#endif
+
/* Definition for rcupdate control block. */
static struct rcu_ctrlblk rcu_ctrlblk = {
.cur = -300,
diff --git a/kernel/sched.c b/kernel/sched.c
index 6107a0cd632..bba57adb950 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -61,6 +61,7 @@
#include <linux/delayacct.h>
#include <linux/reciprocal_div.h>
#include <linux/unistd.h>
+#include <linux/pagemap.h>
#include <asm/tlb.h>
@@ -95,7 +96,7 @@ unsigned long long __attribute__((weak)) sched_clock(void)
/*
* Some helpers for converting nanosecond timing to jiffy resolution
*/
-#define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ))
+#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (1000000000 / HZ))
#define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ))
#define NICE_0_LOAD SCHED_LOAD_SCALE
@@ -104,11 +105,9 @@ unsigned long long __attribute__((weak)) sched_clock(void)
/*
* These are the 'tuning knobs' of the scheduler:
*
- * Minimum timeslice is 5 msecs (or 1 jiffy, whichever is larger),
- * default timeslice is 100 msecs, maximum timeslice is 800 msecs.
+ * default timeslice is 100 msecs (used only for SCHED_RR tasks).
* Timeslices get refilled after they expire.
*/
-#define MIN_TIMESLICE max(5 * HZ / 1000, 1)
#define DEF_TIMESLICE (100 * HZ / 1000)
#ifdef CONFIG_SMP
@@ -132,24 +131,6 @@ static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val)
}
#endif
-#define SCALE_PRIO(x, prio) \
- max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_TIMESLICE)
-
-/*
- * static_prio_timeslice() scales user-nice values [ -20 ... 0 ... 19 ]
- * to time slice values: [800ms ... 100ms ... 5ms]
- */
-static unsigned int static_prio_timeslice(int static_prio)
-{
- if (static_prio == NICE_TO_PRIO(19))
- return 1;
-
- if (static_prio < NICE_TO_PRIO(0))
- return SCALE_PRIO(DEF_TIMESLICE * 4, static_prio);
- else
- return SCALE_PRIO(DEF_TIMESLICE, static_prio);
-}
-
static inline int rt_policy(int policy)
{
if (unlikely(policy == SCHED_FIFO) || unlikely(policy == SCHED_RR))
@@ -170,31 +151,91 @@ struct rt_prio_array {
struct list_head queue[MAX_RT_PRIO];
};
-struct load_stat {
- struct load_weight load;
- u64 load_update_start, load_update_last;
- unsigned long delta_fair, delta_exec, delta_stat;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+
+struct cfs_rq;
+
+/* task group related information */
+struct task_group {
+ /* schedulable entities of this group on each cpu */
+ struct sched_entity **se;
+ /* runqueue "owned" by this group on each cpu */
+ struct cfs_rq **cfs_rq;
+ unsigned long shares;
+ /* spinlock to serialize modification to shares */
+ spinlock_t lock;
+};
+
+/* Default task group's sched entity on each cpu */
+static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
+/* Default task group's cfs_rq on each cpu */
+static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
+
+static struct sched_entity *init_sched_entity_p[NR_CPUS];
+static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
+
+/* Default task group.
+ * Every task in system belong to this group at bootup.
+ */
+struct task_group init_task_group = {
+ .se = init_sched_entity_p,
+ .cfs_rq = init_cfs_rq_p,
};
+#ifdef CONFIG_FAIR_USER_SCHED
+# define INIT_TASK_GRP_LOAD 2*NICE_0_LOAD
+#else
+# define INIT_TASK_GRP_LOAD NICE_0_LOAD
+#endif
+
+static int init_task_group_load = INIT_TASK_GRP_LOAD;
+
+/* return group to which a task belongs */
+static inline struct task_group *task_group(struct task_struct *p)
+{
+ struct task_group *tg;
+
+#ifdef CONFIG_FAIR_USER_SCHED
+ tg = p->user->tg;
+#else
+ tg = &init_task_group;
+#endif
+
+ return tg;
+}
+
+/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
+static inline void set_task_cfs_rq(struct task_struct *p)
+{
+ p->se.cfs_rq = task_group(p)->cfs_rq[task_cpu(p)];
+ p->se.parent = task_group(p)->se[task_cpu(p)];
+}
+
+#else
+
+static inline void set_task_cfs_rq(struct task_struct *p) { }
+
+#endif /* CONFIG_FAIR_GROUP_SCHED */
+
/* CFS-related fields in a runqueue */
struct cfs_rq {
struct load_weight load;
unsigned long nr_running;
- s64 fair_clock;
u64 exec_clock;
- s64 wait_runtime;
- u64 sleeper_bonus;
- unsigned long wait_runtime_overruns, wait_runtime_underruns;
+ u64 min_vruntime;
struct rb_root tasks_timeline;
struct rb_node *rb_leftmost;
struct rb_node *rb_load_balance_curr;
-#ifdef CONFIG_FAIR_GROUP_SCHED
/* 'curr' points to currently running entity on this cfs_rq.
* It is set to NULL otherwise (i.e when none are currently running).
*/
struct sched_entity *curr;
+
+ unsigned long nr_spread_over;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
/* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
@@ -205,6 +246,8 @@ struct cfs_rq {
* list is used during load balance.
*/
struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */
+ struct task_group *tg; /* group that "owns" this runqueue */
+ struct rcu_head rcu;
#endif
};
@@ -236,7 +279,7 @@ struct rq {
#ifdef CONFIG_NO_HZ
unsigned char in_nohz_recently;
#endif
- struct load_stat ls; /* capture load from *all* tasks on this cpu */
+ struct load_weight load; /* capture load from *all* tasks on this cpu */
unsigned long nr_load_updates;
u64 nr_switches;
@@ -288,16 +331,19 @@ struct rq {
unsigned long yld_exp_empty;
unsigned long yld_act_empty;
unsigned long yld_both_empty;
- unsigned long yld_cnt;
+ unsigned long yld_count;
/* schedule() stats */
unsigned long sched_switch;
- unsigned long sched_cnt;
+ unsigned long sched_count;
unsigned long sched_goidle;
/* try_to_wake_up() stats */
- unsigned long ttwu_cnt;
+ unsigned long ttwu_count;
unsigned long ttwu_local;
+
+ /* BKL stats */
+ unsigned long bkl_count;
#endif
struct lock_class_key rq_lock_key;
};
@@ -382,6 +428,37 @@ static void update_rq_clock(struct rq *rq)
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
/*
+ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
+ */
+#ifdef CONFIG_SCHED_DEBUG
+# define const_debug __read_mostly
+#else
+# define const_debug static const
+#endif
+
+/*
+ * Debugging: various feature bits
+ */
+enum {
+ SCHED_FEAT_NEW_FAIR_SLEEPERS = 1,
+ SCHED_FEAT_START_DEBIT = 2,
+ SCHED_FEAT_TREE_AVG = 4,
+ SCHED_FEAT_APPROX_AVG = 8,
+ SCHED_FEAT_WAKEUP_PREEMPT = 16,
+ SCHED_FEAT_PREEMPT_RESTRICT = 32,
+};
+
+const_debug unsigned int sysctl_sched_features =
+ SCHED_FEAT_NEW_FAIR_SLEEPERS *1 |
+ SCHED_FEAT_START_DEBIT *1 |
+ SCHED_FEAT_TREE_AVG *0 |
+ SCHED_FEAT_APPROX_AVG *0 |
+ SCHED_FEAT_WAKEUP_PREEMPT *1 |
+ SCHED_FEAT_PREEMPT_RESTRICT *1;
+
+#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
+
+/*
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
* clock constructed from sched_clock():
*/
@@ -399,18 +476,7 @@ unsigned long long cpu_clock(int cpu)
return now;
}
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-/* Change a task's ->cfs_rq if it moves across CPUs */
-static inline void set_task_cfs_rq(struct task_struct *p)
-{
- p->se.cfs_rq = &task_rq(p)->cfs;
-}
-#else
-static inline void set_task_cfs_rq(struct task_struct *p)
-{
-}
-#endif
+EXPORT_SYMBOL_GPL(cpu_clock);
#ifndef prepare_arch_switch
# define prepare_arch_switch(next) do { } while (0)
@@ -496,16 +562,13 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
static inline struct rq *__task_rq_lock(struct task_struct *p)
__acquires(rq->lock)
{
- struct rq *rq;
-
-repeat_lock_task:
- rq = task_rq(p);
- spin_lock(&rq->lock);
- if (unlikely(rq != task_rq(p))) {
+ for (;;) {
+ struct rq *rq = task_rq(p);
+ spin_lock(&rq->lock);
+ if (likely(rq == task_rq(p)))
+ return rq;
spin_unlock(&rq->lock);
- goto repeat_lock_task;
}
- return rq;
}
/*
@@ -518,18 +581,17 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
{
struct rq *rq;
-repeat_lock_task:
- local_irq_save(*flags);
- rq = task_rq(p);
- spin_lock(&rq->lock);
- if (unlikely(rq != task_rq(p))) {
+ for (;;) {
+ local_irq_save(*flags);
+ rq = task_rq(p);
+ spin_lock(&rq->lock);
+ if (likely(rq == task_rq(p)))
+ return rq;
spin_unlock_irqrestore(&rq->lock, *flags);
- goto repeat_lock_task;
}
- return rq;
}
-static inline void __task_rq_unlock(struct rq *rq)
+static void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
spin_unlock(&rq->lock);
@@ -544,7 +606,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
/*
* this_rq_lock - lock this runqueue and disable interrupts.
*/
-static inline struct rq *this_rq_lock(void)
+static struct rq *this_rq_lock(void)
__acquires(rq->lock)
{
struct rq *rq;
@@ -644,19 +706,6 @@ static inline void resched_task(struct task_struct *p)
}
#endif
-static u64 div64_likely32(u64 divident, unsigned long divisor)
-{
-#if BITS_PER_LONG == 32
- if (likely(divident <= 0xffffffffULL))
- return (u32)divident / divisor;
- do_div(divident, divisor);
-
- return divident;
-#else
- return divident / divisor;
-#endif
-}
-
#if BITS_PER_LONG == 32
# define WMULT_CONST (~0UL)
#else
@@ -698,16 +747,14 @@ calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
return calc_delta_mine(delta_exec, NICE_0_LOAD, lw);
}