aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/cgroup.c1
-rw-r--r--kernel/cgroup_freezer.c51
-rw-r--r--kernel/cpu.c3
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/freezer.c20
-rw-r--r--kernel/hrtimer.c17
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/lockdep.c17
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/printk.c39
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/resource.c8
-rw-r--r--kernel/sched.c26
-rw-r--r--kernel/sched_debug.c43
-rw-r--r--kernel/sched_fair.c248
-rw-r--r--kernel/sched_features.h1
-rw-r--r--kernel/sched_idletask.c5
-rw-r--r--kernel/sched_rt.c5
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/smp.c18
-rw-r--r--kernel/softirq.c7
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/timer.c129
-rw-r--r--kernel/trace/Kconfig31
-rw-r--r--kernel/trace/Makefile6
-rw-r--r--kernel/trace/ftrace.c608
-rw-r--r--kernel/trace/ring_buffer.c64
-rw-r--r--kernel/trace/trace.c82
-rw-r--r--kernel/trace/trace.h22
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--kernel/trace/trace_selftest.c18
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/tracepoint.c8
-rw-r--r--kernel/workqueue.c45
38 files changed, 724 insertions, 836 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 305f11dbef2..9a3ec66a9d8 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -13,7 +13,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
CFLAGS_REMOVE_sched.o = -mno-spe
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
CFLAGS_REMOVE_lockdep.o = -pg
CFLAGS_REMOVE_lockdep_proc.o = -pg
@@ -88,7 +88,7 @@ obj-$(CONFIG_MARKERS) += marker.o
obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
-obj-$(CONFIG_FTRACE) += trace/
+obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 35eebd5510c..358e77564e6 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2497,7 +2497,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
list_del(&cgrp->sibling);
spin_lock(&cgrp->dentry->d_lock);
d = dget(cgrp->dentry);
- cgrp->dentry = NULL;
spin_unlock(&d->d_lock);
cgroup_d_remove_dir(d);
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e9505695449..7fa476f01d0 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -162,9 +162,13 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
struct task_struct *task)
{
struct freezer *freezer;
- int retval;
- /* Anything frozen can't move or be moved to/from */
+ /*
+ * Anything frozen can't move or be moved to/from.
+ *
+ * Since orig_freezer->state == FROZEN means that @task has been
+ * frozen, so it's sufficient to check the latter condition.
+ */
if (is_task_frozen_enough(task))
return -EBUSY;
@@ -173,13 +177,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
if (freezer->state == CGROUP_FROZEN)
return -EBUSY;
- retval = 0;
- task_lock(task);
- freezer = task_freezer(task);
- if (freezer->state == CGROUP_FROZEN)
- retval = -EBUSY;
- task_unlock(task);
- return retval;
+ return 0;
}
static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
@@ -190,8 +188,9 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
freezer = task_freezer(task);
task_unlock(task);
- BUG_ON(freezer->state == CGROUP_FROZEN);
spin_lock_irq(&freezer->lock);
+ BUG_ON(freezer->state == CGROUP_FROZEN);
+
/* Locking avoids race with FREEZING -> THAWED transitions. */
if (freezer->state == CGROUP_FREEZING)
freeze_task(task, true);
@@ -276,25 +275,18 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
return num_cant_freeze_now ? -EBUSY : 0;
}
-static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
{
struct cgroup_iter it;
struct task_struct *task;
cgroup_iter_start(cgroup, &it);
while ((task = cgroup_iter_next(cgroup, &it))) {
- int do_wake;
-
- task_lock(task);
- do_wake = __thaw_process(task);
- task_unlock(task);
- if (do_wake)
- wake_up_process(task);
+ thaw_process(task);
}
cgroup_iter_end(cgroup, &it);
- freezer->state = CGROUP_THAWED;
- return 0;
+ freezer->state = CGROUP_THAWED;
}
static int freezer_change_state(struct cgroup *cgroup,
@@ -304,27 +296,22 @@ static int freezer_change_state(struct cgroup *cgroup,
int retval = 0;
freezer = cgroup_freezer(cgroup);
+
spin_lock_irq(&freezer->lock);
+
update_freezer_state(cgroup, freezer);
if (goal_state == freezer->state)
goto out;
- switch (freezer->state) {
+
+ switch (goal_state) {
case CGROUP_THAWED:
- retval = try_to_freeze_cgroup(cgroup, freezer);
+ unfreeze_cgroup(cgroup, freezer);
break;
- case CGROUP_FREEZING:
- if (goal_state == CGROUP_FROZEN) {
- /* Userspace is retrying after
- * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
- retval = try_to_freeze_cgroup(cgroup, freezer);
- break;
- }
- /* state == FREEZING and goal_state == THAWED, so unfreeze */
case CGROUP_FROZEN:
- retval = unfreeze_cgroup(cgroup, freezer);
+ retval = try_to_freeze_cgroup(cgroup, freezer);
break;
default:
- break;
+ BUG();
}
out:
spin_unlock_irq(&freezer->lock);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 86d49045dae..5a732c5ef08 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
#endif
};
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
+
+const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
+EXPORT_SYMBOL(cpu_all_bits);
diff --git a/kernel/exit.c b/kernel/exit.c
index 80137a5d946..ae2b92be5fa 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -141,6 +141,11 @@ static void __exit_signal(struct task_struct *tsk)
if (sig) {
flush_sigqueue(&sig->shared_pending);
taskstats_tgid_free(sig);
+ /*
+ * Make sure ->signal can't go away under rq->lock,
+ * see account_group_exec_runtime().
+ */
+ task_rq_unlock_wait(tsk);
__cleanup_signal(sig);
}
}
diff --git a/kernel/freezer.c b/kernel/freezer.c
index ba6248b323e..2f4936cf708 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -121,16 +121,7 @@ void cancel_freezing(struct task_struct *p)
}
}
-/*
- * Wake up a frozen process
- *
- * task_lock() is needed to prevent the race with refrigerator() which may
- * occur if the freezing of tasks fails. Namely, without the lock, if the
- * freezing of tasks failed, thaw_tasks() might have run before a task in
- * refrigerator() could call frozen_process(), in which case the task would be
- * frozen and no one would thaw it.
- */
-int __thaw_process(struct task_struct *p)
+static int __thaw_process(struct task_struct *p)
{
if (frozen(p)) {
p->flags &= ~PF_FROZEN;
@@ -140,6 +131,15 @@ int __thaw_process(struct task_struct *p)
return 0;
}
+/*
+ * Wake up a frozen process
+ *
+ * task_lock() is needed to prevent the race with refrigerator() which may
+ * occur if the freezing of tasks fails. Namely, without the lock, if the
+ * freezing of tasks failed, thaw_tasks() might have run before a task in
+ * refrigerator() could call frozen_process(), in which case the task would be
+ * frozen and no one would thaw it.
+ */
int thaw_process(struct task_struct *p)
{
task_lock(p);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2b465dfde42..95d3949f2ae 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1209,6 +1209,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
enum hrtimer_restart (*fn)(struct hrtimer *);
struct hrtimer *timer;
int restart;
+ int emulate_hardirq_ctx = 0;
timer = list_entry(cpu_base->cb_pending.next,
struct hrtimer, cb_entry);
@@ -1217,10 +1218,24 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
timer_stats_account_hrtimer(timer);
fn = timer->function;
+ /*
+ * A timer might have been added to the cb_pending list
+ * when it was migrated during a cpu-offline operation.
+ * Emulate hardirq context for such timers.
+ */
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
+ timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
+ emulate_hardirq_ctx = 1;
+
__remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
spin_unlock_irq(&cpu_base->lock);
- restart = fn(timer);
+ if (unlikely(emulate_hardirq_ctx)) {
+ local_irq_disable();
+ restart = fn(timer);
+ local_irq_enable();
+ } else
+ restart = fn(timer);
spin_lock_irq(&cpu_base->lock);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index fac014a81b2..4d161c70ba5 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -220,7 +220,7 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
}
}
-void register_default_affinity_proc(void)
+static void register_default_affinity_proc(void)
{
#ifdef CONFIG_SMP
proc_create("irq/default_smp_affinity", 0600, NULL,
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 234a9dccb4b..a4285830323 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2173,12 +2173,11 @@ void early_boot_irqs_on(void)
/*
* Hardirqs will be enabled:
*/
-void trace_hardirqs_on_caller(unsigned long a0)
+void trace_hardirqs_on_caller(unsigned long ip)
{
struct task_struct *curr = current;
- unsigned long ip;
- time_hardirqs_on(CALLER_ADDR0, a0);
+ time_hardirqs_on(CALLER_ADDR0, ip);
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2192,7 +2191,6 @@ void trace_hardirqs_on_caller(unsigned long a0)
}
/* we'll do an OFF -> ON transition: */
curr->hardirqs_enabled = 1;
- ip = (unsigned long) __builtin_return_address(0);
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
@@ -2228,11 +2226,11 @@ EXPORT_SYMBOL(trace_hardirqs_on);
/*
* Hardirqs were disabled:
*/
-void trace_hardirqs_off_caller(unsigned long a0)
+void trace_hardirqs_off_caller(unsigned long ip)
{
struct task_struct *curr = current;
- time_hardirqs_off(CALLER_ADDR0, a0);
+ time_hardirqs_off(CALLER_ADDR0, ip);
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2245,7 +2243,7 @@ void trace_hardirqs_off_caller(unsigned long a0)
* We have done an ON -> OFF transition:
*/
curr->hardirqs_enabled = 0;
- curr->hardirq_disable_ip = _RET_IP_;
+ curr->hardirq_disable_ip = ip;
curr->hardirq_disable_event = ++curr->irq_events;
debug_atomic_inc(&hardirqs_off_events);
} else
@@ -3426,9 +3424,10 @@ retry:
}
printk(" ignoring it.\n");
unlock = 0;
+ } else {
+ if (count != 10)
+ printk(KERN_CONT " locked it.\n");
}
- if (count != 10)
- printk(" locked it.\n");
do_each_thread(g, p) {
/*
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index dcd165f92a8..23bd4daeb96 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -96,7 +96,7 @@ config SUSPEND
config PM_TEST_SUSPEND
bool "Test suspend/resume and wakealarm during bootup"
- depends on SUSPEND && PM_DEBUG && RTC_LIB=y
+ depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
---help---
This option will let you suspend your machine during bootup, and
make it wake up a few seconds later using an RTC wakeup alarm.
diff --git a/kernel/printk.c b/kernel/printk.c
index 6341af77eb6..f492f1583d7 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -233,45 +233,6 @@ static inline void boot_delay_msec(void)
#endif
/*
- * Return the number of unread characters in the log buffer.
- */
-static int log_buf_get_len(void)
-{
- return logged_chars;
-}
-
-/*
- * Copy a range of characters from the log buffer.
- */
-int log_buf_copy(char *dest, int idx, int len)
-{
- int ret, max;
- bool took_lock = false;
-
- if (!oops_in_progress) {
- spin_lock_irq(&logbuf_lock);
- took_lock = true;
- }
-
- max = log_buf_get_len();
- if (idx < 0 || idx >= max) {
- ret = -1;
- } else {
- if (len > max)
- len = max;
- ret = len;
- idx += (log_end - max);
- while (len-- > 0)
- dest[len] = LOG_BUF(idx + len);
- }
-
- if (took_lock)
- spin_unlock_irq(&logbuf_lock);
-
- return ret;
-}
-
-/*
* Commands to do_syslog:
*
* 0 -- Close the log. Currently a NOP.
diff --git a/kernel/profile.c b/kernel/profile.c
index a9e422df6bf..9830a037d8d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -102,7 +102,7 @@ int profile_setup(char *str)
__setup("profile=", profile_setup);
-int profile_init(void)
+int __ref profile_init(void)
{
int buffer_bytes;
if (!prof_on)
diff --git a/kernel/resource.c b/kernel/resource.c
index 4089d12af6e..4337063663e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -17,6 +17,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/device.h>
+#include <linux/pfn.h>
#include <asm/io.h>
@@ -522,7 +523,7 @@ static void __init __reserve_region_with_split(struct resource *root,
{
struct resource *parent = root;
struct resource *conflict;
- struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
+ struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC);
if (!res)
return;
@@ -571,7 +572,7 @@ static void __init __reserve_region_with_split(struct resource *root,
}
-void reserve_region_with_split(struct resource *root,
+void __init reserve_region_with_split(struct resource *root,
resource_size_t start, resource_size_t end,
const char *name)
{
@@ -849,7 +850,8 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
continue;
if (p->end < addr)
continue;
- if (p->start <= addr && (p->end >= addr + size - 1))
+ if (PFN_DOWN(p->start) <= PFN_DOWN(addr) &&
+ PFN_DOWN(p->end) >= PFN_DOWN(addr + size - 1))
continue;
printk(KERN_WARNING "resource map sanity check conflict: "
"0x%llx 0x%llx 0x%llx 0x%llx %s\n",
diff --git a/kernel/sched.c b/kernel/sched.c
index 0a4dc3b1300..2a106b6b78b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -386,7 +386,6 @@ struct cfs_rq {
u64 exec_clock;
u64 min_vruntime;
- u64 pair_start;
struct rb_root tasks_timeline;
struct rb_node *rb_leftmost;
@@ -398,9 +397,9 @@ struct cfs_rq {
* 'curr' points to currently running entity on this cfs_rq.
* It is set to NULL otherwise (i.e when none are currently running).
*/
- struct sched_entity *curr, *next;
+ struct sched_entity *curr, *next, *last;
- unsigned long nr_spread_over;
+ unsigned int nr_spread_over;
#ifdef CONFIG_FAIR_GROUP_SCHED
struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
@@ -970,6 +969,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
}
}
+void task_rq_unlock_wait(struct task_struct *p)
+{
+ struct rq *rq = task_rq(p);
+
+ smp_mb(); /* spin-unlock-wait is not a full memory barrier */
+ spin_unlock_wait(&rq->lock);
+}
+
static void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
@@ -1806,7 +1813,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
/*
* Buddy candidates are cache hot:
*/
- if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next))
+ if (sched_feat(CACHE_HOT_BUDDY) &&
+ (&p->se == cfs_rq_of(&p->se)->next ||
+ &p->se == cfs_rq_of(&p->se)->last))
return 1;
if (p->sched_class != &fair_sched_class)
@@ -3344,7 +3353,7 @@ small_imbalance:
} else
this_load_per_task = cpu_avg_load_per_task(this_cpu);
- if (max_load - this_load + 2*busiest_load_per_task >=
+ if (max_load - this_load + busiest_load_per_task >=
busiest_load_per_task * imbn) {
*imbalance = busiest_load_per_task;
return busiest;
@@ -6876,15 +6885,17 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
struct sched_domain *tmp;
/* Remove the sched domains which do not contribute to scheduling. */
- for (tmp = sd; tmp; tmp = tmp->parent) {
+ for (tmp = sd; tmp; ) {
struct sched_domain *parent = tmp->parent;
if (!parent)
break;
+
if (sd_parent_degenerate(tmp, parent)) {
tmp->parent = parent->parent;
if (parent->parent)
parent->parent->child = tmp;
- }
+ } else
+ tmp = tmp->parent;
}
if (sd && sd_degenerate(sd)) {
@@ -7673,6 +7684,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
error:
free_sched_groups(cpu_map, tmpmask);
SCHED_CPUMASK_FREE((void *)allmasks);
+ kfree(rd);
return -ENOMEM;
#endif
}
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index ad958c1ec70..48ecc51e770 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -144,7 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
last = __pick_last_entity(cfs_rq);
if (last)
max_vruntime = last->vruntime;
- min_vruntime = rq->cfs.min_vruntime;
+ min_vruntime = cfs_rq->min_vruntime;
rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
spin_unlock_irqrestore(&rq->lock, flags);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
@@ -161,26 +161,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
SPLIT_NS(spread0));
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
-#ifdef CONFIG_SCHEDSTATS
-#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
-
- P(yld_exp_empty);
- P(yld_act_empty);
- P(yld_both_empty);
- P(yld_count);
- P(sched_switch);
- P(sched_count);
- P(sched_goidle);
-
- P(ttwu_count);
- P(ttwu_local);
-
- P(bkl_count);
-
-#undef P
-#endif
- SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
+ SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over);
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
@@ -260,6 +242,25 @@ static void print_cpu(struct seq_file *m, int cpu)
#undef P
#undef PN
+#ifdef CONFIG_SCHEDSTATS
+#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
+
+ P(yld_exp_empty);
+ P(yld_act_empty);
+ P(yld_both_empty);
+ P(yld_count);
+
+ P(sched_switch);
+ P(sched_count);
+ P(sched_goidle);
+
+ P(ttwu_count);
+ P(ttwu_local);
+
+ P(bkl_count);
+
+#undef P
+#endif
print_cfs_stats(m, cpu);
print_rt_stats(m, cpu);
@@ -319,7 +320,7 @@ static int __init init_sched_debug_procfs(void)
{
struct proc_dir_entry *pe;
- pe = proc_create("sched_debug", 0644, NULL, &sched_debug_fops);
+ pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
if (!pe)
return -ENOMEM;
return 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9573c33688b..98345e45b05 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
return se->parent;
}
+/* return depth at which a sched entity is present in the hierarchy */
+static inline int depth_se(struct sched_entity *se)
+{
+ int depth = 0;
+
+ for_each_sched_entity(se)
+ depth++;
+
+ return depth;
+}
+
+static void
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
+{
+ int se_depth, pse_depth;
+
+ /*
+ * preemption test can be made between sibling entities who are in the
+ * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
+ * both tasks until we find their ancestors who are siblings of common
+ * parent.
+ */
+
+ /* First walk up until both entities are at same depth */
+ se_depth = depth_se(*se);
+ pse_depth = depth_se(*pse);
+
+ while (se_depth > pse_depth) {
+ se_depth--;
+ *se = parent_entity(*se);
+ }
+
+ while (pse_depth > se_depth) {
+ pse_depth--;
+ *pse = parent_entity(*pse);
+ }
+
+ while (!is_same_group(*se, *pse)) {
+ *se = parent_entity(*se);
+ *pse = parent_entity(*pse);
+ }
+}
+
#else /* CONFIG_FAIR_GROUP_SCHED */
static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
return NULL;
}
+static inline void
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
+{
+}
+
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
return se->vruntime - cfs_rq->min_vruntime;
}
+static void update_min_vruntime(struct cfs_rq *cfs_rq)
+{
+ u64 vruntime = cfs_rq->min_vruntime;
+
+ if (cfs_rq->curr)
+ vruntime = cfs_rq->curr->vruntime;
+
+ if (cfs_rq->rb_leftmost) {
+ struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
+ struct sched_entity,
+ run_node);
+
+ if (vruntime == cfs_rq->min_vruntime)
+ vruntime = se->vruntime;
+ else
+ vruntime = min_vruntime(vruntime, se->vruntime);
+ }
+
+ cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
+}
+
/*
* Enqueue an entity into the rb-tree:
*/
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Maintain a cache of leftmost tree entries (it is frequently
* used):
*/
- if (leftmost) {
+ if (leftmost)
cfs_rq->rb_leftmost = &se->run_node;
- /*
- * maintain cfs_rq->min_vruntime to be a monotonic increasing
- * value tracking the leftmost vruntime in the tree.
- */
- cfs_rq->min_vruntime =
- max_vruntime(cfs_rq->min_vruntime, se->vruntime);
- }
rb_link_node(&se->run_node, parent, link);
rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -274,37 +336,25 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (cfs_rq->rb_leftmost == &se->run_node) {
struct rb_node *next_node;
- struct sched_entity *next;
next_node = rb_next(&se->run_node);
cfs_rq->rb_leftmost = next_node;
-
- if (next_node) {
- next = rb_entry(next_node,
- struct sched_entity, run_node);
- cfs_rq->min_vruntime =
- max_vruntime(cfs_rq->min_vruntime,
- next->vruntime);
- }
}
- if (cfs_rq->next == se)
- cfs_rq->next = NULL;
-
rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
}
-static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
-{
- return cfs_rq->rb_leftmost;
-}
-
static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
{
- return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node);
+ struct rb_node *left = cfs_rq->rb_leftmost;
+
+ if (!left)
+ return NULL;
+
+ return rb_entry(left, struct sched_entity, run_node);
}
-static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
+static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
@@ -424,6 +474,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
schedstat_add(cfs_rq, exec_clock, delta_exec);
delta_exec_weighted = calc_delta_fair(delta_exec, curr);
curr->vruntime += delta_exec_weighted;
+ update_min_vruntime(cfs_rq);
}
static void update_curr(struct cfs_rq *cfs_rq)
@@ -613,13 +664,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
{
- u64 vruntime;
-
- if (first_fair(cfs_rq)) {
- vruntime = min_vruntime(cfs_rq->min_vruntime,
- __pick_next_entity(cfs_rq)->vruntime);
- } else
- vruntime = cfs_rq->min_vruntime;
+ u64 vruntime = cfs_rq->min_vruntime;
/*
* The 'current' period is already promised to the current tasks,
@@ -671,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
__enqueue_entity(cfs_rq, se);
}
+static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ if (cfs_rq->last == se)
+ cfs_rq->last = NULL;
+
+ if (cfs_rq->next == se)
+ cfs_rq->next = NULL;
+}
+
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
{
@@ -693,9 +747,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
#endif
}
+ clear_buddies(cfs_rq, se);
+
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
account_entity_dequeue(cfs_rq, se);
+ update_min_vruntime(cfs_rq);
}
/*
@@ -742,29 +799,18 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
se->prev_sum_exec_runtime = se->sum_exec_runtime;
}
-static struct sched_entity *
-pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
- struct rq *rq = rq_of(cfs_rq);
- u64 pair_slice = rq->clock - cfs_rq->pair_start;
-
- if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
- cfs_rq->pair_start = rq->clock;
- return se;
- }
-
- return cfs_rq->next;
-}
+static int
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
{
- struct sched_entity *se = NULL;
+ struct sched_entity *se = __pick_next_entity(cfs_rq);
- if (first_fair(