diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/fork.c | 3 | ||||
-rw-r--r-- | kernel/sched/fair.c | 18 | ||||
-rw-r--r-- | kernel/sched/features.h | 4 |
3 files changed, 24 insertions, 1 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 8b20ab7d3aa..296ea308096 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -821,6 +821,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk) #ifdef CONFIG_TRANSPARENT_HUGEPAGE mm->pmd_huge_pte = NULL; #endif +#ifdef CONFIG_NUMA_BALANCING + mm->first_nid = NUMA_PTE_SCAN_INIT; +#endif if (!mm_init(mm, tsk)) goto fail_nomem; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7a02a2082e9..3e18f611a5a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -861,6 +861,24 @@ void task_numa_work(struct callback_head *work) return; /* + * We do not care about task placement until a task runs on a node + * other than the first one used by the address space. This is + * largely because migrations are driven by what CPU the task + * is running on. If it's never scheduled on another node, it'll + * not migrate so why bother trapping the fault. + */ + if (mm->first_nid == NUMA_PTE_SCAN_INIT) + mm->first_nid = numa_node_id(); + if (mm->first_nid != NUMA_PTE_SCAN_ACTIVE) { + /* Are we running on a new node yet? */ + if (numa_node_id() == mm->first_nid && + !sched_feat_numa(NUMA_FORCE)) + return; + + mm->first_nid = NUMA_PTE_SCAN_ACTIVE; + } + + /* * Reset the scan period if enough time has gone by. Objective is that * scanning will be reduced if pages are properly placed. As tasks * can enter different phases this needs to be re-examined. Lacking diff --git a/kernel/sched/features.h b/kernel/sched/features.h index d2373a3e325..e7c25fff1e9 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -65,8 +65,10 @@ SCHED_FEAT(LB_MIN, false) /* * Apply the automatic NUMA scheduling policy. Enabled automatically * at runtime if running on a NUMA machine. Can be controlled via - * numa_balancing= + * numa_balancing=. Allow PTE scanning to be forced on UMA machines + * for debugging the core machinery. */ #ifdef CONFIG_NUMA_BALANCING SCHED_FEAT(NUMA, false) +SCHED_FEAT(NUMA_FORCE, false) #endif |