aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/pgtable.h110
-rw-r--r--include/linux/huge_mm.h16
-rw-r--r--include/linux/hugetlb.h8
-rw-r--r--include/linux/mempolicy.h8
-rw-r--r--include/linux/migrate.h46
-rw-r--r--include/linux/mm.h39
-rw-r--r--include/linux/mm_types.h31
-rw-r--r--include/linux/mmzone.h13
-rw-r--r--include/linux/rmap.h33
-rw-r--r--include/linux/sched.h27
-rw-r--r--include/linux/vm_event_item.h12
-rw-r--r--include/linux/vmstat.h8
-rw-r--r--include/trace/events/migrate.h51
-rw-r--r--include/uapi/linux/mempolicy.h15
14 files changed, 396 insertions, 21 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 284e80831d2..701beab27aa 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -219,6 +219,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
#define move_pte(pte, prot, old_addr, new_addr) (pte)
#endif
+#ifndef pte_accessible
+# define pte_accessible(pte) ((void)(pte),1)
+#endif
+
#ifndef flush_tlb_fix_spurious_fault
#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
#endif
@@ -580,6 +584,112 @@ static inline int pmd_trans_unstable(pmd_t *pmd)
#endif
}
+#ifdef CONFIG_NUMA_BALANCING
+#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+/*
+ * _PAGE_NUMA works identical to _PAGE_PROTNONE (it's actually the
+ * same bit too). It's set only when _PAGE_PRESET is not set and it's
+ * never set if _PAGE_PRESENT is set.
+ *
+ * pte/pmd_present() returns true if pte/pmd_numa returns true. Page
+ * fault triggers on those regions if pte/pmd_numa returns true
+ * (because _PAGE_PRESENT is not set).
+ */
+#ifndef pte_numa
+static inline int pte_numa(pte_t pte)
+{
+ return (pte_flags(pte) &
+ (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
+}
+#endif
+
+#ifndef pmd_numa
+static inline int pmd_numa(pmd_t pmd)
+{
+ return (pmd_flags(pmd) &
+ (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
+}
+#endif
+
+/*
+ * pte/pmd_mknuma sets the _PAGE_ACCESSED bitflag automatically
+ * because they're called by the NUMA hinting minor page fault. If we
+ * wouldn't set the _PAGE_ACCESSED bitflag here, the TLB miss handler
+ * would be forced to set it later while filling the TLB after we
+ * return to userland. That would trigger a second write to memory
+ * that we optimize away by setting _PAGE_ACCESSED here.
+ */
+#ifndef pte_mknonnuma
+static inline pte_t pte_mknonnuma(pte_t pte)
+{
+ pte = pte_clear_flags(pte, _PAGE_NUMA);
+ return pte_set_flags(pte, _PAGE_PRESENT|_PAGE_ACCESSED);
+}
+#endif
+
+#ifndef pmd_mknonnuma
+static inline pmd_t pmd_mknonnuma(pmd_t pmd)
+{
+ pmd = pmd_clear_flags(pmd, _PAGE_NUMA);
+ return pmd_set_flags(pmd, _PAGE_PRESENT|_PAGE_ACCESSED);
+}
+#endif
+
+#ifndef pte_mknuma
+static inline pte_t pte_mknuma(pte_t pte)
+{
+ pte = pte_set_flags(pte, _PAGE_NUMA);
+ return pte_clear_flags(pte, _PAGE_PRESENT);
+}
+#endif
+
+#ifndef pmd_mknuma
+static inline pmd_t pmd_mknuma(pmd_t pmd)
+{
+ pmd = pmd_set_flags(pmd, _PAGE_NUMA);
+ return pmd_clear_flags(pmd, _PAGE_PRESENT);
+}
+#endif
+#else
+extern int pte_numa(pte_t pte);
+extern int pmd_numa(pmd_t pmd);
+extern pte_t pte_mknonnuma(pte_t pte);
+extern pmd_t pmd_mknonnuma(pmd_t pmd);
+extern pte_t pte_mknuma(pte_t pte);
+extern pmd_t pmd_mknuma(pmd_t pmd);
+#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
+#else
+static inline int pmd_numa(pmd_t pmd)
+{
+ return 0;
+}
+
+static inline int pte_numa(pte_t pte)
+{
+ return 0;
+}
+
+static inline pte_t pte_mknonnuma(pte_t pte)
+{
+ return pte;
+}
+
+static inline pmd_t pmd_mknonnuma(pmd_t pmd)
+{
+ return pmd;
+}
+
+static inline pte_t pte_mknuma(pte_t pte)
+{
+ return pte;
+}
+
+static inline pmd_t pmd_mknuma(pmd_t pmd)
+{
+ return pmd;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
#endif /* CONFIG_MMU */
#endif /* !__ASSEMBLY__ */
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 092dc5305a3..1d76f8ca90f 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -31,7 +31,8 @@ extern int move_huge_pmd(struct vm_area_struct *vma,
unsigned long new_addr, unsigned long old_end,
pmd_t *old_pmd, pmd_t *new_pmd);
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, pgprot_t newprot);
+ unsigned long addr, pgprot_t newprot,
+ int prot_numa);
enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_FLAG,
@@ -111,7 +112,7 @@ extern void __split_huge_page_pmd(struct vm_area_struct *vma,
#define wait_split_huge_page(__anon_vma, __pmd) \
do { \
pmd_t *____pmd = (__pmd); \
- anon_vma_lock(__anon_vma); \
+ anon_vma_lock_write(__anon_vma); \
anon_vma_unlock(__anon_vma); \
BUG_ON(pmd_trans_splitting(*____pmd) || \
pmd_trans_huge(*____pmd)); \
@@ -171,6 +172,10 @@ static inline struct page *compound_trans_head(struct page *page)
}
return page;
}
+
+extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pmd_t pmd, pmd_t *pmdp);
+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -209,6 +214,13 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd,
{
return 0;
}
+
+static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pmd_t pmd, pmd_t *pmdp)
+{
+ return 0;
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3e7fa1acf09..0c80d3f57a5 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -87,7 +87,7 @@ struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int write);
int pmd_huge(pmd_t pmd);
int pud_huge(pud_t pmd);
-void hugetlb_change_protection(struct vm_area_struct *vma,
+unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot);
#else /* !CONFIG_HUGETLB_PAGE */
@@ -132,7 +132,11 @@ static inline void copy_huge_page(struct page *dst, struct page *src)
{
}
-#define hugetlb_change_protection(vma, address, end, newprot)
+static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
+ unsigned long address, unsigned long end, pgprot_t newprot)
+{
+ return 0;
+}
static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start,
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index dbd212723b7..9adc270de7e 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -188,6 +188,8 @@ static inline int vma_migratable(struct vm_area_struct *vma)
return 1;
}
+extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
+
#else
struct mempolicy {};
@@ -307,5 +309,11 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
return 0;
}
+static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
+ unsigned long address)
+{
+ return -1; /* no node preference */
+}
+
#endif /* CONFIG_NUMA */
#endif
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 0b5865c61ef..1e9f627967a 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -23,6 +23,15 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **);
#define MIGRATEPAGE_BALLOON_SUCCESS 1 /* special ret code for balloon page
* sucessful migration case.
*/
+enum migrate_reason {
+ MR_COMPACTION,
+ MR_MEMORY_FAILURE,
+ MR_MEMORY_HOTPLUG,
+ MR_SYSCALL, /* also applies to cpusets */
+ MR_MEMPOLICY_MBIND,
+ MR_NUMA_MISPLACED,
+ MR_CMA
+};
#ifdef CONFIG_MIGRATION
@@ -32,7 +41,7 @@ extern int migrate_page(struct address_space *,
struct page *, struct page *, enum migrate_mode);
extern int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, bool offlining,
- enum migrate_mode mode);
+ enum migrate_mode mode, int reason);
extern int migrate_huge_page(struct page *, new_page_t x,
unsigned long private, bool offlining,
enum migrate_mode mode);
@@ -54,7 +63,7 @@ static inline void putback_lru_pages(struct list_head *l) {}
static inline void putback_movable_pages(struct list_head *l) {}
static inline int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, bool offlining,
- enum migrate_mode mode) { return -ENOSYS; }
+ enum migrate_mode mode, int reason) { return -ENOSYS; }
static inline int migrate_huge_page(struct page *page, new_page_t x,
unsigned long private, bool offlining,
enum migrate_mode mode) { return -ENOSYS; }
@@ -83,4 +92,37 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
#define fail_migrate_page NULL
#endif /* CONFIG_MIGRATION */
+
+#ifdef CONFIG_NUMA_BALANCING
+extern int migrate_misplaced_page(struct page *page, int node);
+extern int migrate_misplaced_page(struct page *page, int node);
+extern bool migrate_ratelimited(int node);
+#else
+static inline int migrate_misplaced_page(struct page *page, int node)
+{
+ return -EAGAIN; /* can't migrate now */
+}
+static inline bool migrate_ratelimited(int node)
+{
+ return false;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+extern int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ pmd_t *pmd, pmd_t entry,
+ unsigned long address,
+ struct page *page, int node);
+#else
+static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ pmd_t *pmd, pmd_t entry,
+ unsigned long address,
+ struct page *page, int node)
+{
+ return -EAGAIN;
+}
+#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/
+
#endif /* _LINUX_MIGRATE_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4af4f0b1be4..7f4f906190b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -693,6 +693,36 @@ static inline int page_to_nid(const struct page *page)
}
#endif
+#ifdef CONFIG_NUMA_BALANCING
+static inline int page_xchg_last_nid(struct page *page, int nid)
+{
+ return xchg(&page->_last_nid, nid);
+}
+
+static inline int page_last_nid(struct page *page)
+{
+ return page->_last_nid;
+}
+static inline void reset_page_last_nid(struct page *page)
+{
+ page->_last_nid = -1;
+}
+#else
+static inline int page_xchg_last_nid(struct page *page, int nid)
+{
+ return page_to_nid(page);
+}
+
+static inline int page_last_nid(struct page *page)
+{
+ return page_to_nid(page);
+}
+
+static inline void reset_page_last_nid(struct page *page)
+{
+}
+#endif
+
static inline struct zone *page_zone(const struct page *page)
{
return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
@@ -1078,6 +1108,9 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
extern unsigned long do_mremap(unsigned long addr,
unsigned long old_len, unsigned long new_len,
unsigned long flags, unsigned long new_addr);
+extern unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, pgprot_t newprot,
+ int dirty_accountable, int prot_numa);
extern int mprotect_fixup(struct vm_area_struct *vma,
struct vm_area_struct **pprev, unsigned long start,
unsigned long end, unsigned long newflags);
@@ -1579,6 +1612,11 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
}
#endif
+#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
+unsigned long change_prot_numa(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
+#endif
+
struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t);
@@ -1600,6 +1638,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
#define FOLL_MLOCK 0x40 /* mark page as mlocked */
#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
+#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7ade2731b5d..7d9ebb7cc98 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -175,6 +175,10 @@ struct page {
*/
void *shadow;
#endif
+
+#ifdef CONFIG_NUMA_BALANCING
+ int _last_nid;
+#endif
}
/*
* The struct page can be forced to be double word aligned so that atomic ops
@@ -411,9 +415,36 @@ struct mm_struct {
#ifdef CONFIG_CPUMASK_OFFSTACK
struct cpumask cpumask_allocation;
#endif
+#ifdef CONFIG_NUMA_BALANCING
+ /*
+ * numa_next_scan is the next time when the PTEs will me marked
+ * pte_numa to gather statistics and migrate pages to new nodes
+ * if necessary
+ */
+ unsigned long numa_next_scan;
+
+ /* numa_next_reset is when the PTE scanner period will be reset */
+ unsigned long numa_next_reset;
+
+ /* Restart point for scanning and setting pte_numa */
+ unsigned long numa_scan_offset;
+
+ /* numa_scan_seq prevents two threads setting pte_numa */
+ int numa_scan_seq;
+
+ /*
+ * The first node a task was scheduled on. If a task runs on
+ * a different node than Make PTE Scan Go Now.
+ */
+ int first_nid;
+#endif
struct uprobes_state uprobes_state;
};
+/* first nid will either be a valid NID or one of these values */
+#define NUMA_PTE_SCAN_INIT -1
+#define NUMA_PTE_SCAN_ACTIVE -2
+
static inline void mm_init_cpumask(struct mm_struct *mm)
{
#ifdef CONFIG_CPUMASK_OFFSTACK
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index cd55dad56aa..4bec5be82ca 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -735,6 +735,19 @@ typedef struct pglist_data {
struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
int kswapd_max_order;
enum zone_type classzone_idx;
+#ifdef CONFIG_NUMA_BALANCING
+ /*
+ * Lock serializing the per destination node AutoNUMA memory
+ * migration rate limiting data.
+ */
+ spinlock_t numabalancing_migrate_lock;
+
+ /* Rate limiting time interval */
+ unsigned long numabalancing_migrate_next_window;
+
+ /* Number of pages migrated during the rate limiting time interval */
+ unsigned long numabalancing_migrate_nr_pages;
+#endif
} pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bfe1f478064..c20635c527a 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -7,7 +7,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/memcontrol.h>
/*
@@ -25,8 +25,8 @@
* pointing to this anon_vma once its vma list is empty.
*/
struct anon_vma {
- struct anon_vma *root; /* Root of this anon_vma tree */
- struct mutex mutex; /* Serialize access to vma list */
+ struct anon_vma *root; /* Root of this anon_vma tree */
+ struct rw_semaphore rwsem; /* W: modification, R: walking the list */
/*
* The refcount is taken on an anon_vma when there is no
* guarantee that the vma of page tables will exist for
@@ -64,7 +64,7 @@ struct anon_vma_chain {
struct vm_area_struct *vma;
struct anon_vma *anon_vma;
struct list_head same_vma; /* locked by mmap_sem & page_table_lock */
- struct rb_node rb; /* locked by anon_vma->mutex */
+ struct rb_node rb; /* locked by anon_vma->rwsem */
unsigned long rb_subtree_last;
#ifdef CONFIG_DEBUG_VM_RB
unsigned long cached_vma_start, cached_vma_last;
@@ -108,26 +108,37 @@ static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
- mutex_lock(&anon_vma->root->mutex);
+ down_write(&anon_vma->root->rwsem);
}
static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
- mutex_unlock(&anon_vma->root->mutex);
+ up_write(&anon_vma->root->rwsem);
}
-static inline void anon_vma_lock(struct anon_vma *anon_vma)
+static inline void anon_vma_lock_write(struct anon_vma *anon_vma)
{
- mutex_lock(&anon_vma->root->mutex);
+ down_write(&anon_vma->root->rwsem);
}
static inline void anon_vma_unlock(struct anon_vma *anon_vma)
{
- mutex_unlock(&anon_vma->root->mutex);
+ up_write(&anon_vma->root->rwsem);
}
+static inline void anon_vma_lock_read(struct anon_vma *anon_vma)
+{
+ down_read(&anon_vma->root->rwsem);
+}
+
+static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)
+{
+ up_read(&anon_vma->root->rwsem);
+}
+
+
/*
* anon_vma helper functions.
*/
@@ -220,8 +231,8 @@ int try_to_munlock(struct page *);
/*
* Called by memory-failure.c to kill processes.
*/
-struct anon_vma *page_lock_anon_vma(struct page *page);
-void page_unlock_anon_vma(struct anon_vma *anon_vma);
+struct anon_vma *page_lock_anon_vma_read(struct page *page);
+void page_unlock_anon_vma_read(struct anon_vma *anon_vma);
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c2f3072bee..b089c92c609 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1527,6 +1527,14 @@ struct task_struct {
short il_next;
short pref_node_fork;
#endif
+#ifdef CONFIG_NUMA_BALANCING
+ int numa_scan_seq;
+ int numa_migrate_seq;
+ unsigned int numa_scan_period;
+ u64 node_stamp; /* migration stamp */
+ struct callback_head numa_work;
+#endif /* CONFIG_NUMA_BALANCING */
+
struct rcu_head rcu;
/*
@@ -1601,6 +1609,18 @@ struct task_struct {
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
+#ifdef CONFIG_NUMA_BALANCING
+extern void task_numa_fault(int node, int pages, bool migrated);
+extern void set_numabalancing_state(bool enabled);
+#else
+static inline void task_numa_fault(int node, int pages, bool migrated)
+{
+}
+static inline void set_numabalancing_state(bool enabled)
+{
+}
+#endif
+
/*
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
@@ -2030,6 +2050,13 @@ enum sched_tunable_scaling {
};
extern enum sched_tunable_scaling sysctl_sched_tunable_scaling;
+extern unsigned int sysctl_numa_balancing_scan_delay;
+extern unsigned int sysctl_numa_balancing_scan_period_min;
+extern unsigned int sysctl_numa_balancing_scan_period_max;
+extern unsigned int sysctl_numa_balancing_scan_period_reset;
+extern unsigned int sysctl_numa_balancing_scan_size;
+extern unsigned int sysctl_numa_balancing_settle_count;
+
#ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index fe786f07d2b..fce0a2799d4 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -38,8 +38,18 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
KSWAPD_SKIP_CONGESTION_WAIT,
PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+#ifdef CONFIG_NUMA_BALANCING
+ NUMA_PTE_UPDATES,
+ NUMA_HINT_FAULTS,
+ NUMA_HINT_FAULTS_LOCAL,
+ NUMA_PAGE_MIGRATE,
+#endif
+#ifdef CONFIG_MIGRATION
+ PGMIGRATE_SUCCESS, PGMIGRATE_FAIL,
+#endif
#ifdef CONFIG_COMPACTION
- COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
+ COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
+ COMPACTISOLATED,
COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
#endif
#ifdef CONFIG_HUGETLB_PAGE
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 92a86b2cce3..a13291f7da8 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -80,6 +80,14 @@ static inline void vm_events_fold_cpu(int cpu)
#endif /* CONFIG_VM_EVENT_COUNTERS */
+#ifdef CONFIG_NUMA_BALANCING
+#define count_vm_numa_event(x) count_vm_event(x)
+#define count_vm_numa_events(x, y) count_vm_events(x, y)
+#else
+#define count_vm_numa_event(x) do {} while (0)
+#define count_vm_numa_events(x, y) do {} while (0)
+#endif /* CONFIG_NUMA_BALANCING */
+
#define __count_zone_vm_events(item, zone, delta) \
__count_vm_events(item##_NORMAL - ZONE_NORMAL + \
zone_idx(zone), delta)
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
new file mode 100644
index 00000000000..ec2a6ccfd7e
--- /dev/null
+++ b/include/trace/events/migrate.h
@@ -0,0 +1,51 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM migrate
+
+#if !defined(_TRACE_MIGRATE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MIGRATE_H
+
+#define MIGRATE_MODE \
+ {MIGRATE_ASYNC, "MIGRATE_ASYNC"}, \
+ {MIGRATE_SYNC_LIGHT, "MIGRATE_SYNC_LIGHT"}, \
+ {MIGRATE_SYNC, "MIGRATE_SYNC"}
+
+#define MIGRATE_REASON \
+ {MR_COMPACTION, "compaction"}, \
+ {MR_MEMORY_FAILURE, "memory_failure"}, \
+ {MR_MEMORY_HOTPLUG, "memory_hotplug"}, \
+ {MR_SYSCALL, "syscall_or_cpuset"}, \
+ {MR_MEMPOLICY_MBIND, "mempolicy_mbind"}, \
+ {MR_CMA, "cma"}
+
+TRACE_EVENT(mm_migrate_pages,
+
+ TP_PROTO(unsigned long succeeded, unsigned long failed,
+ enum migrate_mode mode, int reason),
+
+ TP_ARGS(succeeded, failed, mode, reason),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, succeeded)
+ __field( unsigned long, failed)
+ __field( enum migrate_mode, mode)
+ __field( int, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->succeeded = succeeded;
+ __entry->failed = failed;
+ __entry->mode = mode;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("nr_succeeded=%lu nr_failed=%lu mode=%s reason=%s",
+ __entry->succeeded,
+ __entry->failed,
+ __print_symbolic(__entry->mode, MIGRATE_MODE),
+ __print_symbolic(__entry->reason, MIGRATE_REASON))
+);
+
+#endif /* _TRACE_MIGRATE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 23e62e0537e..0d11c3dcd3a 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -20,6 +20,7 @@ enum {
MPOL_PREFERRED,
MPOL_BIND,
MPOL_INTERLEAVE,
+ MPOL_LOCAL,
MPOL_MAX, /* always last member of enum */
};
@@ -47,9 +48,15 @@ enum mpol_rebind_step {
/* Flags for mbind */
#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
-#define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */
+#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform
+ to policy */
+#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to policy */
+#define MPOL_MF_LAZY (1<<3) /* Modifies '_MOVE: lazy migrate on fault */
+#define MPOL_MF_INTERNAL (1<<4) /* Internal flags start here */
+
+#define MPOL_MF_VALID (MPOL_MF_STRICT | \
+ MPOL_MF_MOVE | \
+ MPOL_MF_MOVE_ALL)
/*
* Internal flags that share the struct mempolicy flags word with
@@ -59,6 +66,8 @@ enum mpol_rebind_step {
#define MPOL_F_SHARED (1 << 0) /* identify shared policies */
#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
#define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */
+#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */
+#define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */
#endif /* _UAPI_LINUX_MEMPOLICY_H */