From 588f9ce6ca61ecb4663ee6ef2f75d2d96c73151e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Dec 2009 12:19:57 +0100 Subject: HWPOISON: Be more aggressive at freeing non LRU caches shake_page handles more types of page caches than lru_drain_all() - per cpu page allocator pages - per CPU LRU Stops early when the page became free. Used in followon patches. Signed-off-by: Andi Kleen --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9d65ae4ba0e..68c84bb2ad3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1335,6 +1335,7 @@ extern void memory_failure(unsigned long pfn, int trapno); extern int __memory_failure(unsigned long pfn, int trapno, int ref); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; +extern void shake_page(struct page *p); extern atomic_long_t mce_bad_pages; #endif /* __KERNEL__ */ -- cgit v1.2.3-18-g5258 From 82ba011b9041dd31c15e4f63797b08aa0a288e61 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Dec 2009 12:19:57 +0100 Subject: HWPOISON: Turn ref argument into flags argument Now that "ref" is just a boolean turn it into a flags argument. First step is only a single flag that makes the code's intention more clear, but more may follow. Signed-off-by: Andi Kleen --- include/linux/mm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 68c84bb2ad3..135e19198cd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1331,8 +1331,11 @@ extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, size_t size); extern void refund_locked_memory(struct mm_struct *mm, size_t size); +enum mf_flags { + MF_COUNT_INCREASED = 1 << 0, +}; extern void memory_failure(unsigned long pfn, int trapno); -extern int __memory_failure(unsigned long pfn, int trapno, int ref); +extern int __memory_failure(unsigned long pfn, int trapno, int flags); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p); -- cgit v1.2.3-18-g5258 From 847ce401df392b0704369fd3f75df614ac1414b4 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 16 Dec 2009 12:19:58 +0100 Subject: HWPOISON: Add unpoisoning support The unpoisoning interface is useful for stress testing tools to reclaim poisoned pages (to prevent OOM) There is no hardware level unpoisioning, so this cannot be used for real memory errors, only for software injected errors. Note that it may leak pages silently - those who have been removed from LRU cache, but not isolated from page cache/swap cache at hwpoison time. Especially the stress test of dirty swap cache pages shall reboot system before exhausting memory. AK: Fix comments, add documentation, add printks, rename symbol Signed-off-by: Wu Fengguang Signed-off-by: Andi Kleen --- include/linux/mm.h | 1 + include/linux/page-flags.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 135e19198cd..8cdb941fc7b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1336,6 +1336,7 @@ enum mf_flags { }; extern void memory_failure(unsigned long pfn, int trapno); extern int __memory_failure(unsigned long pfn, int trapno, int flags); +extern int unpoison_memory(unsigned long pfn); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 49e907bd067..f9df6308af9 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -275,7 +275,7 @@ PAGEFLAG_FALSE(Uncached) #ifdef CONFIG_MEMORY_FAILURE PAGEFLAG(HWPoison, hwpoison) -TESTSETFLAG(HWPoison, hwpoison) +TESTSCFLAG(HWPoison, hwpoison) #define __PG_HWPOISON (1UL << PG_hwpoison) #else PAGEFLAG_FALSE(HWPoison) -- cgit v1.2.3-18-g5258 From 1a9b5b7fe0c5dad8a635288882d36785dea742f9 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 16 Dec 2009 12:19:59 +0100 Subject: mm: export stable page flags Rename get_uflags() to stable_page_flags() and make it a global function for use in the hwpoison page flags filter, which need to compare user page flags with the value provided by user space. Also move KPF_* to kernel-page-flags.h for use by user space tools. Acked-by: Matt Mackall Signed-off-by: Andi Kleen CC: Nick Piggin CC: Christoph Lameter Signed-off-by: Wu Fengguang Signed-off-by: Andi Kleen --- include/linux/kernel-page-flags.h | 46 +++++++++++++++++++++++++++++++++++++++ include/linux/page-flags.h | 2 ++ 2 files changed, 48 insertions(+) create mode 100644 include/linux/kernel-page-flags.h (limited to 'include') diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h new file mode 100644 index 00000000000..bd92a89f4b0 --- /dev/null +++ b/include/linux/kernel-page-flags.h @@ -0,0 +1,46 @@ +#ifndef LINUX_KERNEL_PAGE_FLAGS_H +#define LINUX_KERNEL_PAGE_FLAGS_H + +/* + * Stable page flag bits exported to user space + */ + +#define KPF_LOCKED 0 +#define KPF_ERROR 1 +#define KPF_REFERENCED 2 +#define KPF_UPTODATE 3 +#define KPF_DIRTY 4 +#define KPF_LRU 5 +#define KPF_ACTIVE 6 +#define KPF_SLAB 7 +#define KPF_WRITEBACK 8 +#define KPF_RECLAIM 9 +#define KPF_BUDDY 10 + +/* 11-20: new additions in 2.6.31 */ +#define KPF_MMAP 11 +#define KPF_ANON 12 +#define KPF_SWAPCACHE 13 +#define KPF_SWAPBACKED 14 +#define KPF_COMPOUND_HEAD 15 +#define KPF_COMPOUND_TAIL 16 +#define KPF_HUGE 17 +#define KPF_UNEVICTABLE 18 +#define KPF_HWPOISON 19 +#define KPF_NOPAGE 20 + +#define KPF_KSM 21 + +/* kernel hacking assistances + * WARNING: subject to change, never rely on them! + */ +#define KPF_RESERVED 32 +#define KPF_MLOCKED 33 +#define KPF_MAPPEDTODISK 34 +#define KPF_PRIVATE 35 +#define KPF_PRIVATE_2 36 +#define KPF_OWNER_PRIVATE 37 +#define KPF_ARCH 38 +#define KPF_UNCACHED 39 + +#endif /* LINUX_KERNEL_PAGE_FLAGS_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f9df6308af9..feee2ba8d06 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -282,6 +282,8 @@ PAGEFLAG_FALSE(HWPoison) #define __PG_HWPOISON 0 #endif +u64 stable_page_flags(struct page *page); + static inline int PageUptodate(struct page *page) { int ret = test_bit(PG_uptodate, &(page)->flags); -- cgit v1.2.3-18-g5258 From e42d9d5d47961fb5db0be65b56dd52fe7b2421f1 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 16 Dec 2009 12:19:59 +0100 Subject: memcg: rename and export try_get_mem_cgroup_from_page() So that the hwpoison injector can get mem_cgroup for arbitrary page and thus know whether it is owned by some mem_cgroup task(s). [AK: Merged with latest git tree] CC: KOSAKI Motohiro CC: Hugh Dickins CC: Daisuke Nishimura CC: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Wu Fengguang Signed-off-by: Andi Kleen --- include/linux/memcontrol.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bf9213b2db8..fc9bae82ac4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -68,6 +68,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); +extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); static inline @@ -189,6 +190,11 @@ mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to) { } +static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) +{ + return NULL; +} + static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) { return 1; -- cgit v1.2.3-18-g5258 From d324236b3333e87c8825b35f2104184734020d35 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 16 Dec 2009 12:19:59 +0100 Subject: memcg: add accessor to mem_cgroup.css So that an outside user can free the reference count grabbed by try_get_mem_cgroup_from_page(). CC: KOSAKI Motohiro CC: Hugh Dickins CC: Daisuke Nishimura CC: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Wu Fengguang Signed-off-by: Andi Kleen --- include/linux/memcontrol.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fc9bae82ac4..2c30a1116d8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -81,6 +81,8 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) return cgroup == mem; } +extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem); + extern int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr); extern void mem_cgroup_end_migration(struct mem_cgroup *mem, @@ -206,6 +208,11 @@ static inline int task_in_mem_cgroup(struct task_struct *task, return 1; } +static inline struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem) +{ + return NULL; +} + static inline int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) { -- cgit v1.2.3-18-g5258 From facb6011f3993947283fa15d039dacb4ad140230 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Dec 2009 12:20:00 +0100 Subject: HWPOISON: Add soft page offline support This is a simpler, gentler variant of memory_failure() for soft page offlining controlled from user space. It doesn't kill anything, just tries to invalidate and if that doesn't work migrate the page away. This is useful for predictive failure analysis, where a page has a high rate of corrected errors, but hasn't gone bad yet. Instead it can be offlined early and avoided. The offlining is controlled from sysfs, including a new generic entry point for hard page offlining for symmetry too. We use the page isolate facility to prevent re-allocation race. Normally this is only used by memory hotplug. To avoid races with memory allocation I am using lock_system_sleep(). This avoids the situation where memory hotplug is about to isolate a page range and then hwpoison undoes that work. This is a big hammer currently, but the simplest solution currently. When the page is not free or LRU we try to free pages from slab and other caches. The slab freeing is currently quite dumb and does not try to focus on the specific slab cache which might own the page. This could be potentially improved later. Thanks to Fengguang Wu and Haicheng Li for some fixes. [Added fix from Andrew Morton to adapt to new migrate_pages prototype] Signed-off-by: Andi Kleen --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8cdb941fc7b..849b4a61bd8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1339,8 +1339,9 @@ extern int __memory_failure(unsigned long pfn, int trapno, int flags); extern int unpoison_memory(unsigned long pfn); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; -extern void shake_page(struct page *p); +extern void shake_page(struct page *p, int access); extern atomic_long_t mce_bad_pages; +extern int soft_offline_page(struct page *page, int flags); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ -- cgit v1.2.3-18-g5258 From afcf938ee0aac4ef95b1a23bac704c6fbeb26de6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 16 Dec 2009 12:20:00 +0100 Subject: HWPOISON: Add a madvise() injector for soft page offlining Process based injection is much easier to handle for test programs, who can first bring a page into a specific state and then test. So add a new MADV_SOFT_OFFLINE to soft offline a page, similar to the existing hard offline injector. Signed-off-by: Andi Kleen --- include/asm-generic/mman-common.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h index 20111265afd..3da9e2742fa 100644 --- a/include/asm-generic/mman-common.h +++ b/include/asm-generic/mman-common.h @@ -40,6 +40,7 @@ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ #define MADV_HWPOISON 100 /* poison a page for testing */ +#define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */ #define MADV_MERGEABLE 12 /* KSM may merge identical pages */ #define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ -- cgit v1.2.3-18-g5258