kswapd: avoid unnecessary rebalance after an unsuccessful balancing

commit d2ebd0f6b89567eb93ead4e2ca0cbe03021f344b upstream. Stable note: Fixes https://bugzilla.redhat.com/show_bug.cgi?id=712019. This patch reduces kswapd CPU usage. In commit 215ddd66 ("mm: vmscan: only read new_classzone_idx from pgdat when reclaiming successfully") , Mel Gorman said kswapd is better to sleep after a unsuccessful balancing if there is tighter reclaim request pending in the balancing. But in the following scenario, kswapd do something that is not matched our expectation. The patch fixes this issue. 1, Read pgdat request A (classzone_idx, order = 3) 2, balance_pgdat() 3, During pgdat, a new pgdat request B (classzone_idx, order = 5) is placed 4, balance_pgdat() returns but failed since returned order = 0 5, pgdat of request A assigned to balance_pgdat(), and do balancing again. While the expectation behavior of kswapd should try to sleep. Signed-off-by: Alex Shi <alex.shi@intel.com> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com> Acked-by: Mel Gorman <mgorman@suse.de> Tested-by: Pádraig Brady <P@draigBrady.com> Cc: Rik van Riel <riel@redhat.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Mel Gorman <mgorman@suse.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Alex Shi <alex.shi@intel.com> 2011-10-31 17:08:39 -0700
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2012-08-01 12:27:17 -0700
commit: 5d62e5ca429b85ecadaa5042bdb1d8b88d4bfe80 (patch)
tree: a16a226a0b108d0f0a069c38d03e424ee3debc5f /mm/vmscan.c
parent: a7e32d7a2a801b7838b4159e9d73ea86f68ae002 (diff)
1 files changed, 11 insertions, 3 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 45c40d66b74..5cc0f92eda3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2844,7 +2844,9 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
 static int kswapd(void *p)
 {
 	unsigned long order, new_order;
+	unsigned balanced_order;
 	int classzone_idx, new_classzone_idx;
+	int balanced_classzone_idx;
 	pg_data_t *pgdat = (pg_data_t*)p;
 	struct task_struct *tsk = current;
 
@@ -2875,7 +2877,9 @@ static int kswapd(void *p)
 	set_freezable();
 
 	order = new_order = 0;
+	balanced_order = 0;
 	classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
+	balanced_classzone_idx = classzone_idx;
 	for ( ; ; ) {
 		int ret;
 
@@ -2884,7 +2888,8 @@ static int kswapd(void *p)
 		 * new request of a similar or harder type will succeed soon
 		 * so consider going to sleep on the basis we reclaimed at
 		 */
-		if (classzone_idx >= new_classzone_idx && order == new_order) {
+		if (balanced_classzone_idx >= new_classzone_idx &&
+					balanced_order == new_order) {
 			new_order = pgdat->kswapd_max_order;
 			new_classzone_idx = pgdat->classzone_idx;
 			pgdat->kswapd_max_order =  0;
@@ -2899,7 +2904,8 @@ static int kswapd(void *p)
 			order = new_order;
 			classzone_idx = new_classzone_idx;
 		} else {
-			kswapd_try_to_sleep(pgdat, order, classzone_idx);
+			kswapd_try_to_sleep(pgdat, balanced_order,
+						balanced_classzone_idx);
 			order = pgdat->kswapd_max_order;
 			classzone_idx = pgdat->classzone_idx;
 			pgdat->kswapd_max_order = 0;
@@ -2916,7 +2922,9 @@ static int kswapd(void *p)
 		 */
 		if (!ret) {
 			trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
-			order = balance_pgdat(pgdat, order, &classzone_idx);
+			balanced_classzone_idx = classzone_idx;
+			balanced_order = balance_pgdat(pgdat, order,
+						&balanced_classzone_idx);
 		}
 	}
 	return 0;
author	Alex Shi <alex.shi@intel.com>	2011-10-31 17:08:39 -0700
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2012-08-01 12:27:17 -0700
commit	5d62e5ca429b85ecadaa5042bdb1d8b88d4bfe80 (patch)
tree	a16a226a0b108d0f0a069c38d03e424ee3debc5f /mm/vmscan.c
parent	a7e32d7a2a801b7838b4159e9d73ea86f68ae002 (diff)