From e709ffd6169ccd259eb5874e853303e91e94e829 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 29 May 2012 15:06:18 -0700 Subject: mm: remove swap token code The swap token code no longer fits in with the current VM model. It does not play well with cgroups or the better NUMA placement code in development, since we have only one swap token globally. It also has the potential to mess with scalability of the system, by increasing the number of non-reclaimable pages on the active and inactive anon LRU lists. Last but not least, the swap token code has been broken for a year without complaints, as reported by Konstantin Khlebnikov. This suggests we no longer have much use for it. The days of sub-1G memory systems with heavy use of swap are over. If we ever need thrashing reducing code in the future, we will have to implement something that does scale. Signed-off-by: Rik van Riel Cc: Konstantin Khlebnikov Acked-by: Johannes Weiner Cc: Mel Gorman Cc: Hugh Dickins Acked-by: Bob Picco Acked-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 33dc256033b..ca46080bb07 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2352,8 +2352,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, for (priority = DEF_PRIORITY; priority >= 0; priority--) { sc->nr_scanned = 0; - if (!priority) - disable_swap_token(sc->target_mem_cgroup); aborted_reclaim = shrink_zones(priority, zonelist, sc); /* @@ -2704,10 +2702,6 @@ loop_again: unsigned long lru_pages = 0; int has_under_min_watermark_zone = 0; - /* The swap token gets in the way of swapout... */ - if (!priority) - disable_swap_token(NULL); - all_zones_ok = 1; balanced = 0; -- cgit v1.2.3-70-g09d2 From c53919adc045bf803252e912f23028a68525753d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 29 May 2012 15:06:19 -0700 Subject: mm: vmscan: remove lumpy reclaim This series removes lumpy reclaim and some stalling logic that was unintentionally being used by memory compaction. The end result is that stalling on dirty pages during page reclaim now depends on wait_iff_congested(). Four kernels were compared 3.3.0 vanilla 3.4.0-rc2 vanilla 3.4.0-rc2 lumpyremove-v2 is patch one from this series 3.4.0-rc2 nosync-v2r3 is the full series Removing lumpy reclaim saves almost 900 bytes of text whereas the full series removes 1200 bytes. text data bss dec hex filename 6740375 1927944 2260992 10929311 a6c49f vmlinux-3.4.0-rc2-vanilla 6739479 1927944 2260992 10928415 a6c11f vmlinux-3.4.0-rc2-lumpyremove-v2 6739159 1927944 2260992 10928095 a6bfdf vmlinux-3.4.0-rc2-nosync-v2 There are behaviour changes in the series and so tests were run with monitoring of ftrace events. This disrupts results so the performance results are distorted but the new behaviour should be clearer. fs-mark running in a threaded configuration showed little of interest as it did not push reclaim aggressively FS-Mark Multi Threaded 3.3.0-vanilla rc2-vanilla lumpyremove-v2r3 nosync-v2r3 Files/s min 3.20 ( 0.00%) 3.20 ( 0.00%) 3.20 ( 0.00%) 3.20 ( 0.00%) Files/s mean 3.20 ( 0.00%) 3.20 ( 0.00%) 3.20 ( 0.00%) 3.20 ( 0.00%) Files/s stddev 0.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) 0.00 ( 0.00%) Files/s max 3.20 ( 0.00%) 3.20 ( 0.00%) 3.20 ( 0.00%) 3.20 ( 0.00%) Overhead min 508667.00 ( 0.00%) 521350.00 (-2.49%) 544292.00 (-7.00%) 547168.00 (-7.57%) Overhead mean 551185.00 ( 0.00%) 652690.73 (-18.42%) 991208.40 (-79.83%) 570130.53 (-3.44%) Overhead stddev 18200.69 ( 0.00%) 331958.29 (-1723.88%) 1579579.43 (-8578.68%) 9576.81 (47.38%) Overhead max 576775.00 ( 0.00%) 1846634.00 (-220.17%) 6901055.00 (-1096.49%) 585675.00 (-1.54%) MMTests Statistics: duration Sys Time Running Test (seconds) 309.90 300.95 307.33 298.95 User+Sys Time Running Test (seconds) 319.32 309.67 315.69 307.51 Total Elapsed Time (seconds) 1187.85 1193.09 1191.98 1193.73 MMTests Statistics: vmstat Page Ins 80532 82212 81420 79480 Page Outs 111434984 111456240 111437376 111582628 Swap Ins 0 0 0 0 Swap Outs 0 0 0 0 Direct pages scanned 44881 27889 27453 34843 Kswapd pages scanned 25841428 25860774 25861233 25843212 Kswapd pages reclaimed 25841393 25860741 25861199 25843179 Direct pages reclaimed 44881 27889 27453 34843 Kswapd efficiency 99% 99% 99% 99% Kswapd velocity 21754.791 21675.460 21696.029 21649.127 Direct efficiency 100% 100% 100% 100% Direct velocity 37.783 23.375 23.031 29.188 Percentage direct scans 0% 0% 0% 0% ftrace showed that there was no stalling on writeback or pages submitted for IO from reclaim context. postmark was similar and while it was more interesting, it also did not push reclaim heavily. POSTMARK 3.3.0-vanilla rc2-vanilla lumpyremove-v2r3 nosync-v2r3 Transactions per second: 16.00 ( 0.00%) 20.00 (25.00%) 18.00 (12.50%) 17.00 ( 6.25%) Data megabytes read per second: 18.80 ( 0.00%) 24.27 (29.10%) 22.26 (18.40%) 20.54 ( 9.26%) Data megabytes written per second: 35.83 ( 0.00%) 46.25 (29.08%) 42.42 (18.39%) 39.14 ( 9.24%) Files created alone per second: 28.00 ( 0.00%) 38.00 (35.71%) 34.00 (21.43%) 30.00 ( 7.14%) Files create/transact per second: 8.00 ( 0.00%) 10.00 (25.00%) 9.00 (12.50%) 8.00 ( 0.00%) Files deleted alone per second: 556.00 ( 0.00%) 1224.00 (120.14%) 3062.00 (450.72%) 6124.00 (1001.44%) Files delete/transact per second: 8.00 ( 0.00%) 10.00 (25.00%) 9.00 (12.50%) 8.00 ( 0.00%) MMTests Statistics: duration Sys Time Running Test (seconds) 113.34 107.99 109.73 108.72 User+Sys Time Running Test (seconds) 145.51 139.81 143.32 143.55 Total Elapsed Time (seconds) 1159.16 899.23 980.17 1062.27 MMTests Statistics: vmstat Page Ins 13710192 13729032 13727944 13760136 Page Outs 43071140 42987228 42733684 42931624 Swap Ins 0 0 0 0 Swap Outs 0 0 0 0 Direct pages scanned 0 0 0 0 Kswapd pages scanned 9941613 9937443 9939085 9929154 Kswapd pages reclaimed 9940926 9936751 9938397 9928465 Direct pages reclaimed 0 0 0 0 Kswapd efficiency 99% 99% 99% 99% Kswapd velocity 8576.567 11051.058 10140.164 9347.109 Direct efficiency 100% 100% 100% 100% Direct velocity 0.000 0.000 0.000 0.000 It looks like here that the full series regresses performance but as ftrace showed no usage of wait_iff_congested() or sync reclaim I am assuming it's a disruption due to monitoring. Other data such as memory usage, page IO, swap IO all looked similar. Running a benchmark with a plain DD showed nothing very interesting. The full series stalled in wait_iff_congested() slightly less but stall times on vanilla kernels were marginal. Running a benchmark that hammered on file-backed mappings showed stalls due to congestion but not in sync writebacks MICRO 3.3.0-vanilla rc2-vanilla lumpyremove-v2r3 nosync-v2r3 MMTests Statistics: duration Sys Time Running Test (seconds) 308.13 294.50 298.75 299.53 User+Sys Time Running Test (seconds) 330.45 316.28 318.93 320.79 Total Elapsed Time (seconds) 1814.90 1833.88 1821.14 1832.91 MMTests Statistics: vmstat Page Ins 108712 120708 97224 110344 Page Outs 155514576 156017404 155813676 156193256 Swap Ins 0 0 0 0 Swap Outs 0 0 0 0 Direct pages scanned 2599253 1550480 2512822 2414760 Kswapd pages scanned 69742364 71150694 68839041 69692533 Kswapd pages reclaimed 34824488 34773341 34796602 34799396 Direct pages reclaimed 53693 94750 61792 75205 Kswapd efficiency 49% 48% 50% 49% Kswapd velocity 38427.662 38797.901 37799.972 38022.889 Direct efficiency 2% 6% 2% 3% Direct velocity 1432.174 845.464 1379.807 1317.446 Percentage direct scans 3% 2% 3% 3% Page writes by reclaim 0 0 0 0 Page writes file 0 0 0 0 Page writes anon 0 0 0 0 Page reclaim immediate 0 0 0 1218 Page rescued immediate 0 0 0 0 Slabs scanned 15360 16384 13312 16384 Direct inode steals 0 0 0 0 Kswapd inode steals 4340 4327 1630 4323 FTrace Reclaim Statistics: congestion_wait Direct number congest waited 0 0 0 0 Direct time congest waited 0ms 0ms 0ms 0ms Direct full congest waited 0 0 0 0 Direct number conditional waited 900 870 754 789 Direct time conditional waited 0ms 0ms 0ms 20ms Direct full conditional waited 0 0 0 0 KSwapd number congest waited 2106 2308 2116 1915 KSwapd time congest waited 139924ms 157832ms 125652ms 132516ms KSwapd full congest waited 1346 1530 1202 1278 KSwapd number conditional waited 12922 16320 10943 14670 KSwapd time conditional waited 0ms 0ms 0ms 0ms KSwapd full conditional waited 0 0 0 0 Reclaim statistics are not radically changed. The stall times in kswapd are massive but it is clear that it is due to calls to congestion_wait() and that is almost certainly the call in balance_pgdat(). Otherwise stalls due to dirty pages are non-existant. I ran a benchmark that stressed high-order allocation. This is very artifical load but was used in the past to evaluate lumpy reclaim and compaction. Generally I look at allocation success rates and latency figures. STRESS-HIGHALLOC 3.3.0-vanilla rc2-vanilla lumpyremove-v2r3 nosync-v2r3 Pass 1 81.00 ( 0.00%) 28.00 (-53.00%) 24.00 (-57.00%) 28.00 (-53.00%) Pass 2 82.00 ( 0.00%) 39.00 (-43.00%) 38.00 (-44.00%) 43.00 (-39.00%) while Rested 88.00 ( 0.00%) 87.00 (-1.00%) 88.00 ( 0.00%) 88.00 ( 0.00%) MMTests Statistics: duration Sys Time Running Test (seconds) 740.93 681.42 685.14 684.87 User+Sys Time Running Test (seconds) 2922.65 3269.52 3281.35 3279.44 Total Elapsed Time (seconds) 1161.73 1152.49 1159.55 1161.44 MMTests Statistics: vmstat Page Ins 4486020 2807256 2855944 2876244 Page Outs 7261600 7973688 7975320 7986120 Swap Ins 31694 0 0 0 Swap Outs 98179 0 0 0 Direct pages scanned 53494 57731 34406 113015 Kswapd pages scanned 6271173 1287481 1278174 1219095 Kswapd pages reclaimed 2029240 1281025 1260708 1201583 Direct pages reclaimed 1468 14564 16649 92456 Kswapd efficiency 32% 99% 98% 98% Kswapd velocity 5398.133 1117.130 1102.302 1049.641 Direct efficiency 2% 25% 48% 81% Direct velocity 46.047 50.092 29.672 97.306 Percentage direct scans 0% 4% 2% 8% Page writes by reclaim 1616049 0 0 0 Page writes file 1517870 0 0 0 Page writes anon 98179 0 0 0 Page reclaim immediate 103778 27339 9796 17831 Page rescued immediate 0 0 0 0 Slabs scanned 1096704 986112 980992 998400 Direct inode steals 223 215040 216736 247881 Kswapd inode steals 175331 61548 68444 63066 Kswapd skipped wait 21991 0 1 0 THP fault alloc 1 135 125 134 THP collapse alloc 393 311 228 236 THP splits 25 13 7 8 THP fault fallback 0 0 0 0 THP collapse fail 3 5 7 7 Compaction stalls 865 1270 1422 1518 Compaction success 370 401 353 383 Compaction failures 495 869 1069 1135 Compaction pages moved 870155 3828868 4036106 4423626 Compaction move failure 26429 23865 29742 27514 Success rates are completely hosed for 3.4-rc2 which is almost certainly due to commit fe2c2a106663 ("vmscan: reclaim at order 0 when compaction is enabled"). I expected this would happen for kswapd and impair allocation success rates (https://lkml.org/lkml/2012/1/25/166) but I did not anticipate this much a difference: 80% less scanning, 37% less reclaim by kswapd In comparison, reclaim/compaction is not aggressive and gives up easily which is the intended behaviour. hugetlbfs uses __GFP_REPEAT and would be much more aggressive about reclaim/compaction than THP allocations are. The stress test above is allocating like neither THP or hugetlbfs but is much closer to THP. Mainline is now impaired in terms of high order allocation under heavy load although I do not know to what degree as I did not test with __GFP_REPEAT. Keep this in mind for bugs related to hugepage pool resizing, THP allocation and high order atomic allocation failures from network devices. In terms of congestion throttling, I see the following for this test FTrace Reclaim Statistics: congestion_wait Direct number congest waited 3 0 0 0 Direct time congest waited 0ms 0ms 0ms 0ms Direct full congest waited 0 0 0 0 Direct number conditional waited 957 512 1081 1075 Direct time conditional waited 0ms 0ms 0ms 0ms Direct full conditional waited 0 0 0 0 KSwapd number congest waited 36 4 3 5 KSwapd time congest waited 3148ms 400ms 300ms 500ms KSwapd full congest waited 30 4 3 5 KSwapd number conditional waited 88514 197 332 542 KSwapd time conditional waited 4980ms 0ms 0ms 0ms KSwapd full conditional waited 49 0 0 0 The "conditional waited" times are the most interesting as this is directly impacted by the number of dirty pages encountered during scan. As lumpy reclaim is no longer scanning contiguous ranges, it is finding fewer dirty pages. This brings wait times from about 5 seconds to 0. kswapd itself is still calling congestion_wait() so it'll still stall but it's a lot less. In terms of the type of IO we were doing, I see this FTrace Reclaim Statistics: mm_vmscan_writepage Direct writes anon sync 0 0 0 0 Direct writes anon async 0 0 0 0 Direct writes file sync 0 0 0 0 Direct writes file async 0 0 0 0 Direct writes mixed sync 0 0 0 0 Direct writes mixed async 0 0 0 0 KSwapd writes anon sync 0 0 0 0 KSwapd writes anon async 91682 0 0 0 KSwapd writes file sync 0 0 0 0 KSwapd writes file async 822629 0 0 0 KSwapd writes mixed sync 0 0 0 0 KSwapd writes mixed async 0 0 0 0 In 3.2, kswapd was doing a bunch of async writes of pages but reclaim/compaction was never reaching a point where it was doing sync IO. This does not guarantee that reclaim/compaction was not calling wait_on_page_writeback() but I would consider it unlikely. It indicates that merging patches 2 and 3 to stop reclaim/compaction calling wait_on_page_writeback() should be safe. This patch: Lumpy reclaim had a purpose but in the mind of some, it was to kick the system so hard it trashed. For others the purpose was to complicate vmscan.c. Over time it was giving softer shoes and a nicer attitude but memory compaction needs to step up and replace it so this patch sends lumpy reclaim to the farm. The tracepoint format changes for isolating LRU pages with this patch applied. Furthermore reclaim/compaction can no longer queue dirty pages in pageout() if the underlying BDI is congested. Lumpy reclaim used this logic and reclaim/compaction was using it in error. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Acked-by: KOSAKI Motohiro Cc: Konstantin Khlebnikov Cc: Hugh Dickins Cc: Ying Han Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/vmscan.h | 26 ++------ mm/vmscan.c | 144 +++++------------------------------------- 2 files changed, 19 insertions(+), 151 deletions(-) (limited to 'mm/vmscan.c') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 572195459d5..bdaf32f8a87 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -263,22 +263,16 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, unsigned long nr_requested, unsigned long nr_scanned, unsigned long nr_taken, - unsigned long nr_lumpy_taken, - unsigned long nr_lumpy_dirty, - unsigned long nr_lumpy_failed, isolate_mode_t isolate_mode, int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file), + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file), TP_STRUCT__entry( __field(int, order) __field(unsigned long, nr_requested) __field(unsigned long, nr_scanned) __field(unsigned long, nr_taken) - __field(unsigned long, nr_lumpy_taken) - __field(unsigned long, nr_lumpy_dirty) - __field(unsigned long, nr_lumpy_failed) __field(isolate_mode_t, isolate_mode) __field(int, file) ), @@ -288,22 +282,16 @@ DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, __entry->nr_requested = nr_requested; __entry->nr_scanned = nr_scanned; __entry->nr_taken = nr_taken; - __entry->nr_lumpy_taken = nr_lumpy_taken; - __entry->nr_lumpy_dirty = nr_lumpy_dirty; - __entry->nr_lumpy_failed = nr_lumpy_failed; __entry->isolate_mode = isolate_mode; __entry->file = file; ), - TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu file=%d", + TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu file=%d", __entry->isolate_mode, __entry->order, __entry->nr_requested, __entry->nr_scanned, __entry->nr_taken, - __entry->nr_lumpy_taken, - __entry->nr_lumpy_dirty, - __entry->nr_lumpy_failed, __entry->file) ); @@ -313,13 +301,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate, unsigned long nr_requested, unsigned long nr_scanned, unsigned long nr_taken, - unsigned long nr_lumpy_taken, - unsigned long nr_lumpy_dirty, - unsigned long nr_lumpy_failed, isolate_mode_t isolate_mode, int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file) ); @@ -329,13 +314,10 @@ DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_memcg_isolate, unsigned long nr_requested, unsigned long nr_scanned, unsigned long nr_taken, - unsigned long nr_lumpy_taken, - unsigned long nr_lumpy_dirty, - unsigned long nr_lumpy_failed, isolate_mode_t isolate_mode, int file), - TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file) + TP_ARGS(order, nr_requested, nr_scanned, nr_taken, isolate_mode, file) ); diff --git a/mm/vmscan.c b/mm/vmscan.c index ca46080bb07..546d02ce90e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -58,9 +58,6 @@ * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages * RECLAIM_MODE_ASYNC: Do not block * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback - * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference - * page from the LRU and reclaim all pages within a - * naturally aligned range * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of * order-0 pages and then compact the zone */ @@ -68,7 +65,6 @@ typedef unsigned __bitwise__ reclaim_mode_t; #define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u) #define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u) #define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u) -#define RECLAIM_MODE_LUMPYRECLAIM ((__force reclaim_mode_t)0x08u) #define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u) struct scan_control { @@ -367,27 +363,17 @@ out: static void set_reclaim_mode(int priority, struct scan_control *sc, bool sync) { + /* Sync reclaim used only for compaction */ reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC; /* - * Initially assume we are entering either lumpy reclaim or - * reclaim/compaction.Depending on the order, we will either set the - * sync mode or just reclaim order-0 pages later. - */ - if (COMPACTION_BUILD) - sc->reclaim_mode = RECLAIM_MODE_COMPACTION; - else - sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM; - - /* - * Avoid using lumpy reclaim or reclaim/compaction if possible by - * restricting when its set to either costly allocations or when + * Restrict reclaim/compaction to costly allocations or when * under memory pressure */ - if (sc->order > PAGE_ALLOC_COSTLY_ORDER) - sc->reclaim_mode |= syncmode; - else if (sc->order && priority < DEF_PRIORITY - 2) - sc->reclaim_mode |= syncmode; + if (COMPACTION_BUILD && sc->order && + (sc->order > PAGE_ALLOC_COSTLY_ORDER || + priority < DEF_PRIORITY - 2)) + sc->reclaim_mode = RECLAIM_MODE_COMPACTION | syncmode; else sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; } @@ -416,10 +402,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi, return 1; if (bdi == current->backing_dev_info) return 1; - - /* lumpy reclaim for hugepage often need a lot of write */ - if (sc->order > PAGE_ALLOC_COSTLY_ORDER) - return 1; return 0; } @@ -710,10 +692,6 @@ static enum page_references page_check_references(struct page *page, referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); referenced_page = TestClearPageReferenced(page); - /* Lumpy reclaim - ignore references */ - if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) - return PAGEREF_RECLAIM; - /* * Mlock lost the isolation race with us. Let try_to_unmap() * move the page to the unevictable list. @@ -824,7 +802,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, wait_on_page_writeback(page); else { unlock_page(page); - goto keep_lumpy; + goto keep_reclaim_mode; } } @@ -908,7 +886,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto activate_locked; case PAGE_SUCCESS: if (PageWriteback(page)) - goto keep_lumpy; + goto keep_reclaim_mode; if (PageDirty(page)) goto keep; @@ -1008,7 +986,7 @@ keep_locked: unlock_page(page); keep: reset_reclaim_mode(sc); -keep_lumpy: +keep_reclaim_mode: list_add(&page->lru, &ret_pages); VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); } @@ -1064,11 +1042,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) if (!all_lru_mode && !!page_is_file_cache(page) != file) return ret; - /* - * When this function is being called for lumpy reclaim, we - * initially look into all LRU pages, active, inactive and - * unevictable; only give shrink_page_list evictable pages. - */ + /* Do not give back unevictable pages for compaction */ if (PageUnevictable(page)) return ret; @@ -1153,9 +1127,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, struct lruvec *lruvec; struct list_head *src; unsigned long nr_taken = 0; - unsigned long nr_lumpy_taken = 0; - unsigned long nr_lumpy_dirty = 0; - unsigned long nr_lumpy_failed = 0; unsigned long scan; int lru = LRU_BASE; @@ -1168,10 +1139,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { struct page *page; - unsigned long pfn; - unsigned long end_pfn; - unsigned long page_pfn; - int zone_id; page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); @@ -1193,84 +1160,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, default: BUG(); } - - if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)) - continue; - - /* - * Attempt to take all pages in the order aligned region - * surrounding the tag page. Only take those pages of - * the same active state as that tag page. We may safely - * round the target page pfn down to the requested order - * as the mem_map is guaranteed valid out to MAX_ORDER, - * where that page is in a different zone we will detect - * it from its zone id and abort this block scan. - */ - zone_id = page_zone_id(page); - page_pfn = page_to_pfn(page); - pfn = page_pfn & ~((1 << sc->order) - 1); - end_pfn = pfn + (1 << sc->order); - for (; pfn < end_pfn; pfn++) { - struct page *cursor_page; - - /* The target page is in the block, ignore it. */ - if (unlikely(pfn == page_pfn)) - continue; - - /* Avoid holes within the zone. */ - if (unlikely(!pfn_valid_within(pfn))) - break; - - cursor_page = pfn_to_page(pfn); - - /* Check that we have not crossed a zone boundary. */ - if (unlikely(page_zone_id(cursor_page) != zone_id)) - break; - - /* - * If we don't have enough swap space, reclaiming of - * anon page which don't already have a swap slot is - * pointless. - */ - if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) && - !PageSwapCache(cursor_page)) - break; - - if (__isolate_lru_page(cursor_page, mode, file) == 0) { - unsigned int isolated_pages; - - mem_cgroup_lru_del(cursor_page); - list_move(&cursor_page->lru, dst); - isolated_pages = hpage_nr_pages(cursor_page); - nr_taken += isolated_pages; - nr_lumpy_taken += isolated_pages; - if (PageDirty(cursor_page)) - nr_lumpy_dirty += isolated_pages; - scan++; - pfn += isolated_pages - 1; - } else { - /* - * Check if the page is freed already. - * - * We can't use page_count() as that - * requires compound_head and we don't - * have a pin on the page here. If a - * page is tail, we may or may not - * have isolated the head, so assume - * it's not free, it'd be tricky to - * track the head status without a - * page pin. - */ - if (!PageTail(cursor_page) && - !atomic_read(&cursor_page->_count)) - continue; - break; - } - } - - /* If we break out of the loop above, lumpy reclaim failed */ - if (pfn < end_pfn) - nr_lumpy_failed++; } *nr_scanned = scan; @@ -1278,7 +1167,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan, nr_taken, - nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, mode, file); return nr_taken; } @@ -1466,13 +1354,13 @@ static inline bool should_reclaim_stall(unsigned long nr_taken, int priority, struct scan_control *sc) { - int lumpy_stall_priority; + int stall_priority; /* kswapd should not stall on sync IO */ if (current_is_kswapd()) return false; - /* Only stall on lumpy reclaim */ + /* Only stall for memory compaction */ if (sc->reclaim_mode & RECLAIM_MODE_SINGLE) return false; @@ -1487,11 +1375,11 @@ static inline bool should_reclaim_stall(unsigned long nr_taken, * priority to be much higher before stalling. */ if (sc->order > PAGE_ALLOC_COSTLY_ORDER) - lumpy_stall_priority = DEF_PRIORITY; + stall_priority = DEF_PRIORITY; else - lumpy_stall_priority = DEF_PRIORITY / 3; + stall_priority = DEF_PRIORITY / 3; - return priority <= lumpy_stall_priority; + return priority <= stall_priority; } /* @@ -1523,8 +1411,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, } set_reclaim_mode(priority, sc, false); - if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) - isolate_mode |= ISOLATE_ACTIVE; lru_add_drain(); -- cgit v1.2.3-70-g09d2 From 41ac1999c3e3563e1810b14878a869c79c9368bb Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 29 May 2012 15:06:19 -0700 Subject: mm: vmscan: do not stall on writeback during memory compaction This patch stops reclaim/compaction entering sync reclaim as this was only intended for lumpy reclaim and an oversight. Page migration has its own logic for stalling on writeback pages if necessary and memory compaction is already using it. Waiting on page writeback is bad for a number of reasons but the primary one is that waiting on writeback to a slow device like USB can take a considerable length of time. Page reclaim instead uses wait_iff_congested() to throttle if too many dirty pages are being scanned. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Acked-by: KOSAKI Motohiro Cc: Konstantin Khlebnikov Cc: Hugh Dickins Cc: Ying Han Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/vmscan.h | 12 +++--- mm/vmscan.c | 85 ++++--------------------------------------- 2 files changed, 14 insertions(+), 83 deletions(-) (limited to 'mm/vmscan.c') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index bdaf32f8a87..82f693395ac 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -13,7 +13,7 @@ #define RECLAIM_WB_ANON 0x0001u #define RECLAIM_WB_FILE 0x0002u #define RECLAIM_WB_MIXED 0x0010u -#define RECLAIM_WB_SYNC 0x0004u +#define RECLAIM_WB_SYNC 0x0004u /* Unused, all reclaim async */ #define RECLAIM_WB_ASYNC 0x0008u #define show_reclaim_flags(flags) \ @@ -27,13 +27,13 @@ #define trace_reclaim_flags(page, sync) ( \ (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ - (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ + (RECLAIM_WB_ASYNC) \ ) -#define trace_shrink_flags(file, sync) ( \ - (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_MIXED : \ - (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) | \ - (sync & RECLAIM_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ +#define trace_shrink_flags(file, sync) \ + ( \ + (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ + (RECLAIM_WB_ASYNC) \ ) TRACE_EVENT(mm_vmscan_kswapd_sleep, diff --git a/mm/vmscan.c b/mm/vmscan.c index 546d02ce90e..e27f27d4cc1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -56,15 +56,11 @@ /* * reclaim_mode determines how the inactive list is shrunk * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages - * RECLAIM_MODE_ASYNC: Do not block - * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of * order-0 pages and then compact the zone */ typedef unsigned __bitwise__ reclaim_mode_t; #define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u) -#define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u) -#define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u) #define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u) struct scan_control { @@ -360,12 +356,8 @@ out: return ret; } -static void set_reclaim_mode(int priority, struct scan_control *sc, - bool sync) +static void set_reclaim_mode(int priority, struct scan_control *sc) { - /* Sync reclaim used only for compaction */ - reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC; - /* * Restrict reclaim/compaction to costly allocations or when * under memory pressure @@ -373,14 +365,14 @@ static void set_reclaim_mode(int priority, struct scan_control *sc, if (COMPACTION_BUILD && sc->order && (sc->order > PAGE_ALLOC_COSTLY_ORDER || priority < DEF_PRIORITY - 2)) - sc->reclaim_mode = RECLAIM_MODE_COMPACTION | syncmode; + sc->reclaim_mode = RECLAIM_MODE_COMPACTION; else - sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; + sc->reclaim_mode = RECLAIM_MODE_SINGLE; } static void reset_reclaim_mode(struct scan_control *sc) { - sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; + sc->reclaim_mode = RECLAIM_MODE_SINGLE; } static inline int is_page_cache_freeable(struct page *page) @@ -791,19 +783,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (PageWriteback(page)) { nr_writeback++; - /* - * Synchronous reclaim cannot queue pages for - * writeback due to the possibility of stack overflow - * but if it encounters a page under writeback, wait - * for the IO to complete. - */ - if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) && - may_enter_fs) - wait_on_page_writeback(page); - else { - unlock_page(page); - goto keep_reclaim_mode; - } + unlock_page(page); + goto keep; } references = page_check_references(page, mz, sc); @@ -886,7 +867,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto activate_locked; case PAGE_SUCCESS: if (PageWriteback(page)) - goto keep_reclaim_mode; + goto keep; if (PageDirty(page)) goto keep; @@ -985,8 +966,6 @@ activate_locked: keep_locked: unlock_page(page); keep: - reset_reclaim_mode(sc); -keep_reclaim_mode: list_add(&page->lru, &ret_pages); VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); } @@ -1341,47 +1320,6 @@ update_isolated_counts(struct mem_cgroup_zone *mz, preempt_enable(); } -/* - * Returns true if a direct reclaim should wait on pages under writeback. - * - * If we are direct reclaiming for contiguous pages and we do not reclaim - * everything in the list, try again and wait for writeback IO to complete. - * This will stall high-order allocations noticeably. Only do that when really - * need to free the pages under high memory pressure. - */ -static inline bool should_reclaim_stall(unsigned long nr_taken, - unsigned long nr_freed, - int priority, - struct scan_control *sc) -{ - int stall_priority; - - /* kswapd should not stall on sync IO */ - if (current_is_kswapd()) - return false; - - /* Only stall for memory compaction */ - if (sc->reclaim_mode & RECLAIM_MODE_SINGLE) - return false; - - /* If we have reclaimed everything on the isolated list, no stall */ - if (nr_freed == nr_taken) - return false; - - /* - * For high-order allocations, there are two stall thresholds. - * High-cost allocations stall immediately where as lower - * order allocations such as stacks require the scanning - * priority to be much higher before stalling. - */ - if (sc->order > PAGE_ALLOC_COSTLY_ORDER) - stall_priority = DEF_PRIORITY; - else - stall_priority = DEF_PRIORITY / 3; - - return priority <= stall_priority; -} - /* * shrink_inactive_list() is a helper for shrink_zone(). It returns the number * of reclaimed pages @@ -1410,7 +1348,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, return SWAP_CLUSTER_MAX; } - set_reclaim_mode(priority, sc, false); + set_reclaim_mode(priority, sc); lru_add_drain(); @@ -1442,13 +1380,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, &nr_dirty, &nr_writeback); - /* Check if we should syncronously wait for writeback */ - if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { - set_reclaim_mode(priority, sc, true); - nr_reclaimed += shrink_page_list(&page_list, mz, sc, - priority, &nr_dirty, &nr_writeback); - } - spin_lock_irq(&zone->lru_lock); reclaim_stat->recent_scanned[0] += nr_anon; -- cgit v1.2.3-70-g09d2 From 23b9da55c5b0feb484bd5e8615f4eb1ce4169453 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 29 May 2012 15:06:20 -0700 Subject: mm: vmscan: remove reclaim_mode_t There is little motiviation for reclaim_mode_t once RECLAIM_MODE_[A]SYNC and lumpy reclaim have been removed. This patch gets rid of reclaim_mode_t as well and improves the documentation about what reclaim/compaction is and when it is triggered. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Acked-by: KOSAKI Motohiro Cc: Konstantin Khlebnikov Cc: Hugh Dickins Cc: Ying Han Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/vmscan.h | 4 +-- mm/vmscan.c | 72 +++++++++++++------------------------------ 2 files changed, 24 insertions(+), 52 deletions(-) (limited to 'mm/vmscan.c') diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 82f693395ac..bab3b87e406 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -25,12 +25,12 @@ {RECLAIM_WB_ASYNC, "RECLAIM_WB_ASYNC"} \ ) : "RECLAIM_WB_NONE" -#define trace_reclaim_flags(page, sync) ( \ +#define trace_reclaim_flags(page) ( \ (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ (RECLAIM_WB_ASYNC) \ ) -#define trace_shrink_flags(file, sync) \ +#define trace_shrink_flags(file) \ ( \ (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ (RECLAIM_WB_ASYNC) \ diff --git a/mm/vmscan.c b/mm/vmscan.c index e27f27d4cc1..68e5819d0f1 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -53,16 +53,6 @@ #define CREATE_TRACE_POINTS #include -/* - * reclaim_mode determines how the inactive list is shrunk - * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages - * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of - * order-0 pages and then compact the zone - */ -typedef unsigned __bitwise__ reclaim_mode_t; -#define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u) -#define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u) - struct scan_control { /* Incremented by the number of inactive pages that were scanned */ unsigned long nr_scanned; @@ -88,12 +78,6 @@ struct scan_control { int order; - /* - * Intend to reclaim enough continuous memory rather than reclaim - * enough amount of memory. i.e, mode for high order allocation. - */ - reclaim_mode_t reclaim_mode; - /* * The memory cgroup that hit its limit and as a result is the * primary target of this reclaim invocation. @@ -356,25 +340,6 @@ out: return ret; } -static void set_reclaim_mode(int priority, struct scan_control *sc) -{ - /* - * Restrict reclaim/compaction to costly allocations or when - * under memory pressure - */ - if (COMPACTION_BUILD && sc->order && - (sc->order > PAGE_ALLOC_COSTLY_ORDER || - priority < DEF_PRIORITY - 2)) - sc->reclaim_mode = RECLAIM_MODE_COMPACTION; - else - sc->reclaim_mode = RECLAIM_MODE_SINGLE; -} - -static void reset_reclaim_mode(struct scan_control *sc) -{ - sc->reclaim_mode = RECLAIM_MODE_SINGLE; -} - static inline int is_page_cache_freeable(struct page *page) { /* @@ -497,8 +462,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, /* synchronous write or broken a_ops? */ ClearPageReclaim(page); } - trace_mm_vmscan_writepage(page, - trace_reclaim_flags(page, sc->reclaim_mode)); + trace_mm_vmscan_writepage(page, trace_reclaim_flags(page)); inc_zone_page_state(page, NR_VMSCAN_WRITE); return PAGE_SUCCESS; } @@ -953,7 +917,6 @@ cull_mlocked: try_to_free_swap(page); unlock_page(page); putback_lru_page(page); - reset_reclaim_mode(sc); continue; activate_locked: @@ -1348,8 +1311,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, return SWAP_CLUSTER_MAX; } - set_reclaim_mode(priority, sc); - lru_add_drain(); if (!sc->may_unmap) @@ -1433,7 +1394,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, zone_idx(zone), nr_scanned, nr_reclaimed, priority, - trace_shrink_flags(file, sc->reclaim_mode)); + trace_shrink_flags(file)); return nr_reclaimed; } @@ -1512,8 +1473,6 @@ static void shrink_active_list(unsigned long nr_to_scan, lru_add_drain(); - reset_reclaim_mode(sc); - if (!sc->may_unmap) isolate_mode |= ISOLATE_UNMAPPED; if (!sc->may_writepage) @@ -1826,23 +1785,35 @@ out: } } +/* Use reclaim/compaction for costly allocs or under memory pressure */ +static bool in_reclaim_compaction(int priority, struct scan_control *sc) +{ + if (COMPACTION_BUILD && sc->order && + (sc->order > PAGE_ALLOC_COSTLY_ORDER || + priority < DEF_PRIORITY - 2)) + return true; + + return false; +} + /* - * Reclaim/compaction depends on a number of pages being freed. To avoid - * disruption to the system, a small number of order-0 pages continue to be - * rotated and reclaimed in the normal fashion. However, by the time we get - * back to the allocator and call try_to_compact_zone(), we ensure that - * there are enough free pages for it to be likely successful + * Reclaim/compaction is used for high-order allocation requests. It reclaims + * order-0 pages before compacting the zone. should_continue_reclaim() returns + * true if more pages should be reclaimed such that when the page allocator + * calls try_to_compact_zone() that it will have enough free pages to succeed. + * It will give up earlier than that if there is difficulty reclaiming pages. */ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, unsigned long nr_reclaimed, unsigned long nr_scanned, + int priority, struct scan_control *sc) { unsigned long pages_for_compaction; unsigned long inactive_lru_pages; /* If not in reclaim/compaction mode, stop */ - if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) + if (!in_reclaim_compaction(priority, sc)) return false; /* Consider stopping depending on scan and reclaim activity */ @@ -1944,7 +1915,8 @@ restart: /* reclaim/compaction might need reclaim to continue */ if (should_continue_reclaim(mz, nr_reclaimed, - sc->nr_scanned - nr_scanned, sc)) + sc->nr_scanned - nr_scanned, + priority, sc)) goto restart; throttle_vm_writeout(sc->gfp_mask); -- cgit v1.2.3-70-g09d2 From c3ac9a8ade65ccbfd145fbff895ae8d8d62d09b0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 29 May 2012 15:06:25 -0700 Subject: mm: memcg: count pte references from every member of the reclaimed hierarchy The rmap walker checking page table references has historically ignored references from VMAs that were not part of the memcg that was being reclaimed during memcg hard limit reclaim. When transitioning global reclaim to memcg hierarchy reclaim, I missed that bit and now references from outside a memcg are ignored even during global reclaim. Reverting back to traditional behaviour - count all references during global reclaim and only mind references of the memcg being reclaimed during limit reclaim would be one option. However, the more generic idea is to ignore references exactly then when they are outside the hierarchy that is currently under reclaim; because only then will their reclamation be of any use to help the pressure situation. It makes no sense to ignore references from a sibling memcg and then evict a page that will be immediately refaulted by that sibling which contributes to the same usage of the common ancestor under reclaim. The solution: make the rmap walker ignore references from VMAs that are not part of the hierarchy that is being reclaimed. Flat limit reclaim will stay the same, hierarchical limit reclaim will mind the references only to pages that the hierarchy owns. Global reclaim, since it reclaims from all memcgs, will be fixed to regard all references. [akpm@linux-foundation.org: name the args in the declaration] Signed-off-by: Johannes Weiner Reported-by: Konstantin Khlebnikov Acked-by: Konstantin Khlebnikov Cc: KAMEZAWA Hiroyuki Acked-by: Michal Hocko Cc: Li Zefan Cc: Li Zefan Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 7 ++++++- mm/memcontrol.c | 16 +++++++++++----- mm/vmscan.c | 6 ++++-- 3 files changed, 21 insertions(+), 8 deletions(-) (limited to 'mm/vmscan.c') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f94efd2f6c2..18ea0b7baf3 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -79,6 +79,8 @@ extern void mem_cgroup_uncharge_cache_page(struct page *page); extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order); +bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, + struct mem_cgroup *memcg); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg); extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); @@ -92,10 +94,13 @@ static inline int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) { struct mem_cgroup *memcg; + int match; + rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference((mm)->owner)); + match = __mem_cgroup_same_or_subtree(cgroup, memcg); rcu_read_unlock(); - return cgroup == memcg; + return match; } extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index faad98e6d17..4f71219cc53 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1149,17 +1149,23 @@ struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone, * Checks whether given mem is same or in the root_mem_cgroup's * hierarchy subtree */ -static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, - struct mem_cgroup *memcg) +bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, + struct mem_cgroup *memcg) { - bool ret; - if (root_memcg == memcg) return true; if (!root_memcg->use_hierarchy) return false; + return css_is_ancestor(&memcg->css, &root_memcg->css); +} + +static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, + struct mem_cgroup *memcg) +{ + bool ret; + rcu_read_lock(); - ret = css_is_ancestor(&memcg->css, &root_memcg->css); + ret = __mem_cgroup_same_or_subtree(root_memcg, memcg); rcu_read_unlock(); return ret; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 68e5819d0f1..8fffc65a84d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -645,7 +645,8 @@ static enum page_references page_check_references(struct page *page, int referenced_ptes, referenced_page; unsigned long vm_flags; - referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); + referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, + &vm_flags); referenced_page = TestClearPageReferenced(page); /* @@ -1513,7 +1514,8 @@ static void shrink_active_list(unsigned long nr_to_scan, } } - if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) { + if (page_referenced(page, 0, sc->target_mem_cgroup, + &vm_flags)) { nr_rotated += hpage_nr_pages(page); /* * Identify referenced, file-backed active pages and -- cgit v1.2.3-70-g09d2 From 096a7cf44712ab531101bb4689f75f7fcd9b9f18 Mon Sep 17 00:00:00 2001 From: Ying Han Date: Tue, 29 May 2012 15:06:25 -0700 Subject: mm: rename is_mlocked_vma() to mlocked_vma_newpage() Andrew pointed out that the is_mlocked_vma() is misnamed. A function with name like that would expect bool return and no side-effects. Since it is called on the fault path for new page, rename it in this patch. Signed-off-by: Ying Han Reviewed-by: Rik van Riel Acked-by: KOSAKI Motohiro Acked-by: KAMEZAWA Hiroyuki Reviewed-by: Minchan Kim [akpm@linux-foundation.org: s/mlock_vma_newpage/mlock_vma_newpage/, per Minchan] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/internal.h | 5 +++-- mm/vmscan.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/internal.h b/mm/internal.h index aee4761cf9a..8b0fc8da802 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -164,7 +164,8 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma) * to determine if it's being mapped into a LOCKED vma. * If so, mark page as mlocked. */ -static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page) +static inline int mlocked_vma_newpage(struct vm_area_struct *vma, + struct page *page) { VM_BUG_ON(PageLRU(page)); @@ -222,7 +223,7 @@ extern unsigned long vma_address(struct page *page, struct vm_area_struct *vma); #endif #else /* !CONFIG_MMU */ -static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p) +static inline int mlocked_vma_newpage(struct vm_area_struct *v, struct page *p) { return 0; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 8fffc65a84d..44f04364a30 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3321,7 +3321,7 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) if (mapping_unevictable(page_mapping(page))) return 0; - if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page))) + if (PageMlocked(page) || (vma && mlocked_vma_newpage(vma, page))) return 0; return 1; -- cgit v1.2.3-70-g09d2 From e48982734ea0500d1eba4f9d96195acc5406cad6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 29 May 2012 15:06:45 -0700 Subject: mm: consider all swapped back pages in used-once logic Commit 645747462435 ("vmscan: detect mapped file pages used only once") made mapped pages have another round in inactive list because they might be just short lived and so we could consider them again next time. This heuristic helps to reduce pressure on the active list with a streaming IO worklods. This patch fixes a regression introduced by this commit for heavy shmem based workloads because unlike Anon pages, which are excluded from this heuristic because they are usually long lived, shmem pages are handled as a regular page cache. This doesn't work quite well, unfortunately, if the workload is mostly backed by shmem (in memory database sitting on 80% of memory) with a streaming IO in the background (backup - up to 20% of memory). Anon inactive list is full of (dirty) shmem pages when watermarks are hit. Shmem pages are kept in the inactive list (they are referenced) in the first round and it is hard to reclaim anything else so we reach lower scanning priorities very quickly which leads to an excessive swap out. Let's fix this by excluding all swap backed pages (they tend to be long lived wrt. the regular page cache anyway) from used-once heuristic and rather activate them if they are referenced. The customer's workload is shmem backed database (80% of RAM) and they are measuring transactions/s with an IO in the background (20%). Transactions touch more or less random rows in the table. The transaction rate fell by a factor of 3 (in the worst case) because of commit 64574746. This patch restores the previous numbers. Signed-off-by: Michal Hocko Acked-by: Johannes Weiner Cc: Mel Gorman Cc: Minchan Kim Cc: KAMEZAWA Hiroyuki Reviewed-by: Rik van Riel Cc: [2.6.34+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 44f04364a30..67a4fd4792d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -657,7 +657,7 @@ static enum page_references page_check_references(struct page *page, return PAGEREF_RECLAIM; if (referenced_ptes) { - if (PageAnon(page)) + if (PageSwapBacked(page)) return PAGEREF_ACTIVATE; /* * All mapped pages start out with page table -- cgit v1.2.3-70-g09d2 From fe35004fbf9eaf67482b074a2e032abb9c89b1dd Mon Sep 17 00:00:00 2001 From: Satoru Moriya Date: Tue, 29 May 2012 15:06:47 -0700 Subject: mm: avoid swapping out with swappiness==0 Sometimes we'd like to avoid swapping out anonymous memory. In particular, avoid swapping out pages of important process or process groups while there is a reasonable amount of pagecache on RAM so that we can satisfy our customers' requirements. OTOH, we can control how aggressive the kernel will swap memory pages with /proc/sys/vm/swappiness for global and /sys/fs/cgroup/memory/memory.swappiness for each memcg. But with current reclaim implementation, the kernel may swap out even if we set swappiness=0 and there is pagecache in RAM. This patch changes the behavior with swappiness==0. If we set swappiness==0, the kernel does not swap out completely (for global reclaim until the amount of free pages and filebacked pages in a zone has been reduced to something very very small (nr_free + nr_filebacked < high watermark)). Signed-off-by: Satoru Moriya Acked-by: Minchan Kim Reviewed-by: Rik van Riel Acked-by: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 67a4fd4792d..ee975302877 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1761,10 +1761,10 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * proportional to the fraction of recently scanned pages on * each list that were recently referenced and in active use. */ - ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1); + ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1); ap /= reclaim_stat->recent_rotated[0] + 1; - fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); + fp = file_prio * (reclaim_stat->recent_scanned[1] + 1); fp /= reclaim_stat->recent_rotated[1] + 1; spin_unlock_irq(&mz->zone->lru_lock); @@ -1777,7 +1777,7 @@ out: unsigned long scan; scan = zone_nr_lru_pages(mz, lru); - if (priority || noswap) { + if (priority || noswap || !vmscan_swappiness(mz, sc)) { scan >>= priority; if (!scan && force_scan) scan = SWAP_CLUSTER_MAX; -- cgit v1.2.3-70-g09d2 From c3c787e8c38557ccf44c670d73aebe630a2b1479 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 29 May 2012 15:06:52 -0700 Subject: mm/memcg: scanning_global_lru means mem_cgroup_disabled Although one has to admire the skill with which it has been concealed, scanning_global_lru(mz) is actually just an interesting way to test mem_cgroup_disabled(). Too many developer hours have been wasted on confusing it with global_reclaim(): just use mem_cgroup_disabled(). Signed-off-by: Hugh Dickins Signed-off-by: Konstantin Khlebnikov Acked-by: KAMEZAWA Hiroyuki Acked-by: Glauber Costa Cc: Michal Hocko Cc: Johannes Weiner Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index ee975302877..52fac58b446 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -140,26 +140,16 @@ static bool global_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup; } - -static bool scanning_global_lru(struct mem_cgroup_zone *mz) -{ - return !mz->mem_cgroup; -} #else static bool global_reclaim(struct scan_control *sc) { return true; } - -static bool scanning_global_lru(struct mem_cgroup_zone *mz) -{ - return true; -} #endif static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) { - if (!scanning_global_lru(mz)) + if (!mem_cgroup_disabled()) return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone); return &mz->zone->reclaim_stat; @@ -168,7 +158,7 @@ static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, enum lru_list lru) { - if (!scanning_global_lru(mz)) + if (!mem_cgroup_disabled()) return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, zone_to_nid(mz->zone), zone_idx(mz->zone), @@ -1589,7 +1579,7 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz) if (!total_swap_pages) return 0; - if (!scanning_global_lru(mz)) + if (!mem_cgroup_disabled()) return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, mz->zone); @@ -1628,7 +1618,7 @@ static int inactive_file_is_low_global(struct zone *zone) */ static int inactive_file_is_low(struct mem_cgroup_zone *mz) { - if (!scanning_global_lru(mz)) + if (!mem_cgroup_disabled()) return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, mz->zone); -- cgit v1.2.3-70-g09d2 From 89abfab133ef1f5902abafb744df72793213ac19 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 29 May 2012 15:06:53 -0700 Subject: mm/memcg: move reclaim_stat into lruvec With mem_cgroup_disabled() now explicit, it becomes clear that the zone_reclaim_stat structure actually belongs in lruvec, per-zone when memcg is disabled but per-memcg per-zone when it's enabled. We can delete mem_cgroup_get_reclaim_stat(), and change update_page_reclaim_stat() to update just the one set of stats, the one which get_scan_count() will actually use. Signed-off-by: Hugh Dickins Signed-off-by: Konstantin Khlebnikov Acked-by: KAMEZAWA Hiroyuki Acked-by: Michal Hocko Reviewed-by: Minchan Kim Reviewed-by: Michal Hocko Cc: Glauber Costa Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 --------- include/linux/mmzone.h | 29 ++++++++++++++--------------- mm/memcontrol.c | 27 +++++++-------------------- mm/page_alloc.c | 8 ++++---- mm/swap.c | 14 ++++---------- mm/vmscan.c | 5 +---- 6 files changed, 30 insertions(+), 62 deletions(-) (limited to 'mm/vmscan.c') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 18ea0b7baf3..cfe9050ad8d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -126,8 +126,6 @@ int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, unsigned int lrumask); -struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, - struct zone *zone); struct zone_reclaim_stat* mem_cgroup_get_reclaim_stat_from_page(struct page *page); extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, @@ -356,13 +354,6 @@ mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, return 0; } - -static inline struct zone_reclaim_stat* -mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone *zone) -{ - return NULL; -} - static inline struct zone_reclaim_stat* mem_cgroup_get_reclaim_stat_from_page(struct page *page) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4871e31ae27..1b89861eedc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -185,8 +185,22 @@ static inline int is_unevictable_lru(enum lru_list lru) return (lru == LRU_UNEVICTABLE); } +struct zone_reclaim_stat { + /* + * The pageout code in vmscan.c keeps track of how many of the + * mem/swap backed and file backed pages are refeferenced. + * The higher the rotated/scanned ratio, the more valuable + * that cache is. + * + * The anon LRU stats live in [0], file LRU stats in [1] + */ + unsigned long recent_rotated[2]; + unsigned long recent_scanned[2]; +}; + struct lruvec { struct list_head lists[NR_LRU_LISTS]; + struct zone_reclaim_stat reclaim_stat; }; /* Mask used at gathering information at once (see memcontrol.c) */ @@ -313,19 +327,6 @@ enum zone_type { #error ZONES_SHIFT -- too many zones configured adjust calculation #endif -struct zone_reclaim_stat { - /* - * The pageout code in vmscan.c keeps track of how many of the - * mem/swap backed and file backed pages are refeferenced. - * The higher the rotated/scanned ratio, the more valuable - * that cache is. - * - * The anon LRU stats live in [0], file LRU stats in [1] - */ - unsigned long recent_rotated[2]; - unsigned long recent_scanned[2]; -}; - struct zone { /* Fields commonly accessed by the page allocator */ @@ -407,8 +408,6 @@ struct zone { spinlock_t lru_lock; struct lruvec lruvec; - struct zone_reclaim_stat reclaim_stat; - unsigned long pages_scanned; /* since last reclaim */ unsigned long flags; /* zone flags, see below */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 30f938c8645..00c8898dbb8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -138,7 +138,6 @@ struct mem_cgroup_per_zone { struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; - struct zone_reclaim_stat reclaim_stat; struct rb_node tree_node; /* RB tree node */ unsigned long long usage_in_excess;/* Set to the value by which */ /* the soft limit is exceeded*/ @@ -1243,16 +1242,6 @@ int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) return (active > inactive); } -struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, - struct zone *zone) -{ - int nid = zone_to_nid(zone); - int zid = zone_idx(zone); - struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid); - - return &mz->reclaim_stat; -} - struct zone_reclaim_stat * mem_cgroup_get_reclaim_stat_from_page(struct page *page) { @@ -1268,7 +1257,7 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ smp_rmb(); mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); - return &mz->reclaim_stat; + return &mz->lruvec.reclaim_stat; } #define mem_cgroup_from_res_counter(counter, member) \ @@ -4216,21 +4205,19 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, { int nid, zid; struct mem_cgroup_per_zone *mz; + struct zone_reclaim_stat *rstat; unsigned long recent_rotated[2] = {0, 0}; unsigned long recent_scanned[2] = {0, 0}; for_each_online_node(nid) for (zid = 0; zid < MAX_NR_ZONES; zid++) { mz = mem_cgroup_zoneinfo(memcg, nid, zid); + rstat = &mz->lruvec.reclaim_stat; - recent_rotated[0] += - mz->reclaim_stat.recent_rotated[0]; - recent_rotated[1] += - mz->reclaim_stat.recent_rotated[1]; - recent_scanned[0] += - mz->reclaim_stat.recent_scanned[0]; - recent_scanned[1] += - mz->reclaim_stat.recent_scanned[1]; + recent_rotated[0] += rstat->recent_rotated[0]; + recent_rotated[1] += rstat->recent_rotated[1]; + recent_scanned[0] += rstat->recent_scanned[0]; + recent_scanned[1] += rstat->recent_scanned[1]; } cb->fill(cb, "recent_rotated_anon", recent_rotated[0]); cb->fill(cb, "recent_rotated_file", recent_rotated[1]); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4fc462b5fcf..8cbfc38e68a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4410,10 +4410,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, zone_pcp_init(zone); for_each_lru(lru) INIT_LIST_HEAD(&zone->lruvec.lists[lru]); - zone->reclaim_stat.recent_rotated[0] = 0; - zone->reclaim_stat.recent_rotated[1] = 0; - zone->reclaim_stat.recent_scanned[0] = 0; - zone->reclaim_stat.recent_scanned[1] = 0; + zone->lruvec.reclaim_stat.recent_rotated[0] = 0; + zone->lruvec.reclaim_stat.recent_rotated[1] = 0; + zone->lruvec.reclaim_stat.recent_scanned[0] = 0; + zone->lruvec.reclaim_stat.recent_scanned[1] = 0; zap_zone_vm_stats(zone); zone->flags = 0; if (!size) diff --git a/mm/swap.c b/mm/swap.c index 6fdd72ec15b..0503ad705e7 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -312,21 +312,15 @@ void rotate_reclaimable_page(struct page *page) static void update_page_reclaim_stat(struct zone *zone, struct page *page, int file, int rotated) { - struct zone_reclaim_stat *reclaim_stat = &zone->reclaim_stat; - struct zone_reclaim_stat *memcg_reclaim_stat; + struct zone_reclaim_stat *reclaim_stat; - memcg_reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page); + reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page); + if (!reclaim_stat) + reclaim_stat = &zone->lruvec.reclaim_stat; reclaim_stat->recent_scanned[file]++; if (rotated) reclaim_stat->recent_rotated[file]++; - - if (!memcg_reclaim_stat) - return; - - memcg_reclaim_stat->recent_scanned[file]++; - if (rotated) - memcg_reclaim_stat->recent_rotated[file]++; } static void __activate_page(struct page *page, void *arg) diff --git a/mm/vmscan.c b/mm/vmscan.c index 52fac58b446..e234ada1874 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -149,10 +149,7 @@ static bool global_reclaim(struct scan_control *sc) static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) { - if (!mem_cgroup_disabled()) - return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone); - - return &mz->zone->reclaim_stat; + return &mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup)->reclaim_stat; } static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, -- cgit v1.2.3-70-g09d2 From 3cb9945179bd04e9282f31a1173ac11b1300c462 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:06:53 -0700 Subject: mm: push lru index into shrink_[in]active_list() Let's toss lru index through call stack to isolate_lru_pages(), this is better than its reconstructing from individual bits. [akpm@linux-foundation.org: fix kerneldoc, per Minchan] Signed-off-by: Konstantin Khlebnikov Acked-by: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Cc: Glauber Costa Cc: Michal Hocko Cc: Johannes Weiner Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index e234ada1874..987be819fad 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1044,27 +1044,22 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) * @nr_scanned: The number of pages that were scanned. * @sc: The scan_control struct for this reclaim session * @mode: One of the LRU isolation modes - * @active: True [1] if isolating active pages - * @file: True [1] if isolating file [!anon] pages + * @lru: LRU list id for isolating * * returns how many pages were moved onto *@dst. */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, struct list_head *dst, unsigned long *nr_scanned, struct scan_control *sc, - isolate_mode_t mode, int active, int file) + isolate_mode_t mode, enum lru_list lru) { struct lruvec *lruvec; struct list_head *src; unsigned long nr_taken = 0; unsigned long scan; - int lru = LRU_BASE; + int file = is_file_lru(lru); lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); - if (active) - lru += LRU_ACTIVE; - if (file) - lru += LRU_FILE; src = &lruvec->lists[lru]; for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { @@ -1277,7 +1272,7 @@ update_isolated_counts(struct mem_cgroup_zone *mz, */ static noinline_for_stack unsigned long shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, - struct scan_control *sc, int priority, int file) + struct scan_control *sc, int priority, enum lru_list lru) { LIST_HEAD(page_list); unsigned long nr_scanned; @@ -1288,6 +1283,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, unsigned long nr_dirty = 0; unsigned long nr_writeback = 0; isolate_mode_t isolate_mode = ISOLATE_INACTIVE; + int file = is_file_lru(lru); struct zone *zone = mz->zone; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); @@ -1309,7 +1305,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, spin_lock_irq(&zone->lru_lock); nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned, - sc, isolate_mode, 0, file); + sc, isolate_mode, lru); if (global_reclaim(sc)) { zone->pages_scanned += nr_scanned; if (current_is_kswapd()) @@ -1445,7 +1441,7 @@ static void move_active_pages_to_lru(struct zone *zone, static void shrink_active_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, struct scan_control *sc, - int priority, int file) + int priority, enum lru_list lru) { unsigned long nr_taken; unsigned long nr_scanned; @@ -1457,6 +1453,7 @@ static void shrink_active_list(unsigned long nr_to_scan, struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); unsigned long nr_rotated = 0; isolate_mode_t isolate_mode = ISOLATE_ACTIVE; + int file = is_file_lru(lru); struct zone *zone = mz->zone; lru_add_drain(); @@ -1469,17 +1466,14 @@ static void shrink_active_list(unsigned long nr_to_scan, spin_lock_irq(&zone->lru_lock); nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc, - isolate_mode, 1, file); + isolate_mode, lru); if (global_reclaim(sc)) zone->pages_scanned += nr_scanned; reclaim_stat->recent_scanned[file] += nr_taken; __count_zone_vm_events(PGREFILL, zone, nr_scanned); - if (file) - __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken); - else - __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken); + __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -1535,10 +1529,8 @@ static void shrink_active_list(unsigned long nr_to_scan, */ reclaim_stat->recent_rotated[file] += nr_rotated; - move_active_pages_to_lru(zone, &l_active, &l_hold, - LRU_ACTIVE + file * LRU_FILE); - move_active_pages_to_lru(zone, &l_inactive, &l_hold, - LRU_BASE + file * LRU_FILE); + move_active_pages_to_lru(zone, &l_active, &l_hold, lru); + move_active_pages_to_lru(zone, &l_inactive, &l_hold, lru - LRU_ACTIVE); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -1638,11 +1630,11 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, if (is_active_lru(lru)) { if (inactive_list_is_low(mz, file)) - shrink_active_list(nr_to_scan, mz, sc, priority, file); + shrink_active_list(nr_to_scan, mz, sc, priority, lru); return 0; } - return shrink_inactive_list(nr_to_scan, mz, sc, priority, file); + return shrink_inactive_list(nr_to_scan, mz, sc, priority, lru); } static int vmscan_swappiness(struct mem_cgroup_zone *mz, @@ -1900,7 +1892,8 @@ restart: * rebalance the anon lru active/inactive ratio. */ if (inactive_anon_is_low(mz)) - shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0); + shrink_active_list(SWAP_CLUSTER_MAX, mz, + sc, priority, LRU_ACTIVE_ANON); /* reclaim/compaction might need reclaim to continue */ if (should_continue_reclaim(mz, nr_reclaimed, @@ -2339,7 +2332,7 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc, if (inactive_anon_is_low(&mz)) shrink_active_list(SWAP_CLUSTER_MAX, &mz, - sc, priority, 0); + sc, priority, LRU_ACTIVE_ANON); memcg = mem_cgroup_iter(NULL, memcg, NULL); } while (memcg); -- cgit v1.2.3-70-g09d2 From f3fd4a61928a5edf5b033a417e761b488b43e203 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:06:54 -0700 Subject: mm: remove lru type checks from __isolate_lru_page() After patch "mm: forbid lumpy-reclaim in shrink_active_list()" we can completely remove anon/file and active/inactive lru type filters from __isolate_lru_page(), because isolation for 0-order reclaim always isolates pages from right lru list. And pages-isolation for lumpy shrink_inactive_list() or memory-compaction anyway allowed to isolate pages from all evictable lru lists. Signed-off-by: Konstantin Khlebnikov Acked-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Acked-by: Michal Hocko Cc: Glauber Costa Cc: Johannes Weiner Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 10 +++------- include/linux/swap.h | 2 +- mm/compaction.c | 4 ++-- mm/vmscan.c | 23 ++++------------------- 4 files changed, 10 insertions(+), 29 deletions(-) (limited to 'mm/vmscan.c') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1b89861eedc..5c4880bc027 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -209,16 +209,12 @@ struct lruvec { #define LRU_ALL_EVICTABLE (LRU_ALL_FILE | LRU_ALL_ANON) #define LRU_ALL ((1 << NR_LRU_LISTS) - 1) -/* Isolate inactive pages */ -#define ISOLATE_INACTIVE ((__force isolate_mode_t)0x1) -/* Isolate active pages */ -#define ISOLATE_ACTIVE ((__force isolate_mode_t)0x2) /* Isolate clean file */ -#define ISOLATE_CLEAN ((__force isolate_mode_t)0x4) +#define ISOLATE_CLEAN ((__force isolate_mode_t)0x1) /* Isolate unmapped file */ -#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x8) +#define ISOLATE_UNMAPPED ((__force isolate_mode_t)0x2) /* Isolate for asynchronous migration */ -#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x10) +#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4) /* LRU Isolation modes. */ typedef unsigned __bitwise__ isolate_mode_t; diff --git a/include/linux/swap.h b/include/linux/swap.h index 49c0fa9ef5c..ff38eb7c0ec 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -251,7 +251,7 @@ static inline void lru_cache_add_file(struct page *page) /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); -extern int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file); +extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap); extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, diff --git a/mm/compaction.c b/mm/compaction.c index 840ee288e29..74e1b380383 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -226,7 +226,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; - isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; + isolate_mode_t mode = 0; /* * Ensure that there are not too many pages isolated from the LRU @@ -329,7 +329,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, mode |= ISOLATE_ASYNC_MIGRATE; /* Try isolate the page */ - if (__isolate_lru_page(page, mode, 0) != 0) + if (__isolate_lru_page(page, mode) != 0) continue; VM_BUG_ON(PageTransCompound(page)); diff --git a/mm/vmscan.c b/mm/vmscan.c index 987be819fad..27ef5769b9e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -949,29 +949,14 @@ keep: * * returns 0 on success, -ve errno on failure. */ -int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) +int __isolate_lru_page(struct page *page, isolate_mode_t mode) { - bool all_lru_mode; int ret = -EINVAL; /* Only take pages on the LRU. */ if (!PageLRU(page)) return ret; - all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) == - (ISOLATE_ACTIVE|ISOLATE_INACTIVE); - - /* - * When checking the active state, we need to be sure we are - * dealing with comparible boolean values. Take the logical not - * of each. - */ - if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE)) - return ret; - - if (!all_lru_mode && !!page_is_file_cache(page) != file) - return ret; - /* Do not give back unevictable pages for compaction */ if (PageUnevictable(page)) return ret; @@ -1070,7 +1055,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, VM_BUG_ON(!PageLRU(page)); - switch (__isolate_lru_page(page, mode, file)) { + switch (__isolate_lru_page(page, mode)) { case 0: mem_cgroup_lru_del(page); list_move(&page->lru, dst); @@ -1282,7 +1267,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, unsigned long nr_file; unsigned long nr_dirty = 0; unsigned long nr_writeback = 0; - isolate_mode_t isolate_mode = ISOLATE_INACTIVE; + isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); struct zone *zone = mz->zone; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); @@ -1452,7 +1437,7 @@ static void shrink_active_list(unsigned long nr_to_scan, struct page *page; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); unsigned long nr_rotated = 0; - isolate_mode_t isolate_mode = ISOLATE_ACTIVE; + isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); struct zone *zone = mz->zone; -- cgit v1.2.3-70-g09d2 From bbf808ed7de68fdf626fd4f9718d88cf03ce13a9 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:06:54 -0700 Subject: mm/memcg: kill mem_cgroup_lru_del() This patch kills mem_cgroup_lru_del(), we can use mem_cgroup_lru_del_list() instead. On 0-order isolation we already have right lru list id. Signed-off-by: Konstantin Khlebnikov Cc: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Glauber Costa Cc: Michal Hocko Cc: Johannes Weiner Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ----- mm/memcontrol.c | 5 ----- mm/vmscan.c | 2 +- 3 files changed, 1 insertion(+), 11 deletions(-) (limited to 'mm/vmscan.c') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index cfe9050ad8d..e3fc200cd68 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -66,7 +66,6 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *, enum lru_list); void mem_cgroup_lru_del_list(struct page *, enum lru_list); -void mem_cgroup_lru_del(struct page *); struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *, enum lru_list, enum lru_list); @@ -265,10 +264,6 @@ static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) { } -static inline void mem_cgroup_lru_del(struct page *page) -{ -} - static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone, struct page *page, enum lru_list from, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 00c8898dbb8..75f23e98db4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1115,11 +1115,6 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) mz->lru_size[lru] -= 1 << compound_order(page); } -void mem_cgroup_lru_del(struct page *page) -{ - mem_cgroup_lru_del_list(page, page_lru(page)); -} - /** * mem_cgroup_lru_move_lists - account for moving a page between lrus * @zone: zone of the page diff --git a/mm/vmscan.c b/mm/vmscan.c index 27ef5769b9e..c94d17d75d7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1057,7 +1057,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, switch (__isolate_lru_page(page, mode)) { case 0: - mem_cgroup_lru_del(page); + mem_cgroup_lru_del_list(page, lru); list_move(&page->lru, dst); nr_taken += hpage_nr_pages(page); break; -- cgit v1.2.3-70-g09d2 From 3d58ab5c97fa2d145050242137ac39ca7d3bc2fc Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:06:57 -0700 Subject: mm/memcg: use vm_swappiness from target memory cgroup Use vm_swappiness from memory cgroup which is triggered this memory reclaim. This is more reasonable and allows to kill one argument. [akpm@linux-foundation.org: fix build (patch skew)] Signed-off-by: Konstantin Khlebnikov Acked-by: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Glauber Costa Cc: Michal Hocko Cc: Johannes Weiner Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index c94d17d75d7..0e2131deb2d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1622,12 +1622,11 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, return shrink_inactive_list(nr_to_scan, mz, sc, priority, lru); } -static int vmscan_swappiness(struct mem_cgroup_zone *mz, - struct scan_control *sc) +static int vmscan_swappiness(struct scan_control *sc) { if (global_reclaim(sc)) return vm_swappiness; - return mem_cgroup_swappiness(mz->mem_cgroup); + return mem_cgroup_swappiness(sc->target_mem_cgroup); } /* @@ -1695,8 +1694,8 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * With swappiness at 100, anonymous and file have the same priority. * This scanning priority is essentially the inverse of IO cost. */ - anon_prio = vmscan_swappiness(mz, sc); - file_prio = 200 - vmscan_swappiness(mz, sc); + anon_prio = vmscan_swappiness(sc); + file_prio = 200 - vmscan_swappiness(sc); /* * OK, so we have swap space and a fair amount of page cache @@ -1741,7 +1740,7 @@ out: unsigned long scan; scan = zone_nr_lru_pages(mz, lru); - if (priority || noswap || !vmscan_swappiness(mz, sc)) { + if (priority || noswap || !vmscan_swappiness(sc)) { scan >>= priority; if (!scan && force_scan) scan = SWAP_CLUSTER_MAX; -- cgit v1.2.3-70-g09d2 From 9e3b2f8cd340e13353a44c9a34caef2848131ed7 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:06:57 -0700 Subject: mm/vmscan: store "priority" in struct scan_control In memory reclaim some function have too many arguments - "priority" is one of them. It can be stored in struct scan_control - we construct them on the same level. Instead of an open coded loop we set the initial sc.priority, and do_try_to_free_pages() decreases it down to zero. Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 117 +++++++++++++++++++++++++++++++----------------------------- 1 file changed, 61 insertions(+), 56 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 0e2131deb2d..77905eb3d8a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -78,6 +78,9 @@ struct scan_control { int order; + /* Scan (total_size >> priority) pages at once */ + int priority; + /* * The memory cgroup that hit its limit and as a result is the * primary target of this reclaim invocation. @@ -687,7 +690,6 @@ static enum page_references page_check_references(struct page *page, static unsigned long shrink_page_list(struct list_head *page_list, struct mem_cgroup_zone *mz, struct scan_control *sc, - int priority, unsigned long *ret_nr_dirty, unsigned long *ret_nr_writeback) { @@ -790,7 +792,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, * unless under significant pressure. */ if (page_is_file_cache(page) && - (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) { + (!current_is_kswapd() || + sc->priority >= DEF_PRIORITY - 2)) { /* * Immediately reclaim when written back. * Similar in principal to deactivate_page() @@ -1257,7 +1260,7 @@ update_isolated_counts(struct mem_cgroup_zone *mz, */ static noinline_for_stack unsigned long shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, - struct scan_control *sc, int priority, enum lru_list lru) + struct scan_control *sc, enum lru_list lru) { LIST_HEAD(page_list); unsigned long nr_scanned; @@ -1307,7 +1310,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, update_isolated_counts(mz, &page_list, &nr_anon, &nr_file); - nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, + nr_reclaimed = shrink_page_list(&page_list, mz, sc, &nr_dirty, &nr_writeback); spin_lock_irq(&zone->lru_lock); @@ -1356,13 +1359,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any * isolated page is PageWriteback */ - if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority))) + if (nr_writeback && nr_writeback >= + (nr_taken >> (DEF_PRIORITY - sc->priority))) wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, zone_idx(zone), nr_scanned, nr_reclaimed, - priority, + sc->priority, trace_shrink_flags(file)); return nr_reclaimed; } @@ -1426,7 +1430,7 @@ static void move_active_pages_to_lru(struct zone *zone, static void shrink_active_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, struct scan_control *sc, - int priority, enum lru_list lru) + enum lru_list lru) { unsigned long nr_taken; unsigned long nr_scanned; @@ -1609,17 +1613,17 @@ static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file) static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, struct mem_cgroup_zone *mz, - struct scan_control *sc, int priority) + struct scan_control *sc) { int file = is_file_lru(lru); if (is_active_lru(lru)) { if (inactive_list_is_low(mz, file)) - shrink_active_list(nr_to_scan, mz, sc, priority, lru); + shrink_active_list(nr_to_scan, mz, sc, lru); return 0; } - return shrink_inactive_list(nr_to_scan, mz, sc, priority, lru); + return shrink_inactive_list(nr_to_scan, mz, sc, lru); } static int vmscan_swappiness(struct scan_control *sc) @@ -1638,7 +1642,7 @@ static int vmscan_swappiness(struct scan_control *sc) * nr[0] = anon pages to scan; nr[1] = file pages to scan */ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, - unsigned long *nr, int priority) + unsigned long *nr) { unsigned long anon, file, free; unsigned long anon_prio, file_prio; @@ -1740,8 +1744,8 @@ out: unsigned long scan; scan = zone_nr_lru_pages(mz, lru); - if (priority || noswap || !vmscan_swappiness(sc)) { - scan >>= priority; + if (sc->priority || noswap || !vmscan_swappiness(sc)) { + scan >>= sc->priority; if (!scan && force_scan) scan = SWAP_CLUSTER_MAX; scan = div64_u64(scan * fraction[file], denominator); @@ -1751,11 +1755,11 @@ out: } /* Use reclaim/compaction for costly allocs or under memory pressure */ -static bool in_reclaim_compaction(int priority, struct scan_control *sc) +static bool in_reclaim_compaction(struct scan_control *sc) { if (COMPACTION_BUILD && sc->order && (sc->order > PAGE_ALLOC_COSTLY_ORDER || - priority < DEF_PRIORITY - 2)) + sc->priority < DEF_PRIORITY - 2)) return true; return false; @@ -1771,14 +1775,13 @@ static bool in_reclaim_compaction(int priority, struct scan_control *sc) static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, unsigned long nr_reclaimed, unsigned long nr_scanned, - int priority, struct scan_control *sc) { unsigned long pages_for_compaction; unsigned long inactive_lru_pages; /* If not in reclaim/compaction mode, stop */ - if (!in_reclaim_compaction(priority, sc)) + if (!in_reclaim_compaction(sc)) return false; /* Consider stopping depending on scan and reclaim activity */ @@ -1829,7 +1832,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ -static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, +static void shrink_mem_cgroup_zone(struct mem_cgroup_zone *mz, struct scan_control *sc) { unsigned long nr[NR_LRU_LISTS]; @@ -1842,7 +1845,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, restart: nr_reclaimed = 0; nr_scanned = sc->nr_scanned; - get_scan_count(mz, sc, nr, priority); + get_scan_count(mz, sc, nr); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -1854,7 +1857,7 @@ restart: nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, - mz, sc, priority); + mz, sc); } } /* @@ -1865,7 +1868,8 @@ restart: * with multiple processes reclaiming pages, the total * freeing target can get unreasonably large. */ - if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) + if (nr_reclaimed >= nr_to_reclaim && + sc->priority < DEF_PRIORITY) break; } blk_finish_plug(&plug); @@ -1877,24 +1881,22 @@ restart: */ if (inactive_anon_is_low(mz)) shrink_active_list(SWAP_CLUSTER_MAX, mz, - sc, priority, LRU_ACTIVE_ANON); + sc, LRU_ACTIVE_ANON); /* reclaim/compaction might need reclaim to continue */ if (should_continue_reclaim(mz, nr_reclaimed, - sc->nr_scanned - nr_scanned, - priority, sc)) + sc->nr_scanned - nr_scanned, sc)) goto restart; throttle_vm_writeout(sc->gfp_mask); } -static void shrink_zone(int priority, struct zone *zone, - struct scan_control *sc) +static void shrink_zone(struct zone *zone, struct scan_control *sc) { struct mem_cgroup *root = sc->target_mem_cgroup; struct mem_cgroup_reclaim_cookie reclaim = { .zone = zone, - .priority = priority, + .priority = sc->priority, }; struct mem_cgroup *memcg; @@ -1905,7 +1907,7 @@ static void shrink_zone(int priority, struct zone *zone, .zone = zone, }; - shrink_mem_cgroup_zone(priority, &mz, sc); + shrink_mem_cgroup_zone(&mz, sc); /* * Limit reclaim has historically picked one memcg and * scanned it with decreasing priority levels until @@ -1981,8 +1983,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) * the caller that it should consider retrying the allocation instead of * further reclaim. */ -static bool shrink_zones(int priority, struct zonelist *zonelist, - struct scan_control *sc) +static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; @@ -2009,7 +2010,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, if (global_reclaim(sc)) { if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc->priority != DEF_PRIORITY) continue; /* Let kswapd poll it */ if (COMPACTION_BUILD) { /* @@ -2041,7 +2043,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, /* need some check for avoid more shrink_zone() */ } - shrink_zone(priority, zone, sc); + shrink_zone(zone, sc); } return aborted_reclaim; @@ -2092,7 +2094,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc, struct shrink_control *shrink) { - int priority; unsigned long total_scanned = 0; struct reclaim_state *reclaim_state = current->reclaim_state; struct zoneref *z; @@ -2105,9 +2106,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, if (global_reclaim(sc)) count_vm_event(ALLOCSTALL); - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + do { sc->nr_scanned = 0; - aborted_reclaim = shrink_zones(priority, zonelist, sc); + aborted_reclaim = shrink_zones(zonelist, sc); /* * Don't shrink slabs when reclaiming memory from @@ -2149,7 +2150,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, /* Take a nap, wait for some writeback to complete */ if (!sc->hibernation_mode && sc->nr_scanned && - priority < DEF_PRIORITY - 2) { + sc->priority < DEF_PRIORITY - 2) { struct zone *preferred_zone; first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), @@ -2157,7 +2158,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, &preferred_zone); wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); } - } + } while (--sc->priority >= 0); out: delayacct_freepages_end(); @@ -2195,6 +2196,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .may_unmap = 1, .may_swap = 1, .order = order, + .priority = DEF_PRIORITY, .target_mem_cgroup = NULL, .nodemask = nodemask, }; @@ -2227,6 +2229,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, .may_unmap = 1, .may_swap = !noswap, .order = 0, + .priority = 0, .target_mem_cgroup = memcg, }; struct mem_cgroup_zone mz = { @@ -2237,7 +2240,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); - trace_mm_vmscan_memcg_softlimit_reclaim_begin(0, + trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order, sc.may_writepage, sc.gfp_mask); @@ -2248,7 +2251,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, * will pick up pages from other mem cgroup's as well. We hack * the priority and make it zero. */ - shrink_mem_cgroup_zone(0, &mz, &sc); + shrink_mem_cgroup_zone(&mz, &sc); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); @@ -2269,6 +2272,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_swap = !noswap, .nr_to_reclaim = SWAP_CLUSTER_MAX, .order = 0, + .priority = DEF_PRIORITY, .target_mem_cgroup = memcg, .nodemask = NULL, /* we don't care the placement */ .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | @@ -2299,8 +2303,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, } #endif -static void age_active_anon(struct zone *zone, struct scan_control *sc, - int priority) +static void age_active_anon(struct zone *zone, struct scan_control *sc) { struct mem_cgroup *memcg; @@ -2316,7 +2319,7 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc, if (inactive_anon_is_low(&mz)) shrink_active_list(SWAP_CLUSTER_MAX, &mz, - sc, priority, LRU_ACTIVE_ANON); + sc, LRU_ACTIVE_ANON); memcg = mem_cgroup_iter(NULL, memcg, NULL); } while (memcg); @@ -2425,7 +2428,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, { int all_zones_ok; unsigned long balanced; - int priority; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long total_scanned; @@ -2449,11 +2451,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, }; loop_again: total_scanned = 0; + sc.priority = DEF_PRIORITY; sc.nr_reclaimed = 0; sc.may_writepage = !laptop_mode; count_vm_event(PAGEOUTRUN); - for (priority = DEF_PRIORITY; priority >= 0; priority--) { + do { unsigned long lru_pages = 0; int has_under_min_watermark_zone = 0; @@ -2470,14 +2473,15 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; /* * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. */ - age_active_anon(zone, &sc, priority); + age_active_anon(zone, &sc); /* * If the number of buffer_heads in the machine @@ -2525,7 +2529,8 @@ loop_again: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; sc.nr_scanned = 0; @@ -2569,7 +2574,7 @@ loop_again: !zone_watermark_ok_safe(zone, testorder, high_wmark_pages(zone) + balance_gap, end_zone, 0)) { - shrink_zone(priority, zone, &sc); + shrink_zone(zone, &sc); reclaim_state->reclaimed_slab = 0; nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); @@ -2626,7 +2631,7 @@ loop_again: * OK, kswapd is getting into trouble. Take a nap, then take * another pass across the zones. */ - if (total_scanned && (priority < DEF_PRIORITY - 2)) { + if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) { if (has_under_min_watermark_zone) count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); else @@ -2641,7 +2646,7 @@ loop_again: */ if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) break; - } + } while (--sc.priority >= 0); out: /* @@ -2691,7 +2696,8 @@ out: if (!populated_zone(zone)) continue; - if (zone->all_unreclaimable && priority != DEF_PRIORITY) + if (zone->all_unreclaimable && + sc.priority != DEF_PRIORITY) continue; /* Would compaction fail due to lack of free memory? */ @@ -2958,6 +2964,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) .nr_to_reclaim = nr_to_reclaim, .hibernation_mode = 1, .order = 0, + .priority = DEF_PRIORITY, }; struct shrink_control shrink = { .gfp_mask = sc.gfp_mask, @@ -3135,7 +3142,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) const unsigned long nr_pages = 1 << order; struct task_struct *p = current; struct reclaim_state reclaim_state; - int priority; struct scan_control sc = { .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), @@ -3144,6 +3150,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) SWAP_CLUSTER_MAX), .gfp_mask = gfp_mask, .order = order, + .priority = ZONE_RECLAIM_PRIORITY, }; struct shrink_control shrink = { .gfp_mask = sc.gfp_mask, @@ -3166,11 +3173,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * Free memory by calling shrink zone with increasing * priorities until we have enough memory freed. */ - priority = ZONE_RECLAIM_PRIORITY; do { - shrink_zone(priority, zone, &sc); - priority--; - } while (priority >= 0 && sc.nr_reclaimed < nr_pages); + shrink_zone(zone, &sc); + } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); } nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); -- cgit v1.2.3-70-g09d2 From 5dc35979e444b50d5551bdeb7a7abc5c69c875d0 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:06:58 -0700 Subject: mm/vmscan: push lruvec pointer into isolate_lru_pages() Move the mem_cgroup_zone_lruvec() call from isolate_lru_pages() into shrink_[in]active_list(). Further patches push it to shrink_zone() step by step. Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 77905eb3d8a..b7d03d7b8f8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1027,7 +1027,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) * Appropriate locks must be held before calling this function. * * @nr_to_scan: The number of pages to look through on the list. - * @mz: The mem_cgroup_zone to pull pages from. + * @lruvec: The LRU vector to pull pages from. * @dst: The temp list to put pages on to. * @nr_scanned: The number of pages that were scanned. * @sc: The scan_control struct for this reclaim session @@ -1037,17 +1037,15 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) * returns how many pages were moved onto *@dst. */ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, struct list_head *dst, + struct lruvec *lruvec, struct list_head *dst, unsigned long *nr_scanned, struct scan_control *sc, isolate_mode_t mode, enum lru_list lru) { - struct lruvec *lruvec; struct list_head *src; unsigned long nr_taken = 0; unsigned long scan; int file = is_file_lru(lru); - lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); src = &lruvec->lists[lru]; for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { @@ -1274,6 +1272,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, int file = is_file_lru(lru); struct zone *zone = mz->zone; struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, mz->mem_cgroup); while (unlikely(too_many_isolated(zone, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1292,8 +1291,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, spin_lock_irq(&zone->lru_lock); - nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned, - sc, isolate_mode, lru); + nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list, + &nr_scanned, sc, isolate_mode, lru); if (global_reclaim(sc)) { zone->pages_scanned += nr_scanned; if (current_is_kswapd()) @@ -1444,6 +1443,7 @@ static void shrink_active_list(unsigned long nr_to_scan, isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); struct zone *zone = mz->zone; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, mz->mem_cgroup); lru_add_drain(); @@ -1454,8 +1454,8 @@ static void shrink_active_list(unsigned long nr_to_scan, spin_lock_irq(&zone->lru_lock); - nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc, - isolate_mode, lru); + nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, + &nr_scanned, sc, isolate_mode, lru); if (global_reclaim(sc)) zone->pages_scanned += nr_scanned; -- cgit v1.2.3-70-g09d2 From 6a18adb35c27848195c938b0779ce882d63d3ed1 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:06:59 -0700 Subject: mm/vmscan: push zone pointer into shrink_page_list() It doesn't need a pointer to the cgroup - pointer to the zone is enough. This patch also kills the "mz" argument of page_check_references() - it is unused after "mm: memcg: count pte references from every member of the reclaimed hierarch" Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index b7d03d7b8f8..eaa154bd1f8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -629,7 +629,6 @@ enum page_references { }; static enum page_references page_check_references(struct page *page, - struct mem_cgroup_zone *mz, struct scan_control *sc) { int referenced_ptes, referenced_page; @@ -688,7 +687,7 @@ static enum page_references page_check_references(struct page *page, * shrink_page_list() returns the number of reclaimed pages */ static unsigned long shrink_page_list(struct list_head *page_list, - struct mem_cgroup_zone *mz, + struct zone *zone, struct scan_control *sc, unsigned long *ret_nr_dirty, unsigned long *ret_nr_writeback) @@ -718,7 +717,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep; VM_BUG_ON(PageActive(page)); - VM_BUG_ON(page_zone(page) != mz->zone); + VM_BUG_ON(page_zone(page) != zone); sc->nr_scanned++; @@ -741,7 +740,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep; } - references = page_check_references(page, mz, sc); + references = page_check_references(page, sc); switch (references) { case PAGEREF_ACTIVATE: goto activate_locked; @@ -931,7 +930,7 @@ keep: * will encounter the same problem */ if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) - zone_set_flag(mz->zone, ZONE_CONGESTED); + zone_set_flag(zone, ZONE_CONGESTED); free_hot_cold_page_list(&free_pages, 1); @@ -1309,7 +1308,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, update_isolated_counts(mz, &page_list, &nr_anon, &nr_file); - nr_reclaimed = shrink_page_list(&page_list, mz, sc, + nr_reclaimed = shrink_page_list(&page_list, zone, sc, &nr_dirty, &nr_writeback); spin_lock_irq(&zone->lru_lock); -- cgit v1.2.3-70-g09d2 From 95d918fc009072c2f88ce2e8b5db2e5abfad7c3e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:06:59 -0700 Subject: mm/vmscan: remove update_isolated_counts() update_isolated_counts() is no longer required, because lumpy-reclaim was removed. Insanity is over, now there is only one kind of inactive page. Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 60 ++++++------------------------------------------------------ 1 file changed, 6 insertions(+), 54 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index eaa154bd1f8..53fa8671eab 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1205,52 +1205,6 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, list_splice(&pages_to_free, page_list); } -static noinline_for_stack void -update_isolated_counts(struct mem_cgroup_zone *mz, - struct list_head *page_list, - unsigned long *nr_anon, - unsigned long *nr_file) -{ - struct zone *zone = mz->zone; - unsigned int count[NR_LRU_LISTS] = { 0, }; - unsigned long nr_active = 0; - struct page *page; - int lru; - - /* - * Count pages and clear active flags - */ - list_for_each_entry(page, page_list, lru) { - int numpages = hpage_nr_pages(page); - lru = page_lru_base_type(page); - if (PageActive(page)) { - lru += LRU_ACTIVE; - ClearPageActive(page); - nr_active += numpages; - } - count[lru] += numpages; - } - - preempt_disable(); - __count_vm_events(PGDEACTIVATE, nr_active); - - __mod_zone_page_state(zone, NR_ACTIVE_FILE, - -count[LRU_ACTIVE_FILE]); - __mod_zone_page_state(zone, NR_INACTIVE_FILE, - -count[LRU_INACTIVE_FILE]); - __mod_zone_page_state(zone, NR_ACTIVE_ANON, - -count[LRU_ACTIVE_ANON]); - __mod_zone_page_state(zone, NR_INACTIVE_ANON, - -count[LRU_INACTIVE_ANON]); - - *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; - *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; - - __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file); - preempt_enable(); -} - /* * shrink_inactive_list() is a helper for shrink_zone(). It returns the number * of reclaimed pages @@ -1263,8 +1217,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, unsigned long nr_scanned; unsigned long nr_reclaimed = 0; unsigned long nr_taken; - unsigned long nr_anon; - unsigned long nr_file; unsigned long nr_dirty = 0; unsigned long nr_writeback = 0; isolate_mode_t isolate_mode = 0; @@ -1292,6 +1244,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list, &nr_scanned, sc, isolate_mode, lru); + + __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); + __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); + if (global_reclaim(sc)) { zone->pages_scanned += nr_scanned; if (current_is_kswapd()) @@ -1306,15 +1262,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, if (nr_taken == 0) return 0; - update_isolated_counts(mz, &page_list, &nr_anon, &nr_file); - nr_reclaimed = shrink_page_list(&page_list, zone, sc, &nr_dirty, &nr_writeback); spin_lock_irq(&zone->lru_lock); - reclaim_stat->recent_scanned[0] += nr_anon; - reclaim_stat->recent_scanned[1] += nr_file; + reclaim_stat->recent_scanned[file] += nr_taken; if (global_reclaim(sc)) { if (current_is_kswapd()) @@ -1327,8 +1280,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, putback_inactive_pages(mz, &page_list); - __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); - __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); + __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); -- cgit v1.2.3-70-g09d2 From 27ac81d85e5cfcc755dd5fa3f04dc883ab5d821b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:07:00 -0700 Subject: mm/vmscan: push lruvec pointer into putback_inactive_pages() As zone_reclaim_stat is now located in the lruvec, we can reach it directly. Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 53fa8671eab..76d786eb84a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1155,11 +1155,11 @@ static int too_many_isolated(struct zone *zone, int file, } static noinline_for_stack void -putback_inactive_pages(struct mem_cgroup_zone *mz, +putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) { - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); - struct zone *zone = mz->zone; + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; + struct zone *zone = lruvec_zone(lruvec); LIST_HEAD(pages_to_free); /* @@ -1278,7 +1278,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, nr_reclaimed); } - putback_inactive_pages(mz, &page_list); + putback_inactive_pages(lruvec, &page_list); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); -- cgit v1.2.3-70-g09d2 From 074291fea8bcedeabf295360e2ddd9bbb5830b4a Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:07:00 -0700 Subject: mm/vmscan: replace zone_nr_lru_pages() with get_lruvec_size() If memory cgroup is enabled we always use lruvecs which are embedded into struct mem_cgroup_per_zone, so we can reach lru_size counters via container_of(). Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++---- mm/memcontrol.c | 9 +++++++++ mm/vmscan.c | 31 ++++++++++++++++--------------- 3 files changed, 27 insertions(+), 19 deletions(-) (limited to 'mm/vmscan.c') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e3fc200cd68..ccb3e3c65dd 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -123,8 +123,7 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); -unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, - int nid, int zid, unsigned int lrumask); +unsigned long mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list); struct zone_reclaim_stat* mem_cgroup_get_reclaim_stat_from_page(struct page *page); extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, @@ -343,8 +342,7 @@ mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) } static inline unsigned long -mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, - unsigned int lru_mask) +mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list lru) { return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 39b2c14f150..a68db296ada 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -723,6 +723,15 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, } unsigned long +mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list lru) +{ + struct mem_cgroup_per_zone *mz; + + mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); + return mz->lru_size[lru]; +} + +static unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, unsigned int lru_mask) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 76d786eb84a..5318faa6a25 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -155,19 +155,14 @@ static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) return &mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup)->reclaim_stat; } -static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, - enum lru_list lru) +static unsigned long get_lruvec_size(struct lruvec *lruvec, enum lru_list lru) { if (!mem_cgroup_disabled()) - return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, - zone_to_nid(mz->zone), - zone_idx(mz->zone), - BIT(lru)); + return mem_cgroup_get_lruvec_size(lruvec, lru); - return zone_page_state(mz->zone, NR_LRU_BASE + lru); + return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru); } - /* * Add a shrinker callback to be called from the vm */ @@ -1603,6 +1598,9 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, enum lru_list lru; int noswap = 0; bool force_scan = false; + struct lruvec *lruvec; + + lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); /* * If the zone or memcg is small, nr[l] can be 0. This @@ -1628,10 +1626,10 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, goto out; } - anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) + - zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); - file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) + - zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); + anon = get_lruvec_size(lruvec, LRU_ACTIVE_ANON) + + get_lruvec_size(lruvec, LRU_INACTIVE_ANON); + file = get_lruvec_size(lruvec, LRU_ACTIVE_FILE) + + get_lruvec_size(lruvec, LRU_INACTIVE_FILE); if (global_reclaim(sc)) { free = zone_page_state(mz->zone, NR_FREE_PAGES); @@ -1694,7 +1692,7 @@ out: int file = is_file_lru(lru); unsigned long scan; - scan = zone_nr_lru_pages(mz, lru); + scan = get_lruvec_size(lruvec, lru); if (sc->priority || noswap || !vmscan_swappiness(sc)) { scan >>= sc->priority; if (!scan && force_scan) @@ -1730,6 +1728,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, { unsigned long pages_for_compaction; unsigned long inactive_lru_pages; + struct lruvec *lruvec; /* If not in reclaim/compaction mode, stop */ if (!in_reclaim_compaction(sc)) @@ -1762,10 +1761,12 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, * If we have not reclaimed enough pages for compaction and the * inactive lists are large enough, continue reclaiming */ + lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); + inactive_lru_pages = get_lruvec_size(lruvec, LRU_INACTIVE_FILE); if (nr_swap_pages > 0) - inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); + inactive_lru_pages += get_lruvec_size(lruvec, + LRU_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; -- cgit v1.2.3-70-g09d2 From c56d5c7dfeb5cc754e17fa3d423086a3c551c219 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:07:00 -0700 Subject: mm/vmscan: push lruvec pointer into inactive_list_is_low() Switch mem_cgroup_inactive_anon_is_low() to lruvec pointers, mem_cgroup_get_lruvec_size() is more effective than mem_cgroup_zone_nr_lru_pages() Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ++++------ mm/memcontrol.c | 20 ++++++-------------- mm/vmscan.c | 40 ++++++++++++++++++++++------------------ 3 files changed, 32 insertions(+), 38 deletions(-) (limited to 'mm/vmscan.c') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ccb3e3c65dd..fc81dc24430 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -118,10 +118,8 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); /* * For memory reclaim. */ -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, - struct zone *zone); -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, - struct zone *zone); +int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec); +int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); unsigned long mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list); struct zone_reclaim_stat* @@ -330,13 +328,13 @@ static inline bool mem_cgroup_disabled(void) } static inline int -mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) { return 1; } static inline int -mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) +mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) { return 1; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a68db296ada..9d1764630df 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1208,19 +1208,15 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) return ret; } -int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) { unsigned long inactive_ratio; - int nid = zone_to_nid(zone); - int zid = zone_idx(zone); unsigned long inactive; unsigned long active; unsigned long gb; - inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, - BIT(LRU_INACTIVE_ANON)); - active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, - BIT(LRU_ACTIVE_ANON)); + inactive = mem_cgroup_get_lruvec_size(lruvec, LRU_INACTIVE_ANON); + active = mem_cgroup_get_lruvec_size(lruvec, LRU_ACTIVE_ANON); gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) @@ -1231,17 +1227,13 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) return inactive * inactive_ratio < active; } -int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) +int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) { unsigned long active; unsigned long inactive; - int zid = zone_idx(zone); - int nid = zone_to_nid(zone); - inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, - BIT(LRU_INACTIVE_FILE)); - active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, - BIT(LRU_ACTIVE_FILE)); + inactive = mem_cgroup_get_lruvec_size(lruvec, LRU_INACTIVE_FILE); + active = mem_cgroup_get_lruvec_size(lruvec, LRU_ACTIVE_FILE); return (active > inactive); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 5318faa6a25..79e2ead21c5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1488,13 +1488,12 @@ static int inactive_anon_is_low_global(struct zone *zone) /** * inactive_anon_is_low - check if anonymous pages need to be deactivated - * @zone: zone to check - * @sc: scan control of this context + * @lruvec: LRU vector to check * * Returns true if the zone does not have enough inactive anon pages, * meaning some active anon pages need to be deactivated. */ -static int inactive_anon_is_low(struct mem_cgroup_zone *mz) +static int inactive_anon_is_low(struct lruvec *lruvec) { /* * If we don't have swap space, anonymous page deactivation @@ -1504,13 +1503,12 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz) return 0; if (!mem_cgroup_disabled()) - return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, - mz->zone); + return mem_cgroup_inactive_anon_is_low(lruvec); - return inactive_anon_is_low_global(mz->zone); + return inactive_anon_is_low_global(lruvec_zone(lruvec)); } #else -static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz) +static inline int inactive_anon_is_low(struct lruvec *lruvec) { return 0; } @@ -1528,7 +1526,7 @@ static int inactive_file_is_low_global(struct zone *zone) /** * inactive_file_is_low - check if file pages need to be deactivated - * @mz: memory cgroup and zone to check + * @lruvec: LRU vector to check * * When the system is doing streaming IO, memory pressure here * ensures that active file pages get deactivated, until more @@ -1540,21 +1538,20 @@ static int inactive_file_is_low_global(struct zone *zone) * This uses a different ratio than the anonymous pages, because * the page cache uses a use-once replacement algorithm. */ -static int inactive_file_is_low(struct mem_cgroup_zone *mz) +static int inactive_file_is_low(struct lruvec *lruvec) { if (!mem_cgroup_disabled()) - return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, - mz->zone); + return mem_cgroup_inactive_file_is_low(lruvec); - return inactive_file_is_low_global(mz->zone); + return inactive_file_is_low_global(lruvec_zone(lruvec)); } -static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file) +static int inactive_list_is_low(struct lruvec *lruvec, int file) { if (file) - return inactive_file_is_low(mz); + return inactive_file_is_low(lruvec); else - return inactive_anon_is_low(mz); + return inactive_anon_is_low(lruvec); } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, @@ -1564,7 +1561,10 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, int file = is_file_lru(lru); if (is_active_lru(lru)) { - if (inactive_list_is_low(mz, file)) + struct lruvec *lruvec = mem_cgroup_zone_lruvec(mz->zone, + mz->mem_cgroup); + + if (inactive_list_is_low(lruvec, file)) shrink_active_list(nr_to_scan, mz, sc, lru); return 0; } @@ -1793,6 +1793,9 @@ static void shrink_mem_cgroup_zone(struct mem_cgroup_zone *mz, unsigned long nr_reclaimed, nr_scanned; unsigned long nr_to_reclaim = sc->nr_to_reclaim; struct blk_plug plug; + struct lruvec *lruvec; + + lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); restart: nr_reclaimed = 0; @@ -1831,7 +1834,7 @@ restart: * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (inactive_anon_is_low(mz)) + if (inactive_anon_is_low(lruvec)) shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, LRU_ACTIVE_ANON); @@ -2264,12 +2267,13 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc) memcg = mem_cgroup_iter(NULL, NULL, NULL); do { + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); struct mem_cgroup_zone mz = { .mem_cgroup = memcg, .zone = zone, }; - if (inactive_anon_is_low(&mz)) + if (inactive_anon_is_low(lruvec)) shrink_active_list(SWAP_CLUSTER_MAX, &mz, sc, LRU_ACTIVE_ANON); -- cgit v1.2.3-70-g09d2 From 1a93be0e7a6fc7f3d19101402665c7a958beb568 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:07:01 -0700 Subject: mm/vmscan: push lruvec pointer into shrink_list() Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 79e2ead21c5..6dbf2c2082e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1205,7 +1205,7 @@ putback_inactive_pages(struct lruvec *lruvec, * of reclaimed pages */ static noinline_for_stack unsigned long -shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, +shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, struct scan_control *sc, enum lru_list lru) { LIST_HEAD(page_list); @@ -1216,9 +1216,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, unsigned long nr_writeback = 0; isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); - struct zone *zone = mz->zone; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); - struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, mz->mem_cgroup); + struct zone *zone = lruvec_zone(lruvec); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; while (unlikely(too_many_isolated(zone, file, sc))) { congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -1373,7 +1372,7 @@ static void move_active_pages_to_lru(struct zone *zone, } static void shrink_active_list(unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, + struct lruvec *lruvec, struct scan_control *sc, enum lru_list lru) { @@ -1384,12 +1383,11 @@ static void shrink_active_list(unsigned long nr_to_scan, LIST_HEAD(l_active); LIST_HEAD(l_inactive); struct page *page; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; unsigned long nr_rotated = 0; isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); - struct zone *zone = mz->zone; - struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, mz->mem_cgroup); + struct zone *zone = lruvec_zone(lruvec); lru_add_drain(); @@ -1555,21 +1553,17 @@ static int inactive_list_is_low(struct lruvec *lruvec, int file) } static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, - struct mem_cgroup_zone *mz, - struct scan_control *sc) + struct lruvec *lruvec, struct scan_control *sc) { int file = is_file_lru(lru); if (is_active_lru(lru)) { - struct lruvec *lruvec = mem_cgroup_zone_lruvec(mz->zone, - mz->mem_cgroup); - if (inactive_list_is_low(lruvec, file)) - shrink_active_list(nr_to_scan, mz, sc, lru); + shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } - return shrink_inactive_list(nr_to_scan, mz, sc, lru); + return shrink_inactive_list(nr_to_scan, lruvec, sc, lru); } static int vmscan_swappiness(struct scan_control *sc) @@ -1812,7 +1806,7 @@ restart: nr[lru] -= nr_to_scan; nr_reclaimed += shrink_list(lru, nr_to_scan, - mz, sc); + lruvec, sc); } } /* @@ -1835,7 +1829,7 @@ restart: * rebalance the anon lru active/inactive ratio. */ if (inactive_anon_is_low(lruvec)) - shrink_active_list(SWAP_CLUSTER_MAX, mz, + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); /* reclaim/compaction might need reclaim to continue */ @@ -2268,13 +2262,9 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc) memcg = mem_cgroup_iter(NULL, NULL, NULL); do { struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; if (inactive_anon_is_low(lruvec)) - shrink_active_list(SWAP_CLUSTER_MAX, &mz, + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, sc, LRU_ACTIVE_ANON); memcg = mem_cgroup_iter(NULL, memcg, NULL); -- cgit v1.2.3-70-g09d2 From 90126375d89ab8e0bde30ff22139b6097d56ed8a Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:07:01 -0700 Subject: mm/vmscan: push lruvec pointer into get_scan_count() Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 6dbf2c2082e..b139ad7f396 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -150,11 +150,6 @@ static bool global_reclaim(struct scan_control *sc) } #endif -static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) -{ - return &mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup)->reclaim_stat; -} - static unsigned long get_lruvec_size(struct lruvec *lruvec, enum lru_list lru) { if (!mem_cgroup_disabled()) @@ -1581,20 +1576,18 @@ static int vmscan_swappiness(struct scan_control *sc) * * nr[0] = anon pages to scan; nr[1] = file pages to scan */ -static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, +static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, unsigned long *nr) { unsigned long anon, file, free; unsigned long anon_prio, file_prio; unsigned long ap, fp; - struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; u64 fraction[2], denominator; enum lru_list lru; int noswap = 0; bool force_scan = false; - struct lruvec *lruvec; - - lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); + struct zone *zone = lruvec_zone(lruvec); /* * If the zone or memcg is small, nr[l] can be 0. This @@ -1606,7 +1599,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * latencies, so it's better to scan a minimum amount there as * well. */ - if (current_is_kswapd() && mz->zone->all_unreclaimable) + if (current_is_kswapd() && zone->all_unreclaimable) force_scan = true; if (!global_reclaim(sc)) force_scan = true; @@ -1626,10 +1619,10 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, get_lruvec_size(lruvec, LRU_INACTIVE_FILE); if (global_reclaim(sc)) { - free = zone_page_state(mz->zone, NR_FREE_PAGES); + free = zone_page_state(zone, NR_FREE_PAGES); /* If we have very few page cache pages, force-scan anon pages. */ - if (unlikely(file + free <= high_wmark_pages(mz->zone))) { + if (unlikely(file + free <= high_wmark_pages(zone))) { fraction[0] = 1; fraction[1] = 0; denominator = 1; @@ -1655,7 +1648,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, * * anon in [0], file in [1] */ - spin_lock_irq(&mz->zone->lru_lock); + spin_lock_irq(&zone->lru_lock); if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { reclaim_stat->recent_scanned[0] /= 2; reclaim_stat->recent_rotated[0] /= 2; @@ -1676,7 +1669,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, fp = file_prio * (reclaim_stat->recent_scanned[1] + 1); fp /= reclaim_stat->recent_rotated[1] + 1; - spin_unlock_irq(&mz->zone->lru_lock); + spin_unlock_irq(&zone->lru_lock); fraction[0] = ap; fraction[1] = fp; @@ -1794,7 +1787,7 @@ static void shrink_mem_cgroup_zone(struct mem_cgroup_zone *mz, restart: nr_reclaimed = 0; nr_scanned = sc->nr_scanned; - get_scan_count(mz, sc, nr); + get_scan_count(lruvec, sc, nr); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || -- cgit v1.2.3-70-g09d2 From 90bdcfafdc660b359018262f0f8630d100e28760 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:07:02 -0700 Subject: mm/vmscan: push lruvec pointer into should_continue_reclaim() Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index b139ad7f396..1d251b5b0a0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1708,14 +1708,13 @@ static bool in_reclaim_compaction(struct scan_control *sc) * calls try_to_compact_zone() that it will have enough free pages to succeed. * It will give up earlier than that if there is difficulty reclaiming pages. */ -static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, +static inline bool should_continue_reclaim(struct lruvec *lruvec, unsigned long nr_reclaimed, unsigned long nr_scanned, struct scan_control *sc) { unsigned long pages_for_compaction; unsigned long inactive_lru_pages; - struct lruvec *lruvec; /* If not in reclaim/compaction mode, stop */ if (!in_reclaim_compaction(sc)) @@ -1748,7 +1747,6 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, * If we have not reclaimed enough pages for compaction and the * inactive lists are large enough, continue reclaiming */ - lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); pages_for_compaction = (2UL << sc->order); inactive_lru_pages = get_lruvec_size(lruvec, LRU_INACTIVE_FILE); if (nr_swap_pages > 0) @@ -1759,7 +1757,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, return true; /* If compaction would go ahead or the allocation would succeed, stop */ - switch (compaction_suitable(mz->zone, sc->order)) { + switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) { case COMPACT_PARTIAL: case COMPACT_CONTINUE: return false; @@ -1826,7 +1824,7 @@ restart: sc, LRU_ACTIVE_ANON); /* reclaim/compaction might need reclaim to continue */ - if (should_continue_reclaim(mz, nr_reclaimed, + if (should_continue_reclaim(lruvec, nr_reclaimed, sc->nr_scanned - nr_scanned, sc)) goto restart; -- cgit v1.2.3-70-g09d2 From f9be23d6da035241b7687b25e64401171986dcef Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 29 May 2012 15:07:02 -0700 Subject: mm/vmscan: kill struct mem_cgroup_zone Kill struct mem_cgroup_zone and rename shrink_mem_cgroup_zone() to shrink_lruvec(), it always shrinks one lruvec which it takes as an argument. Signed-off-by: Konstantin Khlebnikov Cc: Mel Gorman Cc: KAMEZAWA Hiroyuki Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 1d251b5b0a0..4c5453f8427 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -94,11 +94,6 @@ struct scan_control { nodemask_t *nodemask; }; -struct mem_cgroup_zone { - struct mem_cgroup *mem_cgroup; - struct zone *zone; -}; - #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) #ifdef ARCH_HAS_PREFETCH @@ -1769,8 +1764,7 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec, /* * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. */ -static void shrink_mem_cgroup_zone(struct mem_cgroup_zone *mz, - struct scan_control *sc) +static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { unsigned long nr[NR_LRU_LISTS]; unsigned long nr_to_scan; @@ -1778,9 +1772,6 @@ static void shrink_mem_cgroup_zone(struct mem_cgroup_zone *mz, unsigned long nr_reclaimed, nr_scanned; unsigned long nr_to_reclaim = sc->nr_to_reclaim; struct blk_plug plug; - struct lruvec *lruvec; - - lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); restart: nr_reclaimed = 0; @@ -1842,12 +1833,10 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc) memcg = mem_cgroup_iter(root, NULL, &reclaim); do { - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); + + shrink_lruvec(lruvec, sc); - shrink_mem_cgroup_zone(&mz, sc); /* * Limit reclaim has historically picked one memcg and * scanned it with decreasing priority levels until @@ -2172,10 +2161,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, .priority = 0, .target_mem_cgroup = memcg, }; - struct mem_cgroup_zone mz = { - .mem_cgroup = memcg, - .zone = zone, - }; + struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); @@ -2191,7 +2177,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, * will pick up pages from other mem cgroup's as well. We hack * the priority and make it zero. */ - shrink_mem_cgroup_zone(&mz, &sc); + shrink_lruvec(lruvec, &sc); trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); -- cgit v1.2.3-70-g09d2 From 4d7dcca213921fbaf08ee05359d28e4aaf2245f1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 29 May 2012 15:07:08 -0700 Subject: mm/memcg: get_lru_size not get_lruvec_size Konstantin just introduced mem_cgroup_get_lruvec_size() and get_lruvec_size(), I'm about to add mem_cgroup_update_lru_size(): but we're dealing with the same thing, lru_size[lru]. We ought to agree on the naming, and I do think lru_size is the more correct: so rename his ones to get_lru_size(). Signed-off-by: Hugh Dickins Acked-by: KAMEZAWA Hiroyuki Acked-by: Konstantin Khlebnikov Acked-by: Michal Hocko Cc: KOSAKI Motohiro Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- mm/memcontrol.c | 10 +++++----- mm/vmscan.c | 19 +++++++++---------- 3 files changed, 16 insertions(+), 17 deletions(-) (limited to 'mm/vmscan.c') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fc81dc24430..609ef7c28c6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -121,7 +121,7 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec); int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); -unsigned long mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list); +unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list); struct zone_reclaim_stat* mem_cgroup_get_reclaim_stat_from_page(struct page *page); extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, @@ -340,7 +340,7 @@ mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) } static inline unsigned long -mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list lru) +mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 35e008e2542..75198dac3fe 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -737,7 +737,7 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, } unsigned long -mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list lru) +mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { struct mem_cgroup_per_zone *mz; @@ -1229,8 +1229,8 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) unsigned long active; unsigned long gb; - inactive = mem_cgroup_get_lruvec_size(lruvec, LRU_INACTIVE_ANON); - active = mem_cgroup_get_lruvec_size(lruvec, LRU_ACTIVE_ANON); + inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON); + active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON); gb = (inactive + active) >> (30 - PAGE_SHIFT); if (gb) @@ -1246,8 +1246,8 @@ int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) unsigned long active; unsigned long inactive; - inactive = mem_cgroup_get_lruvec_size(lruvec, LRU_INACTIVE_FILE); - active = mem_cgroup_get_lruvec_size(lruvec, LRU_ACTIVE_FILE); + inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_FILE); + active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_FILE); return (active > inactive); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 4c5453f8427..8b941f303ce 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -145,10 +145,10 @@ static bool global_reclaim(struct scan_control *sc) } #endif -static unsigned long get_lruvec_size(struct lruvec *lruvec, enum lru_list lru) +static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) { if (!mem_cgroup_disabled()) - return mem_cgroup_get_lruvec_size(lruvec, lru); + return mem_cgroup_get_lru_size(lruvec, lru); return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru); } @@ -1608,10 +1608,10 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, goto out; } - anon = get_lruvec_size(lruvec, LRU_ACTIVE_ANON) + - get_lruvec_size(lruvec, LRU_INACTIVE_ANON); - file = get_lruvec_size(lruvec, LRU_ACTIVE_FILE) + - get_lruvec_size(lruvec, LRU_INACTIVE_FILE); + anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + + get_lru_size(lruvec, LRU_INACTIVE_ANON); + file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + + get_lru_size(lruvec, LRU_INACTIVE_FILE); if (global_reclaim(sc)) { free = zone_page_state(zone, NR_FREE_PAGES); @@ -1674,7 +1674,7 @@ out: int file = is_file_lru(lru); unsigned long scan; - scan = get_lruvec_size(lruvec, lru); + scan = get_lru_size(lruvec, lru); if (sc->priority || noswap || !vmscan_swappiness(sc)) { scan >>= sc->priority; if (!scan && force_scan) @@ -1743,10 +1743,9 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - inactive_lru_pages = get_lruvec_size(lruvec, LRU_INACTIVE_FILE); + inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); if (nr_swap_pages > 0) - inactive_lru_pages += get_lruvec_size(lruvec, - LRU_INACTIVE_ANON); + inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); if (sc->nr_reclaimed < pages_for_compaction && inactive_lru_pages > pages_for_compaction) return true; -- cgit v1.2.3-70-g09d2 From 75b00af77ed5b5a3d55549f9e0c33f3969b9330c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 29 May 2012 15:07:09 -0700 Subject: mm: trivial cleanups in vmscan.c Utter trivia in mm/vmscan.c, mostly just reducing the linecount slightly; most exciting change being get_scan_count() calling vmscan_swappiness() once instead of twice. Signed-off-by: Hugh Dickins Acked-by: KOSAKI Motohiro Acked-by: KAMEZAWA Hiroyuki Acked-by: Konstantin Khlebnikov Reviewed-by: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) (limited to 'mm/vmscan.c') diff --git a/mm/vmscan.c b/mm/vmscan.c index 8b941f303ce..05d439dc1af 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1025,12 +1025,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, unsigned long *nr_scanned, struct scan_control *sc, isolate_mode_t mode, enum lru_list lru) { - struct list_head *src; + struct list_head *src = &lruvec->lists[lru]; unsigned long nr_taken = 0; unsigned long scan; - int file = is_file_lru(lru); - - src = &lruvec->lists[lru]; for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { struct page *page; @@ -1058,11 +1055,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, } *nr_scanned = scan; - - trace_mm_vmscan_lru_isolate(sc->order, - nr_to_scan, scan, - nr_taken, - mode, file); + trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan, + nr_taken, mode, is_file_lru(lru)); return nr_taken; } @@ -1140,8 +1134,7 @@ static int too_many_isolated(struct zone *zone, int file, } static noinline_for_stack void -putback_inactive_pages(struct lruvec *lruvec, - struct list_head *page_list) +putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) { struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; struct zone *zone = lruvec_zone(lruvec); @@ -1235,11 +1228,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, if (global_reclaim(sc)) { zone->pages_scanned += nr_scanned; if (current_is_kswapd()) - __count_zone_vm_events(PGSCAN_KSWAPD, zone, - nr_scanned); + __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); else - __count_zone_vm_events(PGSCAN_DIRECT, zone, - nr_scanned); + __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); } spin_unlock_irq(&zone->lru_lock); @@ -1534,9 +1525,9 @@ static int inactive_file_is_low(struct lruvec *lruvec) return inactive_file_is_low_global(lruvec_zone(lruvec)); } -static int inactive_list_is_low(struct lruvec *lruvec, int file) +static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru) { - if (file) + if (is_file_lru(lru)) return inactive_file_is_low(lruvec); else return inactive_anon_is_low(lruvec); @@ -1545,10 +1536,8 @@ static int inactive_list_is_low(struct lruvec *lruvec, int file) static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, struct lruvec *lruvec, struct scan_control *sc) { - int file = is_file_lru(lru); - if (is_active_lru(lru)) { - if (inactive_list_is_low(lruvec, file)) + if (inactive_list_is_low(lruvec, lru)) shrink_active_list(nr_to_scan, lruvec, sc, lru); return 0; } @@ -1630,7 +1619,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, * This scanning priority is essentially the inverse of IO cost. */ anon_prio = vmscan_swappiness(sc); - file_prio = 200 - vmscan_swappiness(sc); + file_prio = 200 - anon_prio; /* * OK, so we have swap space and a fair amount of page cache -- cgit v1.2.3-70-g09d2 From fa9add641b1b1c564db916accac1db346e7a2759 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 29 May 2012 15:07:09 -0700 Subject: mm/memcg: apply add/del_page to lruvec Take lruvec further: pass it instead of zone to add_page_to_lru_list() and del_page_from_lru_list(); and pagevec_lru_move_fn() pass lruvec down to its target functions. This cleanup eliminates a swathe of cruft in memcontrol.c, including mem_cgroup_lru_add_list(), mem_cgroup_lru_del_list() and mem_cgroup_lru_move_lists() - which never actually touched the lists. In their place, mem_cgroup_page_lruvec() to decide the lruvec, previously a side-effect of add, and mem_cgroup_update_lru_size() to maintain the lru_size stats. Whilst these are simplifications in their own right, the goal is to bring the evaluation of lruvec next to the spin_locking of the lrus, in preparation for a future patch. Signed-off-by: Hugh Dickins Cc: KOSAKI Motohiro Acked-by: KAMEZAWA Hiroyuki Acked-by: Michal Hocko Acked-by: Konstantin Khlebnikov Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 32 ++++---------- include/linux/mm_inline.h | 20 ++++----- include/linux/swap.h | 4 +- mm/compaction.c | 5 ++- mm/huge_memory.c | 8 ++-- mm/memcontrol.c | 101 +++++++++++---------------------------------- mm/swap.c | 86 +++++++++++++++++++------------------- mm/vmscan.c | 47 ++++++++++++--------- 8 files changed, 122 insertions(+), 181 deletions(-) (limited to 'mm/vmscan.c') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 609ef7c28c6..83e7ba90d6e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -63,11 +63,7 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); -struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *, - enum lru_list); -void mem_cgroup_lru_del_list(struct page *, enum lru_list); -struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *, - enum lru_list, enum lru_list); +struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); /* For coalescing uncharge for reducing memcg' overhead*/ extern void mem_cgroup_uncharge_start(void); @@ -122,8 +118,7 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec); int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list); -struct zone_reclaim_stat* -mem_cgroup_get_reclaim_stat_from_page(struct page *page); +void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int); extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p); extern void mem_cgroup_replace_page_cache(struct page *oldpage, @@ -250,21 +245,8 @@ static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, return &zone->lruvec; } -static inline struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, - struct page *page, - enum lru_list lru) -{ - return &zone->lruvec; -} - -static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) -{ -} - -static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone, - struct page *page, - enum lru_list from, - enum lru_list to) +static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, + struct zone *zone) { return &zone->lruvec; } @@ -345,10 +327,10 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) return 0; } -static inline struct zone_reclaim_stat* -mem_cgroup_get_reclaim_stat_from_page(struct page *page) +static inline void +mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, + int increment) { - return NULL; } static inline void diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 16d45d9c31a..1397ccf81e9 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -21,22 +21,22 @@ static inline int page_is_file_cache(struct page *page) return !PageSwapBacked(page); } -static __always_inline void -add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list lru) +static __always_inline void add_page_to_lru_list(struct page *page, + struct lruvec *lruvec, enum lru_list lru) { - struct lruvec *lruvec; - - lruvec = mem_cgroup_lru_add_list(zone, page, lru); + int nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, nr_pages); list_add(&page->lru, &lruvec->lists[lru]); - __mod_zone_page_state(zone, NR_LRU_BASE + lru, hpage_nr_pages(page)); + __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages); } -static __always_inline void -del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list lru) +static __always_inline void del_page_from_lru_list(struct page *page, + struct lruvec *lruvec, enum lru_list lru) { - mem_cgroup_lru_del_list(page, lru); + int nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); list_del(&page->lru); - __mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page)); + __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, -nr_pages); } /** diff --git a/include/linux/swap.h b/include/linux/swap.h index ff38eb7c0ec..b6661933e25 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -221,8 +221,8 @@ extern unsigned int nr_free_pagecache_pages(void); /* linux/mm/swap.c */ extern void __lru_cache_add(struct page *, enum lru_list lru); extern void lru_cache_add_lru(struct page *, enum lru_list lru); -extern void lru_add_page_tail(struct zone* zone, - struct page *page, struct page *page_tail); +extern void lru_add_page_tail(struct page *page, struct page *page_tail, + struct lruvec *lruvec); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); diff --git a/mm/compaction.c b/mm/compaction.c index 74e1b380383..4ac338af512 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -227,6 +227,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; isolate_mode_t mode = 0; + struct lruvec *lruvec; /* * Ensure that there are not too many pages isolated from the LRU @@ -328,6 +329,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, if (cc->mode != COMPACT_SYNC) mode |= ISOLATE_ASYNC_MIGRATE; + lruvec = mem_cgroup_page_lruvec(page, zone); + /* Try isolate the page */ if (__isolate_lru_page(page, mode) != 0) continue; @@ -335,7 +338,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, VM_BUG_ON(PageTransCompound(page)); /* Successfully isolated */ - del_page_from_lru_list(zone, page, page_lru(page)); + del_page_from_lru_list(page, lruvec, page_lru(page)); list_add(&page->lru, migratelist); cc->nr_migratepages++; nr_isolated++; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d0def42c121..57c4b930901 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1231,10 +1231,13 @@ static void __split_huge_page_refcount(struct page *page) { int i; struct zone *zone = page_zone(page); + struct lruvec *lruvec; int tail_count = 0; /* prevent PageLRU to go away from under us, and freeze lru stats */ spin_lock_irq(&zone->lru_lock); + lruvec = mem_cgroup_page_lruvec(page, zone); + compound_lock(page); /* complete memcg works before add pages to LRU */ mem_cgroup_split_huge_fixup(page); @@ -1309,13 +1312,12 @@ static void __split_huge_page_refcount(struct page *page) BUG_ON(!PageDirty(page_tail)); BUG_ON(!PageSwapBacked(page_tail)); - - lru_add_page_tail(zone, page, page_tail); + lru_add_page_tail(page, page_tail, lruvec); } atomic_sub(tail_count, &page->_count); BUG_ON(atomic_read(&page->_count) <= 0); - __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); + __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1); __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); ClearPageCompound(page); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 75198dac3fe..bb8d7d3cf30 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1035,7 +1035,7 @@ EXPORT_SYMBOL(mem_cgroup_count_vm_event); /** * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg * @zone: zone of the wanted lruvec - * @mem: memcg of the wanted lruvec + * @memcg: memcg of the wanted lruvec * * Returns the lru list vector holding pages for the given @zone and * @mem. This can be the global zone lruvec, if the memory controller @@ -1068,19 +1068,11 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, */ /** - * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec - * @zone: zone of the page + * mem_cgroup_page_lruvec - return lruvec for adding an lru page * @page: the page - * @lru: current lru - * - * This function accounts for @page being added to @lru, and returns - * the lruvec for the given @zone and the memcg @page is charged to. - * - * The callsite is then responsible for physically linking the page to - * the returned lruvec->lists[@lru]. + * @zone: zone of the page */ -struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, - enum lru_list lru) +struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) { struct mem_cgroup_per_zone *mz; struct mem_cgroup *memcg; @@ -1093,7 +1085,7 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, memcg = pc->mem_cgroup; /* - * Surreptitiously switch any uncharged page to root: + * Surreptitiously switch any uncharged offlist page to root: * an uncharged page off lru does nothing to secure * its former mem_cgroup from sudden removal. * @@ -1101,65 +1093,35 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, * under page_cgroup lock: between them, they make all uses * of pc->mem_cgroup safe. */ - if (!PageCgroupUsed(pc) && memcg != root_mem_cgroup) + if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup) pc->mem_cgroup = memcg = root_mem_cgroup; mz = page_cgroup_zoneinfo(memcg, page); - /* compound_order() is stabilized through lru_lock */ - mz->lru_size[lru] += 1 << compound_order(page); return &mz->lruvec; } /** - * mem_cgroup_lru_del_list - account for removing an lru page - * @page: the page - * @lru: target lru + * mem_cgroup_update_lru_size - account for adding or removing an lru page + * @lruvec: mem_cgroup per zone lru vector + * @lru: index of lru list the page is sitting on + * @nr_pages: positive when adding or negative when removing * - * This function accounts for @page being removed from @lru. - * - * The callsite is then responsible for physically unlinking - * @page->lru. + * This function must be called when a page is added to or removed from an + * lru list. */ -void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) +void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, + int nr_pages) { struct mem_cgroup_per_zone *mz; - struct mem_cgroup *memcg; - struct page_cgroup *pc; + unsigned long *lru_size; if (mem_cgroup_disabled()) return; - pc = lookup_page_cgroup(page); - memcg = pc->mem_cgroup; - VM_BUG_ON(!memcg); - mz = page_cgroup_zoneinfo(memcg, page); - /* huge page split is done under lru_lock. so, we have no races. */ - VM_BUG_ON(mz->lru_size[lru] < (1 << compound_order(page))); - mz->lru_size[lru] -= 1 << compound_order(page); -} - -/** - * mem_cgroup_lru_move_lists - account for moving a page between lrus - * @zone: zone of the page - * @page: the page - * @from: current lru - * @to: target lru - * - * This function accounts for @page being moved between the lrus @from - * and @to, and returns the lruvec for the given @zone and the memcg - * @page is charged to. - * - * The callsite is then responsible for physically relinking - * @page->lru to the returned lruvec->lists[@to]. - */ -struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone, - struct page *page, - enum lru_list from, - enum lru_list to) -{ - /* XXX: Optimize this, especially for @from == @to */ - mem_cgroup_lru_del_list(page, from); - return mem_cgroup_lru_add_list(zone, page, to); + mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); + lru_size = mz->lru_size + lru; + *lru_size += nr_pages; + VM_BUG_ON((long)(*lru_size) < 0); } /* @@ -1252,24 +1214,6 @@ int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) return (active > inactive); } -struct zone_reclaim_stat * -mem_cgroup_get_reclaim_stat_from_page(struct page *page) -{ - struct page_cgroup *pc; - struct mem_cgroup_per_zone *mz; - - if (mem_cgroup_disabled()) - return NULL; - - pc = lookup_page_cgroup(page); - if (!PageCgroupUsed(pc)) - return NULL; - /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ - smp_rmb(); - mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); - return &mz->lruvec.reclaim_stat; -} - #define mem_cgroup_from_res_counter(counter, member) \ container_of(counter, struct mem_cgroup, member) @@ -2509,6 +2453,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, { struct page_cgroup *pc = lookup_page_cgroup(page); struct zone *uninitialized_var(zone); + struct lruvec *lruvec; bool was_on_lru = false; bool anon; @@ -2531,8 +2476,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, zone = page_zone(page); spin_lock_irq(&zone->lru_lock); if (PageLRU(page)) { + lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); ClearPageLRU(page); - del_page_from_lru_list(zone, page, page_lru(page)); + del_page_from_lru_list(page, lruvec, page_lru(page)); was_on_lru = true; } } @@ -2550,9 +2496,10 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, if (lrucare) { if (was_on_lru) { + lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - add_page_to_lru_list(zone, page, page_lru(page)); + add_page_to_lru_list(page, lruvec, page_lru(page)); } spin_unlock_irq(&zone->lru_lock); } diff --git a/mm/swap.c b/mm/swap.c index 0503ad705e7..4e7e2ec6707 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -47,13 +47,15 @@ static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); static void __page_cache_release(struct page *page) { if (PageLRU(page)) { - unsigned long flags; struct zone *zone = page_zone(page); + struct lruvec *lruvec; + unsigned long flags; spin_lock_irqsave(&zone->lru_lock, flags); + lruvec = mem_cgroup_page_lruvec(page, zone); VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); - del_page_from_lru_list(zone, page, page_off_lru(page)); + del_page_from_lru_list(page, lruvec, page_off_lru(page)); spin_unlock_irqrestore(&zone->lru_lock, flags); } } @@ -235,11 +237,12 @@ void put_pages_list(struct list_head *pages) EXPORT_SYMBOL(put_pages_list); static void pagevec_lru_move_fn(struct pagevec *pvec, - void (*move_fn)(struct page *page, void *arg), - void *arg) + void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg), + void *arg) { int i; struct zone *zone = NULL; + struct lruvec *lruvec; unsigned long flags = 0; for (i = 0; i < pagevec_count(pvec); i++) { @@ -253,7 +256,8 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, spin_lock_irqsave(&zone->lru_lock, flags); } - (*move_fn)(page, arg); + lruvec = mem_cgroup_page_lruvec(page, zone); + (*move_fn)(page, lruvec, arg); } if (zone) spin_unlock_irqrestore(&zone->lru_lock, flags); @@ -261,16 +265,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec, pagevec_reinit(pvec); } -static void pagevec_move_tail_fn(struct page *page, void *arg) +static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, + void *arg) { int *pgmoved = arg; if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { enum lru_list lru = page_lru_base_type(page); - struct lruvec *lruvec; - - lruvec = mem_cgroup_lru_move_lists(page_zone(page), - page, lru, lru); list_move_tail(&page->lru, &lruvec->lists[lru]); (*pgmoved)++; } @@ -309,35 +310,30 @@ void rotate_reclaimable_page(struct page *page) } } -static void update_page_reclaim_stat(struct zone *zone, struct page *page, +static void update_page_reclaim_stat(struct lruvec *lruvec, int file, int rotated) { - struct zone_reclaim_stat *reclaim_stat; - - reclaim_stat = mem_cgroup_get_reclaim_stat_from_page(page); - if (!reclaim_stat) - reclaim_stat = &zone->lruvec.reclaim_stat; + struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; reclaim_stat->recent_scanned[file]++; if (rotated) reclaim_stat->recent_rotated[file]++; } -static void __activate_page(struct page *page, void *arg) +static void __activate_page(struct page *page, struct lruvec *lruvec, + void *arg) { - struct zone *zone = page_zone(page); - if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { int file = page_is_file_cache(page); int lru = page_lru_base_type(page); - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); SetPageActive(page); lru += LRU_ACTIVE; - add_page_to_lru_list(zone, page, lru); - __count_vm_event(PGACTIVATE); + add_page_to_lru_list(page, lruvec, lru); - update_page_reclaim_stat(zone, page, file, 1); + __count_vm_event(PGACTIVATE); + update_page_reclaim_stat(lruvec, file, 1); } } @@ -374,7 +370,7 @@ void activate_page(struct page *page) struct zone *zone = page_zone(page); spin_lock_irq(&zone->lru_lock); - __activate_page(page, NULL); + __activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL); spin_unlock_irq(&zone->lru_lock); } #endif @@ -441,11 +437,13 @@ void lru_cache_add_lru(struct page *page, enum lru_list lru) void add_page_to_unevictable_list(struct page *page) { struct zone *zone = page_zone(page); + struct lruvec *lruvec; spin_lock_irq(&zone->lru_lock); + lruvec = mem_cgroup_page_lruvec(page, zone); SetPageUnevictable(page); SetPageLRU(page); - add_page_to_lru_list(zone, page, LRU_UNEVICTABLE); + add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE); spin_unlock_irq(&zone->lru_lock); } @@ -470,11 +468,11 @@ void add_page_to_unevictable_list(struct page *page) * be write it out by flusher threads as this is much more effective * than the single-page writeout from reclaim. */ -static void lru_deactivate_fn(struct page *page, void *arg) +static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, + void *arg) { int lru, file; bool active; - struct zone *zone = page_zone(page); if (!PageLRU(page)) return; @@ -487,13 +485,13 @@ static void lru_deactivate_fn(struct page *page, void *arg) return; active = PageActive(page); - file = page_is_file_cache(page); lru = page_lru_base_type(page); - del_page_from_lru_list(zone, page, lru + active); + + del_page_from_lru_list(page, lruvec, lru + active); ClearPageActive(page); ClearPageReferenced(page); - add_page_to_lru_list(zone, page, lru); + add_page_to_lru_list(page, lruvec, lru); if (PageWriteback(page) || PageDirty(page)) { /* @@ -503,19 +501,17 @@ static void lru_deactivate_fn(struct page *page, void *arg) */ SetPageReclaim(page); } else { - struct lruvec *lruvec; /* * The page's writeback ends up during pagevec * We moves tha page into tail of inactive. */ - lruvec = mem_cgroup_lru_move_lists(zone, page, lru, lru); list_move_tail(&page->lru, &lruvec->lists[lru]); __count_vm_event(PGROTATED); } if (active) __count_vm_event(PGDEACTIVATE); - update_page_reclaim_stat(zone, page, file, 0); + update_page_reclaim_stat(lruvec, file, 0); } /* @@ -615,6 +611,7 @@ void release_pages(struct page **pages, int nr, int cold) int i; LIST_HEAD(pages_to_free); struct zone *zone = NULL; + struct lruvec *lruvec; unsigned long uninitialized_var(flags); for (i = 0; i < nr; i++) { @@ -642,9 +639,11 @@ void release_pages(struct page **pages, int nr, int cold) zone = pagezone; spin_lock_irqsave(&zone->lru_lock, flags); } + + lruvec = mem_cgroup_page_lruvec(page, zone); VM_BUG_ON(!PageLRU(page)); __ClearPageLRU(page); - del_page_from_lru_list(zone, page, page_off_lru(page)); + del_page_from_lru_list(page, lruvec, page_off_lru(page)); } list_add(&page->lru, &pages_to_free); @@ -676,8 +675,8 @@ EXPORT_SYMBOL(__pagevec_release); #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* used by __split_huge_page_refcount() */ -void lru_add_page_tail(struct zone* zone, - struct page *page, struct page *page_tail) +void lru_add_page_tail(struct page *page, struct page *page_tail, + struct lruvec *lruvec) { int uninitialized_var(active); enum lru_list lru; @@ -686,7 +685,8 @@ void lru_add_page_tail(struct zone* zone, VM_BUG_ON(!PageHead(page)); VM_BUG_ON(PageCompound(page_tail)); VM_BUG_ON(PageLRU(page_tail)); - VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock)); + VM_BUG_ON(NR_CPUS != 1 && + !spin_is_locked(&lruvec_zone(lruvec)->lru_lock)); SetPageLRU(page_tail); @@ -715,20 +715,20 @@ void lru_add_page_tail(struct zone* zone, * Use the standard add function to put page_tail on the list, * but then correct its position so they all end up in order. */ - add_page_to_lru_list(zone, page_tail, lru); + add_page_to_lru_list(page_tail, lruvec, lru); list_head = page_tail->lru.prev; list_move_tail(&page_tail->lru, list_head); } if (!PageUnevictable(page)) - update_page_reclaim_stat(zone, page_tail, file, active); + update_page_reclaim_stat(lruvec, file, active); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static void __pagevec_lru_add_fn(struct page *page, void *arg) +static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, + void *arg) { enum lru_list lru = (enum lru_list)arg; - struct zone *zone = page_zone(page); int file = is_file_lru(lru); int active = is_active_lru(lru); @@ -739,8 +739,8 @@ static void __pagevec_lru_add_fn(struct page *page, void *arg) SetPageLRU(page); if (active) SetPageActive(page); - add_page_to_lru_list(zone, page, lru); - update_page_reclaim_stat(zone, page, file, active); + add_page_to_lru_list(page, lruvec, lru); + update_page_reclaim_stat(lruvec, file, active); } /* diff --git a/mm/vmscan.c b/mm/vmscan.c index 05d439dc1af..eeb3bc9d1d3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1031,6 +1031,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { struct page *page; + int nr_pages; page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); @@ -1039,9 +1040,10 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, switch (__isolate_lru_page(page, mode)) { case 0: - mem_cgroup_lru_del_list(page, lru); + nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); list_move(&page->lru, dst); - nr_taken += hpage_nr_pages(page); + nr_taken += nr_pages; break; case -EBUSY: @@ -1093,15 +1095,16 @@ int isolate_lru_page(struct page *page) if (PageLRU(page)) { struct zone *zone = page_zone(page); + struct lruvec *lruvec; spin_lock_irq(&zone->lru_lock); + lruvec = mem_cgroup_page_lruvec(page, zone); if (PageLRU(page)) { int lru = page_lru(page); - ret = 0; get_page(page); ClearPageLRU(page); - - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); + ret = 0; } spin_unlock_irq(&zone->lru_lock); } @@ -1155,9 +1158,13 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) spin_lock_irq(&zone->lru_lock); continue; } + + lruvec = mem_cgroup_page_lruvec(page, zone); + SetPageLRU(page); lru = page_lru(page); - add_page_to_lru_list(zone, page, lru); + add_page_to_lru_list(page, lruvec, lru); + if (is_active_lru(lru)) { int file = is_file_lru(lru); int numpages = hpage_nr_pages(page); @@ -1166,7 +1173,7 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) if (put_page_testzero(page)) { __ClearPageLRU(page); __ClearPageActive(page); - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); @@ -1314,30 +1321,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * But we had to alter page->flags anyway. */ -static void move_active_pages_to_lru(struct zone *zone, +static void move_active_pages_to_lru(struct lruvec *lruvec, struct list_head *list, struct list_head *pages_to_free, enum lru_list lru) { + struct zone *zone = lruvec_zone(lruvec); unsigned long pgmoved = 0; struct page *page; + int nr_pages; while (!list_empty(list)) { - struct lruvec *lruvec; - page = lru_to_page(list); + lruvec = mem_cgroup_page_lruvec(page, zone); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); - lruvec = mem_cgroup_lru_add_list(zone, page, lru); + nr_pages = hpage_nr_pages(page); + mem_cgroup_update_lru_size(lruvec, lru, nr_pages); list_move(&page->lru, &lruvec->lists[lru]); - pgmoved += hpage_nr_pages(page); + pgmoved += nr_pages; if (put_page_testzero(page)) { __ClearPageLRU(page); __ClearPageActive(page); - del_page_from_lru_list(zone, page, lru); + del_page_from_lru_list(page, lruvec, lru); if (unlikely(PageCompound(page))) { spin_unlock_irq(&zone->lru_lock); @@ -1443,8 +1452,8 @@ static void shrink_active_list(unsigned long nr_to_scan, */ reclaim_stat->recent_rotated[file] += nr_rotated; - move_active_pages_to_lru(zone, &l_active, &l_hold, lru); - move_active_pages_to_lru(zone, &l_inactive, &l_hold, lru - LRU_ACTIVE); + move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); + move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); spin_unlock_irq(&zone->lru_lock); @@ -3237,6 +3246,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) zone = pagezone; spin_lock_irq(&zone->lru_lock); } + lruvec = mem_cgroup_page_lruvec(page, zone); if (!PageLRU(page) || !PageUnevictable(page)) continue; @@ -3246,11 +3256,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) VM_BUG_ON(PageActive(page)); ClearPageUnevictable(page); - __dec_zone_state(zone, NR_UNEVICTABLE); - lruvec = mem_cgroup_lru_move_lists(zone, page, - LRU_UNEVICTABLE, lru); - list_move(&page->lru, &lruvec->lists[lru]); - __inc_zone_state(zone, NR_INACTIVE_ANON + lru); + del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); + add_page_to_lru_list(page, lruvec, lru); pgrescued++; } } -- cgit v1.2.3-70-g09d2