aboutsummaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2012-02-03 23:12:42 +0100
committerJiri Kosina <jkosina@suse.cz>2012-02-03 23:13:05 +0100
commit972c5ae961d6e5103e2b33d935cfa4145fd47140 (patch)
tree350b2a76b979ba8766c09838617df67ff330eca0 /mm/vmscan.c
parent5196d20305d5e30d871111d3a876cf067dd94255 (diff)
parent7c7ed8ec337bf5f62cc5287a6eb6b2f1b7504c2f (diff)
Merge branch 'master' into for-next
Sync with Linus' tree to be able to apply patch to a newer code (namely drivers/gpu/drm/gma500/psb_intel_lvds.c)
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c826
1 files changed, 468 insertions, 358 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 11adc890ce3..c52b2355265 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -26,7 +26,6 @@
#include <linux/buffer_head.h> /* for try_to_release_page(),
buffer_heads_over_limit */
#include <linux/mm_inline.h>
-#include <linux/pagevec.h>
#include <linux/backing-dev.h>
#include <linux/rmap.h>
#include <linux/topology.h>
@@ -103,8 +102,11 @@ struct scan_control {
*/
reclaim_mode_t reclaim_mode;
- /* Which cgroup do we reclaim from */
- struct mem_cgroup *mem_cgroup;
+ /*
+ * The memory cgroup that hit its limit and as a result is the
+ * primary target of this reclaim invocation.
+ */
+ struct mem_cgroup *target_mem_cgroup;
/*
* Nodemask of nodes allowed by the caller. If NULL, all nodes
@@ -113,6 +115,11 @@ struct scan_control {
nodemask_t *nodemask;
};
+struct mem_cgroup_zone {
+ struct mem_cgroup *mem_cgroup;
+ struct zone *zone;
+};
+
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
#ifdef ARCH_HAS_PREFETCH
@@ -153,28 +160,45 @@ static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
-#define scanning_global_lru(sc) (!(sc)->mem_cgroup)
+static bool global_reclaim(struct scan_control *sc)
+{
+ return !sc->target_mem_cgroup;
+}
+
+static bool scanning_global_lru(struct mem_cgroup_zone *mz)
+{
+ return !mz->mem_cgroup;
+}
#else
-#define scanning_global_lru(sc) (1)
+static bool global_reclaim(struct scan_control *sc)
+{
+ return true;
+}
+
+static bool scanning_global_lru(struct mem_cgroup_zone *mz)
+{
+ return true;
+}
#endif
-static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
- struct scan_control *sc)
+static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
{
- if (!scanning_global_lru(sc))
- return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
+ if (!scanning_global_lru(mz))
+ return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);
- return &zone->reclaim_stat;
+ return &mz->zone->reclaim_stat;
}
-static unsigned long zone_nr_lru_pages(struct zone *zone,
- struct scan_control *sc, enum lru_list lru)
+static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
+ enum lru_list lru)
{
- if (!scanning_global_lru(sc))
- return mem_cgroup_zone_nr_lru_pages(sc->mem_cgroup,
- zone_to_nid(zone), zone_idx(zone), BIT(lru));
+ if (!scanning_global_lru(mz))
+ return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
+ zone_to_nid(mz->zone),
+ zone_idx(mz->zone),
+ BIT(lru));
- return zone_page_state(zone, NR_LRU_BASE + lru);
+ return zone_page_state(mz->zone, NR_LRU_BASE + lru);
}
@@ -636,7 +660,7 @@ redo:
* When racing with an mlock or AS_UNEVICTABLE clearing
* (page is unlocked) make sure that if the other thread
* does not observe our setting of PG_lru and fails
- * isolation/check_move_unevictable_page,
+ * isolation/check_move_unevictable_pages,
* we see PG_mlocked/AS_UNEVICTABLE cleared below and move
* the page back to the evictable list.
*
@@ -677,12 +701,13 @@ enum page_references {
};
static enum page_references page_check_references(struct page *page,
+ struct mem_cgroup_zone *mz,
struct scan_control *sc)
{
int referenced_ptes, referenced_page;
unsigned long vm_flags;
- referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags);
+ referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags);
referenced_page = TestClearPageReferenced(page);
/* Lumpy reclaim - ignore references */
@@ -715,7 +740,13 @@ static enum page_references page_check_references(struct page *page,
*/
SetPageReferenced(page);
- if (referenced_page)
+ if (referenced_page || referenced_ptes > 1)
+ return PAGEREF_ACTIVATE;
+
+ /*
+ * Activate file-backed executable pages after first usage.
+ */
+ if (vm_flags & VM_EXEC)
return PAGEREF_ACTIVATE;
return PAGEREF_KEEP;
@@ -728,29 +759,11 @@ static enum page_references page_check_references(struct page *page,
return PAGEREF_RECLAIM;
}
-static noinline_for_stack void free_page_list(struct list_head *free_pages)
-{
- struct pagevec freed_pvec;
- struct page *page, *tmp;
-
- pagevec_init(&freed_pvec, 1);
-
- list_for_each_entry_safe(page, tmp, free_pages, lru) {
- list_del(&page->lru);
- if (!pagevec_add(&freed_pvec, page)) {
- __pagevec_free(&freed_pvec);
- pagevec_reinit(&freed_pvec);
- }
- }
-
- pagevec_free(&freed_pvec);
-}
-
/*
* shrink_page_list() returns the number of reclaimed pages
*/
static unsigned long shrink_page_list(struct list_head *page_list,
- struct zone *zone,
+ struct mem_cgroup_zone *mz,
struct scan_control *sc,
int priority,
unsigned long *ret_nr_dirty,
@@ -781,7 +794,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
goto keep;
VM_BUG_ON(PageActive(page));
- VM_BUG_ON(page_zone(page) != zone);
+ VM_BUG_ON(page_zone(page) != mz->zone);
sc->nr_scanned++;
@@ -815,7 +828,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
}
}
- references = page_check_references(page, sc);
+ references = page_check_references(page, mz, sc);
switch (references) {
case PAGEREF_ACTIVATE:
goto activate_locked;
@@ -1006,10 +1019,10 @@ keep_lumpy:
* back off and wait for congestion to clear because further reclaim
* will encounter the same problem
*/
- if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc))
- zone_set_flag(zone, ZONE_CONGESTED);
+ if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc))
+ zone_set_flag(mz->zone, ZONE_CONGESTED);
- free_page_list(&free_pages);
+ free_hot_cold_page_list(&free_pages, 1);
list_splice(&ret_pages, page_list);
count_vm_events(PGACTIVATE, pgactivate);
@@ -1061,8 +1074,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
ret = -EBUSY;
- if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page)))
- return ret;
+ /*
+ * To minimise LRU disruption, the caller can indicate that it only
+ * wants to isolate pages it will be able to operate on without
+ * blocking - clean pages for the most part.
+ *
+ * ISOLATE_CLEAN means that only clean pages should be isolated. This
+ * is used by reclaim when it is cannot write to backing storage
+ *
+ * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
+ * that it is possible to migrate without blocking
+ */
+ if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
+ /* All the caller can do on PageWriteback is block */
+ if (PageWriteback(page))
+ return ret;
+
+ if (PageDirty(page)) {
+ struct address_space *mapping;
+
+ /* ISOLATE_CLEAN means only clean pages */
+ if (mode & ISOLATE_CLEAN)
+ return ret;
+
+ /*
+ * Only pages without mappings or that have a
+ * ->migratepage callback are possible to migrate
+ * without blocking
+ */
+ mapping = page_mapping(page);
+ if (mapping && !mapping->a_ops->migratepage)
+ return ret;
+ }
+ }
if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
return ret;
@@ -1091,25 +1135,36 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
* Appropriate locks must be held before calling this function.
*
* @nr_to_scan: The number of pages to look through on the list.
- * @src: The LRU list to pull pages off.
+ * @mz: The mem_cgroup_zone to pull pages from.
* @dst: The temp list to put pages on to.
- * @scanned: The number of pages that were scanned.
+ * @nr_scanned: The number of pages that were scanned.
* @order: The caller's attempted allocation order
* @mode: One of the LRU isolation modes
+ * @active: True [1] if isolating active pages
* @file: True [1] if isolating file [!anon] pages
*
* returns how many pages were moved onto *@dst.
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
- struct list_head *src, struct list_head *dst,
- unsigned long *scanned, int order, isolate_mode_t mode,
- int file)
+ struct mem_cgroup_zone *mz, struct list_head *dst,
+ unsigned long *nr_scanned, int order, isolate_mode_t mode,
+ int active, int file)
{
+ struct lruvec *lruvec;
+ struct list_head *src;
unsigned long nr_taken = 0;
unsigned long nr_lumpy_taken = 0;
unsigned long nr_lumpy_dirty = 0;
unsigned long nr_lumpy_failed = 0;
unsigned long scan;
+ int lru = LRU_BASE;
+
+ lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
+ if (active)
+ lru += LRU_ACTIVE;
+ if (file)
+ lru += LRU_FILE;
+ src = &lruvec->lists[lru];
for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
struct page *page;
@@ -1125,15 +1180,14 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
switch (__isolate_lru_page(page, mode, file)) {
case 0:
+ mem_cgroup_lru_del(page);
list_move(&page->lru, dst);
- mem_cgroup_del_lru(page);
nr_taken += hpage_nr_pages(page);
break;
case -EBUSY:
/* else it is being freed elsewhere */
list_move(&page->lru, src);
- mem_cgroup_rotate_lru_list(page, page_lru(page));
continue;
default:
@@ -1178,18 +1232,22 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
* anon page which don't already have a swap slot is
* pointless.
*/
- if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
+ if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
!PageSwapCache(cursor_page))
break;
if (__isolate_lru_page(cursor_page, mode, file) == 0) {
+ unsigned int isolated_pages;
+
+ mem_cgroup_lru_del(cursor_page);
list_move(&cursor_page->lru, dst);
- mem_cgroup_del_lru(cursor_page);
- nr_taken += hpage_nr_pages(page);
- nr_lumpy_taken++;
+ isolated_pages = hpage_nr_pages(cursor_page);
+ nr_taken += isolated_pages;
+ nr_lumpy_taken += isolated_pages;
if (PageDirty(cursor_page))
- nr_lumpy_dirty++;
+ nr_lumpy_dirty += isolated_pages;
scan++;
+ pfn += isolated_pages - 1;
} else {
/*
* Check if the page is freed already.
@@ -1215,57 +1273,16 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
nr_lumpy_failed++;
}
- *scanned = scan;
+ *nr_scanned = scan;
trace_mm_vmscan_lru_isolate(order,
nr_to_scan, scan,
nr_taken,
nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
- mode);
+ mode, file);
return nr_taken;
}
-static unsigned long isolate_pages_global(unsigned long nr,
- struct list_head *dst,
- unsigned long *scanned, int order,
- isolate_mode_t mode,
- struct zone *z, int active, int file)
-{
- int lru = LRU_BASE;
- if (active)
- lru += LRU_ACTIVE;
- if (file)
- lru += LRU_FILE;
- return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
- mode, file);
-}
-
-/*
- * clear_active_flags() is a helper for shrink_active_list(), clearing
- * any active bits from the pages in the list.
- */
-static unsigned long clear_active_flags(struct list_head *page_list,
- unsigned int *count)
-{
- int nr_active = 0;
- int lru;
- struct page *page;
-
- list_for_each_entry(page, page_list, lru) {
- int numpages = hpage_nr_pages(page);
- lru = page_lru_base_type(page);
- if (PageActive(page)) {
- lru += LRU_ACTIVE;
- ClearPageActive(page);
- nr_active += numpages;
- }
- if (count)
- count[lru] += numpages;
- }
-
- return nr_active;
-}
-
/**
* isolate_lru_page - tries to isolate a page from its LRU list
* @page: page to isolate from its LRU list
@@ -1325,7 +1342,7 @@ static int too_many_isolated(struct zone *zone, int file,
if (current_is_kswapd())
return 0;
- if (!scanning_global_lru(sc))
+ if (!global_reclaim(sc))
return 0;
if (file) {
@@ -1339,27 +1356,21 @@ static int too_many_isolated(struct zone *zone, int file,
return isolated > inactive;
}
-/*
- * TODO: Try merging with migrations version of putback_lru_pages
- */
static noinline_for_stack void
-putback_lru_pages(struct zone *zone, struct scan_control *sc,
- unsigned long nr_anon, unsigned long nr_file,
- struct list_head *page_list)
+putback_inactive_pages(struct mem_cgroup_zone *mz,
+ struct list_head *page_list)
{
- struct page *page;
- struct pagevec pvec;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
-
- pagevec_init(&pvec, 1);
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+ struct zone *zone = mz->zone;
+ LIST_HEAD(pages_to_free);
/*
* Put back any unfreeable pages.
*/
- spin_lock(&zone->lru_lock);
while (!list_empty(page_list)) {
+ struct page *page = lru_to_page(page_list);
int lru;
- page = lru_to_page(page_list);
+
VM_BUG_ON(PageLRU(page));
list_del(&page->lru);
if (unlikely(!page_evictable(page, NULL))) {
@@ -1376,30 +1387,53 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
int numpages = hpage_nr_pages(page);
reclaim_stat->recent_rotated[file] += numpages;
}
- if (!pagevec_add(&pvec, page)) {
- spin_unlock_irq(&zone->lru_lock);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
+ if (put_page_testzero(page)) {
+ __ClearPageLRU(page);
+ __ClearPageActive(page);
+ del_page_from_lru_list(zone, page, lru);
+
+ if (unlikely(PageCompound(page))) {
+ spin_unlock_irq(&zone->lru_lock);
+ (*get_compound_page_dtor(page))(page);
+ spin_lock_irq(&zone->lru_lock);
+ } else
+ list_add(&page->lru, &pages_to_free);
}
}
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
- spin_unlock_irq(&zone->lru_lock);
- pagevec_release(&pvec);
+ /*
+ * To save our caller's stack, now use input list for pages to free.
+ */
+ list_splice(&pages_to_free, page_list);
}
-static noinline_for_stack void update_isolated_counts(struct zone *zone,
- struct scan_control *sc,
- unsigned long *nr_anon,
- unsigned long *nr_file,
- struct list_head *isolated_list)
+static noinline_for_stack void
+update_isolated_counts(struct mem_cgroup_zone *mz,
+ struct list_head *page_list,
+ unsigned long *nr_anon,
+ unsigned long *nr_file)
{
- unsigned long nr_active;
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+ struct zone *zone = mz->zone;
unsigned int count[NR_LRU_LISTS] = { 0, };
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+ unsigned long nr_active = 0;
+ struct page *page;
+ int lru;
+
+ /*
+ * Count pages and clear active flags
+ */
+ list_for_each_entry(page, page_list, lru) {
+ int numpages = hpage_nr_pages(page);
+ lru = page_lru_base_type(page);
+ if (PageActive(page)) {
+ lru += LRU_ACTIVE;
+ ClearPageActive(page);
+ nr_active += numpages;
+ }
+ count[lru] += numpages;
+ }
- nr_active = clear_active_flags(isolated_list, count);
__count_vm_events(PGDEACTIVATE, nr_active);
__mod_zone_page_state(zone, NR_ACTIVE_FILE,
@@ -1413,8 +1447,6 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone,
*nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
*nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
reclaim_stat->recent_scanned[0] += *nr_anon;
reclaim_stat->recent_scanned[1] += *nr_file;
@@ -1466,8 +1498,8 @@ static inline bool should_reclaim_stall(unsigned long nr_taken,
* of reclaimed pages
*/
static noinline_for_stack unsigned long
-shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
- struct scan_control *sc, int priority, int file)
+shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
+ struct scan_control *sc, int priority, int file)
{
LIST_HEAD(page_list);
unsigned long nr_scanned;
@@ -1478,6 +1510,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
unsigned long nr_dirty = 0;
unsigned long nr_writeback = 0;
isolate_mode_t reclaim_mode = ISOLATE_INACTIVE;
+ struct zone *zone = mz->zone;
while (unlikely(too_many_isolated(zone, file, sc))) {
congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1500,9 +1533,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
spin_lock_irq(&zone->lru_lock);
- if (scanning_global_lru(sc)) {
- nr_taken = isolate_pages_global(nr_to_scan, &page_list,
- &nr_scanned, sc->order, reclaim_mode, zone, 0, file);
+ nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list,
+ &nr_scanned, sc->order,
+ reclaim_mode, 0, file);
+ if (global_reclaim(sc)) {
zone->pages_scanned += nr_scanned;
if (current_is_kswapd())
__count_zone_vm_events(PGSCAN_KSWAPD, zone,
@@ -1510,14 +1544,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
else
__count_zone_vm_events(PGSCAN_DIRECT, zone,
nr_scanned);
- } else {
- nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list,
- &nr_scanned, sc->order, reclaim_mode, zone,
- sc->mem_cgroup, 0, file);
- /*
- * mem_cgroup_isolate_pages() keeps track of
- * scanned pages on its own.
- */
}
if (nr_taken == 0) {
@@ -1525,26 +1551,37 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
return 0;
}
- update_isolated_counts(zone, sc, &nr_anon, &nr_file, &page_list);
+ update_isolated_counts(mz, &page_list, &nr_anon, &nr_file);
+
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
+ __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
spin_unlock_irq(&zone->lru_lock);
- nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority,
+ nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
&nr_dirty, &nr_writeback);
/* Check if we should syncronously wait for writeback */
if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
set_reclaim_mode(priority, sc, true);
- nr_reclaimed += shrink_page_list(&page_list, zone, sc,
+ nr_reclaimed += shrink_page_list(&page_list, mz, sc,
priority, &nr_dirty, &nr_writeback);
}
- local_irq_disable();
+ spin_lock_irq(&zone->lru_lock);
+
if (current_is_kswapd())
__count_vm_events(KSWAPD_STEAL, nr_reclaimed);
__count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
- putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list);
+ putback_inactive_pages(mz, &page_list);
+
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
+ __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+
+ spin_unlock_irq(&zone->lru_lock);
+
+ free_hot_cold_page_list(&page_list, 1);
/*
* If reclaim is isolating dirty pages under writeback, it implies
@@ -1600,30 +1637,47 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
static void move_active_pages_to_lru(struct zone *zone,
struct list_head *list,
+ struct list_head *pages_to_free,
enum lru_list lru)
{
unsigned long pgmoved = 0;
- struct pagevec pvec;
struct page *page;
- pagevec_init(&pvec, 1);
+ if (buffer_heads_over_limit) {
+ spin_unlock_irq(&zone->lru_lock);
+ list_for_each_entry(page, list, lru) {
+ if (page_has_private(page) && trylock_page(page)) {
+ if (page_has_private(page))
+ try_to_release_page(page, 0);
+ unlock_page(page);
+ }
+ }
+ spin_lock_irq(&zone->lru_lock);
+ }
while (!list_empty(list)) {
+ struct lruvec *lruvec;
+
page = lru_to_page(list);
VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
- list_move(&page->lru, &zone->lru[lru].list);
- mem_cgroup_add_lru_list(page, lru);
+ lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+ list_move(&page->lru, &lruvec->lists[lru]);
pgmoved += hpage_nr_pages(page);
- if (!pagevec_add(&pvec, page) || list_empty(list)) {
- spin_unlock_irq(&zone->lru_lock);
- if (buffer_heads_over_limit)
- pagevec_strip(&pvec);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
+ if (put_page_testzero(page)) {
+ __ClearPageLRU(page);
+ __ClearPageActive(page);
+ del_page_from_lru_list(zone, page, lru);
+
+ if (unlikely(PageCompound(page))) {
+ spin_unlock_irq(&zone->lru_lock);
+ (*get_compound_page_dtor(page))(page);
+ spin_lock_irq(&zone->lru_lock);
+ } else
+ list_add(&page->lru, pages_to_free);
}
}
__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
@@ -1631,19 +1685,22 @@ static void move_active_pages_to_lru(struct zone *zone,
__count_vm_events(PGDEACTIVATE, pgmoved);
}
-static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
- struct scan_control *sc, int priority, int file)
+static void shrink_active_list(unsigned long nr_to_scan,
+ struct mem_cgroup_zone *mz,
+ struct scan_control *sc,
+ int priority, int file)
{
unsigned long nr_taken;
- unsigned long pgscanned;
+ unsigned long nr_scanned;
unsigned long vm_flags;
LIST_HEAD(l_hold); /* The pages which were snipped off */
LIST_HEAD(l_active);
LIST_HEAD(l_inactive);
struct page *page;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
unsigned long nr_rotated = 0;
isolate_mode_t reclaim_mode = ISOLATE_ACTIVE;
+ struct zone *zone = mz->zone;
lru_add_drain();
@@ -1653,26 +1710,16 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
reclaim_mode |= ISOLATE_CLEAN;
spin_lock_irq(&zone->lru_lock);
- if (scanning_global_lru(sc)) {
- nr_taken = isolate_pages_global(nr_pages, &l_hold,
- &pgscanned, sc->order,
- reclaim_mode, zone,
- 1, file);
- zone->pages_scanned += pgscanned;
- } else {
- nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
- &pgscanned, sc->order,
- reclaim_mode, zone,
- sc->mem_cgroup, 1, file);
- /*
- * mem_cgroup_isolate_pages() keeps track of
- * scanned pages on its own.
- */
- }
+
+ nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold,
+ &nr_scanned, sc->order,
+ reclaim_mode, 1, file);
+ if (global_reclaim(sc))
+ zone->pages_scanned += nr_scanned;
reclaim_stat->recent_scanned[file] += nr_taken;
- __count_zone_vm_events(PGREFILL, zone, pgscanned);
+ __count_zone_vm_events(PGREFILL, zone, nr_scanned);
if (file)
__mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
else
@@ -1690,7 +1737,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
continue;
}
- if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
+ if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) {
nr_rotated += hpage_nr_pages(page);
/*
* Identify referenced, file-backed active pages and
@@ -1723,12 +1770,14 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
*/
reclaim_stat->recent_rotated[file] += nr_rotated;
- move_active_pages_to_lru(zone, &l_active,
+ move_active_pages_to_lru(zone, &l_active, &l_hold,
LRU_ACTIVE + file * LRU_FILE);
- move_active_pages_to_lru(zone, &l_inactive,
+ move_active_pages_to_lru(zone, &l_inactive, &l_hold,
LRU_BASE + file * LRU_FILE);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
+
+ free_hot_cold_page_list(&l_hold, 1);
}
#ifdef CONFIG_SWAP
@@ -1753,10 +1802,8 @@ static int inactive_anon_is_low_global(struct zone *zone)
* Returns true if the zone does not have enough inactive anon pages,
* meaning some active anon pages need to be deactivated.
*/
-static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
+static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
{
- int low;
-
/*
* If we don't have swap space, anonymous page deactivation
* is pointless.
@@ -1764,15 +1811,14 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
if (!total_swap_pages)
return 0;
- if (scanning_global_lru(sc))
- low = inactive_anon_is_low_global(zone);
- else
- low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone);
- return low;
+ if (!scanning_global_lru(mz))
+ return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
+ mz->zone);
+
+ return inactive_anon_is_low_global(mz->zone);
}
#else
-static inline int inactive_anon_is_low(struct zone *zone,
- struct scan_control *sc)
+static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
{
return 0;
}
@@ -1790,8 +1836,7 @@ static int inactive_file_is_low_global(struct zone *zone)
/**
* inactive_file_is_low - check if file pages need to be deactivated
- * @zone: zone to check
- * @sc: scan control of this context
+ * @mz: memory cgroup and zone to check
*
* When the system is doing streaming IO, memory pressure here
* ensures that active file pages get deactivated, until more
@@ -1803,45 +1848,44 @@ static int inactive_file_is_low_global(struct zone *zone)
* This uses a different ratio than the anonymous pages, because
* the page cache uses a use-once replacement algorithm.
*/
-static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
+static int inactive_file_is_low(struct mem_cgroup_zone *mz)
{
- int low;
+ if (!scanning_global_lru(mz))
+ return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
+ mz->zone);
- if (scanning_global_lru(sc))
- low = inactive_file_is_low_global(zone);
- else
- low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone);
- return low;
+ return inactive_file_is_low_global(mz->zone);
}
-static int inactive_list_is_low(struct zone *zone, struct scan_control *sc,
- int file)
+static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
{
if (file)
- return inactive_file_is_low(zone, sc);
+ return inactive_file_is_low(mz);
else
- return inactive_anon_is_low(zone, sc);
+ return inactive_anon_is_low(mz);
}
static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
- struct zone *zone, struct scan_control *sc, int priority)
+ struct mem_cgroup_zone *mz,
+ struct scan_control *sc, int priority)
{
int file = is_file_lru(lru);
if (is_active_lru(lru)) {
- if (inactive_list_is_low(zone, sc, file))
- shrink_active_list(nr_to_scan, zone, sc, priority, file);
+ if (inactive_list_is_low(mz, file))
+ shrink_active_list(nr_to_scan, mz, sc, priority, file);
return 0;
}
- return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
+ return shrink_inactive_list(nr_to_scan, mz, sc, priority, file);
}
-static int vmscan_swappiness(struct scan_control *sc)
+static int vmscan_swappiness(struct mem_cgroup_zone *mz,
+ struct scan_control *sc)
{
- if (scanning_global_lru(sc))
+ if (global_reclaim(sc))
return vm_swappiness;
- return mem_cgroup_swappiness(sc->mem_cgroup);
+ return mem_cgroup_swappiness(mz->mem_cgroup);
}
/*
@@ -1852,15 +1896,15 @@ static int vmscan_swappiness(struct scan_control *sc)
*
* nr[0] = anon pages to scan; nr[1] = file pages to scan
*/
-static void get_scan_count(struct zone *zone, struct scan_control *sc,
- unsigned long *nr, int priority)
+static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
+ unsigned long *nr, int priority)
{
unsigned long anon, file, free;
unsigned long anon_prio, file_prio;
unsigned long ap, fp;
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
u64 fraction[2], denominator;
- enum lru_list l;
+ enum lru_list lru;
int noswap = 0;
bool force_scan = false;
@@ -1874,9 +1918,9 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
* latencies, so it's better to scan a minimum amount there as
* well.
*/
- if (scanning_global_lru(sc) && current_is_kswapd())
+ if (current_is_kswapd() && mz->zone->all_unreclaimable)
force_scan = true;
- if (!scanning_global_lru(sc))
+ if (!global_reclaim(sc))
force_scan = true;
/* If we have no swap space, do not bother scanning anon pages. */
@@ -1888,16 +1932,16 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
goto out;
}
- anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
- file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+ anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) +
+ zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
+ file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) +
+ zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
- if (scanning_global_lru(sc)) {
- free = zone_page_state(zone, NR_FREE_PAGES);
+ if (global_reclaim(sc)) {
+ free = zone_page_state(mz->zone, NR_FREE_PAGES);
/* If we have very few page cache pages,
force-scan anon pages. */
- if (unlikely(file + free <= high_wmark_pages(zone))) {
+ if (unlikely(file + free <= high_wmark_pages(mz->zone))) {
fraction[0] = 1;
fraction[1] = 0;
denominator = 1;
@@ -1909,8 +1953,8 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
* With swappiness at 100, anonymous and file have the same priority.
* This scanning priority is essentially the inverse of IO cost.
*/
- anon_prio = vmscan_swappiness(sc);
- file_prio = 200 - vmscan_swappiness(sc);
+ anon_prio = vmscan_swappiness(mz, sc);
+ file_prio = 200 - vmscan_swappiness(mz, sc);
/*
* OK, so we have swap space and a fair amount of page cache
@@ -1923,7 +1967,7 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
*
* anon in [0], file in [1]
*/
- spin_lock_irq(&zone->lru_lock);
+ spin_lock_irq(&mz->zone->lru_lock);
if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
reclaim_stat->recent_scanned[0] /= 2;
reclaim_stat->recent_rotated[0] /= 2;
@@ -1944,24 +1988,24 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
fp /= reclaim_stat->recent_rotated[1] + 1;
- spin_unlock_irq(&zone->lru_lock);
+ spin_unlock_irq(&mz->zone->lru_lock);
fraction[0] = ap;
fraction[1] = fp;
denominator = ap + fp + 1;
out:
- for_each_evictable_lru(l) {
- int file = is_file_lru(l);
+ for_each_evictable_lru(lru) {
+ int file = is_file_lru(lru);
unsigned long scan;
- scan = zone_nr_lru_pages(zone, sc, l);
+ scan = zone_nr_lru_pages(mz, lru);
if (priority || noswap) {
scan >>= priority;
if (!scan && force_scan)
scan = SWAP_CLUSTER_MAX;
scan = div64_u64(scan * fraction[file], denominator);
}
- nr[l] = scan;
+ nr[lru] = scan;
}
}
@@ -1972,7 +2016,7 @@ out:
* back to the allocator and call try_to_compact_zone(), we ensure that
* there are enough free pages for it to be likely successful
*/
-static inline bool should_continue_reclaim(struct zone *zone,
+static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
unsigned long nr_reclaimed,
unsigned long nr_scanned,
struct scan_control *sc)
@@ -2012,14 +2056,15 @@ static inline bool should_continue_reclaim(struct zone *zone,
* inactive lists are large enough, continue reclaiming
*/
pages_for_compaction = (2UL << sc->order);
- inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON) +
- zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+ inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
+ if (nr_swap_pages > 0)
+ inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
if (sc->nr_reclaimed < pages_for_compaction &&
inactive_lru_pages > pages_for_compaction)
return true;
/* If compaction would go ahead or the allocation would succeed, stop */
- switch (compaction_suitable(zone, sc->order)) {
+ switch (compaction_suitable(mz->zone, sc->order)) {
case COMPACT_PARTIAL:
case COMPACT_CONTINUE:
return false;
@@ -2031,12 +2076,12 @@ static inline bool should_continue_reclaim(struct zone *zone,
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
-static void shrink_zone(int priority, struct zone *zone,
- struct scan_control *sc)
+static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
+ struct scan_control *sc)
{
unsigned long nr[NR_LRU_LISTS];
unsigned long nr_to_scan;
- enum lru_list l;
+ enum lru_list lru;
unsigned long nr_reclaimed, nr_scanned;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
struct blk_plug plug;
@@ -2044,19 +2089,19 @@ static void shrink_zone(int priority, struct zone *zone,
restart:
nr_reclaimed = 0;
nr_scanned = sc->nr_scanned;
- get_scan_count(zone, sc, nr, priority);
+ get_scan_count(mz, sc, nr, priority);
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
nr[LRU_INACTIVE_FILE]) {
- for_each_evictable_lru(l) {
- if (nr[l]) {
+ for_each_evictable_lru(lru) {
+ if (nr[lru]) {
nr_to_scan = min_t(unsigned long,
- nr[l], SWAP_CLUSTER_MAX);
- nr[l] -= nr_to_scan;
+ nr[lru], SWAP_CLUSTER_MAX);
+ nr[lru] -= nr_to_scan;
- nr_reclaimed += shrink_list(l, nr_to_scan,
- zone, sc, priority);
+ nr_reclaimed += shrink_list(lru, nr_to_scan,
+ mz, sc, priority);
}
}
/*
@@ -2077,17 +2122,89 @@ restart:
* Even if we did not try to evict anon pages at all, we want to
* rebalance the anon lru active/inactive ratio.
*/
- if (inactive_anon_is_low(zone, sc))
- shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+ if (inactive_anon_is_low(mz))
+ shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0);
/* reclaim/compaction might need reclaim to continue */
- if (should_continue_reclaim(zone, nr_reclaimed,
+ if (should_continue_reclaim(mz, nr_reclaimed,
sc->nr_scanned - nr_scanned, sc))
goto restart;
throttle_vm_writeout(sc->gfp_mask);
}
+static void shrink_zone(int priority, struct zone *zone,
+ struct scan_control *sc)
+{
+ struct mem_cgroup *root = sc->target_mem_cgroup;
+ struct mem_cgroup_reclaim_cookie reclaim = {
+ .zone = zone,
+ .priority = priority,
+ };
+ struct mem_cgroup *memcg;
+
+ memcg = mem_cgroup_iter(root, NULL, &reclaim);
+ do {
+ struct mem_cgroup_zone mz = {
+ .mem_cgroup = memcg,
+ .zone = zone,