diff options
Diffstat (limited to 'mm/vmscan.c')
| -rw-r--r-- | mm/vmscan.c | 213 | 
1 files changed, 151 insertions, 62 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index ba8228e0a80..613e89f471d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -148,8 +148,8 @@ static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,  	return &zone->reclaim_stat;  } -static unsigned long zone_nr_pages(struct zone *zone, struct scan_control *sc, -				   enum lru_list lru) +static unsigned long zone_nr_lru_pages(struct zone *zone, +				struct scan_control *sc, enum lru_list lru)  {  	if (!scanning_global_lru(sc))  		return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru); @@ -286,7 +286,12 @@ static inline int page_mapping_inuse(struct page *page)  static inline int is_page_cache_freeable(struct page *page)  { -	return page_count(page) - !!page_has_private(page) == 2; +	/* +	 * A freeable page cache page is referenced only by the caller +	 * that isolated the page, the page cache radix tree and +	 * optional buffer heads at page->private. +	 */ +	return page_count(page) - page_has_private(page) == 2;  }  static int may_write_to_queue(struct backing_dev_info *bdi) @@ -361,7 +366,6 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,  	 * block, for some throttling. This happens by accident, because  	 * swap_backing_dev_info is bust: it doesn't reflect the  	 * congestion state of the swapdevs.  Easy to fix, if needed. -	 * See swapfile.c:page_queue_congested().  	 */  	if (!is_page_cache_freeable(page))  		return PAGE_KEEP; @@ -531,7 +535,7 @@ redo:  		 * unevictable page on [in]active list.  		 * We know how to handle that.  		 */ -		lru = active + page_is_file_cache(page); +		lru = active + page_lru_base_type(page);  		lru_cache_add_lru(page, lru);  	} else {  		/* @@ -821,7 +825,7 @@ int __isolate_lru_page(struct page *page, int mode, int file)  	if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))  		return ret; -	if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file)) +	if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)  		return ret;  	/* @@ -935,6 +939,16 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,  			/* Check that we have not crossed a zone boundary. */  			if (unlikely(page_zone_id(cursor_page) != zone_id))  				continue; + +			/* +			 * If we don't have enough swap space, reclaiming of +			 * anon page which don't already have a swap slot is +			 * pointless. +			 */ +			if (nr_swap_pages <= 0 && PageAnon(cursor_page) && +					!PageSwapCache(cursor_page)) +				continue; +  			if (__isolate_lru_page(cursor_page, mode, file) == 0) {  				list_move(&cursor_page->lru, dst);  				mem_cgroup_del_lru(cursor_page); @@ -961,7 +975,7 @@ static unsigned long isolate_pages_global(unsigned long nr,  	if (file)  		lru += LRU_FILE;  	return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order, -								mode, !!file); +								mode, file);  }  /* @@ -976,7 +990,7 @@ static unsigned long clear_active_flags(struct list_head *page_list,  	struct page *page;  	list_for_each_entry(page, page_list, lru) { -		lru = page_is_file_cache(page); +		lru = page_lru_base_type(page);  		if (PageActive(page)) {  			lru += LRU_ACTIVE;  			ClearPageActive(page); @@ -1034,6 +1048,31 @@ int isolate_lru_page(struct page *page)  }  /* + * Are there way too many processes in the direct reclaim path already? + */ +static int too_many_isolated(struct zone *zone, int file, +		struct scan_control *sc) +{ +	unsigned long inactive, isolated; + +	if (current_is_kswapd()) +		return 0; + +	if (!scanning_global_lru(sc)) +		return 0; + +	if (file) { +		inactive = zone_page_state(zone, NR_INACTIVE_FILE); +		isolated = zone_page_state(zone, NR_ISOLATED_FILE); +	} else { +		inactive = zone_page_state(zone, NR_INACTIVE_ANON); +		isolated = zone_page_state(zone, NR_ISOLATED_ANON); +	} + +	return isolated > inactive; +} + +/*   * shrink_inactive_list() is a helper for shrink_zone().  It returns the number   * of reclaimed pages   */ @@ -1048,6 +1087,14 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,  	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);  	int lumpy_reclaim = 0; +	while (unlikely(too_many_isolated(zone, file, sc))) { +		congestion_wait(WRITE, HZ/10); + +		/* We are about to die and free our memory. Return now. */ +		if (fatal_signal_pending(current)) +			return SWAP_CLUSTER_MAX; +	} +  	/*  	 * If we need a large contiguous chunk of memory, or have  	 * trouble getting a small set of contiguous pages, we @@ -1072,10 +1119,26 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,  		unsigned long nr_active;  		unsigned int count[NR_LRU_LISTS] = { 0, };  		int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE; +		unsigned long nr_anon; +		unsigned long nr_file;  		nr_taken = sc->isolate_pages(sc->swap_cluster_max,  			     &page_list, &nr_scan, sc->order, mode,  				zone, sc->mem_cgroup, 0, file); + +		if (scanning_global_lru(sc)) { +			zone->pages_scanned += nr_scan; +			if (current_is_kswapd()) +				__count_zone_vm_events(PGSCAN_KSWAPD, zone, +						       nr_scan); +			else +				__count_zone_vm_events(PGSCAN_DIRECT, zone, +						       nr_scan); +		} + +		if (nr_taken == 0) +			goto done; +  		nr_active = clear_active_flags(&page_list, count);  		__count_vm_events(PGDEACTIVATE, nr_active); @@ -1088,8 +1151,10 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,  		__mod_zone_page_state(zone, NR_INACTIVE_ANON,  						-count[LRU_INACTIVE_ANON]); -		if (scanning_global_lru(sc)) -			zone->pages_scanned += nr_scan; +		nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; +		nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; +		__mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon); +		__mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);  		reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];  		reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON]; @@ -1123,18 +1188,12 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,  		}  		nr_reclaimed += nr_freed; +  		local_irq_disable(); -		if (current_is_kswapd()) { -			__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan); +		if (current_is_kswapd())  			__count_vm_events(KSWAPD_STEAL, nr_freed); -		} else if (scanning_global_lru(sc)) -			__count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan); -  		__count_zone_vm_events(PGSTEAL, zone, nr_freed); -		if (nr_taken == 0) -			goto done; -  		spin_lock(&zone->lru_lock);  		/*  		 * Put back any unfreeable pages. @@ -1153,8 +1212,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,  			SetPageLRU(page);  			lru = page_lru(page);  			add_page_to_lru_list(zone, page, lru); -			if (PageActive(page)) { -				int file = !!page_is_file_cache(page); +			if (is_active_lru(lru)) { +				int file = is_file_lru(lru);  				reclaim_stat->recent_rotated[file]++;  			}  			if (!pagevec_add(&pvec, page)) { @@ -1163,10 +1222,13 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,  				spin_lock_irq(&zone->lru_lock);  			}  		} +		__mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); +		__mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); +    	} while (nr_scanned < max_scan); -	spin_unlock(&zone->lru_lock); +  done: -	local_irq_enable(); +	spin_unlock_irq(&zone->lru_lock);  	pagevec_release(&pvec);  	return nr_reclaimed;  } @@ -1215,15 +1277,10 @@ static void move_active_pages_to_lru(struct zone *zone,  	while (!list_empty(list)) {  		page = lru_to_page(list); -		prefetchw_prev_lru_page(page, list, flags);  		VM_BUG_ON(PageLRU(page));  		SetPageLRU(page); -		VM_BUG_ON(!PageActive(page)); -		if (!is_active_lru(lru)) -			ClearPageActive(page);	/* we are de-activating */ -  		list_move(&page->lru, &zone->lru[lru].list);  		mem_cgroup_add_lru_list(page, lru);  		pgmoved++; @@ -1244,7 +1301,7 @@ static void move_active_pages_to_lru(struct zone *zone,  static void shrink_active_list(unsigned long nr_pages, struct zone *zone,  			struct scan_control *sc, int priority, int file)  { -	unsigned long pgmoved; +	unsigned long nr_taken;  	unsigned long pgscanned;  	unsigned long vm_flags;  	LIST_HEAD(l_hold);	/* The pages which were snipped off */ @@ -1252,10 +1309,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,  	LIST_HEAD(l_inactive);  	struct page *page;  	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); +	unsigned long nr_rotated = 0;  	lru_add_drain();  	spin_lock_irq(&zone->lru_lock); -	pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, +	nr_taken = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order,  					ISOLATE_ACTIVE, zone,  					sc->mem_cgroup, 1, file);  	/* @@ -1265,16 +1323,16 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,  	if (scanning_global_lru(sc)) {  		zone->pages_scanned += pgscanned;  	} -	reclaim_stat->recent_scanned[!!file] += pgmoved; +	reclaim_stat->recent_scanned[file] += nr_taken;  	__count_zone_vm_events(PGREFILL, zone, pgscanned);  	if (file) -		__mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); +		__mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);  	else -		__mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); +		__mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken); +	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);  	spin_unlock_irq(&zone->lru_lock); -	pgmoved = 0;  /* count referenced (mapping) mapped pages */  	while (!list_empty(&l_hold)) {  		cond_resched();  		page = lru_to_page(&l_hold); @@ -1288,7 +1346,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,  		/* page_referenced clears PageReferenced */  		if (page_mapping_inuse(page) &&  		    page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { -			pgmoved++; +			nr_rotated++;  			/*  			 * Identify referenced, file-backed active pages and  			 * give them one more trip around the active list. So @@ -1304,6 +1362,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,  			}  		} +		ClearPageActive(page);	/* we are de-activating */  		list_add(&page->lru, &l_inactive);  	} @@ -1317,13 +1376,13 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,  	 * helps balance scan pressure between file and anonymous pages in  	 * get_scan_ratio.  	 */ -	reclaim_stat->recent_rotated[!!file] += pgmoved; +	reclaim_stat->recent_rotated[file] += nr_rotated;  	move_active_pages_to_lru(zone, &l_active,  						LRU_ACTIVE + file * LRU_FILE);  	move_active_pages_to_lru(zone, &l_inactive,  						LRU_BASE   + file * LRU_FILE); - +	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);  	spin_unlock_irq(&zone->lru_lock);  } @@ -1429,10 +1488,10 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,  	unsigned long ap, fp;  	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); -	anon  = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) + -		zone_nr_pages(zone, sc, LRU_INACTIVE_ANON); -	file  = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) + -		zone_nr_pages(zone, sc, LRU_INACTIVE_FILE); +	anon  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + +		zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); +	file  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + +		zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);  	if (scanning_global_lru(sc)) {  		free  = zone_page_state(zone, NR_FREE_PAGES); @@ -1526,6 +1585,7 @@ static void shrink_zone(int priority, struct zone *zone,  	enum lru_list l;  	unsigned long nr_reclaimed = sc->nr_reclaimed;  	unsigned long swap_cluster_max = sc->swap_cluster_max; +	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);  	int noswap = 0;  	/* If we have no swap space, do not bother scanning anon pages. */ @@ -1540,17 +1600,14 @@ static void shrink_zone(int priority, struct zone *zone,  		int file = is_file_lru(l);  		unsigned long scan; -		scan = zone_nr_pages(zone, sc, l); +		scan = zone_nr_lru_pages(zone, sc, l);  		if (priority || noswap) {  			scan >>= priority;  			scan = (scan * percent[file]) / 100;  		} -		if (scanning_global_lru(sc)) -			nr[l] = nr_scan_try_batch(scan, -						  &zone->lru[l].nr_saved_scan, -						  swap_cluster_max); -		else -			nr[l] = scan; +		nr[l] = nr_scan_try_batch(scan, +					  &reclaim_stat->nr_saved_scan[l], +					  swap_cluster_max);  	}  	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -1685,7 +1742,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,  			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))  				continue; -			lru_pages += zone_lru_pages(zone); +			lru_pages += zone_reclaimable_pages(zone);  		}  	} @@ -1902,7 +1959,7 @@ loop_again:  		for (i = 0; i <= end_zone; i++) {  			struct zone *zone = pgdat->node_zones + i; -			lru_pages += zone_lru_pages(zone); +			lru_pages += zone_reclaimable_pages(zone);  		}  		/* @@ -1946,7 +2003,7 @@ loop_again:  			if (zone_is_all_unreclaimable(zone))  				continue;  			if (nr_slab == 0 && zone->pages_scanned >= -						(zone_lru_pages(zone) * 6)) +					(zone_reclaimable_pages(zone) * 6))  					zone_set_flag(zone,  						      ZONE_ALL_UNRECLAIMABLE);  			/* @@ -2113,12 +2170,39 @@ void wakeup_kswapd(struct zone *zone, int order)  	wake_up_interruptible(&pgdat->kswapd_wait);  } -unsigned long global_lru_pages(void) +/* + * The reclaimable count would be mostly accurate. + * The less reclaimable pages may be + * - mlocked pages, which will be moved to unevictable list when encountered + * - mapped pages, which may require several travels to be reclaimed + * - dirty pages, which is not "instantly" reclaimable + */ +unsigned long global_reclaimable_pages(void) +{ +	int nr; + +	nr = global_page_state(NR_ACTIVE_FILE) + +	     global_page_state(NR_INACTIVE_FILE); + +	if (nr_swap_pages > 0) +		nr += global_page_state(NR_ACTIVE_ANON) + +		      global_page_state(NR_INACTIVE_ANON); + +	return nr; +} + +unsigned long zone_reclaimable_pages(struct zone *zone)  { -	return global_page_state(NR_ACTIVE_ANON) -		+ global_page_state(NR_ACTIVE_FILE) -		+ global_page_state(NR_INACTIVE_ANON) -		+ global_page_state(NR_INACTIVE_FILE); +	int nr; + +	nr = zone_page_state(zone, NR_ACTIVE_FILE) + +	     zone_page_state(zone, NR_INACTIVE_FILE); + +	if (nr_swap_pages > 0) +		nr += zone_page_state(zone, NR_ACTIVE_ANON) + +		      zone_page_state(zone, NR_INACTIVE_ANON); + +	return nr;  }  #ifdef CONFIG_HIBERNATION @@ -2133,6 +2217,7 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,  {  	struct zone *zone;  	unsigned long nr_reclaimed = 0; +	struct zone_reclaim_stat *reclaim_stat;  	for_each_populated_zone(zone) {  		enum lru_list l; @@ -2149,11 +2234,14 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,  						l == LRU_ACTIVE_FILE))  				continue; -			zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1; -			if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) { +			reclaim_stat = get_reclaim_stat(zone, sc); +			reclaim_stat->nr_saved_scan[l] += +						(lru_pages >> prio) + 1; +			if (reclaim_stat->nr_saved_scan[l] +						>= nr_pages || pass > 3) {  				unsigned long nr_to_scan; -				zone->lru[l].nr_saved_scan = 0; +				reclaim_stat->nr_saved_scan[l] = 0;  				nr_to_scan = min(nr_pages, lru_pages);  				nr_reclaimed += shrink_list(l, nr_to_scan, zone,  								sc, prio); @@ -2190,7 +2278,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)  	current->reclaim_state = &reclaim_state; -	lru_pages = global_lru_pages(); +	lru_pages = global_reclaimable_pages();  	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);  	/* If slab caches are huge, it's better to hit them first */  	while (nr_slab >= lru_pages) { @@ -2232,7 +2320,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)  			reclaim_state.reclaimed_slab = 0;  			shrink_slab(sc.nr_scanned, sc.gfp_mask, -					global_lru_pages()); +				    global_reclaimable_pages());  			sc.nr_reclaimed += reclaim_state.reclaimed_slab;  			if (sc.nr_reclaimed >= nr_pages)  				goto out; @@ -2249,7 +2337,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)  	if (!sc.nr_reclaimed) {  		do {  			reclaim_state.reclaimed_slab = 0; -			shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages()); +			shrink_slab(nr_pages, sc.gfp_mask, +				    global_reclaimable_pages());  			sc.nr_reclaimed += reclaim_state.reclaimed_slab;  		} while (sc.nr_reclaimed < nr_pages &&  				reclaim_state.reclaimed_slab > 0); @@ -2569,7 +2658,7 @@ static void check_move_unevictable_page(struct page *page, struct zone *zone)  retry:  	ClearPageUnevictable(page);  	if (page_evictable(page, NULL)) { -		enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); +		enum lru_list l = page_lru_base_type(page);  		__dec_zone_state(zone, NR_UNEVICTABLE);  		list_move(&page->lru, &zone->lru[l].list);  | 
