diff options
Diffstat (limited to 'mm/page-writeback.c')
| -rw-r--r-- | mm/page-writeback.c | 123 | 
1 files changed, 51 insertions, 72 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index f5236f804aa..e0c943014eb 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -156,24 +156,6 @@ static unsigned long writeout_period_time = 0;  #define VM_COMPLETIONS_PERIOD_LEN (3*HZ)  /* - * Work out the current dirty-memory clamping and background writeout - * thresholds. - * - * The main aim here is to lower them aggressively if there is a lot of mapped - * memory around.  To avoid stressing page reclaim with lots of unreclaimable - * pages.  It is better to clamp down on writers than to start swapping, and - * performing lots of scanning. - * - * We only allow 1/2 of the currently-unmapped memory to be dirtied. - * - * We don't permit the clamping level to fall below 5% - that is getting rather - * excessive. - * - * We make sure that the background writeout level is below the adjusted - * clamping level. - */ - -/*   * In a memory zone, there is a certain amount of pages we consider   * available for the page cache, which is essentially the number of   * free and reclaimable pages, minus some zone reserves to protect @@ -191,6 +173,26 @@ static unsigned long writeout_period_time = 0;   * global dirtyable memory first.   */ +/** + * zone_dirtyable_memory - number of dirtyable pages in a zone + * @zone: the zone + * + * Returns the zone's number of pages potentially available for dirty + * page cache.  This is the base value for the per-zone dirty limits. + */ +static unsigned long zone_dirtyable_memory(struct zone *zone) +{ +	unsigned long nr_pages; + +	nr_pages = zone_page_state(zone, NR_FREE_PAGES); +	nr_pages -= min(nr_pages, zone->dirty_balance_reserve); + +	nr_pages += zone_page_state(zone, NR_INACTIVE_FILE); +	nr_pages += zone_page_state(zone, NR_ACTIVE_FILE); + +	return nr_pages; +} +  static unsigned long highmem_dirtyable_memory(unsigned long total)  {  #ifdef CONFIG_HIGHMEM @@ -198,11 +200,9 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)  	unsigned long x = 0;  	for_each_node_state(node, N_HIGH_MEMORY) { -		struct zone *z = -			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM]; +		struct zone *z = &NODE_DATA(node)->node_zones[ZONE_HIGHMEM]; -		x += zone_page_state(z, NR_FREE_PAGES) + -		     zone_reclaimable_pages(z) - z->dirty_balance_reserve; +		x += zone_dirtyable_memory(z);  	}  	/*  	 * Unreclaimable memory (kernel memory or anonymous memory @@ -238,9 +238,12 @@ static unsigned long global_dirtyable_memory(void)  {  	unsigned long x; -	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages(); +	x = global_page_state(NR_FREE_PAGES);  	x -= min(x, dirty_balance_reserve); +	x += global_page_state(NR_INACTIVE_FILE); +	x += global_page_state(NR_ACTIVE_FILE); +  	if (!vm_highmem_is_dirtyable)  		x -= highmem_dirtyable_memory(x); @@ -289,32 +292,6 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)  }  /** - * zone_dirtyable_memory - number of dirtyable pages in a zone - * @zone: the zone - * - * Returns the zone's number of pages potentially available for dirty - * page cache.  This is the base value for the per-zone dirty limits. - */ -static unsigned long zone_dirtyable_memory(struct zone *zone) -{ -	/* -	 * The effective global number of dirtyable pages may exclude -	 * highmem as a big-picture measure to keep the ratio between -	 * dirty memory and lowmem reasonable. -	 * -	 * But this function is purely about the individual zone and a -	 * highmem zone can hold its share of dirty pages, so we don't -	 * care about vm_highmem_is_dirtyable here. -	 */ -	unsigned long nr_pages = zone_page_state(zone, NR_FREE_PAGES) + -		zone_reclaimable_pages(zone); - -	/* don't allow this to underflow */ -	nr_pages -= min(nr_pages, zone->dirty_balance_reserve); -	return nr_pages; -} - -/**   * zone_dirty_limit - maximum number of dirty pages allowed in a zone   * @zone: the zone   * @@ -598,14 +575,14 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)   * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)   *     => fast response on large errors; small oscillation near setpoint   */ -static inline long long pos_ratio_polynom(unsigned long setpoint, +static long long pos_ratio_polynom(unsigned long setpoint,  					  unsigned long dirty,  					  unsigned long limit)  {  	long long pos_ratio;  	long x; -	x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, +	x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,  		    limit - setpoint + 1);  	pos_ratio = x;  	pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; @@ -847,7 +824,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,  	x_intercept = bdi_setpoint + span;  	if (bdi_dirty < x_intercept - span / 4) { -		pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty), +		pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),  				    x_intercept - bdi_setpoint + 1);  	} else  		pos_ratio /= 4; @@ -1210,11 +1187,11 @@ static unsigned long dirty_poll_interval(unsigned long dirty,  	return 1;  } -static long bdi_max_pause(struct backing_dev_info *bdi, -			  unsigned long bdi_dirty) +static unsigned long bdi_max_pause(struct backing_dev_info *bdi, +				   unsigned long bdi_dirty)  { -	long bw = bdi->avg_write_bandwidth; -	long t; +	unsigned long bw = bdi->avg_write_bandwidth; +	unsigned long t;  	/*  	 * Limit pause time for small memory systems. If sleeping for too long @@ -1226,7 +1203,7 @@ static long bdi_max_pause(struct backing_dev_info *bdi,  	t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));  	t++; -	return min_t(long, t, MAX_PAUSE); +	return min_t(unsigned long, t, MAX_PAUSE);  }  static long bdi_min_pause(struct backing_dev_info *bdi, @@ -1329,9 +1306,9 @@ static inline void bdi_dirty_limits(struct backing_dev_info *bdi,  	*bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);  	if (bdi_bg_thresh) -		*bdi_bg_thresh = div_u64((u64)*bdi_thresh * -					 background_thresh, -					 dirty_thresh); +		*bdi_bg_thresh = dirty_thresh ? div_u64((u64)*bdi_thresh * +							background_thresh, +							dirty_thresh) : 0;  	/*  	 * In order to avoid the stacked BDI deadlock we need @@ -1567,9 +1544,9 @@ pause:  		bdi_start_background_writeback(bdi);  } -void set_page_dirty_balance(struct page *page, int page_mkwrite) +void set_page_dirty_balance(struct page *page)  { -	if (set_page_dirty(page) || page_mkwrite) { +	if (set_page_dirty(page)) {  		struct address_space *mapping = page_mapping(page);  		if (mapping) @@ -1628,7 +1605,7 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)  	 * 1000+ tasks, all of them start dirtying pages at exactly the same  	 * time, hence all honoured too large initial task->nr_dirtied_pause.  	 */ -	p =  &__get_cpu_var(bdp_ratelimits); +	p =  this_cpu_ptr(&bdp_ratelimits);  	if (unlikely(current->nr_dirtied >= ratelimit))  		*p = 0;  	else if (unlikely(*p >= ratelimit_pages)) { @@ -1640,7 +1617,7 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)  	 * short-lived tasks (eg. gcc invocations in a kernel build) escaping  	 * the dirty throttling and livelock other long-run dirtiers.  	 */ -	p = &__get_cpu_var(dirty_throttle_leaks); +	p = this_cpu_ptr(&dirty_throttle_leaks);  	if (*p > 0 && current->nr_dirtied < ratelimit) {  		unsigned long nr_pages_dirtied;  		nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied); @@ -1687,7 +1664,7 @@ void throttle_vm_writeout(gfp_t gfp_mask)  /*   * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs   */ -int dirty_writeback_centisecs_handler(ctl_table *table, int write, +int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,  	void __user *buffer, size_t *length, loff_t *ppos)  {  	proc_dointvec(table, write, buffer, length, ppos); @@ -2178,11 +2155,12 @@ int __set_page_dirty_nobuffers(struct page *page)  	if (!TestSetPageDirty(page)) {  		struct address_space *mapping = page_mapping(page);  		struct address_space *mapping2; +		unsigned long flags;  		if (!mapping)  			return 1; -		spin_lock_irq(&mapping->tree_lock); +		spin_lock_irqsave(&mapping->tree_lock, flags);  		mapping2 = page_mapping(page);  		if (mapping2) { /* Race with truncate? */  			BUG_ON(mapping2 != mapping); @@ -2191,7 +2169,7 @@ int __set_page_dirty_nobuffers(struct page *page)  			radix_tree_tag_set(&mapping->page_tree,  				page_index(page), PAGECACHE_TAG_DIRTY);  		} -		spin_unlock_irq(&mapping->tree_lock); +		spin_unlock_irqrestore(&mapping->tree_lock, flags);  		if (mapping->host) {  			/* !PageAnon && !swapper_space */  			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES); @@ -2402,7 +2380,7 @@ int test_clear_page_writeback(struct page *page)  	return ret;  } -int test_set_page_writeback(struct page *page) +int __test_set_page_writeback(struct page *page, bool keep_write)  {  	struct address_space *mapping = page_mapping(page);  	int ret; @@ -2427,9 +2405,10 @@ int test_set_page_writeback(struct page *page)  			radix_tree_tag_clear(&mapping->page_tree,  						page_index(page),  						PAGECACHE_TAG_DIRTY); -		radix_tree_tag_clear(&mapping->page_tree, -				     page_index(page), -				     PAGECACHE_TAG_TOWRITE); +		if (!keep_write) +			radix_tree_tag_clear(&mapping->page_tree, +						page_index(page), +						PAGECACHE_TAG_TOWRITE);  		spin_unlock_irqrestore(&mapping->tree_lock, flags);  	} else {  		ret = TestSetPageWriteback(page); @@ -2440,7 +2419,7 @@ int test_set_page_writeback(struct page *page)  	return ret;  } -EXPORT_SYMBOL(test_set_page_writeback); +EXPORT_SYMBOL(__test_set_page_writeback);  /*   * Return true if any of the pages in the mapping are marked with the  | 
