aboutsummaryrefslogtreecommitdiff
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c123
1 files changed, 51 insertions, 72 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f5236f804aa..e0c943014eb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -156,24 +156,6 @@ static unsigned long writeout_period_time = 0;
#define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
/*
- * Work out the current dirty-memory clamping and background writeout
- * thresholds.
- *
- * The main aim here is to lower them aggressively if there is a lot of mapped
- * memory around. To avoid stressing page reclaim with lots of unreclaimable
- * pages. It is better to clamp down on writers than to start swapping, and
- * performing lots of scanning.
- *
- * We only allow 1/2 of the currently-unmapped memory to be dirtied.
- *
- * We don't permit the clamping level to fall below 5% - that is getting rather
- * excessive.
- *
- * We make sure that the background writeout level is below the adjusted
- * clamping level.
- */
-
-/*
* In a memory zone, there is a certain amount of pages we consider
* available for the page cache, which is essentially the number of
* free and reclaimable pages, minus some zone reserves to protect
@@ -191,6 +173,26 @@ static unsigned long writeout_period_time = 0;
* global dirtyable memory first.
*/
+/**
+ * zone_dirtyable_memory - number of dirtyable pages in a zone
+ * @zone: the zone
+ *
+ * Returns the zone's number of pages potentially available for dirty
+ * page cache. This is the base value for the per-zone dirty limits.
+ */
+static unsigned long zone_dirtyable_memory(struct zone *zone)
+{
+ unsigned long nr_pages;
+
+ nr_pages = zone_page_state(zone, NR_FREE_PAGES);
+ nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
+
+ nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
+ nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
+
+ return nr_pages;
+}
+
static unsigned long highmem_dirtyable_memory(unsigned long total)
{
#ifdef CONFIG_HIGHMEM
@@ -198,11 +200,9 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
unsigned long x = 0;
for_each_node_state(node, N_HIGH_MEMORY) {
- struct zone *z =
- &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
+ struct zone *z = &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
- x += zone_page_state(z, NR_FREE_PAGES) +
- zone_reclaimable_pages(z) - z->dirty_balance_reserve;
+ x += zone_dirtyable_memory(z);
}
/*
* Unreclaimable memory (kernel memory or anonymous memory
@@ -238,9 +238,12 @@ static unsigned long global_dirtyable_memory(void)
{
unsigned long x;
- x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
+ x = global_page_state(NR_FREE_PAGES);
x -= min(x, dirty_balance_reserve);
+ x += global_page_state(NR_INACTIVE_FILE);
+ x += global_page_state(NR_ACTIVE_FILE);
+
if (!vm_highmem_is_dirtyable)
x -= highmem_dirtyable_memory(x);
@@ -289,32 +292,6 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
}
/**
- * zone_dirtyable_memory - number of dirtyable pages in a zone
- * @zone: the zone
- *
- * Returns the zone's number of pages potentially available for dirty
- * page cache. This is the base value for the per-zone dirty limits.
- */
-static unsigned long zone_dirtyable_memory(struct zone *zone)
-{
- /*
- * The effective global number of dirtyable pages may exclude
- * highmem as a big-picture measure to keep the ratio between
- * dirty memory and lowmem reasonable.
- *
- * But this function is purely about the individual zone and a
- * highmem zone can hold its share of dirty pages, so we don't
- * care about vm_highmem_is_dirtyable here.
- */
- unsigned long nr_pages = zone_page_state(zone, NR_FREE_PAGES) +
- zone_reclaimable_pages(zone);
-
- /* don't allow this to underflow */
- nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
- return nr_pages;
-}
-
-/**
* zone_dirty_limit - maximum number of dirty pages allowed in a zone
* @zone: the zone
*
@@ -598,14 +575,14 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
* (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
* => fast response on large errors; small oscillation near setpoint
*/
-static inline long long pos_ratio_polynom(unsigned long setpoint,
+static long long pos_ratio_polynom(unsigned long setpoint,
unsigned long dirty,
unsigned long limit)
{
long long pos_ratio;
long x;
- x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
+ x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
limit - setpoint + 1);
pos_ratio = x;
pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
@@ -847,7 +824,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
x_intercept = bdi_setpoint + span;
if (bdi_dirty < x_intercept - span / 4) {
- pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty),
+ pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
x_intercept - bdi_setpoint + 1);
} else
pos_ratio /= 4;
@@ -1210,11 +1187,11 @@ static unsigned long dirty_poll_interval(unsigned long dirty,
return 1;
}
-static long bdi_max_pause(struct backing_dev_info *bdi,
- unsigned long bdi_dirty)
+static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
+ unsigned long bdi_dirty)
{
- long bw = bdi->avg_write_bandwidth;
- long t;
+ unsigned long bw = bdi->avg_write_bandwidth;
+ unsigned long t;
/*
* Limit pause time for small memory systems. If sleeping for too long
@@ -1226,7 +1203,7 @@ static long bdi_max_pause(struct backing_dev_info *bdi,
t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
t++;
- return min_t(long, t, MAX_PAUSE);
+ return min_t(unsigned long, t, MAX_PAUSE);
}
static long bdi_min_pause(struct backing_dev_info *bdi,
@@ -1329,9 +1306,9 @@ static inline void bdi_dirty_limits(struct backing_dev_info *bdi,
*bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
if (bdi_bg_thresh)
- *bdi_bg_thresh = div_u64((u64)*bdi_thresh *
- background_thresh,
- dirty_thresh);
+ *bdi_bg_thresh = dirty_thresh ? div_u64((u64)*bdi_thresh *
+ background_thresh,
+ dirty_thresh) : 0;
/*
* In order to avoid the stacked BDI deadlock we need
@@ -1567,9 +1544,9 @@ pause:
bdi_start_background_writeback(bdi);
}
-void set_page_dirty_balance(struct page *page, int page_mkwrite)
+void set_page_dirty_balance(struct page *page)
{
- if (set_page_dirty(page) || page_mkwrite) {
+ if (set_page_dirty(page)) {
struct address_space *mapping = page_mapping(page);
if (mapping)
@@ -1628,7 +1605,7 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
* 1000+ tasks, all of them start dirtying pages at exactly the same
* time, hence all honoured too large initial task->nr_dirtied_pause.
*/
- p = &__get_cpu_var(bdp_ratelimits);
+ p = this_cpu_ptr(&bdp_ratelimits);
if (unlikely(current->nr_dirtied >= ratelimit))
*p = 0;
else if (unlikely(*p >= ratelimit_pages)) {
@@ -1640,7 +1617,7 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
* short-lived tasks (eg. gcc invocations in a kernel build) escaping
* the dirty throttling and livelock other long-run dirtiers.
*/
- p = &__get_cpu_var(dirty_throttle_leaks);
+ p = this_cpu_ptr(&dirty_throttle_leaks);
if (*p > 0 && current->nr_dirtied < ratelimit) {
unsigned long nr_pages_dirtied;
nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
@@ -1687,7 +1664,7 @@ void throttle_vm_writeout(gfp_t gfp_mask)
/*
* sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
*/
-int dirty_writeback_centisecs_handler(ctl_table *table, int write,
+int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
proc_dointvec(table, write, buffer, length, ppos);
@@ -2178,11 +2155,12 @@ int __set_page_dirty_nobuffers(struct page *page)
if (!TestSetPageDirty(page)) {
struct address_space *mapping = page_mapping(page);
struct address_space *mapping2;
+ unsigned long flags;
if (!mapping)
return 1;
- spin_lock_irq(&mapping->tree_lock);
+ spin_lock_irqsave(&mapping->tree_lock, flags);
mapping2 = page_mapping(page);
if (mapping2) { /* Race with truncate? */
BUG_ON(mapping2 != mapping);
@@ -2191,7 +2169,7 @@ int __set_page_dirty_nobuffers(struct page *page)
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
- spin_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
if (mapping->host) {
/* !PageAnon && !swapper_space */
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -2402,7 +2380,7 @@ int test_clear_page_writeback(struct page *page)
return ret;
}
-int test_set_page_writeback(struct page *page)
+int __test_set_page_writeback(struct page *page, bool keep_write)
{
struct address_space *mapping = page_mapping(page);
int ret;
@@ -2427,9 +2405,10 @@ int test_set_page_writeback(struct page *page)
radix_tree_tag_clear(&mapping->page_tree,
page_index(page),
PAGECACHE_TAG_DIRTY);
- radix_tree_tag_clear(&mapping->page_tree,
- page_index(page),
- PAGECACHE_TAG_TOWRITE);
+ if (!keep_write)
+ radix_tree_tag_clear(&mapping->page_tree,
+ page_index(page),
+ PAGECACHE_TAG_TOWRITE);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
} else {
ret = TestSetPageWriteback(page);
@@ -2440,7 +2419,7 @@ int test_set_page_writeback(struct page *page)
return ret;
}
-EXPORT_SYMBOL(test_set_page_writeback);
+EXPORT_SYMBOL(__test_set_page_writeback);
/*
* Return true if any of the pages in the mapping are marked with the