/*
* mm/page-writeback.c
*
* Copyright (C) 2002, Linus Torvalds.
*
* Contains functions related to writing back dirty pages at the
* address_space level.
*
* 10Apr2002 akpm@zip.com.au
* Initial version
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/init.h>
#include <linux/backing-dev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/blkdev.h>
#include <linux/mpage.h>
#include <linux/rmap.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/syscalls.h>
#include <linux/buffer_head.h>
#include <linux/pagevec.h>
/*
* The maximum number of pages to writeout in a single bdflush/kupdate
* operation. We do this so we don't hold I_LOCK against an inode for
* enormous amounts of time, which would block a userspace task which has
* been forced to throttle against that inode. Also, the code reevaluates
* the dirty each time it has written this many pages.
*/
#define MAX_WRITEBACK_PAGES 1024
/*
* After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
* will look to see if it needs to force writeback or throttling.
*/
static long ratelimit_pages = 32;
static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */
/*
* When balance_dirty_pages decides that the caller needs to perform some
* non-background writeback, this is how many pages it will attempt to write.
* It should be somewhat larger than RATELIMIT_PAGES to ensure that reasonably
* large amounts of I/O are submitted.
*/
static inline long sync_writeback_pages(void)
{
return ratelimit_pages + ratelimit_pages / 2;
}
/* The following parameters are exported via /proc/sys/vm */
/*
* Start background writeback (via pdflush) at this percentage
*/
int dirty_background_ratio = 5;
/*
* The generator of dirty data starts writeback at this percentage
*/
int vm_dirty_ratio = 10;
/*
* The interval between `kupdate'-style writebacks, in jiffies
*/
int dirty_writeback_interval = 5 * HZ;
/*
* The longest number of jiffies for which data is allowed to remain dirty
*/
int dirty_expire_interval = 30 * HZ;
/*
* Flag that makes the machine dump writes/reads and block dirtyings.
*/
int block_dump;
/*
* Flag that puts the machine in "laptop mode". Doubles as a timeout in jiffies:
* a full sync is triggered after this time elapses without any disk activity.
*/
int laptop_mode;
EXPORT_SYMBOL(laptop_mode);
/* End of sysctl-exported parameters */
static void background_writeout(unsigned long _min_pages);
/*
* Work out the current dirty-memory clamping and background writeout
* thresholds.
*
* The main aim here is to lower them aggressively if there is a lot of mapped
* memory around. To avoid stressing page reclaim with lots of unreclaimable
* pages. It is better to clamp down on writers than to start swapping, and
* performing lots of scanning.
*
* We only allow 1/2 of the currently-unmapped memory to be dirtied.
*
* We don't permit the clamping level to fall below 5% - that is getting rather
* excessive.
*
* We make sure that the background writeout level is below the adjusted
* clamping level.
*/
static unsigned long highmem_dirtyable_memory(unsigned long total)
{
#ifdef CONFIG_HIGHMEM
int node;
unsigned long x = 0;
for_each_node_state(node, N_HIGH_MEMORY) {
struct zone *z =
&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
x += zone_page_state(z, NR_FREE_PAGES)
+ zone_page_state(z, NR_INACTIVE)
+ zone_page_state(z, NR_ACTIVE);
}
/*
* Make sure that the number of highmem pages is never larger
* than the number of the total dirtyable memory. This can only
* occur in very strange VM situations but we want to make sure
* that this does not occur.
*/
return min(x, total);
#else
return 0;
#endif
}
static unsigned long determine_dirtyable_memory(void)
{
unsigned long x;
x = global_page_state(NR_FREE_PAGES)
+ global_page_state(NR_INACTIVE)
+ global_page_state(NR_ACTIVE);
x -= highmem_dirtyable_memory(x);
return x + 1; /* Ensure that we never return 0 */
}
static void
get_dirty_limits(long *pbackground, long *pdirty,
struct address_space *mapping)
{
int background_ratio; /* Percentages */
int dirty_ratio;
int unmapped_ratio;
long background;
long dirty;
unsigned long available_memory = determine_dirtyable_memory();
struct task_struct *tsk;
unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) +
global_page_state(NR_ANON_PAGES)) * 100) /
available_memory;
dirty_ratio = vm_dirty_ratio;
if (dirty_ratio > unmapped_ratio / 2)
dirty_ratio = unmapped_ratio / 2;
if (dirty_ratio < 5)
dirty_ratio = 5;
background_ratio = dirty_background_ratio;
if (background_ratio >= dirty_ratio)
background_ratio = dirty_ratio / 2;
background = (background_ratio * available_memory) / 100;
dirty = (dirty_ratio * available_memory) / 100;
tsk = current;
if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
background += background / 4;
dirty += dirty / 4;
}
*pbackgro