aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2008-10-18 20:26:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 08:50:25 -0700
commit4f98a2fee8acdb4ac84545df98cccecfd130f8db (patch)
tree035a2937f4c3e2f7b4269412041c073ac646937c
parentb2e185384f534781fd22f5ce170b2ad26f97df70 (diff)
vmscan: split LRU lists into anon & file sets
Split the LRU lists in two, one set for pages that are backed by real file systems ("file") and one for pages that are backed by memory and swap ("anon"). The latter includes tmpfs. The advantage of doing this is that the VM will not have to scan over lots of anonymous pages (which we generally do not want to swap out), just to find the page cache pages that it should evict. This patch has the infrastructure and a basic policy to balance how much we scan the anon lists and how much we scan the file lists. The big policy changes are in separate patches. [lee.schermerhorn@hp.com: collect lru meminfo statistics from correct offset] [kosaki.motohiro@jp.fujitsu.com: prevent incorrect oom under split_lru] [kosaki.motohiro@jp.fujitsu.com: fix pagevec_move_tail() doesn't treat unevictable page] [hugh@veritas.com: memcg swapbacked pages active] [hugh@veritas.com: splitlru: BDI_CAP_SWAP_BACKED] [akpm@linux-foundation.org: fix /proc/vmstat units] [nishimura@mxp.nes.nec.co.jp: memcg: fix handling of shmem migration] [kosaki.motohiro@jp.fujitsu.com: adjust Quicklists field of /proc/meminfo] [kosaki.motohiro@jp.fujitsu.com: fix style issue of get_scan_ratio()] Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/base/node.c56
-rw-r--r--fs/cifs/file.c4
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/proc/proc_misc.c77
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--include/linux/backing-dev.h13
-rw-r--r--include/linux/memcontrol.h2
-rw-r--r--include/linux/mm_inline.h50
-rw-r--r--include/linux/mmzone.h47
-rw-r--r--include/linux/pagevec.h29
-rw-r--r--include/linux/swap.h20
-rw-r--r--include/linux/vmstat.h10
-rw-r--r--mm/filemap.c22
-rw-r--r--mm/hugetlb.c10
-rw-r--r--mm/memcontrol.c88
-rw-r--r--mm/memory.c6
-rw-r--r--mm/page-writeback.c8
-rw-r--r--mm/page_alloc.c25
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/swap.c14
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/vmscan.c416
-rw-r--r--mm/vmstat.c14
25 files changed, 562 insertions, 367 deletions
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 5116b78c632..fc7e9bf0cdb 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -61,34 +61,44 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
si_meminfo_node(&i, nid);
n = sprintf(buf, "\n"
- "Node %d MemTotal: %8lu kB\n"
- "Node %d MemFree: %8lu kB\n"
- "Node %d MemUsed: %8lu kB\n"
- "Node %d Active: %8lu kB\n"
- "Node %d Inactive: %8lu kB\n"
+ "Node %d MemTotal: %8lu kB\n"
+ "Node %d MemFree: %8lu kB\n"
+ "Node %d MemUsed: %8lu kB\n"
+ "Node %d Active: %8lu kB\n"
+ "Node %d Inactive: %8lu kB\n"
+ "Node %d Active(anon): %8lu kB\n"
+ "Node %d Inactive(anon): %8lu kB\n"
+ "Node %d Active(file): %8lu kB\n"
+ "Node %d Inactive(file): %8lu kB\n"
#ifdef CONFIG_HIGHMEM
- "Node %d HighTotal: %8lu kB\n"
- "Node %d HighFree: %8lu kB\n"
- "Node %d LowTotal: %8lu kB\n"
- "Node %d LowFree: %8lu kB\n"
+ "Node %d HighTotal: %8lu kB\n"
+ "Node %d HighFree: %8lu kB\n"
+ "Node %d LowTotal: %8lu kB\n"
+ "Node %d LowFree: %8lu kB\n"
#endif
- "Node %d Dirty: %8lu kB\n"
- "Node %d Writeback: %8lu kB\n"
- "Node %d FilePages: %8lu kB\n"
- "Node %d Mapped: %8lu kB\n"
- "Node %d AnonPages: %8lu kB\n"
- "Node %d PageTables: %8lu kB\n"
- "Node %d NFS_Unstable: %8lu kB\n"
- "Node %d Bounce: %8lu kB\n"
- "Node %d WritebackTmp: %8lu kB\n"
- "Node %d Slab: %8lu kB\n"
- "Node %d SReclaimable: %8lu kB\n"
- "Node %d SUnreclaim: %8lu kB\n",
+ "Node %d Dirty: %8lu kB\n"
+ "Node %d Writeback: %8lu kB\n"
+ "Node %d FilePages: %8lu kB\n"
+ "Node %d Mapped: %8lu kB\n"
+ "Node %d AnonPages: %8lu kB\n"
+ "Node %d PageTables: %8lu kB\n"
+ "Node %d NFS_Unstable: %8lu kB\n"
+ "Node %d Bounce: %8lu kB\n"
+ "Node %d WritebackTmp: %8lu kB\n"
+ "Node %d Slab: %8lu kB\n"
+ "Node %d SReclaimable: %8lu kB\n"
+ "Node %d SUnreclaim: %8lu kB\n",
nid, K(i.totalram),
nid, K(i.freeram),
nid, K(i.totalram - i.freeram),
- nid, K(node_page_state(nid, NR_ACTIVE)),
- nid, K(node_page_state(nid, NR_INACTIVE)),
+ nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
+ node_page_state(nid, NR_ACTIVE_FILE)),
+ nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
+ node_page_state(nid, NR_INACTIVE_FILE)),
+ nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
+ nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
+ nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
+ nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
#ifdef CONFIG_HIGHMEM
nid, K(i.totalhigh),
nid, K(i.freehigh),
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c4a8a060512..62d8bd8f14c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1791,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
SetPageUptodate(page);
unlock_page(page);
if (!pagevec_add(plru_pvec, page))
- __pagevec_lru_add(plru_pvec);
+ __pagevec_lru_add_file(plru_pvec);
data += PAGE_CACHE_SIZE;
}
return;
@@ -1925,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
bytes_read = 0;
}
- pagevec_lru_add(&lru_pvec);
+ pagevec_lru_add_file(&lru_pvec);
/* need to free smb_read_data buf before exit */
if (smb_read_data) {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2ab70d46ecb..efdba2e802d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1517,7 +1517,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
GFP_KERNEL)) {
pagevec_add(&lru_pvec, page);
- pagevec_lru_add(&lru_pvec);
+ pagevec_lru_add_file(&lru_pvec);
SetPageUptodate(page);
unlock_page(page);
} else
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index d020866d423..3140a4429af 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
pages[nr] = *cached_page;
page_cache_get(*cached_page);
if (unlikely(!pagevec_add(lru_pvec, *cached_page)))
- __pagevec_lru_add(lru_pvec);
+ __pagevec_lru_add_file(lru_pvec);
*cached_page = NULL;
}
index++;
@@ -2084,7 +2084,7 @@ err_out:
OSYNC_METADATA|OSYNC_DATA);
}
}
- pagevec_lru_add(&lru_pvec);
+ pagevec_lru_add_file(&lru_pvec);
ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
written ? "written" : "status", (unsigned long)written,
(long)status);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 59ea42e1ef0..b8edb286055 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -136,6 +136,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
unsigned long allowed;
struct vmalloc_info vmi;
long cached;
+ unsigned long pages[NR_LRU_LISTS];
+ int lru;
/*
* display in kilobytes.
@@ -154,51 +156,62 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
get_vmalloc_info(&vmi);
+ for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
+ pages[lru] = global_page_state(NR_LRU_BASE + lru);
+
/*
* Tagged format, for easy grepping and expansion.
*/
len = sprintf(page,
- "MemTotal: %8lu kB\n"
- "MemFree: %8lu kB\n"
- "Buffers: %8lu kB\n"
- "Cached: %8lu kB\n"
- "SwapCached: %8lu kB\n"
- "Active: %8lu kB\n"
- "Inactive: %8lu kB\n"
+ "MemTotal: %8lu kB\n"
+ "MemFree: %8lu kB\n"
+ "Buffers: %8lu kB\n"
+ "Cached: %8lu kB\n"
+ "SwapCached: %8lu kB\n"
+ "Active: %8lu kB\n"
+ "Inactive: %8lu kB\n"
+ "Active(anon): %8lu kB\n"
+ "Inactive(anon): %8lu kB\n"
+ "Active(file): %8lu kB\n"
+ "Inactive(file): %8lu kB\n"
#ifdef CONFIG_HIGHMEM
- "HighTotal: %8lu kB\n"
- "HighFree: %8lu kB\n"
- "LowTotal: %8lu kB\n"
- "LowFree: %8lu kB\n"
+ "HighTotal: %8lu kB\n"
+ "HighFree: %8lu kB\n"
+ "LowTotal: %8lu kB\n"
+ "LowFree: %8lu kB\n"
#endif
- "SwapTotal: %8lu kB\n"
- "SwapFree: %8lu kB\n"
- "Dirty: %8lu kB\n"
- "Writeback: %8lu kB\n"
- "AnonPages: %8lu kB\n"
- "Mapped: %8lu kB\n"
- "Slab: %8lu kB\n"
- "SReclaimable: %8lu kB\n"
- "SUnreclaim: %8lu kB\n"
- "PageTables: %8lu kB\n"
+ "SwapTotal: %8lu kB\n"
+ "SwapFree: %8lu kB\n"
+ "Dirty: %8lu kB\n"
+ "Writeback: %8lu kB\n"
+ "AnonPages: %8lu kB\n"
+ "Mapped: %8lu kB\n"
+ "Slab: %8lu kB\n"
+ "SReclaimable: %8lu kB\n"
+ "SUnreclaim: %8lu kB\n"
+ "PageTables: %8lu kB\n"
#ifdef CONFIG_QUICKLIST
- "Quicklists: %8lu kB\n"
+ "Quicklists: %8lu kB\n"
#endif
- "NFS_Unstable: %8lu kB\n"
- "Bounce: %8lu kB\n"
- "WritebackTmp: %8lu kB\n"
- "CommitLimit: %8lu kB\n"
- "Committed_AS: %8lu kB\n"
- "VmallocTotal: %8lu kB\n"
- "VmallocUsed: %8lu kB\n"
- "VmallocChunk: %8lu kB\n",
+ "NFS_Unstable: %8lu kB\n"
+ "Bounce: %8lu kB\n"
+ "WritebackTmp: %8lu kB\n"
+ "CommitLimit: %8lu kB\n"
+ "Committed_AS: %8lu kB\n"
+ "VmallocTotal: %8lu kB\n"
+ "VmallocUsed: %8lu kB\n"
+ "VmallocChunk: %8lu kB\n",
K(i.totalram),
K(i.freeram),
K(i.bufferram),
K(cached),
K(total_swapcache_pages),
- K(global_page_state(NR_ACTIVE)),
- K(global_page_state(NR_INACTIVE)),
+ K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
+ K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
+ K(pages[LRU_ACTIVE_ANON]),
+ K(pages[LRU_INACTIVE_ANON]),
+ K(pages[LRU_ACTIVE_FILE]),
+ K(pages[LRU_INACTIVE_FILE]),
#ifdef CONFIG_HIGHMEM
K(i.totalhigh),
K(i.freehigh),
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5145cb9125a..76acdbc3461 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
goto add_error;
if (!pagevec_add(&lru_pvec, page))
- __pagevec_lru_add(&lru_pvec);
+ __pagevec_lru_add_file(&lru_pvec);
unlock_page(page);
}
- pagevec_lru_add(&lru_pvec);
+ pagevec_lru_add_file(&lru_pvec);
return 0;
fsize_exceeded:
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0a24d5550eb..bee52abb8a4 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -175,6 +175,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
* BDI_CAP_READ_MAP: Can be mapped for reading
* BDI_CAP_WRITE_MAP: Can be mapped for writing
* BDI_CAP_EXEC_MAP: Can be mapped for execution
+ *
+ * BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed.
*/
#define BDI_CAP_NO_ACCT_DIRTY 0x00000001
#define BDI_CAP_NO_WRITEBACK 0x00000002
@@ -184,6 +186,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
#define BDI_CAP_WRITE_MAP 0x00000020
#define BDI_CAP_EXEC_MAP 0x00000040
#define BDI_CAP_NO_ACCT_WB 0x00000080
+#define BDI_CAP_SWAP_BACKED 0x00000100
#define BDI_CAP_VMFLAGS \
(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -248,6 +251,11 @@ static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi)
BDI_CAP_NO_WRITEBACK));
}
+static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
+{
+ return bdi->capabilities & BDI_CAP_SWAP_BACKED;
+}
+
static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
{
return bdi_cap_writeback_dirty(mapping->backing_dev_info);
@@ -258,4 +266,9 @@ static inline bool mapping_cap_account_dirty(struct address_space *mapping)
return bdi_cap_account_dirty(mapping->backing_dev_info);
}
+static inline bool mapping_cap_swap_backed(struct address_space *mapping)
+{
+ return bdi_cap_swap_backed(mapping->backing_dev_info);
+}
+
#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a6ac0d491fe..8d8f05c1515 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -44,7 +44,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
unsigned long *scanned, int order,
int mode, struct zone *z,
struct mem_cgroup *mem_cont,
- int active);
+ int active, int file);
extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 96e970485b6..2eb599465d5 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -5,7 +5,7 @@
* page_is_file_cache - should the page be on a file LRU or anon LRU?
* @page: the page to test
*
- * Returns !0 if @page is page cache page backed by a regular filesystem,
+ * Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
* or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
* Used by functions that manipulate the LRU lists, to sort a page
* onto the right LRU list.
@@ -20,7 +20,7 @@ static inline int page_is_file_cache(struct page *page)
return 0;
/* The page is page cache backed by a normal filesystem. */
- return 1;
+ return LRU_FILE;
}
static inline void
@@ -38,39 +38,64 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
}
static inline void
-add_page_to_active_list(struct zone *zone, struct page *page)
+add_page_to_inactive_anon_list(struct zone *zone, struct page *page)
{
- add_page_to_lru_list(zone, page, LRU_ACTIVE);
+ add_page_to_lru_list(zone, page, LRU_INACTIVE_ANON);
}
static inline void
-add_page_to_inactive_list(struct zone *zone, struct page *page)
+add_page_to_active_anon_list(struct zone *zone, struct page *page)
{
- add_page_to_lru_list(zone, page, LRU_INACTIVE);
+ add_page_to_lru_list(zone, page, LRU_ACTIVE_ANON);
}
static inline void
-del_page_from_active_list(struct zone *zone, struct page *page)
+add_page_to_inactive_file_list(struct zone *zone, struct page *page)
{
- del_page_from_lru_list(zone, page, LRU_ACTIVE);
+ add_page_to_lru_list(zone, page, LRU_INACTIVE_FILE);
}
static inline void
-del_page_from_inactive_list(struct zone *zone, struct page *page)
+add_page_to_active_file_list(struct zone *zone, struct page *page)
{
- del_page_from_lru_list(zone, page, LRU_INACTIVE);
+ add_page_to_lru_list(zone, page, LRU_ACTIVE_FILE);
+}
+
+static inline void
+del_page_from_inactive_anon_list(struct zone *zone, struct page *page)
+{
+ del_page_from_lru_list(zone, page, LRU_INACTIVE_ANON);
+}
+
+static inline void
+del_page_from_active_anon_list(struct zone *zone, struct page *page)
+{
+ del_page_from_lru_list(zone, page, LRU_ACTIVE_ANON);
+}
+
+static inline void
+del_page_from_inactive_file_list(struct zone *zone, struct page *page)
+{
+ del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
+}
+
+static inline void
+del_page_from_active_file_list(struct zone *zone, struct page *page)
+{
+ del_page_from_lru_list(zone, page, LRU_INACTIVE_FILE);
}
static inline void
del_page_from_lru(struct zone *zone, struct page *page)
{
- enum lru_list l = LRU_INACTIVE;
+ enum lru_list l = LRU_BASE;
list_del(&page->lru);
if (PageActive(page)) {
__ClearPageActive(page);
- l = LRU_ACTIVE;
+ l += LRU_ACTIVE;
}
+ l += page_is_file_cache(page);
__dec_zone_state(zone, NR_LRU_BASE + l);
}
@@ -87,6 +112,7 @@ static inline enum lru_list page_lru(struct page *page)
if (PageActive(page))
lru += LRU_ACTIVE;
+ lru += page_is_file_cache(page);
return lru;
}
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 156e18f3919..59a4c8fd6eb 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -82,21 +82,23 @@ enum zone_stat_item {
/* First 128 byte cacheline (assuming 64 bit words) */
NR_FREE_PAGES,
NR_LRU_BASE,
- NR_INACTIVE = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
- NR_ACTIVE, /* " " " " " */
+ NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
+ NR_ACTIVE_ANON, /* " " " " " */
+ NR_INACTIVE_FILE, /* " " " " " */
+ NR_ACTIVE_FILE, /* " " " " " */
NR_ANON_PAGES, /* Mapped anonymous pages */
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
only modified from process context */
NR_FILE_PAGES,
NR_FILE_DIRTY,
NR_WRITEBACK,
- /* Second 128 byte cacheline */
NR_SLAB_RECLAIMABLE,
NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
NR_UNSTABLE_NFS, /* NFS unstable pages */
NR_BOUNCE,
NR_VMSCAN_WRITE,
+ /* Second 128 byte cacheline */
NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
#ifdef CONFIG_NUMA
NUMA_HIT, /* allocated in intended node */
@@ -108,17 +110,36 @@ enum zone_stat_item {
#endif
NR_VM_ZONE_STAT_ITEMS };
+/*
+ * We do arithmetic on the LRU lists in various places in the code,
+ * so it is important to keep the active lists LRU_ACTIVE higher in
+ * the array than the corresponding inactive lists, and to keep
+ * the *_FILE lists LRU_FILE higher than the corresponding _ANON lists.
+ *
+ * This has to be kept in sync with the statistics in zone_stat_item
+ * above and the descriptions in vmstat_text in mm/vmstat.c
+ */
+#define LRU_BASE 0
+#define LRU_ACTIVE 1
+#define LRU_FILE 2
+
enum lru_list {
- LRU_BASE,
- LRU_INACTIVE=LRU_BASE, /* must match order of NR_[IN]ACTIVE */
- LRU_ACTIVE, /* " " " " " */
+ LRU_INACTIVE_ANON = LRU_BASE,
+ LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
+ LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
+ LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
NR_LRU_LISTS };
#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
+static inline int is_file_lru(enum lru_list l)
+{
+ return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
+}
+
static inline int is_active_lru(enum lru_list l)
{
- return (l == LRU_ACTIVE);
+ return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
}
struct per_cpu_pages {
@@ -269,6 +290,18 @@ struct zone {
struct list_head list;
unsigned long nr_scan;
} lru[NR_LRU_LISTS];
+
+ /*
+ * The pageout code in vmscan.c keeps track of how many of the
+ * mem/swap backed and file backed pages are refeferenced.
+ * The higher the rotated/scanned ratio, the more valuable
+ * that cache is.
+ *
+ * The anon LRU stats live in [0], file LRU stats in [1]
+ */
+ unsigned long recent_rotated[2];
+ unsigned long recent_scanned[2];
+
unsigned long pages_scanned; /* since last reclaim */
unsigned long flags; /* zone flags, see below */
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index fea3a982ee5..5fc96a4e760 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -81,20 +81,37 @@ static inline void pagevec_free(struct pagevec *pvec)
__pagevec_free(pvec);
}
-static inline void __pagevec_lru_add(struct pagevec *pvec)
+static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
{
- ____pagevec_lru_add(pvec, LRU_INACTIVE);
+ ____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
}
-static inline void __pagevec_lru_add_active(struct pagevec *pvec)
+static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
{
- ____pagevec_lru_add(pvec, LRU_ACTIVE);
+ ____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
}
-static inline void pagevec_lru_add(struct pagevec *pvec)
+static inline void __pagevec_lru_add_file(struct pagevec *pvec)
+{
+ ____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
+}
+
+static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
+{
+ ____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
+}
+
+
+static inline void pagevec_lru_add_file(struct pagevec *pvec)
+{
+ if (pagevec_count(pvec))
+ __pagevec_lru_add_file(pvec);
+}
+
+static inline void pagevec_lru_add_anon(struct pagevec *pvec)
{
if (pagevec_count(pvec))
- __pagevec_lru_add(pvec);
+ __pagevec_lru_add_anon(pvec);
}
#endif /* _LINUX_PAGEVEC_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 833be56ad83..7d09d79997a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -184,14 +184,24 @@ extern void swap_setup(void);
* lru_cache_add: add a page to the page lists
* @page: the page to add
*/
-static inline void lru_cache_add(struct page *page)
+static inline void lru_cache_add_anon(struct page *page)
{
- __lru_cache_add(page, LRU_INACTIVE);
+ __lru_cache_add(page, LRU_INACTIVE_ANON);
}
-static inline void lru_cache_add_active(struct page *page)
+static inline void lru_cache_add_active_anon(struct page *page)
{
- __lru_cache_add(page, LRU_ACTIVE);
+ __lru_cache_add(page, LRU_ACTIVE_ANON);
+}
+
+static inline void lru_cache_add_file(struct page *page)
+{
+ __lru_cache_add(page, LRU_INACTIVE_FILE);
+}
+
+static inline void lru_cache_add_active_file(struct page *page)
+{
+ __lru_cache_add(page, LRU_ACTIVE_FILE);
}
/* linux/mm/vmscan.c */
@@ -199,7 +209,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
gfp_t gfp_mask);
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
gfp_t gfp_mask);
-extern int __isolate_lru_page(struct page *page, int mode);
+extern int __isolate_lru_page(struct page *page, int mode, int file);
extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
extern int remove_mapping(struct address_space *mapping, struct page *page);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 58334d43951..ff5179f2b15 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -159,6 +159,16 @@ static inline unsigned long zone_page_state(struct zone *zone,
return x;
}
+extern unsigned long global_lru_pages(void);
+
+static inline unsigned long zone_lru_pages(struct zone *zone)
+{
+ return (zone_page_state(zone, NR_ACTIVE_ANON)
+ + zone_page_state(zone, NR_ACTIVE_FILE)
+ + zone_page_state(zone, NR_INACTIVE_ANON)
+ + zone_page_state(zone, NR_INACTIVE_FILE));
+}
+
#ifdef CONFIG_NUMA
/*
* Determine the per node value of a stat item. This function
diff --git a/mm/filemap.c b/mm/filemap.c
index 903bf316912..a1ddd2557af 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -33,6 +33,7 @@
#include <linux/cpuset.h>
#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
#include <linux/memcontrol.h>
+#include <linux/mm_inline.h> /* for page_is_file_cache() */
#include "internal.h"
/*
@@ -492,9 +493,24 @@ EXPORT_SYMBOL(add_to_page_cache_locked);
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask)
{
- int ret = add_to_page_cache(page, mapping, offset, gfp_mask);
- if (ret == 0)
- lru_cache_add(page);
+ int ret;
+
+ /*
+ * Splice_read and readahead add shmem/tmpfs pages into the page cache
+ * before shmem_readpage has a chance to mark them as SwapBacked: they
+ * need to go on the active_anon lru below, and mem_cgroup_cache_charge
+ * (called in add_to_page_cache) needs to know where they're going too.
+ */
+ if (mapping_cap_swap_backed(mapping))
+ SetPageSwapBacked(page);
+
+ ret = add_to_page_cache(page, mapping, offset, gfp_mask);
+ if (ret == 0) {
+ if (page_is_file_cache(page))
+ lru_cache_add_file(page);
+ else
+ lru_cache_add_active_anon(page);
+ }
return ret;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 38633864a93..2fc7fddd9b1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1459,11 +1459,11 @@ int hugetlb_report_meminfo(char *buf)
{
struct hstate *h = &default_hstate;
return sprintf(buf,
- "HugePages_Total: %5lu\n"
- "HugePages_Free: %5lu\n"
- "HugePages_Rsvd: %5lu\n"
- "HugePages_Surp: %5lu\n"
- "Hugepagesize: %5lu kB\n",
+ "HugePages_Total: %5lu\n"
+ "HugePages_Free: %5lu\n"
+ "HugePages_Rsvd: %5lu\n"
+ "HugePages_Surp: %5lu\n"
+ "Hugepagesize: %8lu kB\n",
h->nr_huge_pages,
h->free_huge_pages,
h->resv_huge_pages,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c0cbd7790c5..27e9e75f4ea 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -162,6 +162,7 @@ struct page_cgroup {
};
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */
+#define PAGE_CGROUP_FLAG_FILE (0x4) /* page is file system backed */
static int page_cgroup_nid(struct page_cgroup *pc)
{
@@ -177,6 +178,7 @@ enum charge_type {
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
MEM_CGROUP_CHARGE_TYPE_MAPPED,
MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
+ MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */
};
/*
@@ -288,8 +290,12 @@ static void unlock_page_cgroup(struct page *page)
static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
struct page_cgroup *pc)
{
- int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
- int lru = !!from;
+ int lru = LRU_BASE;
+
+ if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
+ lru += LRU_ACTIVE;
+ if (pc->flags & PAGE_CGROUP_FLAG_FILE)
+ lru += LRU_FILE;
MEM_CGROUP_ZSTAT(mz, lru) -= 1;
@@ -300,10 +306,12 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
struct page_cgroup *pc)
{
- int lru = LRU_INACTIVE;
+ int lru = LRU_BASE;
if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
lru += LRU_ACTIVE;
+ if (pc->flags & PAGE_CGROUP_FLAG_FILE)
+ lru += LRU_FILE;
MEM_CGROUP_ZSTAT(mz, lru) += 1;
list_add(&pc->lru, &mz->lists[lru]);
@@ -314,10 +322,9 @@ static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
{
struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
- int lru = LRU_INACTIVE;
-
- if (pc->flags & PAGE_CGROUP_FLAG_ACTIVE)
- lru += LRU_ACTIVE;
+ int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
+ int file = pc->flags & PAGE_CGROUP_FLAG_FILE;
+ int lru = LRU_FILE * !!file + !!from;
MEM_CGROUP_ZSTAT(mz, lru) -= 1;
@@ -326,7 +333,7 @@ static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
else
pc->flags &= ~PAGE_CGROUP_FLAG_ACTIVE;
- lru = !!active;
+ lru = LRU_FILE * !!file + !!active;
MEM_CGROUP_ZSTAT(mz, lru) += 1;
list_move(&pc->lru, &mz->lists[lru]);
}
@@ -391,21 +398,6 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
}
/*
- * This function is called from vmscan.c. In page reclaiming loop. balance
- * between active and inactive list is calculated. For memory controller
- * page reclaiming, we should use using mem_cgroup's imbalance rather than
- * zone's global lru imbalance.
- */
-long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
-{
- unsigned long active, inactive;
- /* active and inactive are the number of pages. 'long' is ok.*/
- active = mem_cgroup_get_all_zonestat(mem, LRU_ACTIVE);
- inactive = mem_cgroup_get_all_zonestat(mem, LRU_INACTIVE);
- return (long) (active / (inactive + 1));
-}
-
-/*
* prev_priority control...this will be used in memory reclaim path.
*/
int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
@@ -450,7 +442,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
unsigned long *scanned, int order,
int mode, struct zone *z,
struct mem_cgroup *mem_cont,
- int active)
+ int active, int file)
{
unsigned long nr_taken = 0;
struct page *page;
@@ -461,7 +453,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
int nid = z->zone_pgdat->node_id;
int zid = zone_idx(z);
struct mem_cgroup_per_zone *mz;
- int lru = !!active;
+ int lru = LRU_FILE * !!file + !!active;
BUG_ON(!mem_cont);
mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
@@ -477,6 +469,9 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
if (unlikely(!PageLRU(page)))
continue;
+ /*
+ * TODO: play better with lumpy reclaim, grabbing anything.
+ */
if (PageActive(page) && !active) {
__mem_cgroup_move_lists(pc, true);
continue;
@@ -489,7 +484,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
scan++;
list_move(&pc->lru, &pc_list);
- if (__isolate_lru_page(page, mode) == 0) {
+ if (__isolate_lru_page(page, mode, file) == 0) {
list_move(&page->lru, dst);
nr_taken++;
}
@@ -575,10 +570,16 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
* If a page is accounted as a page cache, insert to inactive list.
* If anon, insert to active list.
*/
- if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
+ if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) {
pc->flags = PAGE_CGROUP_FLAG_CACHE;
- else
+ if (page_is_file_cache(page))
+ pc->flags |= PAGE_CGROUP_FLAG_FILE;
+ else
+ pc->flags |= PAGE_CGROUP_FLAG_ACTIVE;
+ } else if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
+ else /* MEM_CGROUP_CHARGE_TYP