aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile2
-rw-r--r--mm/filemap.c8
-rw-r--r--mm/highmem.c6
-rw-r--r--mm/memory.c8
-rw-r--r--mm/mempolicy.c6
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/page-writeback.c93
-rw-r--r--mm/page_alloc.c472
-rw-r--r--mm/page_io.c4
-rw-r--r--mm/rmap.c7
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/slab.c124
-rw-r--r--mm/swap.c4
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/vmscan.c68
-rw-r--r--mm/vmstat.c614
17 files changed, 773 insertions, 655 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 0b8f73f2ed1..9dd824c11ee 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -10,7 +10,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
page_alloc.o page-writeback.o pdflush.o \
readahead.o swap.o truncate.o vmscan.o \
- prio_tree.o util.o mmzone.o $(mmu-y)
+ prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
diff --git a/mm/filemap.c b/mm/filemap.c
index f02ca30372c..d087fc3d328 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -119,7 +119,7 @@ void __remove_from_page_cache(struct page *page)
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
mapping->nrpages--;
- pagecache_acct(-1);
+ __dec_zone_page_state(page, NR_FILE_PAGES);
}
void remove_from_page_cache(struct page *page)
@@ -448,7 +448,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
page->mapping = mapping;
page->index = offset;
mapping->nrpages++;
- pagecache_acct(1);
+ __inc_zone_page_state(page, NR_FILE_PAGES);
}
write_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
@@ -1415,7 +1415,7 @@ retry_find:
*/
if (!did_readaround) {
majmin = VM_FAULT_MAJOR;
- inc_page_state(pgmajfault);
+ count_vm_event(PGMAJFAULT);
}
did_readaround = 1;
ra_pages = max_sane_readahead(file->f_ra.ra_pages);
@@ -1486,7 +1486,7 @@ no_cached_page:
page_not_uptodate:
if (!did_readaround) {
majmin = VM_FAULT_MAJOR;
- inc_page_state(pgmajfault);
+ count_vm_event(PGMAJFAULT);
}
lock_page(page);
diff --git a/mm/highmem.c b/mm/highmem.c
index 9b274fdf9d0..9b2a5403c44 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -315,8 +315,8 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
if (bvec->bv_page == org_vec->bv_page)
continue;
- mempool_free(bvec->bv_page, pool);
- dec_page_state(nr_bounce);
+ dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
+ mempool_free(bvec->bv_page, pool);
}
bio_endio(bio_orig, bio_orig->bi_size, err);
@@ -397,7 +397,7 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
to->bv_page = mempool_alloc(pool, q->bounce_gfp);
to->bv_len = from->bv_len;
to->bv_offset = from->bv_offset;
- inc_page_state(nr_bounce);
+ inc_zone_page_state(to->bv_page, NR_BOUNCE);
if (rw == WRITE) {
char *vto, *vfrom;
diff --git a/mm/memory.c b/mm/memory.c
index 247b5c312b9..7e2a4b1580e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -126,7 +126,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
pmd_clear(pmd);
pte_lock_deinit(page);
pte_free_tlb(tlb, page);
- dec_page_state(nr_page_table_pages);
+ dec_zone_page_state(page, NR_PAGETABLE);
tlb->mm->nr_ptes--;
}
@@ -311,7 +311,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
pte_free(new);
} else {
mm->nr_ptes++;
- inc_page_state(nr_page_table_pages);
+ inc_zone_page_state(new, NR_PAGETABLE);
pmd_populate(mm, pmd, new);
}
spin_unlock(&mm->page_table_lock);
@@ -1951,7 +1951,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
- inc_page_state(pgmajfault);
+ count_vm_event(PGMAJFAULT);
grab_swap_token();
}
@@ -2324,7 +2324,7 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
__set_current_state(TASK_RUNNING);
- inc_page_state(pgfault);
+ count_vm_event(PGFAULT);
if (unlikely(is_vm_hugetlb_page(vma)))
return hugetlb_fault(mm, vma, address, write_access);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 6b9740bbf4c..e07e27e846a 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1209,10 +1209,8 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp);
page = __alloc_pages(gfp, order, zl);
- if (page && page_zone(page) == zl->zones[0]) {
- zone_pcp(zl->zones[0],get_cpu())->interleave_hit++;
- put_cpu();
- }
+ if (page && page_zone(page) == zl->zones[0])
+ inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
return page;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 6446c6134b0..c1868ecdbc5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -96,7 +96,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
unsigned long n;
- free = get_page_cache_size();
+ free = global_page_state(NR_FILE_PAGES);
free += nr_swap_pages;
/*
diff --git a/mm/nommu.c b/mm/nommu.c
index 029fadac0fb..5151c44a825 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1122,7 +1122,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
unsigned long n;
- free = get_page_cache_size();
+ free = global_page_state(NR_FILE_PAGES);
free += nr_swap_pages;
/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4ec7026c7ba..e630188ccc4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -99,22 +99,6 @@ EXPORT_SYMBOL(laptop_mode);
static void background_writeout(unsigned long _min_pages);
-struct writeback_state
-{
- unsigned long nr_dirty;
- unsigned long nr_unstable;
- unsigned long nr_mapped;
- unsigned long nr_writeback;
-};
-
-static void get_writeback_state(struct writeback_state *wbs)
-{
- wbs->nr_dirty = read_page_state(nr_dirty);
- wbs->nr_unstable = read_page_state(nr_unstable);
- wbs->nr_mapped = read_page_state(nr_mapped);
- wbs->nr_writeback = read_page_state(nr_writeback);
-}
-
/*
* Work out the current dirty-memory clamping and background writeout
* thresholds.
@@ -133,8 +117,8 @@ static void get_writeback_state(struct writeback_state *wbs)
* clamping level.
*/
static void
-get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty,
- struct address_space *mapping)
+get_dirty_limits(long *pbackground, long *pdirty,
+ struct address_space *mapping)
{
int background_ratio; /* Percentages */
int dirty_ratio;
@@ -144,8 +128,6 @@ get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty,
unsigned long available_memory = total_pages;
struct task_struct *tsk;
- get_writeback_state(wbs);
-
#ifdef CONFIG_HIGHMEM
/*
* If this mapping can only allocate from low memory,
@@ -156,7 +138,9 @@ get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty,
#endif
- unmapped_ratio = 100 - (wbs->nr_mapped * 100) / total_pages;
+ unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) +
+ global_page_state(NR_ANON_PAGES)) * 100) /
+ total_pages;
dirty_ratio = vm_dirty_ratio;
if (dirty_ratio > unmapped_ratio / 2)
@@ -189,7 +173,6 @@ get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty,
*/
static void balance_dirty_pages(struct address_space *mapping)
{
- struct writeback_state wbs;
long nr_reclaimable;
long background_thresh;
long dirty_thresh;
@@ -207,11 +190,12 @@ static void balance_dirty_pages(struct address_space *mapping)
.range_cyclic = 1,
};
- get_dirty_limits(&wbs, &background_thresh,
- &dirty_thresh, mapping);
- nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable;
- if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh)
- break;
+ get_dirty_limits(&background_thresh, &dirty_thresh, mapping);
+ nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
+ if (nr_reclaimable + global_page_state(NR_WRITEBACK) <=
+ dirty_thresh)
+ break;
if (!dirty_exceeded)
dirty_exceeded = 1;
@@ -224,11 +208,14 @@ static void balance_dirty_pages(struct address_space *mapping)
*/
if (nr_reclaimable) {
writeback_inodes(&wbc);
- get_dirty_limits(&wbs, &background_thresh,
- &dirty_thresh, mapping);
- nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable;
- if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh)
- break;
+ get_dirty_limits(&background_thresh,
+ &dirty_thresh, mapping);
+ nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
+ if (nr_reclaimable +
+ global_page_state(NR_WRITEBACK)
+ <= dirty_thresh)
+ break;
pages_written += write_chunk - wbc.nr_to_write;
if (pages_written >= write_chunk)
break; /* We've done our duty */
@@ -236,8 +223,9 @@ static void balance_dirty_pages(struct address_space *mapping)
blk_congestion_wait(WRITE, HZ/10);
}
- if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh && dirty_exceeded)
- dirty_exceeded = 0;
+ if (nr_reclaimable + global_page_state(NR_WRITEBACK)
+ <= dirty_thresh && dirty_exceeded)
+ dirty_exceeded = 0;
if (writeback_in_progress(bdi))
return; /* pdflush is already working this queue */
@@ -299,12 +287,11 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
void throttle_vm_writeout(void)
{
- struct writeback_state wbs;
long background_thresh;
long dirty_thresh;
for ( ; ; ) {
- get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL);
+ get_dirty_limits(&background_thresh, &dirty_thresh, NULL);
/*
* Boost the allowable dirty threshold a bit for page
@@ -312,8 +299,9 @@ void throttle_vm_writeout(void)
*/
dirty_thresh += dirty_thresh / 10; /* wheeee... */
- if (wbs.nr_unstable + wbs.nr_writeback <= dirty_thresh)
- break;
+ if (global_page_state(NR_UNSTABLE_NFS) +
+ global_page_state(NR_WRITEBACK) <= dirty_thresh)
+ break;
blk_congestion_wait(WRITE, HZ/10);
}
}
@@ -336,12 +324,12 @@ static void background_writeout(unsigned long _min_pages)
};
for ( ; ; ) {
- struct writeback_state wbs;
long background_thresh;
long dirty_thresh;
- get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL);
- if (wbs.nr_dirty + wbs.nr_unstable < background_thresh
+ get_dirty_limits(&background_thresh, &dirty_thresh, NULL);
+ if (global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS) < background_thresh
&& min_pages <= 0)
break;
wbc.encountered_congestion = 0;
@@ -365,12 +353,9 @@ static void background_writeout(unsigned long _min_pages)
*/
int wakeup_pdflush(long nr_pages)
{
- if (nr_pages == 0) {
- struct writeback_state wbs;
-
- get_writeback_state(&wbs);
- nr_pages = wbs.nr_dirty + wbs.nr_unstable;
- }
+ if (nr_pages == 0)
+ nr_pages = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
return pdflush_operation(background_writeout, nr_pages);
}
@@ -401,7 +386,6 @@ static void wb_kupdate(unsigned long arg)
unsigned long start_jif;
unsigned long next_jif;
long nr_to_write;
- struct writeback_state wbs;
struct writeback_control wbc = {
.bdi = NULL,
.sync_mode = WB_SYNC_NONE,
@@ -414,11 +398,11 @@ static void wb_kupdate(unsigned long arg)
sync_supers();
- get_writeback_state(&wbs);
oldest_jif = jiffies - dirty_expire_interval;
start_jif = jiffies;
next_jif = start_jif + dirty_writeback_interval;
- nr_to_write = wbs.nr_dirty + wbs.nr_unstable +
+ nr_to_write = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS) +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
while (nr_to_write > 0) {
wbc.encountered_congestion = 0;
@@ -640,7 +624,8 @@ int __set_page_dirty_nobuffers(struct page *page)
if (mapping2) { /* Race with truncate? */
BUG_ON(mapping2 != mapping);
if (mapping_cap_account_dirty(mapping))
- inc_page_state(nr_dirty);
+ __inc_zone_page_state(page,
+ NR_FILE_DIRTY);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
@@ -727,9 +712,9 @@ int test_clear_page_dirty(struct page *page)
radix_tree_tag_clear(&mapping->page_tree,
page_index(page),
PAGECACHE_TAG_DIRTY);
- write_unlock_irqrestore(&mapping->tree_lock, flags);
if (mapping_cap_account_dirty(mapping))
- dec_page_state(nr_dirty);
+ __dec_zone_page_state(page, NR_FILE_DIRTY);
+ write_unlock_irqrestore(&mapping->tree_lock, flags);
return 1;
}
write_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -760,7 +745,7 @@ int clear_page_dirty_for_io(struct page *page)
if (mapping) {
if (TestClearPageDirty(page)) {
if (mapping_cap_account_dirty(mapping))
- dec_page_state(nr_dirty);
+ dec_zone_page_state(page, NR_FILE_DIRTY);
return 1;
}
return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 60f2feddbe5..3e792a583f3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -455,7 +455,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
kernel_map_pages(page, 1 << order, 0);
local_irq_save(flags);
- __mod_page_state(pgfree, 1 << order);
+ __count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, order);
local_irq_restore(flags);
}
@@ -708,27 +708,6 @@ void drain_local_pages(void)
}
#endif /* CONFIG_PM */
-static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu)
-{
-#ifdef CONFIG_NUMA
- pg_data_t *pg = z->zone_pgdat;
- pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
- struct per_cpu_pageset *p;
-
- p = zone_pcp(z, cpu);
- if (pg == orig) {
- p->numa_hit++;
- } else {
- p->numa_miss++;
- zone_pcp(zonelist->zones[0], cpu)->numa_foreign++;
- }
- if (pg == NODE_DATA(numa_node_id()))
- p->local_node++;
- else
- p->other_node++;
-#endif
-}
-
/*
* Free a 0-order page
*/
@@ -749,7 +728,7 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
local_irq_save(flags);
- __inc_page_state(pgfree);
+ __count_vm_event(PGFREE);
list_add(&page->lru, &pcp->list);
pcp->count++;
if (pcp->count >= pcp->high) {
@@ -825,8 +804,8 @@ again:
goto failed;
}
- __mod_page_state_zone(zone, pgalloc, 1 << order);
- zone_statistics(zonelist, zone, cpu);
+ __count_zone_vm_events(PGALLOC, zone, 1 << order);
+ zone_statistics(zonelist, zone);
local_irq_restore(flags);
put_cpu();
@@ -1230,141 +1209,6 @@ static void show_node(struct zone *zone)
#define show_node(zone) do { } while (0)
#endif
-/*
- * Accumulate the page_state information across all CPUs.
- * The result is unavoidably approximate - it can change
- * during and after execution of this function.
- */
-static DEFINE_PER_CPU(struct page_state, page_states) = {0};
-
-atomic_t nr_pagecache = ATOMIC_INIT(0);
-EXPORT_SYMBOL(nr_pagecache);
-#ifdef CONFIG_SMP
-DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
-#endif
-
-static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
-{
- unsigned cpu;
-
- memset(ret, 0, nr * sizeof(unsigned long));
- cpus_and(*cpumask, *cpumask, cpu_online_map);
-
- for_each_cpu_mask(cpu, *cpumask) {
- unsigned long *in;
- unsigned long *out;
- unsigned off;
- unsigned next_cpu;
-
- in = (unsigned long *)&per_cpu(page_states, cpu);
-
- next_cpu = next_cpu(cpu, *cpumask);
- if (likely(next_cpu < NR_CPUS))
- prefetch(&per_cpu(page_states, next_cpu));
-
- out = (unsigned long *)ret;
- for (off = 0; off < nr; off++)
- *out++ += *in++;
- }
-}
-
-void get_page_state_node(struct page_state *ret, int node)
-{
- int nr;
- cpumask_t mask = node_to_cpumask(node);
-
- nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
- nr /= sizeof(unsigned long);
-
- __get_page_state(ret, nr+1, &mask);
-}
-
-void get_page_state(struct page_state *ret)
-{
- int nr;
- cpumask_t mask = CPU_MASK_ALL;
-
- nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
- nr /= sizeof(unsigned long);
-
- __get_page_state(ret, nr + 1, &mask);
-}
-
-void get_full_page_state(struct page_state *ret)
-{
- cpumask_t mask = CPU_MASK_ALL;
-
- __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
-}
-
-unsigned long read_page_state_offset(unsigned long offset)
-{
- unsigned long ret = 0;
- int cpu;
-
- for_each_online_cpu(cpu) {
- unsigned long in;
-
- in = (unsigned long)&per_cpu(page_states, cpu) + offset;
- ret += *((unsigned long *)in);
- }
- return ret;
-}
-
-void __mod_page_state_offset(unsigned long offset, unsigned long delta)
-{
- void *ptr;
-
- ptr = &__get_cpu_var(page_states);
- *(unsigned long *)(ptr + offset) += delta;
-}
-EXPORT_SYMBOL(__mod_page_state_offset);
-
-void mod_page_state_offset(unsigned long offset, unsigned long delta)
-{
- unsigned long flags;
- void *ptr;
-
- local_irq_save(flags);
- ptr = &__get_cpu_var(page_states);
- *(unsigned long *)(ptr + offset) += delta;
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL(mod_page_state_offset);
-
-void __get_zone_counts(unsigned long *active, unsigned long *inactive,
- unsigned long *free, struct pglist_data *pgdat)
-{
- struct zone *zones = pgdat->node_zones;
- int i;
-
- *active = 0;
- *inactive = 0;
- *free = 0;
- for (i = 0; i < MAX_NR_ZONES; i++) {
- *active += zones[i].nr_active;
- *inactive += zones[i].nr_inactive;
- *free += zones[i].free_pages;
- }
-}
-
-void get_zone_counts(unsigned long *active,
- unsigned long *inactive, unsigned long *free)
-{
- struct pglist_data *pgdat;
-
- *active = 0;
- *inactive = 0;
- *free = 0;
- for_each_online_pgdat(pgdat) {
- unsigned long l, m, n;
- __get_zone_counts(&l, &m, &n, pgdat);
- *active += l;
- *inactive += m;
- *free += n;
- }
-}
-
void si_meminfo(struct sysinfo *val)
{
val->totalram = totalram_pages;
@@ -1405,7 +1249,6 @@ void si_meminfo_node(struct sysinfo *val, int nid)
*/
void show_free_areas(void)
{
- struct page_state ps;
int cpu, temperature;
unsigned long active;
unsigned long inactive;
@@ -1437,7 +1280,6 @@ void show_free_areas(void)
}
}
- get_page_state(&ps);
get_zone_counts(&active, &inactive, &free);
printk("Free pages: %11ukB (%ukB HighMem)\n",
@@ -1448,13 +1290,13 @@ void show_free_areas(void)
"unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
active,
inactive,
- ps.nr_dirty,
- ps.nr_writeback,
- ps.nr_unstable,
+ global_page_state(NR_FILE_DIRTY),
+ global_page_state(NR_WRITEBACK),
+ global_page_state(NR_UNSTABLE_NFS),
nr_free_pages(),
- ps.nr_slab,
- ps.nr_mapped,
- ps.nr_page_table_pages);
+ global_page_state(NR_SLAB),
+ global_page_state(NR_FILE_MAPPED),
+ global_page_state(NR_PAGETABLE));
for_each_zone(zone) {
int i;
@@ -2179,6 +2021,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
zone->nr_scan_inactive = 0;
zone->nr_active = 0;
zone->nr_inactive = 0;
+ zap_zone_vm_stats(zone);
atomic_set(&zone->reclaim_in_progress, 0);
if (!size)
continue;
@@ -2252,307 +2095,18 @@ void __init free_area_init(unsigned long *zones_size)
__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
}
-#ifdef CONFIG_PROC_FS
-
-#include <linux/seq_file.h>
-
-static void *frag_start(struct seq_file *m, loff_t *pos)
-{
- pg_data_t *pgdat;
- loff_t node = *pos;
- for (pgdat = first_online_pgdat();
- pgdat && node;
- pgdat = next_online_pgdat(pgdat))
- --node;
-
- return pgdat;
-}
-
-static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
-{
- pg_data_t *pgdat = (pg_data_t *)arg;
-
- (*pos)++;
- return next_online_pgdat(pgdat);
-}
-
-static void frag_stop(struct seq_file *m, void *arg)
-{
-}
-
-/*
- * This walks the free areas for each zone.
- */
-static int frag_show(struct seq_file *m, void *arg)
-{
- pg_data_t *pgdat = (pg_data_t *)arg;
- struct zone *zone;
- struct zone *node_zones = pgdat->node_zones;
- unsigned long flags;
- int order;
-
- for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
- if (!populated_zone(zone))
- continue;
-
- spin_lock_irqsave(&zone->lock, flags);
- seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
- for (order = 0; order < MAX_ORDER; ++order)
- seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
- spin_unlock_irqrestore(&zone->lock, flags);
- seq_putc(m, '\n');
- }
- return 0;
-}
-
-struct seq_operations fragmentation_op = {
- .start = frag_start,
- .next = frag_next,
- .stop = frag_stop,
- .show = frag_show,
-};
-
-/*
- * Output information about zones in @pgdat.
- */
-static int zoneinfo_show(struct seq_file *m, void *arg)
-{
- pg_data_t *pgdat = arg;
- struct zone *zone;
- struct zone *node_zones = pgdat->node_zones;
- unsigned long flags;
-
- for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
- int i;
-
- if (!populated_zone(zone))
- continue;
-
- spin_lock_irqsave(&zone->lock, flags);
- seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
- seq_printf(m,
- "\n pages free %lu"
- "\n min %lu"
- "\n low %lu"
- "\n high %lu"
- "\n active %lu"
- "\n inactive %lu"
- "\n scanned %lu (a: %lu i: %lu)"
- "\n spanned %lu"
- "\n present %lu",
- zone->free_pages,
- zone->pages_min,
- zone->pages_low,
- zone->pages_high,
- zone->nr_active,
- zone->nr_inactive,
- zone->pages_scanned,
- zone->nr_scan_active, zone->nr_scan_inactive,
- zone->spanned_pages,
- zone->present_pages);
- seq_printf(m,
- "\n protection: (%lu",
- zone->lowmem_reserve[0]);
- for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
- seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
- seq_printf(m,
- ")"
- "\n pagesets");
- for_each_online_cpu(i) {
- struct per_cpu_pageset *pageset;
- int j;
-
- pageset = zone_pcp(zone, i);
- for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
- if (pageset->pcp[j].count)
- break;
- }
- if (j == ARRAY_SIZE(pageset->pcp))
- continue;
- for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
- seq_printf(m,
- "\n cpu: %i pcp: %i"
- "\n count: %i"
- "\n high: %i"
- "\n batch: %i",
- i, j,
- pageset->pcp[j].count,
- pageset->pcp[j].high,
- pageset->pcp[j].batch);
- }
-#ifdef CONFIG_NUMA
- seq_printf(m,
- "\n numa_hit: %lu"
- "\n numa_miss: %lu"
- "\n numa_foreign: %lu"
- "\n interleave_hit: %lu"
- "\n local_node: %lu"
- "\n other_node: %lu",
- pageset->numa_hit,
- pageset->numa_miss,
- pageset->numa_foreign,
- pageset->interleave_hit,
- pageset->local_node,
- pageset->other_node);
-#endif
- }
- seq_printf(m,
- "\n all_unreclaimable: %u"
- "\n prev_priority: %i"
- "\n temp_priority: %i"
- "\n start_pfn: %lu",
- zone->all_unreclaimable,
- zone->prev_priority,
- zone->temp_priority,
- zone->zone_start_pfn);
- spin_unlock_irqrestore(&zone->lock, flags);
- seq_putc(m, '\n');
- }
- return 0;
-}
-
-struct seq_operations zoneinfo_op = {
- .start = frag_start, /* iterate over all zones. The same as in
- * fragmentation. */
- .next = frag_next,
- .stop = frag_stop,
- .show = zoneinfo_show,
-};
-
-static char *vmstat_text[] = {
- "nr_dirty",
- "nr_writeback",
- "nr_unstable",
- "nr_page_table_pages",
- "nr_mapped",
- "nr_slab",
-
- "pgpgin",
- "pgpgout",
- "pswpin",
- "pswpout",
-
- "pgalloc_high",
- "pgalloc_normal",
- "pgalloc_dma32",
- "pgalloc_dma",
-
- "pgfree",
- "pgactivate",
- "pgdeactivate",
-
- "pgfault",
- "pgmajfault",
-
- "pgrefill_high",
- "pgrefill_normal",
- "pgrefill_dma32",
- "pgrefill_dma",
-
- "pgsteal_high",
- "pgsteal_normal",
- "pgsteal_dma32",
- "pgsteal_dma",
-
- "pgscan_kswapd_high",
- "pgscan_kswapd_normal",
- "pgscan_kswapd_dma32",
- "pgscan_kswapd_dma",
-
- "pgscan_direct_high",
- "pgscan_direct_normal",
- "pgscan_direct_dma32",
- "pgscan_direct_dma",
-
- "pginodesteal",
- "slabs_scanned",
- "kswapd_steal",
- "kswapd_inodesteal",
- "pageoutrun",
- "allocstall",
-
- "pgrotated",
- "nr_bounce",
-};
-
-static void *vmstat_start(struct seq_file *m, loff_t *pos)
-{
- struct page_state *ps;
-
- if (*pos >= ARRAY_SIZE(vmstat_text))
- return NULL;
-
- ps = kmalloc(sizeof(*ps), GFP_KERNEL);
- m->private = ps;
- if (!ps)
- return ERR_PTR(-ENOMEM);
- get_full_page_state(ps);
- ps->pgpgin /= 2; /* sectors -> kbytes */
- ps->pgpgout /= 2;
- return (unsigned long *)ps + *pos;
-}
-
-static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
-{
- (*pos)++;
- if (*pos >= ARRAY_SIZE(vmstat_text))
- return NULL;
- return (unsigned long *)m->private + *pos;
-}
-
-static int vmstat_show(struct seq_file *m, void *arg)
-{
- unsigned long *l = arg;
- unsigned long off = l - (unsigned long *)m->private;
-
- seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
- return 0;
-}
-
-static void vmstat_stop(struct seq_file *m, void *arg)
-{
- kfree(m->private);
- m->private = NULL;
-}
-
-struct seq_operations vmstat_op = {
- .start = vmstat_start,
- .next = vmstat_next,
- .stop = vmstat_stop,
- .show = vmstat_show,
-};
-
-#endif /* CONFIG_PROC_FS */
-
#ifdef CONFIG_HOTPLUG_CPU
static int page_alloc_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
int cpu = (unsigned long)hcpu;
- long *count;
- unsigned long *src, *dest;
if (action == CPU_DEAD) {
- int i;
-
- /* Drain local pagecache count. */
- count = &per_cpu(nr_pagecache_local, cpu);
- atomic_add(*count, &nr_pagecache);
- *count = 0;
local_irq_disable();
__drain_pages(cpu);
-
- /* Add dead cpu's page_states to our own. */
- dest = (unsigned long *)&__get_cpu_var(page_states);
- src = (unsigned long *)&per_cpu(page_states, cpu);
-
- for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long);
- i++) {
- dest[i] += src[i];
- src[i] = 0;
- }
-
+ vm_events_fold_cpu(cpu);
local_irq_enable();
+ refresh_cpu_vm_stats(cpu);
}
return NOTIFY_OK;
}
diff --git a/mm/page_io.c b/mm/page_io.c
index bb2b0d53889..88029948d00 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -101,7 +101,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
}
if (wbc->sync_mode == WB_SYNC_ALL)
rw |= (1 << BIO_RW_SYNC);
- inc_page_state(pswpout);
+ count_vm_event(PSWPOUT);
set_page_writeback(page);
unlock_page(page);
submit_bio(rw, bio);
@@ -123,7 +123,7 @@ int swap_readpage(struct file *file, struct page *page)
ret = -ENOMEM;
goto out;
}
- inc_page_state(pswpin);
+ count_vm_event(PSWPIN);
submit_bio(READ, bio);
out:
return ret;
diff --git a/mm/rmap.c b/mm/rmap.c
index e76909e880c..40158b59729 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -455,7 +455,7 @@ static void __page_set_anon_rmap(struct page *page,
* nr_mapped state can be updated without turning off
* interrupts because it is not modified via interrupt.
*/
- __inc_page_state(nr_mapped);
+ __inc_zone_page_state(page, NR_ANON_PAGES);
}
/**
@@ -499,7 +499,7 @@ void page_add_new_anon_rmap(struct page *page,
void page_add_file_rmap(struct page *page)
{
if (atomic_inc_and_test(&page->_mapcount))
- __inc_page_state(nr_mapped);
+ __inc_zone_page_state(page, NR_FILE_MAPPED);
}
/**
@@ -531,7 +531,8 @@ void page_remove_rmap(struct page *page)
*/
if (page_test_and_clear_dirty(page))
set_page_dirty(page);
- __dec_page_state(nr_mapped);
+ __dec_zone_page_state(page,
+ PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
}
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 83c9fea1e0e..db21c51531c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1044,12 +1044,12 @@ repeat:
swappage = lookup_swap_cache(swap);
if (!swappage) {
shmem_swp_unmap(entry);
- spin_unlock(&info->lock);
/* here we actually do the io */
if (type && *type == VM_FAULT_MINOR) {
- inc_page_state(pgmajfault);
+ __count_vm_event(PGMAJFAULT);
*type = VM_FAULT_MAJOR;
}
+ spin_unlock(&info->lock);
swappage = shmem_swapin(info, swap, idx);
if (!swappage) {
spin_lock(&info->lock);
diff --git a/mm/slab.c b/mm/slab.c
index 233e39d14ca..3936af34454 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -309,6 +309,13 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
#define SIZE_AC 1
#define SIZE_L3 (1 + MAX_NUMNODES)
+static int drain_freelist(struct kmem_cache *cache,
+ struct kmem_list3 *l3, int tofree);
+static void free_block(struct kmem_cache *cachep, void **objpp, int len,
+ int node);
+static void enable_cpucache(struct kmem_cache *cachep);
+static void cache_reap(void *unused);
+
/*
* This function must be completely optimized away if a constant is passed to
* it. Mostly the same as what is in linux/slab.h except it returns an index.
@@ -456,7 +463,7 @@ struct kmem_cache {
#define STATS_DEC_ACTIVE(x) ((x)->num_active--)
#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
#define STATS_INC_GROWN(x) ((x)->grown++)
-#define STATS_INC_REAPED(x) ((x)->reaped++)
+#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y))
#define STATS_SET_HIGH(x) \
do { \
if ((x)->num_active > (x)->high_mark) \
@@ -480,7 +487,7 @@ struct kmem_cache {
#define STATS_DEC_ACTIVE(x) do { } while (0)
#define STATS_INC_ALLOCED(x) do { } while (0)
#define STATS_INC_GROWN(x) do { } while (0)
-#define STATS_INC_REAPED(x) do { } while (0)
+#define STATS_ADD_REAPED(x,y) do { } while (0)
#define STATS_SET_HIGH(x) do { } while (0)
#define STATS_INC_ERR(x) do { } while (0)
#define STATS_INC_NODEALLOCS(x) do { } while (0)
@@ -700,12 +707,6 @@ int slab_is_available(void)
static DEFINE_PER_CPU(struct work_struct, reap_work);
-static void free_block(struct kmem_cache *cachep, void **objpp, int len,
- int node);
-static void enable_cpucache(struct kmem_cache *cachep);
-static void cache_reap(void *unused);
-static int __node_shrink(struct kmem_cache *cachep, int node);
-
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
{
return cachep->array[smp_processor_id()];
@@ -1241,10 +1242,7 @@ free_array_cache:
l3 = cachep->nodelists[node];
if (!l3)
continue;
- spin_lock_irq(&l3->list_lock);
- /* free slabs belonging to this node */
- __node_shrink(cachep, node);
- spin_unlock_irq(&l3->list_lock);
+ drain_freelist(cachep, l3, l3->free_objects);
}
mutex_unlock(&cache_chain_mutex);
break;
@@ -1507,7 +1505,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
nr_pages = (1 << cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
atomic_add(nr_pages, &slab_reclaim_pages);
- add_page_state(nr_slab, nr_pages);
+ add_zone_page_state(page_zone(page), NR_SLAB, nr_pages);
for (i = 0; i < nr_pages; i++)
__SetPageSlab(page + i);
return page_address(page);
@@ -1522,12 +1520,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
struct page *page = virt_to_page(addr);
const unsigned long nr_freed = i;
+ sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed);
while (i--) {
BUG_ON(!PageSlab(page));
__ClearPageSlab(page);
page++;
}
- sub_page_state(nr_slab, nr_freed);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
free_pages((unsigned long)addr, cachep->gfporder);
@@ -2248,32 +2246,45 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
}
}
-static int __node_shrink(struct kmem_cache *cachep, int node)
+/*
+ * Remove slabs from the list of free slabs.
+ * Specify the number of slabs to drain in tofree.
+ *
+ * Returns the actual number of slabs released.
+ */
+static int drain_freelist(struct kmem_cache *cache,
+ struct kmem_list3 *l3, int tofree)
{
+ struct list_head *p;
+ int nr_freed;
struct slab *slabp;
- struct kmem_list3 *l3 = cachep->nodelists[node];
- int ret;
- for (;;) {
- struct list_head *p;
+ nr_freed = 0;
+ while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
+ spin_lock_irq(&l3->list_lock);
p = l3->slabs_free.prev;
- if (p == &l3->slabs_free)
- break;
+ if (p == &l3->slabs_free) {
+ spin_unlock_irq(&l3->list_lock);
+ goto out;
+ }
- slabp = list_entry(l3->slabs_free.prev, struct slab, list);
+ slabp = list_entry(p, struct slab, list);
#if DEBUG
BUG_ON(slabp->inuse);
#endif
list_del(&slabp->list);
-
- l3->free_objects -= cachep->num;
+ /*
+ * Safe to drop the lock. The slab is no longer linked
+ * to the cache.
+ */
+ l3->free_objects -= cache->num;
spin_unlock_irq(&l3->list_lock);
- slab_destroy(cachep, slabp);
- spin_lock_irq(&l3->list_lock);
+ slab_destroy(cache, slabp);
+ nr_freed++;
}
- ret = !list_empty(&l3->slabs_full) || !list_empty(&l3->slabs_partial);
- return ret;
+out:
+ return nr_freed;
}
static int __cache_shrink(struct kmem_cache *cachep)
@@ -2286,11 +2297,13 @@ static int __cache_shrink(struct kmem_cache *cachep)
check_irq_on();
for_each_online_node(i) {
l3 = cachep->nodelists[i];
- if (l3) {
- spin_lock_irq(&l3->list_lock);
- ret += __node_shrink(cachep, i);
- spin_unlock_irq(&l3->list_lock);
- }
+ if (!l3)
+ continue;
+
+ drain_freelist(cachep, l3, l3->free_objects);
+
+ ret += !list_empty(&l3->slabs_full) ||
+ !list_empty(&l3->slabs_partial);
}
return (ret ? 1 : 0);
}
@@ -3694,10 +3707,6 @@ static void cache_reap(void *unused)
}
list_for_each_entry(searchp, &cache_chain, next) {
- struct list_head *p;
- int tofree;
- struct slab *slabp;
-
check_irq_on();
/*
@@ -3722,47 +3731,22 @@ static void cache_reap(void *unused)
drain_array(searchp, l3, l3->shared, 0, node);
- if (l3->free_touched) {
+ if (l3->free_touched)
l3->free_touched = 0;
- goto next;
- }
-
- tofree = (l3->free_limit + 5 * searchp->num - 1) /
- (5 * searchp->num);
- do {
- /*
- * Do not lock if there are no free blocks.
- */
- if (list_empty(&l3->slabs_free))
- break;
-
- spin_lock_irq(&l3->list_lock);
- p = l3->slabs_free.next;
- if (p == &(l3->slabs_free)) {
- spin_unlock_irq(&l3->list_lock);
- break;
- }
+ else {
+ int freed;
- slabp = list_entry(p, struct slab, list);
- BUG_ON(slabp->inuse);
- list_del(&slabp->list);
- STATS_INC_REAPED(searchp);
-
- /*
- * Safe to drop the lock. The slab is no longer linked
- * to the cache. searchp cannot disappear, we hold
- * cache_chain_lock
- */
- l3->free_objects -= searchp->num;
- spin_unlock_irq(&l3->list_lock);
- slab_destroy(searchp, slabp);
- } while (--tofree > 0);
+ freed = drain_freelist(searchp, l3, (l3->free_limit +
+ 5 * searchp->num - 1) / (5 * searchp->num));
+ STATS_ADD_REAPED(searchp, freed);
+ }
next:
cond_resched();
}
check_irq_on();
mutex_unlock(&cache_chain_mutex);
next_reap_node();
+ refresh_cpu_vm_stats(smp_processor_id());
/* Set up the next iteration */
schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
}
diff --git a/mm/swap.c b/mm/swap.c
index 990868afc1c..8fd095c4ae5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -87,7 +87,7 @@ int rotate_reclaimable_page(struct page *page)
spin_lock_irqsave(&zone->lru_lock, flags);
if (PageLRU(page) && !PageActive(page)) {
list_move_tail(&page->lru, &zone->inactive_list);
- inc_page_state(pgrotated);
+ __count_vm_event(PGROTATED);
}
if (!test_clear_page_writeback(page))
BUG();
@@ -107,7 +107,7 @@ void fastcall activate_page(struct page *page)
del_page_from_inactive_list(zone, page);
SetPageActive(page);
add_page_to_active_list(zone, page);
- inc_page_state(pgactivate);
+ __count_vm_event(PGACTIVATE);
}
spin_unlock_irq(&zone->lru_lock);
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 7535211bb49..fccbd9bba77 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -87,7 +87,7 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
SetPageSwapCache(page);
set_page_private(page, entry.val);
total_swapcache_pages++;
- pagecache_acct(1);
+ __inc_zone_page_state(page, NR_FILE_PAGES);
}
write_unlock_irq(&swapper_space.tree_lock);
radix_tree_preload_end();
@@ -132,7 +132,7 @@ void __delete_from_swap_cache(struct page *page)
set_page_private(page, 0);
ClearPageSwapCache(page);
total_swapcache_pages--;
- pagecache_acct(-1);
+ __dec_zone_page_state(page, NR_FILE_PAGES);
INC_CACHE_INFO(del_total);
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index eeacb0d695c..ff2ebe9458a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -47,8 +47,6 @@ struct scan_control {
/* Incremented by the number of inactive pages that were scanned */
unsigned long nr_scanned;
- unsigned long nr_mapped; /* From page_state */
-
/* This context's GFP mask */
gfp_t gfp_mask;
@@ -217,7 +215,7 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
break;
if (shrink_ret < nr_before)
ret += nr_before - shrink_ret;
- mod_page_state(slabs_scanned, this_scan);
+ count_vm_events(SLABS_SCANNED, this_scan);
total_scan -= this_scan;
cond_resched();
@@ -571,7 +569,7 @@ keep:
list_splice(&ret_pages, page_list);
if (pagevec_count(&freed_pvec))
__pagevec_release_nonlru(&freed_pvec);
- mod_page_state(pgactivate, pgactivate);
+ count_vm_events(PGACTIVATE, pgactivate);
return nr_reclaimed;
}
@@ -661,11 +659,11 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
nr_reclaimed += nr_freed;
local_irq_disable();
if (current_is_kswapd()) {
- __mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
- __mod_page_state(kswapd_steal, nr_freed);
+ __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
+ __count_vm_events(KSWAPD_STEAL, nr_freed);
} else
- __mod_page_state_zone(zone, pgscan_direct, nr_scan);
- __mod_page_state_zone(zone, pgsteal, nr_freed);
+ __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
+ __count_vm_events(PGACTIVATE, nr_freed);
if (nr_taken == 0)
goto done;
@@ -744,7 +742,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
* how much memory
* is mapped.
*/
- mapped_ratio = (sc->nr_mapped * 100) / vm_total_pages;
+ mapped_ratio = ((global_page_state(NR_FILE_MAPPED) +
+ global_page_state(NR_ANON_PAGES)) * 100) /
+ vm_total_pages;
/*
* Now decide how much we really want to unmap some pages. The
@@ -841,11 +841,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
}
}
zone->nr_active += pgmoved;
- spin_unlock(&zone->lru_lock);
- __mod_page_state_zone(zone, pgrefill, pgscanned);
- __mod_page_state(pgdeactivate, pgdeactivate);
- local_irq_enable();
+ __count_zone_vm_events(PGREFILL, zone, pgscanned);
+ __count_vm_events(PGDEACTIVATE, pgdeactivate);
+ spin_unlock_irq(&zone->lru_lock);
pagevec_release(&pvec);
}
@@ -977,7 +976,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
.swappiness = vm_swappiness,
};
- inc_page_state(allocstall);
+ count_vm_event(ALLOCSTALL);
for (i = 0; zones[i] != NULL; i++) {
struct zone *zone = zones[i];
@@ -990,7 +989,6 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
}
for (priority = DEF_PRIORITY; priority >= 0; priority--) {
- sc.nr_mapped = read_page_state(nr_mapped);
sc.nr_scanned = 0;
if (!priority)
disable_swap_token();
@@ -1075,9 +1073,7 @@ loop_again:
total_scanned = 0;
nr_reclaimed = 0;
sc.may_writepage = !laptop_mode;
- sc.nr_mapped = read_page_state(nr_mapped);
-
- inc_page_state(pageoutrun);
+ count_vm_event(PAGEOUTRUN);
for (i = 0; i < pgdat->nr_zones; i++) {
struct zone *zone = pgdat->node_zones + i;
@@ -1365,7 +1361,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
for_each_zone(zone)
lru_pages += zone->nr_active + zone->nr_inactive;
- nr_slab = read_page_state(nr_slab);
+ nr_slab = global_page_state(NR_SLAB);
/* If slab caches are huge, it's better to hit them first */
while (nr_slab >= lru_pages) {
reclaim_state.reclaimed_slab = 0;
@@ -1407,9 +1403,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
for (prio = DEF_PRIORITY; prio >= 0; prio--) {
unsigned long nr_to_scan = nr_pages - ret;
- sc.nr_mapped = read_page_state(nr_mapped);
sc.nr_scanned = 0;
-
ret += shrink_all_zones(nr_to_scan, prio, pass, &sc);
if (ret >= nr_pages)
goto out;
@@ -1523,11 +1517,6 @@ int zone_reclaim_mode __read_mostly;
#define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */
/*
- * Mininum time between zone reclaim scans
- */
-int zone_reclaim_interval __read_mostly = 30*HZ;
-
-/*
* Priority for ZONE_RECLAIM. This determines the fraction of pages
* of a node considered for each zone_reclaim. 4 scans 1/16th of
* a zone.
@@ -1548,7 +1537,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
struct scan_control sc = {
.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP),
- .nr_mapped = read_page_state(nr_mapped),
.swap_cluster_max = max_t(unsigned long, nr_pages,
SWAP_CLUSTER_MAX),
.gfp_mask = gfp_mask,
@@ -1593,16 +1581,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
p->reclaim_state = NULL;
current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
-
- if (nr_reclaimed == 0) {
- /*
- * We were unable to reclaim enough pages to stay on node. We
- * now allow off node accesses for a certain time period before
- * trying again to reclaim pages from the local zone.
- */
- zone->last_unsuccessful_zone_reclaim = jiffies;
- }
-
return nr_reclaimed >= nr_pages;
}
@@ -1612,13 +1590,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
int node_id;
/*
- * Do not reclaim if there was a recent unsuccessful attempt at zone
- * reclaim. In that case we let allocations go off node for the
- * zone_reclaim_interval. Otherwise we would scan for each off-node
- * page allocation.
+ * Do not reclaim if there are not enough reclaimable pages in this
+ * zone that would satify this allocations.
+ *
+ * All unmapped pagecache pages are reclaimable.
+ *
+ * Both counters may be temporarily off a bit so we use
+ * SWAP_CLUSTER_MAX as the boundary. It may also be good to
+ * leave a few frequently used unmapped pagecache pages around.
*/
- if (time_before(jiffies,
- zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval))
+ if (zone_page_state(zone, NR_FILE_PAGES) -
+ zone_page_state(zone, NR_FILE_MAPPED) < SWAP_CLUSTER_MAX)
return 0;
/*
diff --git a/mm/vmstat.c b/mm/vmstat.c
new file mode 100644
index 00000000000..73b83d67bab
--- /dev/null
+++ b/mm/vmstat.c
@@ -0,0 +1,614 @@
+/*
+ * linux/mm/vmstat.c
+ *
+ * Manages VM statistics
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ *
+ * zoned VM statistics
+ * Copyright (C) 2006 Silicon Graphics, Inc.,
+ * Christoph Lameter <christoph@lameter.com>
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+void __get_zone_counts(unsigned long *active, unsigned long *inactive,
+ unsigned long *free, struct pglist_data *pgdat)
+{
+ struct zone *zones = pgdat->node_zones;
+ int i;
+
+ *active = 0;
+ *inactive = 0;
+ *free = 0;
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ *active += zones[i].nr_active;
+ *inactive += zones[i].nr_inactive;
+ *free += zones[i].free_pages;
+ }
+}
+
+void get_zone_counts(unsigned long *active,
+ unsigned long *inactive, unsigned long *free)
+{
+ struct pglist_data *pgdat;
+
+ *active = 0;
+ *inactive = 0;
+ *free = 0;
+ for_each_online_pgdat(pgdat) {
+ unsigned long l, m, n;
+ __get_zone_counts(&l, &m, &n, pgdat);
+ *active += l;
+ *inactive += m;
+ *free += n;
+ }
+}
+
+#ifdef CONFIG_VM_EVENT_COUNTERS
+DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
+EXPORT_PER_CPU_SYMBOL(vm_event_states);
+
+static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
+{
+ int cpu = 0;
+ int i;
+
+ memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
+
+ cpu = first_cpu(*cpumask);
+ while (cpu < NR_CPUS) {
+ struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
+
+ cpu = next_cpu(cpu, *cpumask);
+
+ if (cpu < NR_CPUS)
+ prefetch(&per_cpu(vm_event_states, cpu));
+
+
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ ret[i] += this->event[i];
+ }
+}
+
+/*
+ * Accumulate the vm event counters across all CPUs.
+ * The result is unavoidably approximate - it can change
+ * during and after execution of this function.
+*/
+void all_vm_events(unsigned long *ret)
+{
+ sum_vm_events(ret, &cpu_online_map);
+}
+
+#ifdef CONFIG_HOTPLUG
+/*
+ * Fold the foreign cpu events into our own.
+ *
+ * This is adding to the events on one processor
+ * but keeps the global counts constant.
+ */
+void vm_events_fold_cpu(int cpu)
+{
+ struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
+ int i;
+
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
+ count_vm_events(i, fold_state->event[i]);
+ fold_state->event[i] = 0;
+ }
+}
+#endif /* CONFIG_HOTPLUG */
+
+#endif /* CONFIG_VM_EVENT_COUNTERS */
+
+/*
+ * Manage combined zone based / global counters
+ *
+ * vm_stat contains the global counters
+ */
+atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
+EXPORT_SYMBOL(vm_stat);
+
+#ifdef CONFIG_SMP
+
+#define STAT_THRESHOLD 32
+
+/*
+ * Determine pointer to currently valid differential byte given a zone and
+ * the item number.
+ *
+ * Preemption must be off
+ */
+static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item)
+{
+ return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item];
+}
+
+/*
+ * For use when we know that interrupts are disabled.
+ */
+void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
+ int delta)
+{
+ s8 *p;
+ long x;
+
+ p = diff_pointer(zone, item);
+ x = delta + *p;
+
+ if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) {
+ zone_page_state_add(x, zone, item);
+ x = 0;
+ }
+
+ *p = x;
+}
+EXPORT_SYMBOL(__mod_zone_page_state);
+
+/*
+ * For an unknown interrupt state
+ */
+void mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
+ int delta)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_zone_page_state(zone, item, delta);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(mod_zone_page_state);
+
+/*
+ * Optimized increment and decrement functions.
+ *
+ * These are only for a single page and therefore can take a struct page *
+ * argument instead of struct zone *. This allows the inclusion of the code
+ * generated for page_zone(page) into the optimized functions.
+ *
+ * No overflow check is necessary and therefore the differential can be
+ * incremented or decremented in place which may allow the compilers to
+ * generate better code.
+ *
+ * The increment or decrement is known and therefore one boundary check can
+ * be omitted.
+ *
+ * Some processors have inc/dec instructions that are atomic vs an interrupt.
+ * However, the code must first determine the differential location in a zone
+ * based on the processor number and then inc/dec the counter. There is no
+ * guarantee without disabling preemption that the processor will not change
+ * in between and therefore the atomicity vs. interrupt cannot be exploited
+ * in a useful way here.
+ */
+static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
+{
+ s8 *p = diff_pointer(zone, item);
+
+ (*p)++;
+
+ if (unlikely(*p > STAT_THRESHOLD)) {
+ zone_page_state_add(*p, zone, item);
+ *p = 0;
+ }
+}
+
+void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+ __inc_zone_state(page_zone(page), item);
+}
+EXPORT_SYMBOL(__inc_zone_page_state);
+
+void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+ struct zone *zone = page_zone(page);
+ s8 *p = diff_pointer(zone, item);
+
+ (*p)--;
+
+ if (unlikely(*p < -STAT_THRESHOLD)) {
+ zone_page_state_add(*p, zone, item);
+ *p = 0;
+ }
+}
+EXPORT_SYMBOL(__dec_zone_page_state);
+
+void inc_zone_state(struct zone *zone, enum zone_stat_item item)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __inc_zone_state(zone, item);
+ local_irq_restore(flags);
+}
+
+void inc_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+ unsigned long flags;
+ struct zone *zone;
+
+ zone = page_zone(page);
+ local_irq_save(flags);
+ __inc_zone_state(zone, item);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(inc_zone_page_state);
+
+void dec_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+ unsigned long flags;
+ struct zone *zone;
+ s8 *p;
+
+ zone = page_zone(page);
+ local_irq_save(flags);
+ p = diff_pointer(zone, item);
+
+ (*p)--;
+
+ if (unlikely(*p < -STAT_THRESHOLD)) {
+ zone_page_state_add(*p, zone, item);
+ *p = 0;
+ }
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(dec_zone_page_state);
+
+/*
+ * Update the zone counters for one cpu.
+ */
+void refresh_cpu_vm_stats(int cpu)
+{
+ struct zone *zone;
+ int i;
+ unsigned long flags;
+
+ for_each_zone(zone) {
+ struct per_cpu_pageset *pcp;
+
+ pcp = zone_pcp(zone, cpu);
+
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+ if (pcp->vm_stat_diff[i]) {
+ local_irq_save(flags);
+ zone_page_state_add(pcp->vm_stat_diff[i],
+ zone, i);
+ pcp->vm_stat_diff[i] = 0;
+ local_irq_restore(flags);
+ }
+ }
+}
+
+static void __refresh_cpu_vm_stats(void *dummy)
+{
+ refresh_cpu_vm_stats(smp_processor_id());
+}
+
+/*
+ * Consolidate all counters.
+ *
+ * Note that the result is less inaccurate but still inaccurate
+ * if concurrent processes are allowed to run.
+ */
+void refresh_vm_stats(void)
+{
+ on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1);
+}
+EXPORT_SYMBOL(refresh_vm_stats);
+
+#endif
+
+#ifdef CONFIG_NUMA
+/*
+ * zonelist = the list of zones passed to the allocator
+ * z = the zone from which the allocation occurred.
+ *
+ * Must be called with interrupts disabled.
+ */
+void zone_statistics(struct zonelist *zonelist, struct zone *z)
+{
+ if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) {
+ __inc_zone_state(z, NUMA_HIT);
+ } else {
+ __inc_zone_state(z, NUMA_MISS);
+ __inc_zone_state(zonelist->zones[0], NUMA_FOREIGN);
+ }
+ if (z->zone_pgdat == NODE_DATA(numa_node_id()))
+ __inc_zone_state(z, NUMA_LOCAL);
+ else
+ __inc_zone_state(z, NUMA_OTHER);
+}
+#endif
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+
+static void *frag_start(struct seq_file *m, loff_t *pos)
+{
+ pg_data_t *pgdat;
+ loff_t node = *pos;
+ for (pgdat = first_online_pgdat();
+ pgdat && node;
+ pgdat = next_online_pgdat(pgdat))
+ --node;
+
+ return pgdat;
+}
+
+static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+ pg_data_t *pgdat = (pg_data_t *)arg;
+
+ (*pos)++;
+ return next_online_pgdat(pgdat);
+}
+
+static void frag_stop(struct seq_file *m, void *arg)
+{
+}
+
+/*
+ * This walks the free areas for each zone.
+ */
+static int frag_show(struct seq_file *m, void *arg)
+{
+ pg_data_t *pgdat = (pg_data_t *)arg;
+ struct zone *zone;
+ struct zone *node_zones = pgdat->node_zones;
+ unsigned long flags;
+ int order;
+
+ for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
+ if (!populated_zone(zone))
+ continue;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
+ for (order = 0; order < MAX_ORDER; ++order)
+ seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
+ spin_unlock_irqrestore(&zone->lock, flags);
+ seq_putc(m, '\n');
+ }
+ return 0;
+}
+
+struct seq_operations fragmentation_op = {
+ .start = frag_start,
+ .next = frag_next,
+ .stop = frag_stop,
+ .show = frag_show,
+};
+
+static char *vmstat_text[] = {
+ /* Zoned VM counters */
+ "nr_anon_pages",
+ "nr_mapped",
+ "nr_file_pages",
+ "nr_slab",
+ "nr_page_table_pages",
+ "nr_dirty",
+ "nr_writeback",
+ "nr_unstable",
+ "nr_bounce",
+
+#ifdef CONFIG_NUMA
+ "numa_hit",
+ "numa_miss",
+ "numa_foreign",
+ "numa_interleave",
+ "numa_local",
+ "numa_other",
+#endif
+
+#ifdef CONFIG_VM_EVENT_COUNTERS
+ "pgpgin",
+ "pgpgout",
+ "pswpin",
+ "pswpout",
+
+ "pgalloc_dma",
+ "pgalloc_dma32",
+ "pgalloc_normal",
+ "pgalloc_high",
+
+ "pgfree",
+ "pgactivate",
+ "pgdeactivate",
+
+ "pgfault",
+ "pgmajfault",
+
+ "pgrefill_dma",
+ "pgrefill_dma32",
+ "pgrefill_normal",
+ "pgrefill_high",
+
+ "pgsteal_dma",
+ "pgsteal_dma32",
+ "pgsteal_normal",
+ "pgsteal_high",
+
+ "pgscan_kswapd_dma",
+ "pgscan_kswapd_dma32",
+ "pgscan_kswapd_normal",
+ "pgscan_kswapd_high",
+
+ "pgscan_direct_dma",
+ "pgscan_direct_dma32",
+ "pgscan_direct_normal",
+ "pgscan_direct_high",
+
+ "pginodesteal",
+ "slabs_scanned",
+ "kswapd_steal",
+ "kswapd_inodesteal",
+ "pageoutrun",
+ "allocstall",
+
+ "pgrotated",
+#endif
+};
+
+/*
+ * Output information about zones in @pgdat.
+ */
+static int zoneinfo_show(struct seq_file *m, void *arg)
+{
+ pg_data_t *pgdat = arg;
+ struct zone *zone;
+ struct zone *node_zones = pgdat->node_zones;
+ unsigned long flags;
+
+ for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
+ int i;
+
+ if (!populated_zone(zone))
+ continue;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
+ seq_printf(m,
+ "\n pages free %lu"
+ "\n min %lu"
+ "\n low %lu"
+ "\n high %lu"
+ "\n active %lu"
+ "\n inactive %lu"
+ "\n scanned %lu (a: %lu i: %lu)"
+ "\n spanned %lu"
+ "\n present %lu",
+ zone->free_pages,
+ zone->pages_min,
+ zone->pages_low,
+ zone->pages_high,
+ zone->nr_active,
+ zone->nr_inactive,
+ zone->pages_scanned,
+ zone->nr_scan_active, zone->nr_scan_inactive,
+ zone->spanned_pages,
+ zone->present_pages);
+
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+ seq_printf(m, "\n %-12s %lu", vmstat_text[i],
+ zone_page_state(zone, i));
+
+ seq_printf(m,
+ "\n protection: (%lu",
+ zone->lowmem_reserve[0]);
+ for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
+ seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
+ seq_printf(m,
+ ")"
+ "\n pagesets");
+ for_each_online_cpu(i) {
+ struct per_cpu_pageset *pageset;
+ int j;
+
+ pageset = zone_pcp(zone, i);
+ for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
+ if (pageset->pcp[j].count)
+ break;
+ }
+ if (j == ARRAY_SIZE(pageset->pcp))
+ continue;
+ for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
+ seq_printf(m,
+ "\n cpu: %i pcp: %i"
+ "\n count: %i"
+ "\n high: %i"
+ "\n batch: %i",
+ i, j,
+ pageset->pcp[j].count,
+ pageset->pcp[j].high,
+ pageset->pcp[j].batch);
+ }
+ }
+ seq_printf(m,
+ "\n all_unreclaimable: %u"
+ "\n prev_priority: %i"
+ "\n temp_priority: %i"
+ "\n start_pfn: %lu",
+ zone->all_unreclaimable,
+ zone->prev_priority,
+ zone->temp_priority,
+ zone->zone_start_pfn);
+ spin_unlock_irqrestore(&zone->lock, flags);
+ seq_putc(m, '\n');
+ }
+ return 0;
+}
+
+struct seq_operations zoneinfo_op = {
+ .start = frag_start, /* iterate over all zones. The same as in
+ * fragmentation. */
+ .next = frag_next,
+ .stop = frag_stop,
+ .show = zoneinfo_show,
+};
+
+static void *vmstat_start(struct seq_file *m, loff_t *pos)
+{
+ unsigned long *v;
+#ifdef CONFIG_VM_EVENT_COUNTERS
+ unsigned long *e;
+#endif
+ int i;
+
+ if (*pos >= ARRAY_SIZE(vmstat_text))
+ return NULL;
+
+#ifdef CONFIG_VM_EVENT_COUNTERS
+ v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
+ + sizeof(struct vm_event_state), GFP_KERNEL);
+#else
+ v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
+ GFP_KERNEL);
+#endif
+ m->private = v;
+ if (!v)
+ return ERR_PTR(-ENOMEM);
+ for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+ v[i] = global_page_state(i);
+#ifdef CONFIG_VM_EVENT_COUNTERS
+ e = v + NR_VM_ZONE_STAT_ITEMS;
+ all_vm_events(e);
+ e[PGPGIN] /= 2; /* sectors -> kbytes */
+ e[PGPGOUT] /= 2;
+#endif
+ return v + *pos;
+}
+
+static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
+{
+ (*pos)++;
+ if (*pos >= ARRAY_SIZE(vmstat_text))
+ return NULL;
+ return (unsigned long *)m->private + *pos;
+}
+
+static int vmstat_show(struct seq_file *m, void *arg)
+{
+ unsigned long *l = arg;
+ unsigned long off = l - (unsigned long *)m->private;
+
+ seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
+ return 0;
+}
+
+static void vmstat_stop(struct seq_file *m, void *arg)
+{
+ kfree(m->private);
+ m->private = NULL;
+}
+
+struct seq_operations vmstat_op = {
+ .start = vmstat_start,
+ .next = vmstat_next,
+ .stop = vmstat_stop,
+ .show = vmstat_show,
+};
+
+#endif /* CONFIG_PROC_FS */
+