diff options
author | Paul Mackerras <paulus@samba.org> | 2007-05-08 13:37:51 +1000 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2007-05-08 13:37:51 +1000 |
commit | 02bbc0f09c90cefdb2837605c96a66c5ce4ba2e1 (patch) | |
tree | 04ef573cd4de095c500c9fc3477f4278c0b36300 /mm | |
parent | 7487a2245b8841c77ba9db406cf99a483b9334e9 (diff) | |
parent | 5b94f675f57e4ff16c8fda09088d7480a84dcd91 (diff) |
Merge branch 'linux-2.6'
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 5 | ||||
-rw-r--r-- | mm/Makefile | 3 | ||||
-rw-r--r-- | mm/filemap.c | 85 | ||||
-rw-r--r-- | mm/highmem.c | 9 | ||||
-rw-r--r-- | mm/internal.h | 2 | ||||
-rw-r--r-- | mm/madvise.c | 33 | ||||
-rw-r--r-- | mm/memory.c | 106 | ||||
-rw-r--r-- | mm/mmap.c | 47 | ||||
-rw-r--r-- | mm/oom_kill.c | 17 | ||||
-rw-r--r-- | mm/page-writeback.c | 50 | ||||
-rw-r--r-- | mm/page_alloc.c | 50 | ||||
-rw-r--r-- | mm/quicklist.c | 88 | ||||
-rw-r--r-- | mm/readahead.c | 29 | ||||
-rw-r--r-- | mm/rmap.c | 3 | ||||
-rw-r--r-- | mm/shmem.c | 3 | ||||
-rw-r--r-- | mm/slab.c | 200 | ||||
-rw-r--r-- | mm/slob.c | 57 | ||||
-rw-r--r-- | mm/slub.c | 3520 | ||||
-rw-r--r-- | mm/sparse.c | 2 | ||||
-rw-r--r-- | mm/swap.c | 2 | ||||
-rw-r--r-- | mm/swapfile.c | 3 | ||||
-rw-r--r-- | mm/vmalloc.c | 14 | ||||
-rw-r--r-- | mm/vmscan.c | 13 |
23 files changed, 4100 insertions, 241 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 7942b333e46..1ac718f636e 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -163,3 +163,8 @@ config ZONE_DMA_FLAG default "0" if !ZONE_DMA default "1" +config NR_QUICK + int + depends on QUICKLIST + default "1" + diff --git a/mm/Makefile b/mm/Makefile index f3c077eb0b8..a9148ea329a 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -25,7 +25,10 @@ obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_SLAB) += slab.o +obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o +obj-$(CONFIG_QUICKLIST) += quicklist.o + diff --git a/mm/filemap.c b/mm/filemap.c index 5dfc093ceb3..5631d6b2a62 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -868,6 +868,7 @@ void do_generic_mapping_read(struct address_space *mapping, unsigned long last_index; unsigned long next_index; unsigned long prev_index; + unsigned int prev_offset; loff_t isize; struct page *cached_page; int error; @@ -876,7 +877,8 @@ void do_generic_mapping_read(struct address_space *mapping, cached_page = NULL; index = *ppos >> PAGE_CACHE_SHIFT; next_index = index; - prev_index = ra.prev_page; + prev_index = ra.prev_index; + prev_offset = ra.prev_offset; last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK; @@ -924,10 +926,10 @@ page_ok: flush_dcache_page(page); /* - * When (part of) the same page is read multiple times - * in succession, only mark it as accessed the first time. + * When a sequential read accesses a page several times, + * only mark it as accessed the first time. */ - if (prev_index != index) + if (prev_index != index || offset != prev_offset) mark_page_accessed(page); prev_index = index; @@ -945,6 +947,8 @@ page_ok: offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; + prev_offset = offset; + ra.prev_offset = offset; page_cache_release(page); if (ret == nr && desc->count) @@ -1446,30 +1450,6 @@ page_not_uptodate: majmin = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); } - lock_page(page); - - /* Did it get unhashed while we waited for it? */ - if (!page->mapping) { - unlock_page(page); - page_cache_release(page); - goto retry_all; - } - - /* Did somebody else get it up-to-date? */ - if (PageUptodate(page)) { - unlock_page(page); - goto success; - } - - error = mapping->a_ops->readpage(file, page); - if (!error) { - wait_on_page_locked(page); - if (PageUptodate(page)) - goto success; - } else if (error == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto retry_find; - } /* * Umm, take care of errors if the page isn't up-to-date. @@ -1726,7 +1706,7 @@ int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma) EXPORT_SYMBOL(generic_file_mmap); EXPORT_SYMBOL(generic_file_readonly_mmap); -static inline struct page *__read_cache_page(struct address_space *mapping, +static struct page *__read_cache_page(struct address_space *mapping, unsigned long index, int (*filler)(void *,struct page*), void *data) @@ -1763,17 +1743,11 @@ repeat: return page; } -/** - * read_cache_page - read into page cache, fill it if needed - * @mapping: the page's address_space - * @index: the page index - * @filler: function to perform the read - * @data: destination for read data - * - * Read into the page cache. If a page already exists, - * and PageUptodate() is not set, try to fill the page. +/* + * Same as read_cache_page, but don't wait for page to become unlocked + * after submitting it to the filler. */ -struct page *read_cache_page(struct address_space *mapping, +struct page *read_cache_page_async(struct address_space *mapping, unsigned long index, int (*filler)(void *,struct page*), void *data) @@ -1805,6 +1779,39 @@ retry: page = ERR_PTR(err); } out: + mark_page_accessed(page); + return page; +} +EXPORT_SYMBOL(read_cache_page_async); + +/** + * read_cache_page - read into page cache, fill it if needed + * @mapping: the page's address_space + * @index: the page index + * @filler: function to perform the read + * @data: destination for read data + * + * Read into the page cache. If a page already exists, and PageUptodate() is + * not set, try to fill the page then wait for it to become unlocked. + * + * If the page does not get brought uptodate, return -EIO. + */ +struct page *read_cache_page(struct address_space *mapping, + unsigned long index, + int (*filler)(void *,struct page*), + void *data) +{ + struct page *page; + + page = read_cache_page_async(mapping, index, filler, data); + if (IS_ERR(page)) + goto out; + wait_on_page_locked(page); + if (!PageUptodate(page)) { + page_cache_release(page); + page = ERR_PTR(-EIO); + } + out: return page; } EXPORT_SYMBOL(read_cache_page); diff --git a/mm/highmem.c b/mm/highmem.c index 51e1c1995fe..be8f8d36a8b 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -99,6 +99,15 @@ static void flush_all_zero_pkmaps(void) flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP)); } +/* Flush all unused kmap mappings in order to remove stray + mappings. */ +void kmap_flush_unused(void) +{ + spin_lock(&kmap_lock); + flush_all_zero_pkmaps(); + spin_unlock(&kmap_lock); +} + static inline unsigned long map_new_virtual(struct page *page) { unsigned long vaddr; diff --git a/mm/internal.h b/mm/internal.h index d527b80b292..a3110c02aea 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -24,7 +24,7 @@ static inline void set_page_count(struct page *page, int v) */ static inline void set_page_refcounted(struct page *page) { - VM_BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page); + VM_BUG_ON(PageCompound(page) && PageTail(page)); VM_BUG_ON(atomic_read(&page->_count)); set_page_count(page, 1); } diff --git a/mm/madvise.c b/mm/madvise.c index 603c5257ed6..e75096b5a6d 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -12,6 +12,24 @@ #include <linux/hugetlb.h> /* + * Any behaviour which results in changes to the vma->vm_flags needs to + * take mmap_sem for writing. Others, which simply traverse vmas, need + * to only take it for reading. + */ +static int madvise_need_mmap_write(int behavior) +{ + switch (behavior) { + case MADV_REMOVE: + case MADV_WILLNEED: + case MADV_DONTNEED: + return 0; + default: + /* be safe, default to 1. list exceptions explicitly */ + return 1; + } +} + +/* * We can potentially split a vm area into separate * areas, each area with its own behavior. */ @@ -183,9 +201,9 @@ static long madvise_remove(struct vm_area_struct *vma, + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ - up_write(¤t->mm->mmap_sem); + up_read(¤t->mm->mmap_sem); error = vmtruncate_range(mapping->host, offset, endoff); - down_write(¤t->mm->mmap_sem); + down_read(¤t->mm->mmap_sem); return error; } @@ -270,7 +288,10 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) int error = -EINVAL; size_t len; - down_write(¤t->mm->mmap_sem); + if (madvise_need_mmap_write(behavior)) + down_write(¤t->mm->mmap_sem); + else + down_read(¤t->mm->mmap_sem); if (start & ~PAGE_MASK) goto out; @@ -332,6 +353,10 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) vma = find_vma(current->mm, start); } out: - up_write(¤t->mm->mmap_sem); + if (madvise_need_mmap_write(behavior)) + up_write(¤t->mm->mmap_sem); + else + up_read(¤t->mm->mmap_sem); + return error; } diff --git a/mm/memory.c b/mm/memory.c index e7066e71dfa..1d647ab0ee7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1448,6 +1448,100 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(remap_pfn_range); +static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, unsigned long end, + pte_fn_t fn, void *data) +{ + pte_t *pte; + int err; + struct page *pmd_page; + spinlock_t *uninitialized_var(ptl); + + pte = (mm == &init_mm) ? + pte_alloc_kernel(pmd, addr) : + pte_alloc_map_lock(mm, pmd, addr, &ptl); + if (!pte) + return -ENOMEM; + + BUG_ON(pmd_huge(*pmd)); + + pmd_page = pmd_page(*pmd); + + do { + err = fn(pte, pmd_page, addr, data); + if (err) + break; + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (mm != &init_mm) + pte_unmap_unlock(pte-1, ptl); + return err; +} + +static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, + pte_fn_t fn, void *data) +{ + pmd_t *pmd; + unsigned long next; + int err; + + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return -ENOMEM; + do { + next = pmd_addr_end(addr, end); + err = apply_to_pte_range(mm, pmd, addr, next, fn, data); + if (err) + break; + } while (pmd++, addr = next, addr != end); + return err; +} + +static int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + pte_fn_t fn, void *data) +{ + pud_t *pud; + unsigned long next; + int err; + + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return -ENOMEM; + do { + next = pud_addr_end(addr, end); + err = apply_to_pmd_range(mm, pud, addr, next, fn, data); + if (err) + break; + } while (pud++, addr = next, addr != end); + return err; +} + +/* + * Scan a region of virtual memory, filling in page tables as necessary + * and calling a provided function on each leaf page table. + */ +int apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + pgd_t *pgd; + unsigned long next; + unsigned long end = addr + size; + int err; + + BUG_ON(addr >= end); + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + err = apply_to_pud_range(mm, pgd, addr, next, fn, data); + if (err) + break; + } while (pgd++, addr = next, addr != end); + return err; +} +EXPORT_SYMBOL_GPL(apply_to_page_range); + /* * handle_pte_fault chooses page fault handler according to an entry * which was read non-atomically. Before making any commitment, on @@ -2539,12 +2633,6 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) spin_unlock(&mm->page_table_lock); return 0; } -#else -/* Workaround for gcc 2.96 */ -int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) -{ - return 0; -} #endif /* __PAGETABLE_PUD_FOLDED */ #ifndef __PAGETABLE_PMD_FOLDED @@ -2573,12 +2661,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) spin_unlock(&mm->page_table_lock); return 0; } -#else -/* Workaround for gcc 2.96 */ -int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) -{ - return 0; -} #endif /* __PAGETABLE_PMD_FOLDED */ int make_pages_present(unsigned long addr, unsigned long end) diff --git a/mm/mmap.c b/mm/mmap.c index 84f997da78d..52646d61ff6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -29,6 +29,7 @@ #include <asm/uaccess.h> #include <asm/cacheflush.h> #include <asm/tlb.h> +#include <asm/mmu_context.h> #ifndef arch_mmap_check #define arch_mmap_check(addr, len, flags) (0) @@ -1199,6 +1200,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (len > TASK_SIZE) return -ENOMEM; + if (flags & MAP_FIXED) + return addr; + if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); @@ -1272,6 +1276,9 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (len > TASK_SIZE) return -ENOMEM; + if (flags & MAP_FIXED) + return addr; + /* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); @@ -1360,38 +1367,21 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { unsigned long ret; - - if (!(flags & MAP_FIXED)) { - unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); - - get_area = current->mm->get_unmapped_area; - if (file && file->f_op && file->f_op->get_unmapped_area) - get_area = file->f_op->get_unmapped_area; - addr = get_area(file, addr, len, pgoff, flags); - if (IS_ERR_VALUE(addr)) - return addr; - } + unsigned long (*get_area)(struct file *, unsigned long, + unsigned long, unsigned long, unsigned long); + + get_area = current->mm->get_unmapped_area; + if (file && file->f_op && file->f_op->get_unmapped_area) + get_area = file->f_op->get_unmapped_area; + addr = get_area(file, addr, len, pgoff, flags); + if (IS_ERR_VALUE(addr)) + return addr; if (addr > TASK_SIZE - len) return -ENOMEM; if (addr & ~PAGE_MASK) return -EINVAL; - if (file && is_file_hugepages(file)) { - /* - * Check if the given range is hugepage aligned, and - * can be made suitable for hugepages. - */ - ret = prepare_hugepage_range(addr, len, pgoff); - } else { - /* - * Ensure that a normal request is not falling in a - * reserved hugepage range. For some archs like IA-64, - * there is a separate region for hugepages. - */ - ret = is_hugepage_only_range(current->mm, addr, len); - } - if (ret) - return -EINVAL; + return addr; } @@ -1979,6 +1969,9 @@ void exit_mmap(struct mm_struct *mm) unsigned long nr_accounted = 0; unsigned long end; + /* mm's last user has gone, and its about to be pulled down */ + arch_exit_mmap(mm); + lru_add_drain(); flush_cache_mm(mm); tlb = tlb_gather_mmu(mm, 1); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3791edfffee..a7001410ab1 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -147,9 +147,11 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) * Adjust the score by oomkilladj. */ if (p->oomkilladj) { - if (p->oomkilladj > 0) + if (p->oomkilladj > 0) { + if (!points) + points = 1; points <<= p->oomkilladj; - else + } else points >>= -(p->oomkilladj); } @@ -397,6 +399,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) struct task_struct *p; unsigned long points = 0; unsigned long freed = 0; + int constraint; blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0) @@ -411,14 +414,18 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) show_mem(); } - cpuset_lock(); - read_lock(&tasklist_lock); + if (sysctl_panic_on_oom == 2) + panic("out of memory. Compulsory panic_on_oom is selected.\n"); /* * Check if there were limitations on the allocation (only relevant for * NUMA) that may require different handling. */ - switch (constrained_alloc(zonelist, gfp_mask)) { + constraint = constrained_alloc(zonelist, gfp_mask); + cpuset_lock(); + read_lock(&tasklist_lock); + + switch (constraint) { case CONSTRAINT_MEMORY_POLICY: oom_kill_process(current, points, "No available memory (MPOL_BIND)"); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a794945fd19..029dfad5a23 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -119,6 +119,44 @@ static void background_writeout(unsigned long _min_pages); * We make sure that the background writeout level is below the adjusted * clamping level. */ + +static unsigned long highmem_dirtyable_memory(unsigned long total) +{ +#ifdef CONFIG_HIGHMEM + int node; + unsigned long x = 0; + + for_each_online_node(node) { + struct zone *z = + &NODE_DATA(node)->node_zones[ZONE_HIGHMEM]; + + x += zone_page_state(z, NR_FREE_PAGES) + + zone_page_state(z, NR_INACTIVE) + + zone_page_state(z, NR_ACTIVE); + } + /* + * Make sure that the number of highmem pages is never larger + * than the number of the total dirtyable memory. This can only + * occur in very strange VM situations but we want to make sure + * that this does not occur. + */ + return min(x, total); +#else + return 0; +#endif +} + +static unsigned long determine_dirtyable_memory(void) +{ + unsigned long x; + + x = global_page_state(NR_FREE_PAGES) + + global_page_state(NR_INACTIVE) + + global_page_state(NR_ACTIVE); + x -= highmem_dirtyable_memory(x); + return x + 1; /* Ensure that we never return 0 */ +} + static void get_dirty_limits(long *pbackground, long *pdirty, struct address_space *mapping) @@ -128,20 +166,12 @@ get_dirty_limits(long *pbackground, long *pdirty, int unmapped_ratio; long background; long dirty; - unsigned long available_memory = vm_total_pages; + unsigned long available_memory = determine_dirtyable_memory(); struct task_struct *tsk; -#ifdef CONFIG_HIGHMEM - /* - * We always exclude high memory from our count. - */ - available_memory -= totalhigh_pages; -#endif - - unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) + global_page_state(NR_ANON_PAGES)) * 100) / - vm_total_pages; + available_memory; dirty_ratio = vm_dirty_ratio; if (dirty_ratio > unmapped_ratio / 2) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 353ce9039a8..59164313167 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -156,10 +156,8 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page) static int page_is_consistent(struct zone *zone, struct page *page) { -#ifdef CONFIG_HOLES_IN_ZONE - if (!pfn_valid(page_to_pfn(page))) + if (!pfn_valid_within(page_to_pfn(page))) return 0; -#endif if (zone != page_zone(page)) return 0; @@ -227,7 +225,7 @@ static void bad_page(struct page *page) static void free_compound_page(struct page *page) { - __free_pages_ok(page, (unsigned long)page[1].lru.prev); + __free_pages_ok(page, compound_order(page)); } static void prep_compound_page(struct page *page, unsigned long order) @@ -236,12 +234,13 @@ static void prep_compound_page(struct page *page, unsigned long order) int nr_pages = 1 << order; set_compound_page_dtor(page, free_compound_page); - page[1].lru.prev = (void *)order; - for (i = 0; i < nr_pages; i++) { + set_compound_order(page, order); + __SetPageHead(page); + for (i = 1; i < nr_pages; i++) { struct page *p = page + i; - __SetPageCompound(p); - set_page_private(p, (unsigned long)page); + __SetPageTail(p); + p->first_page = page; } } @@ -250,16 +249,19 @@ static void destroy_compound_page(struct page *page, unsigned long order) int i; int nr_pages = 1 << order; - if (unlikely((unsigned long)page[1].lru.prev != order)) + if (unlikely(compound_order(page) != order)) bad_page(page); - for (i = 0; i < nr_pages; i++) { + if (unlikely(!PageHead(page))) + bad_page(page); + __ClearPageHead(page); + for (i = 1; i < nr_pages; i++) { struct page *p = page + i; - if (unlikely(!PageCompound(p) | - (page_private(p) != (unsigned long)page))) + if (unlikely(!PageTail(p) | + (p->first_page != page))) bad_page(page); - __ClearPageCompound(p); + __ClearPageTail(p); } } @@ -346,10 +348,8 @@ __find_combined_index(unsigned long page_idx, unsigned int order) static inline int page_is_buddy(struct page *page, struct page *buddy, int order) { -#ifdef CONFIG_HOLES_IN_ZONE - if (!pfn_valid(page_to_pfn(buddy))) + if (!pfn_valid_within(page_to_pfn(buddy))) return 0; -#endif if (page_zone_id(page) != page_zone_id(buddy)) return 0; @@ -433,13 +433,18 @@ static inline int free_pages_check(struct page *page) 1 << PG_private | 1 << PG_locked | 1 << PG_active | - 1 << PG_reclaim | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_writeback | 1 << PG_reserved | 1 << PG_buddy )))) bad_page(page); + /* + * PageReclaim == PageTail. It is only an error + * for PageReclaim to be set if PageCompound is clear. + */ + if (unlikely(!PageCompound(page) && PageReclaim(page))) + bad_page(page); if (PageDirty(page)) __ClearPageDirty(page); /* @@ -665,7 +670,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, } #if MAX_NUMNODES > 1 -int nr_node_ids __read_mostly; +int nr_node_ids __read_mostly = MAX_NUMNODES; EXPORT_SYMBOL(nr_node_ids); /* @@ -770,8 +775,8 @@ void mark_free_pages(struct zone *zone) if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); - if (!PageNosave(page)) - ClearPageNosaveFree(page); + if (!swsusp_page_is_forbidden(page)) + swsusp_unset_page_free(page); } for (order = MAX_ORDER - 1; order >= 0; --order) @@ -780,7 +785,7 @@ void mark_free_pages(struct zone *zone) pfn = page_to_pfn(list_entry(curr, struct page, lru)); for (i = 0; i < (1UL << order); i++) - SetPageNosaveFree(pfn_to_page(pfn + i)); + swsusp_set_page_free(pfn_to_page(pfn + i)); } spin_unlock_irqrestore(&zone->lock, flags); @@ -3203,7 +3208,8 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { proc_dointvec(table, write, file, buffer, length, ppos); - setup_per_zone_pages_min(); + if (write) + setup_per_zone_pages_min(); return 0; } diff --git a/mm/quicklist.c b/mm/quicklist.c new file mode 100644 index 00000000000..ae8189c2799 --- /dev/null +++ b/mm/quicklist.c @@ -0,0 +1,88 @@ +/* + * Quicklist support. + * + * Quicklists are light weight lists of pages that have a defined state + * on alloc and free. Pages must be in the quicklist specific defined state + * (zero by default) when the page is freed. It seems that the initial idea + * for such lists first came from Dave Miller and then various other people + * improved on it. + * + * Copyright (C) 2007 SGI, + * Christoph Lameter <clameter@sgi.com> + * Generalized, added support for multiple lists and + * constructors / destructors. + */ +#include <linux/kernel.h> + +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/quicklist.h> + +DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; + +#define FRACTION_OF_NODE_MEM 16 + +static unsigned long max_pages(unsigned long min_pages) +{ + unsigned long node_free_pages, max; + + node_free_pages = node_page_state(numa_node_id(), + NR_FREE_PAGES); + max = node_free_pages / FRACTION_OF_NODE_MEM; + return max(max, min_pages); +} + +static long min_pages_to_free(struct quicklist *q, + unsigned long min_pages, long max_free) +{ + long pages_to_free; + + pages_to_free = q->nr_pages - max_pages(min_pages); + + return min(pages_to_free, max_free); +} + +/* + * Trim down the number of pages in the quicklist + */ +void quicklist_trim(int nr, void (*dtor)(void *), + unsigned long min_pages, unsigned long max_free) +{ + long pages_to_free; + struct quicklist *q; + + q = &get_cpu_var(quicklist)[nr]; + if (q->nr_pages > min_pages) { + pages_to_free = min_pages_to_free(q, min_pages, max_free); + + while (pages_to_free > 0) { + /* + * We pass a gfp_t of 0 to quicklist_alloc here + * because we will never call into the page allocator. + */ + void *p = quicklist_alloc(nr, 0, NULL); + + if (dtor) + dtor(p); + free_page((unsigned long)p); + pages_to_free--; + } + } + put_cpu_var(quicklist); +} + +unsigned long quicklist_total_size(void) +{ + unsigned long count = 0; + int cpu; + struct quicklist *ql, *q; + + for_each_online_cpu(cpu) { + ql = per_cpu(quicklist, cpu); + for (q = ql; q < ql + CONFIG_NR_QUICK; q++) + count += q->nr_pages; + } + return count; +} + diff --git a/mm/readahead.c b/mm/readahead.c index 93d9ee692fd..9861e883fe5 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -37,7 +37,7 @@ void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping) { ra->ra_pages = mapping->backing_dev_info->ra_pages; - ra->prev_page = -1; + ra->prev_index = -1; } EXPORT_SYMBOL_GPL(file_ra_state_init); @@ -202,17 +202,19 @@ out: * size: Number of pages in that read * Together, these form the "current window". * Together, start and size represent the `readahead window'. - * prev_page: The page which the readahead algorithm most-recently inspected. + * prev_index: The page which the readahead algorithm most-recently inspected. * It is mainly used to detect sequential file reading. * If page_cache_readahead sees that it is again being called for * a page which it just looked at, it can return immediately without * making any state changes. + * offset: Offset in the prev_index where the last read ended - used for + * detection of sequential file reading. * ahead_start, * ahead_size: Together, these form the "ahead window". * ra_pages: The externally controlled max readahead for this fd. * * When readahead is in the off state (size == 0), readahead is disabled. - * In this state, prev_page is used to detect the resumption of sequential I/O. + * In this state, prev_index is used to detect the resumption of sequential I/O. * * The readahead code manages two windows - the "current" and the "ahead" * windows. The intent is that while the application is walking the pages @@ -415,7 +417,7 @@ static int make_ahead_window(struct address_space *mapping, struct file *filp, ra->ahead_size = get_next_ra_size(ra); ra->ahead_start = ra->start + ra->size; - block = force || (ra->prev_page >= ra->ahead_start); + block = force || (ra->prev_index >= ra->ahead_start); ret = blockable_page_cache_readahead(mapping, filp, ra->ahead_start, ra->ahead_size, ra, block); @@ -467,12 +469,13 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, * We avoid doing extra work and bogusly perturbing the readahead * window expansion logic. */ - if (offset == ra->prev_page && --req_size) + if (offset == ra->prev_index && --req_size) ++offset; - /* Note that prev_page == -1 if it is a first read */ - sequential = (offset == ra->prev_page + 1); - ra->prev_page = offset; + /* Note that prev_index == -1 if it is a first read */ + sequential = (offset == ra->prev_index + 1); + ra->prev_index = offset; + ra->prev_offset = 0; max = get_max_readahead(ra); newsize = min(req_size, max); @@ -481,7 +484,7 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, if (newsize == 0 || (ra->flags & RA_FLAG_INCACHE)) goto out; - ra->prev_page += newsize - 1; + ra->prev_index += newsize - 1; /* * Special case - first read at start of file. We'll assume it's @@ -537,18 +540,18 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, * we get called back on the first page of the ahead window which * will allow us to submit more IO. */ - if (ra->prev_page >= ra->ahead_start) { + if (ra->prev_index >= ra->ahead_start) { ra->start = ra->ahead_start; ra->size = ra->ahead_size; make_ahead_window(mapping, filp, ra, 0); recheck: - /* prev_page shouldn't overrun the ahead window */ - ra->prev_page = min(ra->prev_page, + /* prev_index shouldn't overrun the ahead window */ + ra->prev_index = min(ra->prev_index, ra->ahead_start + ra->ahead_size - 1); } out: - return ra->prev_page + 1; + return ra->prev_index + 1; } EXPORT_SYM |