aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/allocpercpu.c9
-rw-r--r--mm/bootmem.c6
-rw-r--r--mm/bounce.c4
-rw-r--r--mm/fadvise.c2
-rw-r--r--mm/filemap.c13
-rw-r--r--mm/filemap_xip.c4
-rw-r--r--mm/fremap.c4
-rw-r--r--mm/hugetlb.c30
-rw-r--r--mm/memory.c51
-rw-r--r--mm/memory_hotplug.c7
-rw-r--r--mm/mempolicy.c12
-rw-r--r--mm/migrate.c19
-rw-r--r--mm/mincore.c183
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/mmap.c14
-rw-r--r--mm/mmzone.c5
-rw-r--r--mm/nommu.c30
-rw-r--r--mm/oom_kill.c62
-rw-r--r--mm/page-writeback.c106
-rw-r--r--mm/page_alloc.c406
-rw-r--r--mm/page_io.c45
-rw-r--r--mm/pdflush.c1
-rw-r--r--mm/readahead.c12
-rw-r--r--mm/rmap.c36
-rw-r--r--mm/shmem.c35
-rw-r--r--mm/slab.c411
-rw-r--r--mm/slob.c27
-rw-r--r--mm/sparse.c23
-rw-r--r--mm/swap.c10
-rw-r--r--mm/swapfile.c102
-rw-r--r--mm/thrash.c116
-rw-r--r--mm/tiny-shmem.c4
-rw-r--r--mm/truncate.c41
-rw-r--r--mm/vmscan.c60
-rw-r--r--mm/vmstat.c22
35 files changed, 1224 insertions, 690 deletions
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index eaa9abeea53..b2486cf887a 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -17,10 +17,9 @@
void percpu_depopulate(void *__pdata, int cpu)
{
struct percpu_data *pdata = __percpu_disguise(__pdata);
- if (pdata->ptrs[cpu]) {
- kfree(pdata->ptrs[cpu]);
- pdata->ptrs[cpu] = NULL;
- }
+
+ kfree(pdata->ptrs[cpu]);
+ pdata->ptrs[cpu] = NULL;
}
EXPORT_SYMBOL_GPL(percpu_depopulate);
@@ -123,6 +122,8 @@ EXPORT_SYMBOL_GPL(__percpu_alloc_mask);
*/
void percpu_free(void *__pdata)
{
+ if (unlikely(!__pdata))
+ return;
__percpu_depopulate_mask(__pdata, &cpu_possible_map);
kfree(__percpu_disguise(__pdata));
}
diff --git a/mm/bootmem.c b/mm/bootmem.c
index d53112fcb40..00a96970b23 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -27,8 +27,6 @@ unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;
-EXPORT_UNUSED_SYMBOL(max_pfn); /* June 2006 */
-
static LIST_HEAD(bdata_list);
#ifdef CONFIG_CRASH_DUMP
/*
@@ -196,6 +194,10 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
if (limit && bdata->node_boot_start >= limit)
return NULL;
+ /* on nodes without memory - bootmem_map is NULL */
+ if (!bdata->node_bootmem_map)
+ return NULL;
+
end_pfn = bdata->node_low_pfn;
limit = PFN_DOWN(limit);
if (limit && end_pfn > limit)
diff --git a/mm/bounce.c b/mm/bounce.c
index e4b62d2a402..643efbe8240 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -237,6 +237,8 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
if (!bio)
return;
+ blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
+
/*
* at least one page was bounced, fill in possible non-highmem
* pages
@@ -291,8 +293,6 @@ void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
pool = isa_page_pool;
}
- blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
-
/*
* slow path
*/
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 168c78a121b..0df4c899e97 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -38,7 +38,7 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
if (!file)
return -EBADF;
- if (S_ISFIFO(file->f_dentry->d_inode->i_mode)) {
+ if (S_ISFIFO(file->f_path.dentry->d_inode->i_mode)) {
ret = -ESPIPE;
goto out;
}
diff --git a/mm/filemap.c b/mm/filemap.c
index 7b84dc81434..8332c77b1bd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1181,8 +1181,6 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
if (pos < size) {
retval = generic_file_direct_IO(READ, iocb,
iov, pos, nr_segs);
- if (retval > 0 && !is_sync_kiocb(iocb))
- retval = -EIOCBQUEUED;
if (retval > 0)
*ppos = pos + retval;
}
@@ -1445,7 +1443,6 @@ no_cached_page:
* effect.
*/
error = page_cache_read(file, pgoff);
- grab_swap_token();
/*
* The page we want has now been added to the page cache.
@@ -1893,6 +1890,7 @@ int should_remove_suid(struct dentry *dentry)
return 0;
}
+EXPORT_SYMBOL(should_remove_suid);
int __remove_suid(struct dentry *dentry, int kill)
{
@@ -2047,15 +2045,14 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
* Sync the fs metadata but not the minor inode changes and
* of course not the data as we did direct DMA for the IO.
* i_mutex is held, which protects generic_osync_inode() from
- * livelocking.
+ * livelocking. AIO O_DIRECT ops attempt to sync metadata here.
*/
- if (written >= 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
+ if ((written >= 0 || written == -EIOCBQUEUED) &&
+ ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
int err = generic_osync_inode(inode, mapping, OSYNC_METADATA);
if (err < 0)
written = err;
}
- if (written == count && !is_sync_kiocb(iocb))
- written = -EIOCBQUEUED;
return written;
}
EXPORT_SYMBOL(generic_file_direct_write);
@@ -2269,7 +2266,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
if (count == 0)
goto out;
- err = remove_suid(file->f_dentry);
+ err = remove_suid(file->f_path.dentry);
if (err)
goto out;
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index b4fd0d7c9bf..45b3553865c 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -189,7 +189,7 @@ __xip_unmap (struct address_space * mapping,
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
pteval = ptep_clear_flush(vma, address, pte);
- page_remove_rmap(page);
+ page_remove_rmap(page, vma);
dec_mm_counter(mm, file_rss);
BUG_ON(pte_dirty(pteval));
pte_unmap_unlock(pte, ptl);
@@ -379,7 +379,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
if (count == 0)
goto out_backing;
- ret = remove_suid(filp->f_dentry);
+ ret = remove_suid(filp->f_path.dentry);
if (ret)
goto out_backing;
diff --git a/mm/fremap.c b/mm/fremap.c
index 7a9d0f5d246..4e3f53dd5fd 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -33,7 +33,7 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
if (page) {
if (pte_dirty(pte))
set_page_dirty(page);
- page_remove_rmap(page);
+ page_remove_rmap(page, vma);
page_cache_release(page);
}
} else {
@@ -101,7 +101,6 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
{
int err = -ENOMEM;
pte_t *pte;
- pte_t pte_val;
spinlock_t *ptl;
pte = get_locked_pte(mm, addr, &ptl);
@@ -114,7 +113,6 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
}
set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
- pte_val = *pte;
/*
* We don't need to run update_mmu_cache() here because the "file pte"
* being installed by install_file_pte() is not a real pte - it's a
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a088f593a80..cb362f761f1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -44,14 +44,14 @@ static void clear_huge_page(struct page *page, unsigned long addr)
}
static void copy_huge_page(struct page *dst, struct page *src,
- unsigned long addr)
+ unsigned long addr, struct vm_area_struct *vma)
{
int i;
might_sleep();
for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) {
cond_resched();
- copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE);
+ copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
}
}
@@ -73,7 +73,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
for (z = zonelist->zones; *z; z++) {
nid = zone_to_nid(*z);
- if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
+ if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) &&
!list_empty(&hugepage_freelists[nid]))
break;
}
@@ -109,7 +109,7 @@ static int alloc_fresh_huge_page(void)
if (nid == MAX_NUMNODES)
nid = first_node(node_online_map);
if (page) {
- page[1].lru.next = (void *)free_huge_page; /* dtor */
+ set_compound_page_dtor(page, free_huge_page);
spin_lock(&hugetlb_lock);
nr_huge_pages++;
nr_huge_pages_node[page_to_nid(page)]++;
@@ -344,7 +344,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
entry = *src_pte;
ptepage = pte_page(entry);
get_page(ptepage);
- add_mm_counter(dst, file_rss, HPAGE_SIZE / PAGE_SIZE);
set_huge_pte_at(dst, addr, dst_pte, entry);
}
spin_unlock(&src->page_table_lock);
@@ -365,6 +364,11 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
pte_t pte;
struct page *page;
struct page *tmp;
+ /*
+ * A page gathering list, protected by per file i_mmap_lock. The
+ * lock is used to avoid list corruption from multiple unmapping
+ * of the same page since we are using page->lru.
+ */
LIST_HEAD(page_list);
WARN_ON(!is_vm_hugetlb_page(vma));
@@ -372,24 +376,21 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
BUG_ON(end & ~HPAGE_MASK);
spin_lock(&mm->page_table_lock);
-
- /* Update high watermark before we lower rss */
- update_hiwater_rss(mm);
-
for (address = start; address < end; address += HPAGE_SIZE) {
ptep = huge_pte_offset(mm, address);
if (!ptep)
continue;
+ if (huge_pmd_unshare(mm, &address, ptep))
+ continue;
+
pte = huge_ptep_get_and_clear(mm, address, ptep);
if (pte_none(pte))
continue;
page = pte_page(pte);
list_add(&page->lru, &page_list);
- add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE));
}
-
spin_unlock(&mm->page_table_lock);
flush_tlb_range(vma, start, end);
list_for_each_entry_safe(page, tmp, &page_list, lru) {
@@ -441,7 +442,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
}
spin_unlock(&mm->page_table_lock);
- copy_huge_page(new_page, old_page, address);
+ copy_huge_page(new_page, old_page, address, vma);
spin_lock(&mm->page_table_lock);
ptep = huge_pte_offset(mm, address & HPAGE_MASK);
@@ -515,7 +516,6 @@ retry:
if (!pte_none(*ptep))
goto backout;
- add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE);
new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
&& (vma->vm_flags & VM_SHARED)));
set_huge_pte_at(mm, address, ptep, new_pte);
@@ -653,11 +653,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
BUG_ON(address >= end);
flush_cache_range(vma, address, end);
+ spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
spin_lock(&mm->page_table_lock);
for (; address < end; address += HPAGE_SIZE) {
ptep = huge_pte_offset(mm, address);
if (!ptep)
continue;
+ if (huge_pmd_unshare(mm, &address, ptep))
+ continue;
if (!pte_none(*ptep)) {
pte = huge_ptep_get_and_clear(mm, address, ptep);
pte = pte_mkhuge(pte_modify(pte, newprot));
@@ -666,6 +669,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
}
}
spin_unlock(&mm->page_table_lock);
+ spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
flush_tlb_range(vma, start, end);
}
diff --git a/mm/memory.c b/mm/memory.c
index 156861fcac4..af227d26e10 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -681,7 +681,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
mark_page_accessed(page);
file_rss--;
}
- page_remove_rmap(page);
+ page_remove_rmap(page, vma);
tlb_remove_page(tlb, page);
continue;
}
@@ -1091,7 +1091,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (pages) {
pages[i] = page;
- flush_anon_page(page, start);
+ flush_anon_page(vma, page, start);
flush_dcache_page(page);
}
if (vmas)
@@ -1110,23 +1110,29 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
{
pte_t *pte;
spinlock_t *ptl;
+ int err = 0;
pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (!pte)
- return -ENOMEM;
+ return -EAGAIN;
arch_enter_lazy_mmu_mode();
do {
struct page *page = ZERO_PAGE(addr);
pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
+
+ if (unlikely(!pte_none(*pte))) {
+ err = -EEXIST;
+ pte++;
+ break;
+ }
page_cache_get(page);
page_add_file_rmap(page);
inc_mm_counter(mm, file_rss);
- BUG_ON(!pte_none(*pte));
set_pte_at(mm, addr, pte, zero_pte);
} while (pte++, addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
pte_unmap_unlock(pte - 1, ptl);
- return 0;
+ return err;
}
static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1134,16 +1140,18 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
{
pmd_t *pmd;
unsigned long next;
+ int err;
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
- return -ENOMEM;
+ return -EAGAIN;
do {
next = pmd_addr_end(addr, end);
- if (zeromap_pte_range(mm, pmd, addr, next, prot))
- return -ENOMEM;
+ err = zeromap_pte_range(mm, pmd, addr, next, prot);
+ if (err)
+ break;
} while (pmd++, addr = next, addr != end);
- return 0;
+ return err;
}
static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
@@ -1151,16 +1159,18 @@ static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
{
pud_t *pud;
unsigned long next;
+ int err;
pud = pud_alloc(mm, pgd, addr);
if (!pud)
- return -ENOMEM;
+ return -EAGAIN;
do {
next = pud_addr_end(addr, end);
- if (zeromap_pmd_range(mm, pud, addr, next, prot))
- return -ENOMEM;
+ err = zeromap_pmd_range(mm, pud, addr, next, prot);
+ if (err)
+ break;
} while (pud++, addr = next, addr != end);
- return 0;
+ return err;
}
int zeromap_page_range(struct vm_area_struct *vma,
@@ -1431,7 +1441,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
return pte;
}
-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va)
+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
{
/*
* If the source page was a PFN mapping, we don't have
@@ -1454,9 +1464,9 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
kunmap_atomic(kaddr, KM_USER0);
flush_dcache_page(dst);
return;
-
+
}
- copy_user_highpage(dst, src, va);
+ copy_user_highpage(dst, src, va, vma);
}
/*
@@ -1567,7 +1577,7 @@ gotten:
new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
if (!new_page)
goto oom;
- cow_user_page(new_page, old_page, address);
+ cow_user_page(new_page, old_page, address, vma);
}
/*
@@ -1576,7 +1586,7 @@ gotten:
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (likely(pte_same(*page_table, orig_pte))) {
if (old_page) {
- page_remove_rmap(old_page);
+ page_remove_rmap(old_page, vma);
if (!PageAnon(old_page)) {
dec_mm_counter(mm, file_rss);
inc_mm_counter(mm, anon_rss);
@@ -1902,7 +1912,6 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
return 0;
}
-EXPORT_UNUSED_SYMBOL(vmtruncate_range); /* June 2006 */
/**
* swapin_readahead - swap in pages in hope we need them soon
@@ -1991,6 +2000,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
delayacct_set_flag(DELAYACCT_PF_SWAPIN);
page = lookup_swap_cache(entry);
if (!page) {
+ grab_swap_token(); /* Contend for token _before_ read-in */
swapin_readahead(entry, address, vma);
page = read_swap_cache_async(entry, vma, address);
if (!page) {
@@ -2008,7 +2018,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
- grab_swap_token();
}
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
@@ -2191,7 +2200,7 @@ retry:
page = alloc_page_vma(GFP_HIGHUSER, vma, address);
if (!page)
goto oom;
- copy_user_highpage(page, new_page, address);
+ copy_user_highpage(page, new_page, address, vma);
page_cache_release(new_page);
new_page = page;
anon = 1;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index fd678a662ea..84279127fcd 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -67,12 +67,13 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
zone_type = zone - pgdat->node_zones;
if (!populated_zone(zone)) {
int ret = 0;
- ret = init_currently_empty_zone(zone, phys_start_pfn, nr_pages);
+ ret = init_currently_empty_zone(zone, phys_start_pfn,
+ nr_pages, MEMMAP_HOTPLUG);
if (ret < 0)
return ret;
}
- memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn);
- zonetable_add(zone, nid, zone_type, phys_start_pfn, nr_pages);
+ memmap_init_zone(nr_pages, nid, zone_type,
+ phys_start_pfn, MEMMAP_HOTPLUG);
return 0;
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 617fb31086e..da946394655 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -141,9 +141,11 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
enum zone_type k;
max = 1 + MAX_NR_ZONES * nodes_weight(*nodes);
+ max++; /* space for zlcache_ptr (see mmzone.h) */
zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL);
if (!zl)
return NULL;
+ zl->zlcache_ptr = NULL;
num = 0;
/* First put in the highest zones from all nodes, then all the next
lower zones etc. Avoid empty zones because the memory allocator
@@ -219,7 +221,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
do {
struct page *page;
- unsigned int nid;
+ int nid;
if (!pte_present(*pte))
continue;
@@ -1324,7 +1326,7 @@ struct mempolicy *__mpol_copy(struct mempolicy *old)
atomic_set(&new->refcnt, 1);
if (new->policy == MPOL_BIND) {
int sz = ksize(old->v.zonelist);
- new->v.zonelist = kmemdup(old->v.zonelist, sz, SLAB_KERNEL);
+ new->v.zonelist = kmemdup(old->v.zonelist, sz, GFP_KERNEL);
if (!new->v.zonelist) {
kmem_cache_free(policy_cache, new);
return ERR_PTR(-ENOMEM);
@@ -1705,8 +1707,8 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
* Display pages allocated per node and memory policy via /proc.
*/
-static const char *policy_types[] = { "default", "prefer", "bind",
- "interleave" };
+static const char * const policy_types[] =
+ { "default", "prefer", "bind", "interleave" };
/*
* Convert a mempolicy into a string.
@@ -1855,7 +1857,7 @@ int show_numa_map(struct seq_file *m, void *v)
if (file) {
seq_printf(m, " file=");
- seq_path(m, file->f_vfsmnt, file->f_dentry, "\n\t= ");
+ seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_printf(m, " heap");
} else if (vma->vm_start <= mm->start_stack &&
diff --git a/mm/migrate.c b/mm/migrate.c
index b4979d423d2..e9b161bde95 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -294,7 +294,7 @@ out:
static int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page)
{
- struct page **radix_pointer;
+ void **pslot;
if (!mapping) {
/* Anonymous page */
@@ -305,12 +305,11 @@ static int migrate_page_move_mapping(struct address_space *mapping,
write_lock_irq(&mapping->tree_lock);
- radix_pointer = (struct page **)radix_tree_lookup_slot(
- &mapping->page_tree,
- page_index(page));
+ pslot = radix_tree_lookup_slot(&mapping->page_tree,
+ page_index(page));
if (page_count(page) != 2 + !!PagePrivate(page) ||
- *radix_pointer != page) {
+ (struct page *)radix_tree_deref_slot(pslot) != page) {
write_unlock_irq(&mapping->tree_lock);
return -EAGAIN;
}
@@ -318,7 +317,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
/*
* Now we know that no one else is looking at the page.
*/
- get_page(newpage);
+ get_page(newpage); /* add cache reference */
#ifdef CONFIG_SWAP
if (PageSwapCache(page)) {
SetPageSwapCache(newpage);
@@ -326,8 +325,14 @@ static int migrate_page_move_mapping(struct address_space *mapping,
}
#endif
- *radix_pointer = newpage;
+ radix_tree_replace_slot(pslot, newpage);
+
+ /*
+ * Drop cache reference from old page.
+ * We know this isn't the last reference.
+ */
__put_page(page);
+
write_unlock_irq(&mapping->tree_lock);
return 0;
diff --git a/mm/mincore.c b/mm/mincore.c
index 72890780c1c..8aca6f7167b 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -1,7 +1,7 @@
/*
* linux/mm/mincore.c
*
- * Copyright (C) 1994-1999 Linus Torvalds
+ * Copyright (C) 1994-2006 Linus Torvalds
*/
/*
@@ -38,46 +38,51 @@ static unsigned char mincore_page(struct vm_area_struct * vma,
return present;
}
-static long mincore_vma(struct vm_area_struct * vma,
- unsigned long start, unsigned long end, unsigned char __user * vec)
+/*
+ * Do a chunk of "sys_mincore()". We've already checked
+ * all the arguments, we hold the mmap semaphore: we should
+ * just return the amount of info we're asked for.
+ */
+static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages)
{
- long error, i, remaining;
- unsigned char * tmp;
-
- error = -ENOMEM;
- if (!vma->vm_file)
- return error;
-
- start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
- if (end > vma->vm_end)
- end = vma->vm_end;
- end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ unsigned long i, nr, pgoff;
+ struct vm_area_struct *vma = find_vma(current->mm, addr);
- error = -EAGAIN;
- tmp = (unsigned char *) __get_free_page(GFP_KERNEL);
- if (!tmp)
- return error;
+ /*
+ * find_vma() didn't find anything above us, or we're
+ * in an unmapped hole in the address space: ENOMEM.
+ */
+ if (!vma || addr < vma->vm_start)
+ return -ENOMEM;
- /* (end - start) is # of pages, and also # of bytes in "vec */
- remaining = (end - start),
+ /*
+ * Ok, got it. But check whether it's a segment we support
+ * mincore() on. Right now, we don't do any anonymous mappings.
+ *
+ * FIXME: This is just stupid. And returning ENOMEM is
+ * stupid too. We should just look at the page tables. But
+ * this is what we've traditionally done, so we'll just
+ * continue doing it.
+ */
+ if (!vma->vm_file)
+ return -ENOMEM;
- error = 0;
- for (i = 0; remaining > 0; remaining -= PAGE_SIZE, i++) {
- int j = 0;
- long thispiece = (remaining < PAGE_SIZE) ?
- remaining : PAGE_SIZE;
+ /*
+ * Calculate how many pages there are left in the vma, and
+ * what the pgoff is for our address.
+ */
+ nr = (vma->vm_end - addr) >> PAGE_SHIFT;
+ if (nr > pages)
+ nr = pages;
- while (j < thispiece)
- tmp[j++] = mincore_page(vma, start++);
+ pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
+ pgoff += vma->vm_pgoff;
- if (copy_to_user(vec + PAGE_SIZE * i, tmp, thispiece)) {
- error = -EFAULT;
- break;
- }
- }
+ /* And then we just fill the sucker in.. */
+ for (i = 0 ; i < nr; i++, pgoff++)
+ vec[i] = mincore_page(vma, pgoff);
- free_page((unsigned long) tmp);
- return error;
+ return nr;
}
/*
@@ -107,82 +112,50 @@ static long mincore_vma(struct vm_area_struct * vma,
asmlinkage long sys_mincore(unsigned long start, size_t len,
unsigned char __user * vec)
{
- int index = 0;
- unsigned long end, limit;
- struct vm_area_struct * vma;
- size_t max;
- int unmapped_error = 0;
- long error;
-
- /* check the arguments */
- if (start & ~PAGE_CACHE_MASK)
- goto einval;
-
- limit = TASK_SIZE;
- if (start >= limit)
- goto enomem;
-
- if (!len)
- return 0;
-
- max = limit - start;
- len = PAGE_CACHE_ALIGN(len);
- if (len > max || !len)
- goto enomem;
+ long retval;
+ unsigned long pages;
+ unsigned char *tmp;
- end = start + len;
+ /* Check the start address: needs to be page-aligned.. */
+ if (start & ~PAGE_CACHE_MASK)
+ return -EINVAL;
- /* check the output buffer whilst holding the lock */
- error = -EFAULT;
- down_read(&current->mm->mmap_sem);
+ /* ..and we need to be passed a valid user-space range */
+ if (!access_ok(VERIFY_READ, (void __user *) start, len))
+ return -ENOMEM;
- if (!access_ok(VERIFY_WRITE, vec, len >> PAGE_SHIFT))
- goto out;
+ /* This also avoids any overflows on PAGE_CACHE_ALIGN */
+ pages = len >> PAGE_SHIFT;
+ pages += (len & ~PAGE_MASK) != 0;
- /*
- * If the interval [start,end) covers some unmapped address
- * ranges, just ignore them, but return -ENOMEM at the end.
- */
- error = 0;
-
- vma = find_vma(current->mm, start);
- while (vma) {
- /* Here start < vma->vm_end. */
- if (start < vma->vm_start) {
- unmapped_error = -ENOMEM;
- start = vma->vm_start;
- }
+ if (!access_ok(VERIFY_WRITE, vec, pages))
+ return -EFAULT;
- /* Here vma->vm_start <= start < vma->vm_end. */
- if (end <= vma->vm_end) {
- if (start < end) {
- error = mincore_vma(vma, start, end,
- &vec[index]);
- if (error)
- goto out;
- }
- error = unmapped_error;
- goto out;
+ tmp = (void *) __get_free_page(GFP_USER);
+ if (!tmp)
+ return -EAGAIN;
+
+ retval = 0;
+ while (pages) {
+ /*
+ * Do at most PAGE_SIZE entries per iteration, due to
+ * the temporary buffer size.
+ */
+ down_read(&current->mm->mmap_sem);
+ retval = do_mincore(start, tmp, min(pages, PAGE_SIZE));
+ up_read(&current->mm->mmap_sem);
+
+ if (retval <= 0)
+ break;
+ if (copy_to_user(vec, tmp, retval)) {
+ retval = -EFAULT;
+ break;
}
-
- /* Here vma->vm_start <= start < vma->vm_end < end. */
- error = mincore_vma(vma, start, vma->vm_end, &vec[index]);
- if (error)
- goto out;
- index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT;
- start = vma->vm_end;
- vma = vma->vm_next;
+ pages -= retval;
+ vec += retval;
+ start += retval << PAGE_SHIFT;
+ retval = 0;
}
-
- /* we found a hole in the area queried if we arrive here */
- error = -ENOMEM;
-
-out:
- up_read(&current->mm->mmap_sem);
- return error;
-
-einval:
- return -EINVAL;
-enomem:
- return -ENOMEM;
+ free_page((unsigned long) tmp);
+ return retval;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index b90c59573ab..3446b7ef731 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -65,7 +65,7 @@ success:
ret = make_pages_present(start, end);
}
- vma->vm_mm->locked_vm -= pages;
+ mm->locked_vm -= pages;
out:
if (ret == -ENOMEM)
ret = -EAGAIN;
diff --git a/mm/mmap.c b/mm/mmap.c
index 7b40abd7cba..9717337293c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -188,7 +188,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
struct file *file, struct address_space *mapping)
{
if (vma->vm_flags & VM_DENYWRITE)
- atomic_inc(&file->f_dentry->d_inode->i_writecount);