diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 65 |
1 files changed, 55 insertions, 10 deletions
diff --git a/mm/rmap.c b/mm/rmap.c index a4fd3680038..c8454e06b6c 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -272,6 +272,51 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) } /* + * Some rmap walk that needs to find all ptes/hugepmds without false + * negatives (like migrate and split_huge_page) running concurrent + * with operations that copy or move pagetables (like mremap() and + * fork()) to be safe. They depend on the anon_vma "same_anon_vma" + * list to be in a certain order: the dst_vma must be placed after the + * src_vma in the list. This is always guaranteed by fork() but + * mremap() needs to call this function to enforce it in case the + * dst_vma isn't newly allocated and chained with the anon_vma_clone() + * function but just an extension of a pre-existing vma through + * vma_merge. + * + * NOTE: the same_anon_vma list can still be changed by other + * processes while mremap runs because mremap doesn't hold the + * anon_vma mutex to prevent modifications to the list while it + * runs. All we need to enforce is that the relative order of this + * process vmas isn't changing (we don't care about other vmas + * order). Each vma corresponds to an anon_vma_chain structure so + * there's no risk that other processes calling anon_vma_moveto_tail() + * and changing the same_anon_vma list under mremap() will screw with + * the relative order of this process vmas in the list, because we + * they can't alter the order of any vma that belongs to this + * process. And there can't be another anon_vma_moveto_tail() running + * concurrently with mremap() coming from this process because we hold + * the mmap_sem for the whole mremap(). fork() ordering dependency + * also shouldn't be affected because fork() only cares that the + * parent vmas are placed in the list before the child vmas and + * anon_vma_moveto_tail() won't reorder vmas from either the fork() + * parent or child. + */ +void anon_vma_moveto_tail(struct vm_area_struct *dst) +{ + struct anon_vma_chain *pavc; + struct anon_vma *root = NULL; + + list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) { + struct anon_vma *anon_vma = pavc->anon_vma; + VM_BUG_ON(pavc->vma != dst); + root = lock_anon_vma_root(root, anon_vma); + list_del(&pavc->same_anon_vma); + list_add_tail(&pavc->same_anon_vma, &anon_vma->head); + } + unlock_anon_vma_root(root); +} + +/* * Attach vma to its own anon_vma, as well as to the anon_vmas that * the corresponding VMA in the parent process is attached to. * Returns 0 on success, non-zero on failure. @@ -728,7 +773,7 @@ out: } static int page_referenced_anon(struct page *page, - struct mem_cgroup *mem_cont, + struct mem_cgroup *memcg, unsigned long *vm_flags) { unsigned int mapcount; @@ -751,7 +796,7 @@ static int page_referenced_anon(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) + if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) continue; referenced += page_referenced_one(page, vma, address, &mapcount, vm_flags); @@ -766,7 +811,7 @@ static int page_referenced_anon(struct page *page, /** * page_referenced_file - referenced check for object-based rmap * @page: the page we're checking references on. - * @mem_cont: target memory controller + * @memcg: target memory control group * @vm_flags: collect encountered vma->vm_flags who actually referenced the page * * For an object-based mapped page, find all the places it is mapped and @@ -777,7 +822,7 @@ static int page_referenced_anon(struct page *page, * This function is only called from page_referenced for object-based pages. */ static int page_referenced_file(struct page *page, - struct mem_cgroup *mem_cont, + struct mem_cgroup *memcg, unsigned long *vm_flags) { unsigned int mapcount; @@ -819,7 +864,7 @@ static int page_referenced_file(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) + if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) continue; referenced += page_referenced_one(page, vma, address, &mapcount, vm_flags); @@ -835,7 +880,7 @@ static int page_referenced_file(struct page *page, * page_referenced - test if the page was referenced * @page: the page to test * @is_locked: caller holds lock on the page - * @mem_cont: target memory controller + * @memcg: target memory cgroup * @vm_flags: collect encountered vma->vm_flags who actually referenced the page * * Quick test_and_clear_referenced for all mappings to a page, @@ -843,7 +888,7 @@ static int page_referenced_file(struct page *page, */ int page_referenced(struct page *page, int is_locked, - struct mem_cgroup *mem_cont, + struct mem_cgroup *memcg, unsigned long *vm_flags) { int referenced = 0; @@ -859,13 +904,13 @@ int page_referenced(struct page *page, } } if (unlikely(PageKsm(page))) - referenced += page_referenced_ksm(page, mem_cont, + referenced += page_referenced_ksm(page, memcg, vm_flags); else if (PageAnon(page)) - referenced += page_referenced_anon(page, mem_cont, + referenced += page_referenced_anon(page, memcg, vm_flags); else if (page->mapping) - referenced += page_referenced_file(page, mem_cont, + referenced += page_referenced_file(page, memcg, vm_flags); if (we_locked) unlock_page(page); |