/*
* linux/mm/swap.c
*
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*/
/*
* This file contains the default values for the operation of the
* Linux VM subsystem. Fine-tuning documentation can be found in
* Documentation/sysctl/vm.txt.
* Started 18.12.91
* Swap aging added 23.2.95, Stephen Tweedie.
* Buffermem limits added 12.3.98, Rik van Riel.
*/
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
#include <linux/init.h>
#include <linux/export.h>
#include <linux/mm_inline.h>
#include <linux/percpu_counter.h>
#include <linux/percpu.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/backing-dev.h>
#include <linux/memcontrol.h>
#include <linux/gfp.h>
#include <linux/uio.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/pagemap.h>
/* How many pages do we try to swap or page in/out together? */
int page_cluster;
static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
/*
* This path almost never happens for VM activity - pages are normally
* freed via pagevecs. But it gets used by networking.
*/
static void __page_cache_release(struct page *page)
{
if (PageLRU(page)) {
struct zone *zone = page_zone(page);
struct lruvec *lruvec;
unsigned long flags;
spin_lock_irqsave(&zone->lru_lock, flags);
lruvec = mem_cgroup_page_lruvec(page, zone);
VM_BUG_ON_PAGE(!PageLRU(page), page);
__ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, page_off_lru(page));
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
}
static void __put_single_page(struct page *page)
{
__page_cache_release(page);
free_hot_cold_page(page, 0);
}
static void __put_compound_page(struct page *page)
{
compound_page_dtor *dtor;
__page_cache_release(page);
dtor = get_compound_page_dtor(page);
(*dtor)(page);
}
static void put_compound_page(struct page *page)
{
struct page *page_head;
if (likely(!PageTail(page))) {
if (put_page_testzero(page)) {
/*
* By the time all refcounts have been released
* split_huge_page cannot run anymore from under us.
*/
if (PageHead(page))
__put_compound_page(page);
else
__put_single_page(page);
}
return;
}
/* __split_huge_page_refcount can run under us */
page_head = compound_head(page);
/*
* THP can not break up slab pages so avoid taking
* compound_lock() and skip the tail page refcounting (in
* _mapcount) too. Slab performs non-atomic bit ops on
* page->flags for better performance. In particular
* slab_unlock() in slub used to be a hot path. It is still
* hot on arches that do not support
* this_cpu_cmpxchg_double().
*
* If "page" is part of a slab or hugetlbfs page it cannot be
* splitted and the head page cannot change from under us. And
* if "page" is part of a THP page under splitting, if the
* head page pointed by the THP tail isn't a THP head anymore,
* we'll find PageTail clear after smp_rmb() and we'll treat
* it as a single page.
*/
if (!__compound_tail_refcounted(page_head)) {
/*
* If "page" is a THP tail, we must read the tail page
* flags after the head page flags. The
* split_huge_page side enforces write memory barriers
* between clearing PageTail and before the head page
* can be freed and reallocated.
*/
smp_rmb();
if (likely(PageTail(page))) {
/*
* __split_huge_page_refcount cannot race
* here.
*/
VM_BUG_ON_PAGE(!PageHead(page_head), page_head);
VM_BUG_ON_PAGE(page_mapcount(page) != 0, page);
if (put_page_testzero(page_head)) {
/*
* If this is the tail of a slab
* compound page, the tail pin must
* not be the last reference held on
* the page, because the PG_slab
* cannot be cleared before all tail
* pins (which skips the _mapcount
* tail refcounting) have been
* released. For hugetlbfs the tail
* pin may be the last reference on
* the page instead, because
* PageHeadHuge will not go away until
* the compound page enters the buddy
* allocator.
*/
VM_BUG_ON_PAGE(PageSlab(page_head), page_head);
__put_compound_page(page_head);
}
return;
} else
/*
* __split_huge_page_refcount run before us,
* "page" was a THP tail. The split page_head
* has been freed and reallocated as slab or
* hugetlbfs page of smaller order (only
* possible if reallocated as slab on x86).
*/
goto out_put_single;
}
if (likely(page != page_head && get_page_unless_zero(page_head))) {
unsigned long flags;
/*
* page_head wasn't a dangling pointer but it may not
* be a head page anymore by the time we obtain the
* lock. That is ok as long as it can't be freed from
* under us.
*/
flags = compound_lock_irqsave(page_head);
if (unlikely(!PageTail(page))) {
/* __split_huge_page_refcount run before us */
compound_unlock_irqrestore(page_head, flags);
if (put_page_testzero(page_head)) {
/*
* The head page may have been freed
* and reallocated as a compound page
* of smaller order and then freed
* again. All we know is that it
* cannot have become: a THP page, a
* compound page of higher order, a
* tail page. That is because we
* still hold the refcount of the
* split THP tail and page_head was
* the THP head before the split.
*/
if (PageHead(page_head))
__put_compound_page(page_head);