/*
* linux/mm/swap.c
*
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
*/
/*
* This file contains the default values for the operation of the
* Linux VM subsystem. Fine-tuning documentation can be found in
* Documentation/sysctl/vm.txt.
* Started 18.12.91
* Swap aging added 23.2.95, Stephen Tweedie.
* Buffermem limits added 12.3.98, Rik van Riel.
*/
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
#include <linux/init.h>
#include <linux/export.h>
#include <linux/mm_inline.h>
#include <linux/percpu_counter.h>
#include <linux/percpu.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/backing-dev.h>
#include <linux/memcontrol.h>
#include <linux/gfp.h>
#include <linux/uio.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/pagemap.h>
/* How many pages do we try to swap or page in/out together? */
int page_cluster;
static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
/*
* This path almost never happens for VM activity - pages are normally
* freed via pagevecs. But it gets used by networking.
*/
static void __page_cache_release(struct page *page)
{
if (PageLRU(page)) {
struct zone *zone = page_zone(page);
struct lruvec *lruvec;
unsigned long flags;
spin_lock_irqsave(&zone->lru_lock, flags);
lruvec = mem_cgroup_page_lruvec(page, zone);
VM_BUG_ON_PAGE(!PageLRU(page), page);
__ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, page_off_lru(page));
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
}
static void __put_single_page(struct page *page)
{
__page_cache_release(page);
free_hot_cold_page(page, false);
}
static void __put_compound_page(struct page *page)
{
compound_page_dtor *dtor;
__page_cache_release(page);
dtor = get_compound_page_dtor(page);
(*dtor)(page);
}
/**
* Two special cases here: we could avoid taking compound_lock_irqsave
* and could skip the tail refcounting(in _mapcount).
*
* 1. Hugetlbfs page:
*
* PageHeadHuge will remain true until the compound page
* is released and enters the buddy allocator, and it could
* not be split by __split_huge_page_refcount().
*
* So if we see PageHeadHuge set, and we have the tail page pin,
* then we could safely put head page.
*
* 2. Slab THP page:
*
* PG_slab is cleared before the slab frees the head page, and
* tail pin cannot be the last reference left on the head page,
* because the slab code is free to reuse the compound page
* after a kfree/kmem_cache_free without having to check if
* there's any tail pin left. In turn all tail pinsmust be always
* released while the head is still pinned by the slab code
* and so we know PG_slab will be still set too.
*
* So if we see PageSlab set, and we have the tail page pin,
* then we could safely put head page.
*/
static __always_inline
void put_unrefcounted_compound_page(struct page *page_head, struct page *page)
{
/*
* If @page is a THP tail, we must read the tail page
* flags after the head page flags. The
* __split_huge_page_refcount side enforces write memory barriers
* between clearing PageTail and before the head page
* can be freed and reallocated.
*/
smp_rmb();
if (likely(PageTail(page))) {
/*
* __split_huge_page_refcount cannot race
* here, see the comment above this function.
*/
VM_BUG_ON_PAGE(!PageHead(page_head), page_head);
VM_BUG_ON_PAGE(page_mapcount(page) != 0, page);
if (put_page_testzero(page_head)) {
/*
* If this is the tail of a slab THP page,
* the tail pin must not be the last reference
* held on the page, because the PG_slab cannot
* be cleared before all tail pins (which skips
* the _mapcount tail refcounting) have been
* released.
*
* If this is the tail of a hugetlbfs page,
* the tail pin may be the last reference on
* the page instead, because PageHeadHuge will
* not go away until the compound page enters
* the buddy allocator.
*/
VM_BUG_ON_PAGE(PageSlab(page_head), page_head);
__put_compound_page(page_head);
}
} else
/*
* __split_huge_page_refcount run before us,
* @page was a THP tail. The split @page_head
* has been freed and reallocated as slab or
* hugetlbfs page of smaller order (only
* possible if reallocated as slab on x86).
*/
if (put_page_testzero(page))
__put_single_page(page);
}
static __always_inline
void put_refcounted_compound_page(struct page *page_head, struct page *page)
{
if (likely(page != page_head && get_page_unless_zero(page_head))) {
unsigned long flags;
/*
* @page_head wasn't a dangling pointer but it may not
* be a head page anymore by the time we obtain the
* lock. That is ok as long as it can't be freed from
* under us.
*/
flags = compound_lock_irqsave(page_head);
if (unlikely(!PageTail(page))) {
/* __split_huge_page_refcount run before us */
compound_unlock_irqrestore(page_head, flags);
if (put_page_testzero(page_head)) {
/*
* The @page_head may have been freed
* and reallocated as a compound page
* of smaller order and then freed
* again. All we know is that it
* cannot have become: a THP pag