/*P:700
* The pagetable code, on the other hand, still shows the scars of
* previous encounters. It's functional, and as neat as it can be in the
* circumstances, but be wary, for these things are subtle and break easily.
* The Guest provides a virtual to physical mapping, but we can neither trust
* it nor use it: we verify and convert it here then point the CPU to the
* converted Guest pages when running the Guest.
:*/
/* Copyright (C) Rusty Russell IBM Corporation 2006.
* GPL v2 and any later version */
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/random.h>
#include <linux/percpu.h>
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
#include <asm/bootparam.h>
#include "lg.h"
/*M:008
* We hold reference to pages, which prevents them from being swapped.
* It'd be nice to have a callback in the "struct mm_struct" when Linux wants
* to swap out. If we had this, and a shrinker callback to trim PTE pages, we
* could probably consider launching Guests as non-root.
:*/
/*H:300
* The Page Table Code
*
* We use two-level page tables for the Guest, or three-level with PAE. If
* you're not entirely comfortable with virtual addresses, physical addresses
* and page tables then I recommend you review arch/x86/lguest/boot.c's "Page
* Table Handling" (with diagrams!).
*
* The Guest keeps page tables, but we maintain the actual ones here: these are
* called "shadow" page tables. Which is a very Guest-centric name: these are
* the real page tables the CPU uses, although we keep them up to date to
* reflect the Guest's. (See what I mean about weird naming? Since when do
* shadows reflect anything?)
*
* Anyway, this is the most complicated part of the Host code. There are seven
* parts to this:
* (i) Looking up a page table entry when the Guest faults,
* (ii) Making sure the Guest stack is mapped,
* (iii) Setting up a page table entry when the Guest tells us one has changed,
* (iv) Switching page tables,
* (v) Flushing (throwing away) page tables,
* (vi) Mapping the Switcher when the Guest is about to run,
* (vii) Setting up the page tables initially.
:*/
/*
* The Switcher uses the complete top PTE page. That's 1024 PTE entries (4MB)
* or 512 PTE entries with PAE (2MB).
*/
#define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1)
/*
* For PAE we need the PMD index as well. We use the last 2MB, so we
* will need the last pmd entry of the last pmd page.
*/
#ifdef CONFIG_X86_PAE
#define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1)
#define RESERVE_MEM 2U
#define CHECK_GPGD_MASK _PAGE_PRESENT
#else
#define RESERVE_MEM 4U
#define CHECK_GPGD_MASK _PAGE_TABLE
#endif
/*
* We actually need a separate PTE page for each CPU. Remember that after the
* Switcher code itself comes two pages for each CPU, and we don't want this
* CPU's guest to see the pages of any other CPU.
*/
static DEFINE_PER_CPU(pte_t *, switcher_pte_pages);
#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)
/*H:320
* The page table code is curly enough to need helper functions to keep it
* clear and clean. The kernel itself provides many of them; one advantage
* of insisting that the Guest and Host use the same CONFIG_PAE setting.
*
* There are two functions which return pointers to the shadow (aka "real")
* page tables.
*
* spgd_addr() takes the virtual address and returns a pointer to the top-level
* page directory entry (PGD) for that address. Since we keep track of several
* page tables, the "i" argument tells us which one we're interested in (it's
* usually the current one).
*/
static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
{
unsigned int index = pgd_index(vaddr);
#ifndef CONFIG_X86_PAE
/* We kill any Guest trying to touch the Switcher addresses. */
if (index >= SWITCHER_PGD_INDEX) {
kill_guest(cpu, "attempt to access switcher pages");
index = 0;
}
#endif
/* Return a pointer index'th pgd entry for the i'th page table. */
return &cpu->lg->pgdirs[i].pgdir[index];
}
#ifdef CONFIG_X86_PAE
/*
* This routine then takes the PGD entry given above, which contains the
* address of the PMD page. It then returns a pointer to the PMD entry for the
* given address.
*/
static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
{
unsigned int index = pmd_index(vaddr);
pmd_t *page;
/* We kill any Guest trying to touch the Switcher addresses. */
if (pgd_index(vaddr) == SWITCHER_PGD_INDEX &&
index >= SWITCHER_PMD_INDEX) {
kill_guest(cpu, "attempt to access switcher pages");
index = 0;
}
/* You should never call this if the PGD entry wasn't valid */
BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
return &page[index];
}
#endif
/*
* This routine then takes the page directory entry returned above, which
* contains the address of the page table entry (PTE) page. It then returns a