aboutsummaryrefslogtreecommitdiff
path: root/arch/x86_64/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/mm')
-rw-r--r--arch/x86_64/mm/Makefile11
-rw-r--r--arch/x86_64/mm/extable.c35
-rw-r--r--arch/x86_64/mm/fault.c571
-rw-r--r--arch/x86_64/mm/init.c623
-rw-r--r--arch/x86_64/mm/ioremap.c270
-rw-r--r--arch/x86_64/mm/k8topology.c168
-rw-r--r--arch/x86_64/mm/numa.c304
-rw-r--r--arch/x86_64/mm/pageattr.c235
-rw-r--r--arch/x86_64/mm/srat.c225
9 files changed, 0 insertions, 2442 deletions
diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile
deleted file mode 100644
index 1d232a87f11..00000000000
--- a/arch/x86_64/mm/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# Makefile for the linux x86_64-specific parts of the memory manager.
-#
-
-obj-y := init.o fault.o ioremap.o extable.o pageattr.o
-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_NUMA) += numa.o
-obj-$(CONFIG_K8_NUMA) += k8topology.o
-obj-$(CONFIG_ACPI_NUMA) += srat.o
-
-hugetlbpage-y = ../../i386/mm/hugetlbpage.o
diff --git a/arch/x86_64/mm/extable.c b/arch/x86_64/mm/extable.c
deleted file mode 100644
index 2d78f9fb403..00000000000
--- a/arch/x86_64/mm/extable.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * linux/arch/x86_64/mm/extable.c
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <asm/uaccess.h>
-
-/* Simple binary search */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *first,
- const struct exception_table_entry *last,
- unsigned long value)
-{
- /* Work around a B stepping K8 bug */
- if ((value >> 32) == 0)
- value |= 0xffffffffUL << 32;
-
- while (first <= last) {
- const struct exception_table_entry *mid;
- long diff;
-
- mid = (last - first) / 2 + first;
- diff = mid->insn - value;
- if (diff == 0)
- return mid;
- else if (diff < 0)
- first = mid+1;
- else
- last = mid-1;
- }
- return NULL;
-}
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
deleted file mode 100644
index 13792721037..00000000000
--- a/arch/x86_64/mm/fault.c
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
- * linux/arch/x86-64/mm/fault.c
- *
- * Copyright (C) 1995 Linus Torvalds
- * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
- */
-
-#include <linux/config.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/tty.h>
-#include <linux/vt_kern.h> /* For unblank_screen() */
-#include <linux/compiler.h>
-#include <linux/module.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/pgalloc.h>
-#include <asm/smp.h>
-#include <asm/tlbflush.h>
-#include <asm/proto.h>
-#include <asm/kdebug.h>
-#include <asm-generic/sections.h>
-#include <asm/kdebug.h>
-
-void bust_spinlocks(int yes)
-{
- int loglevel_save = console_loglevel;
- if (yes) {
- oops_in_progress = 1;
- } else {
-#ifdef CONFIG_VT
- unblank_screen();
-#endif
- oops_in_progress = 0;
- /*
- * OK, the message is on the console. Now we call printk()
- * without oops_in_progress set so that printk will give klogd
- * a poke. Hold onto your hats...
- */
- console_loglevel = 15; /* NMI oopser may have shut the console up */
- printk(" ");
- console_loglevel = loglevel_save;
- }
-}
-
-/* Sometimes the CPU reports invalid exceptions on prefetch.
- Check that here and ignore.
- Opcode checker based on code by Richard Brunner */
-static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
- unsigned long error_code)
-{
- unsigned char *instr;
- int scan_more = 1;
- int prefetch = 0;
- unsigned char *max_instr;
-
- /* If it was a exec fault ignore */
- if (error_code & (1<<4))
- return 0;
-
- instr = (unsigned char *)convert_rip_to_linear(current, regs);
- max_instr = instr + 15;
-
- if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
- return 0;
-
- while (scan_more && instr < max_instr) {
- unsigned char opcode;
- unsigned char instr_hi;
- unsigned char instr_lo;
-
- if (__get_user(opcode, instr))
- break;
-
- instr_hi = opcode & 0xf0;
- instr_lo = opcode & 0x0f;
- instr++;
-
- switch (instr_hi) {
- case 0x20:
- case 0x30:
- /* Values 0x26,0x2E,0x36,0x3E are valid x86
- prefixes. In long mode, the CPU will signal
- invalid opcode if some of these prefixes are
- present so we will never get here anyway */
- scan_more = ((instr_lo & 7) == 0x6);
- break;
-
- case 0x40:
- /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes
- Need to figure out under what instruction mode the
- instruction was issued ... */
- /* Could check the LDT for lm, but for now it's good
- enough to assume that long mode only uses well known
- segments or kernel. */
- scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
- break;
-
- case 0x60:
- /* 0x64 thru 0x67 are valid prefixes in all modes. */
- scan_more = (instr_lo & 0xC) == 0x4;
- break;
- case 0xF0:
- /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
- scan_more = !instr_lo || (instr_lo>>1) == 1;
- break;
- case 0x00:
- /* Prefetch instruction is 0x0F0D or 0x0F18 */
- scan_more = 0;
- if (__get_user(opcode, instr))
- break;
- prefetch = (instr_lo == 0xF) &&
- (opcode == 0x0D || opcode == 0x18);
- break;
- default:
- scan_more = 0;
- break;
- }
- }
- return prefetch;
-}
-
-static int bad_address(void *p)
-{
- unsigned long dummy;
- return __get_user(dummy, (unsigned long *)p);
-}
-
-void dump_pagetable(unsigned long address)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- asm("movq %%cr3,%0" : "=r" (pgd));
-
- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
- pgd += pgd_index(address);
- printk("PGD %lx ", pgd_val(*pgd));
- if (bad_address(pgd)) goto bad;
- if (!pgd_present(*pgd)) goto ret;
-
- pud = __pud_offset_k((pud_t *)pgd_page(*pgd), address);
- if (bad_address(pud)) goto bad;
- printk("PUD %lx ", pud_val(*pud));
- if (!pud_present(*pud)) goto ret;
-
- pmd = pmd_offset(pud, address);
- if (bad_address(pmd)) goto bad;
- printk("PMD %lx ", pmd_val(*pmd));
- if (!pmd_present(*pmd)) goto ret;
-
- pte = pte_offset_kernel(pmd, address);
- if (bad_address(pte)) goto bad;
- printk("PTE %lx", pte_val(*pte));
-ret:
- printk("\n");
- return;
-bad:
- printk("BAD\n");
-}
-
-static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
-
-/* Workaround for K8 erratum #93 & buggy BIOS.
- BIOS SMM functions are required to use a specific workaround
- to avoid corruption of the 64bit RIP register on C stepping K8.
- A lot of BIOS that didn't get tested properly miss this.
- The OS sees this as a page fault with the upper 32bits of RIP cleared.
- Try to work around it here.
- Note we only handle faults in kernel here. */
-
-static int is_errata93(struct pt_regs *regs, unsigned long address)
-{
- static int warned;
- if (address != regs->rip)
- return 0;
- if ((address >> 32) != 0)
- return 0;
- address |= 0xffffffffUL << 32;
- if ((address >= (u64)_stext && address <= (u64)_etext) ||
- (address >= MODULES_VADDR && address <= MODULES_END)) {
- if (!warned) {
- printk(errata93_warning);
- warned = 1;
- }
- regs->rip = address;
- return 1;
- }
- return 0;
-}
-
-int unhandled_signal(struct task_struct *tsk, int sig)
-{
- if (tsk->pid == 1)
- return 1;
- /* Warn for strace, but not for gdb */
- if (!test_ti_thread_flag(tsk->thread_info, TIF_SYSCALL_TRACE) &&
- (tsk->ptrace & PT_PTRACED))
- return 0;
- return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
- (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
-}
-
-static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
- unsigned long error_code)
-{
- oops_begin();
- printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
- current->comm, address);
- dump_pagetable(address);
- __die("Bad pagetable", regs, error_code);
- oops_end();
- do_exit(SIGKILL);
-}
-
-/*
- * Handle a fault on the vmalloc or module mapping area
- *
- * This assumes no large pages in there.
- */
-static int vmalloc_fault(unsigned long address)
-{
- pgd_t *pgd, *pgd_ref;
- pud_t *pud, *pud_ref;
- pmd_t *pmd, *pmd_ref;
- pte_t *pte, *pte_ref;
-
- /* Copy kernel mappings over when needed. This can also
- happen within a race in page table update. In the later
- case just flush. */
-
- pgd = pgd_offset(current->mm ?: &init_mm, address);
- pgd_ref = pgd_offset_k(address);
- if (pgd_none(*pgd_ref))
- return -1;
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
-
- /* Below here mismatches are bugs because these lower tables
- are shared */
-
- pud = pud_offset(pgd, address);
- pud_ref = pud_offset(pgd_ref, address);
- if (pud_none(*pud_ref))
- return -1;
- if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
- BUG();
- pmd = pmd_offset(pud, address);
- pmd_ref = pmd_offset(pud_ref, address);
- if (pmd_none(*pmd_ref))
- return -1;
- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
- BUG();
- pte_ref = pte_offset_kernel(pmd_ref, address);
- if (!pte_present(*pte_ref))
- return -1;
- pte = pte_offset_kernel(pmd, address);
- /* Don't use pte_page here, because the mappings can point
- outside mem_map, and the NUMA hash lookup cannot handle
- that. */
- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
- BUG();
- __flush_tlb_all();
- return 0;
-}
-
-int page_fault_trace = 0;
-int exception_trace = 1;
-
-/*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- *
- * error_code:
- * bit 0 == 0 means no page found, 1 means protection fault
- * bit 1 == 0 means read, 1 means write
- * bit 2 == 0 means kernel, 1 means user-mode
- * bit 3 == 1 means fault was an instruction fetch
- */
-asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
-{
- struct task_struct *tsk;
- struct mm_struct *mm;
- struct vm_area_struct * vma;
- unsigned long address;
- const struct exception_table_entry *fixup;
- int write;
- siginfo_t info;
-
-#ifdef CONFIG_CHECKING
- {
- unsigned long gs;
- struct x8664_pda *pda = cpu_pda + stack_smp_processor_id();
- rdmsrl(MSR_GS_BASE, gs);
- if (gs != (unsigned long)pda) {
- wrmsrl(MSR_GS_BASE, pda);
- printk("page_fault: wrong gs %lx expected %p\n", gs, pda);
- }
- }
-#endif
-
- /* get the address */
- __asm__("movq %%cr2,%0":"=r" (address));
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
- SIGSEGV) == NOTIFY_STOP)
- return;
-
- if (likely(regs->eflags & X86_EFLAGS_IF))
- local_irq_enable();
-
- if (unlikely(page_fault_trace))
- printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
- regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
-
- tsk = current;
- mm = tsk->mm;
- info.si_code = SEGV_MAPERR;
-
-
- /*
- * We fault-in kernel-space virtual memory on-demand. The
- * 'reference' page table is init_mm.pgd.
- *
- * NOTE! We MUST NOT take any locks for this case. We may
- * be in an interrupt or a critical region, and should
- * only copy the information from the master page table,
- * nothing more.
- *
- * This verifies that the fault happens in kernel space
- * (error_code & 4) == 0, and that the fault was not a
- * protection error (error_code & 1) == 0.
- */
- if (unlikely(address >= TASK_SIZE64)) {
- if (!(error_code & 5) &&
- ((address >= VMALLOC_START && address < VMALLOC_END) ||
- (address >= MODULES_VADDR && address < MODULES_END))) {
- if (vmalloc_fault(address) < 0)
- goto bad_area_nosemaphore;
- return;
- }
- /*
- * Don't take the mm semaphore here. If we fixup a prefetch
- * fault we could otherwise deadlock.
- */
- goto bad_area_nosemaphore;
- }
-
- if (unlikely(error_code & (1 << 3)))
- pgtable_bad(address, regs, error_code);
-
- /*
- * If we're in an interrupt or have no user
- * context, we must not take the fault..
- */
- if (unlikely(in_atomic() || !mm))
- goto bad_area_nosemaphore;
-
- again:
- /* When running in the kernel we expect faults to occur only to
- * addresses in user space. All other faults represent errors in the
- * kernel and should generate an OOPS. Unfortunatly, in the case of an
- * erroneous fault occuring in a code path which already holds mmap_sem
- * we will deadlock attempting to validate the fault against the
- * address space. Luckily the kernel only validly references user
- * space from well defined areas of code, which are listed in the
- * exceptions table.
- *
- * As the vast majority of faults will be valid we will only perform
- * the source reference check when there is a possibilty of a deadlock.
- * Attempt to lock the address space, if we cannot we then validate the
- * source. If this is invalid we can skip the address space check,
- * thus avoiding the deadlock.
- */
- if (!down_read_trylock(&mm->mmap_sem)) {
- if ((error_code & 4) == 0 &&
- !search_exception_tables(regs->rip))
- goto bad_area_nosemaphore;
- down_read(&mm->mmap_sem);
- }
-
- vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (likely(vma->vm_start <= address))
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (error_code & 4) {
- // XXX: align red zone size with ABI
- if (address + 128 < regs->rsp)
- goto bad_area;
- }
- if (expand_stack(vma, address))
- goto bad_area;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
- info.si_code = SEGV_ACCERR;
- write = 0;
- switch (error_code & 3) {
- default: /* 3: write, present */
- /* fall through */
- case 2: /* write, not present */
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- write++;
- break;
- case 1: /* read, present */
- goto bad_area;
- case 0: /* read, not present */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
- goto bad_area;
- }
-
- /*
- * If for any reason at all we couldn't handle the fault,
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
- */
- switch (handle_mm_fault(mm, vma, address, write)) {
- case 1:
- tsk->min_flt++;
- break;
- case 2:
- tsk->maj_flt++;
- break;
- case 0:
- goto do_sigbus;
- default:
- goto out_of_memory;
- }
-
- up_read(&mm->mmap_sem);
- return;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
- up_read(&mm->mmap_sem);
-
-bad_area_nosemaphore:
- /* User mode accesses just cause a SIGSEGV */
- if (error_code & 4) {
- if (is_prefetch(regs, address, error_code))
- return;
-
- /* Work around K8 erratum #100 K8 in compat mode
- occasionally jumps to illegal addresses >4GB. We
- catch this here in the page fault handler because
- these addresses are not reachable. Just detect this
- case and return. Any code segment in LDT is
- compatibility mode. */
- if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
- (address >> 32))
- return;
-
- if (exception_trace && unhandled_signal(tsk, SIGSEGV)) {
- printk(
- "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
- tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
- tsk->comm, tsk->pid, address, regs->rip,
- regs->rsp, error_code);
- }
-
- tsk->thread.cr2 = address;
- /* Kernel addresses are always protection faults */
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
- tsk->thread.trap_no = 14;
- info.si_signo = SIGSEGV;
- info.si_errno = 0;
- /* info.si_code has been set above */
- info.si_addr = (void __user *)address;
- force_sig_info(SIGSEGV, &info, tsk);
- return;
- }
-
-no_context:
-
- /* Are we prepared to handle this kernel fault? */
- fixup = search_exception_tables(regs->rip);
- if (fixup) {
- regs->rip = fixup->fixup;
- return;
- }
-
- /*
- * Hall of shame of CPU/BIOS bugs.
- */
-
- if (is_prefetch(regs, address, error_code))
- return;
-
- if (is_errata93(regs, address))
- return;
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
-
- oops_begin();
-
- if (address < PAGE_SIZE)
- printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
- else
- printk(KERN_ALERT "Unable to handle kernel paging request");
- printk(" at %016lx RIP: \n" KERN_ALERT,address);
- printk_address(regs->rip);
- printk("\n");
- dump_pagetable(address);
- __die("Oops", regs, error_code);
- /* Executive summary in case the body of the oops scrolled away */
- printk(KERN_EMERG "CR2: %016lx\n", address);
- oops_end();
- do_exit(SIGKILL);
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
- up_read(&mm->mmap_sem);
- if (current->pid == 1) {
- yield();
- goto again;
- }
- printk("VM: killing process %s\n", tsk->comm);
- if (error_code & 4)
- do_exit(SIGKILL);
- goto no_context;
-
-do_sigbus:
- up_read(&mm->mmap_sem);
-
- /* Kernel mode? Handle exceptions or die */
- if (!(error_code & 4))
- goto no_context;
-
- tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code;
- tsk->thread.trap_no = 14;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, tsk);
- return;
-}
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
deleted file mode 100644
index 72e4b364ed7..00000000000
--- a/arch/x86_64/mm/init.c
+++ /dev/null
@@ -1,623 +0,0 @@
-/*
- * linux/arch/x86_64/mm/init.c
- *
- * Copyright (C) 1995 Linus Torvalds
- * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
- * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
- */
-
-#include <linux/config.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/pagemap.h>
-#include <linux/bootmem.h>
-#include <linux/proc_fs.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/dma.h>
-#include <asm/fixmap.h>
-#include <asm/e820.h>
-#include <asm/apic.h>
-#include <asm/tlb.h>
-#include <asm/mmu_context.h>
-#include <asm/proto.h>
-#include <asm/smp.h>
-
-#ifndef Dprintk
-#define Dprintk(x...)
-#endif
-
-#ifdef CONFIG_GART_IOMMU
-extern int swiotlb;
-#endif
-
-extern char _stext[];
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-/*
- * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
- * physical space so we can cache the place of the first one and move
- * around without checking the pgd every time.
- */
-
-void show_mem(void)
-{
- int i, total = 0, reserved = 0;
- int shared = 0, cached = 0;
- pg_data_t *pgdat;
- struct page *page;
-
- printk("Mem-info:\n");
- show_free_areas();
- printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
-
- for_each_pgdat(pgdat) {
- for (i = 0; i < pgdat->node_spanned_pages; ++i) {
- page = pfn_to_page(pgdat->node_start_pfn + i);
- total++;
- if (PageReserved(page))
- reserved++;
- else if (PageSwapCache(page))
- cached++;
- else if (page_count(page))
- shared += page_count(page) - 1;
- }
- }
- printk("%d pages of RAM\n", total);
- printk("%d reserved pages\n",reserved);
- printk("%d pages shared\n",shared);
- printk("%d pages swap cached\n",cached);
-}
-
-/* References to section boundaries */
-
-extern char _text, _etext, _edata, __bss_start, _end[];
-extern char __init_begin, __init_end;
-
-int after_bootmem;
-
-static void *spp_getpage(void)
-{
- void *ptr;
- if (after_bootmem)
- ptr = (void *) get_zeroed_page(GFP_ATOMIC);
- else
- ptr = alloc_bootmem_pages(PAGE_SIZE);
- if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
- panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
-
- Dprintk("spp_getpage %p\n", ptr);
- return ptr;
-}
-
-static void set_pte_phys(unsigned long vaddr,
- unsigned long phys, pgprot_t prot)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte, new_pte;
-
- Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
-
- pgd = pgd_offset_k(vaddr);
- if (pgd_none(*pgd)) {
- printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
- return;
- }
- pud = pud_offset(pgd, vaddr);
- if (pud_none(*pud)) {
- pmd = (pmd_t *) spp_getpage();
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
- if (pmd != pmd_offset(pud, 0)) {
- printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
- return;
- }
- }
- pmd = pmd_offset(pud, vaddr);
- if (pmd_none(*pmd)) {
- pte = (pte_t *) spp_getpage();
- set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
- if (pte != pte_offset_kernel(pmd, 0)) {
- printk("PAGETABLE BUG #02!\n");
- return;
- }
- }
- new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
-
- pte = pte_offset_kernel(pmd, vaddr);
- if (!pte_none(*pte) &&
- pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
- pte_ERROR(*pte);
- set_pte(pte, new_pte);
-
- /*
- * It's enough to flush this one mapping.
- * (PGE mappings get flushed as well)
- */
- __flush_tlb_one(vaddr);
-}
-
-/* NOTE: this is meant to be run only at boot */
-void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
-{
- unsigned long address = __fix_to_virt(idx);
-
- if (idx >= __end_of_fixed_addresses) {
- printk("Invalid __set_fixmap\n");
- return;
- }
- set_pte_phys(address, phys, prot);
-}
-
-unsigned long __initdata table_start, table_end;
-
-extern pmd_t temp_boot_pmds[];
-
-static struct temp_map {
- pmd_t *pmd;
- void *address;
- int allocated;
-} temp_mappings[] __initdata = {
- { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
- { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
- {}
-};
-
-static __init void *alloc_low_page(int *index, unsigned long *phys)
-{
- struct temp_map *ti;
- int i;
- unsigned long pfn = table_end++, paddr;
- void *adr;
-
- if (pfn >= end_pfn)
- panic("alloc_low_page: ran out of memory");
- for (i = 0; temp_mappings[i].allocated; i++) {
- if (!temp_mappings[i].pmd)
- panic("alloc_low_page: ran out of temp mappings");
- }
- ti = &temp_mappings[i];
- paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
- set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
- ti->allocated = 1;
- __flush_tlb();
- adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
- *index = i;
- *phys = pfn * PAGE_SIZE;
- return adr;
-}
-
-static __init void unmap_low_page(int i)
-{
- struct temp_map *ti = &temp_mappings[i];
- set_pmd(ti->pmd, __pmd(0));
- ti->allocated = 0;
-}
-
-static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
-{
- long i, j;
-
- i = pud_index(address);
- pud = pud + i;
- for (; i < PTRS_PER_PUD; pud++, i++) {
- int map;
- unsigned long paddr, pmd_phys;
- pmd_t *pmd;
-
- paddr = address + i*PUD_SIZE;
- if (paddr >= end) {
- for (; i < PTRS_PER_PUD; i++, pud++)
- set_pud(pud, __pud(0));
- break;
- }
-
- if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) {
- set_pud(pud, __pud(0));
- continue;
- }
-
- pmd = alloc_low_page(&map, &pmd_phys);
- set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
- for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
- unsigned long pe;
-
- if (paddr >= end) {
- for (; j < PTRS_PER_PMD; j++, pmd++)
- set_pmd(pmd, __pmd(0));
- break;
- }
- pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
- pe &= __supported_pte_mask;
- set_pmd(pmd, __pmd(pe));
- }
- unmap_low_page(map);
- }
- __flush_tlb();
-}
-
-static void __init find_early_table_space(unsigned long end)
-{
- unsigned long puds, pmds, tables;
-
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
- tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
- round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
-
- table_start = find_e820_area(0x8000, __pa_symbol(&_text), tables);
- if (table_start == -1UL)
- panic("Cannot find space for the kernel page tables");
-
- table_start >>= PAGE_SHIFT;
- table_end = table_start;
-}
-
-/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
- This runs before bootmem is initialized and gets pages directly from the
- physical memory. To access them they are temporarily mapped. */
-void __init init_memory_mapping(unsigned long start, unsigned long end)
-{
- unsigned long next;
-
- Dprintk("init_memory_mapping\n");
-
- /*
- * Find space for the kernel direct mapping tables.
- * Later we should allocate these tables in the local node of the memory
- * mapped. Unfortunately this is done currently before the nodes are
- * discovered.
- */
- find_early_table_space(end);
-
- start = (unsigned long)__va(start);
- end = (unsigned long)__va(end);
-
- for (; start < end; start = next) {
- int map;
- unsigned long pud_phys;
- pud_t *pud = alloc_low_page(&map, &pud_phys);
- next = start + PGDIR_SIZE;
- if (next > end)
- next = end;
- phys_pud_init(pud, __pa(start), __pa(next));
- set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
- unmap_low_page(map);
- }
-
- asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
- __flush_tlb_all();
- early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
- table_start<<PAGE_SHIFT,
- table_end<<PAGE_SHIFT);
-}
-
-extern struct x8664_pda cpu_pda[NR_CPUS];
-
-/* Assumes all CPUs still execute in init_mm */
-void zap_low_mappings(void)
-{
- pgd_t *pgd = pgd_offset_k(0UL);
- pgd_clear(pgd);
- flush_tlb_all();
-}
-
-#ifndef CONFIG_NUMA
-void __init paging_init(void)
-{
- {
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
- unsigned int max_dma;
-
- max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
- if (end_pfn < max_dma)
- zones_size[ZONE_DMA] = end_pfn;
- else {
- zones_size[ZONE_DMA] = max_dma;
- zones_size[ZONE_NORMAL] = end_pfn - max_dma;
- }
- free_area_init(zones_size);
- }
- return;
-}
-#endif
-
-/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
- from the CPU leading to inconsistent cache lines. address and size
- must be aligned to 2MB boundaries.
- Does nothing when the mapping doesn't exist. */
-void __init clear_kernel_mapping(unsigned long address, unsigned long size)
-{
- unsigned long end = address + size;
-
- BUG_ON(address & ~LARGE_PAGE_MASK);
- BUG_ON(size & ~LARGE_PAGE_MASK);
-
- for (; address < end; address += LARGE_PAGE_SIZE) {
- pgd_t *pgd = pgd_offset_k(address);
- pud_t *pud;
- pmd_t *pmd;
- if (pgd_none(*pgd))
- continue;
- pud = pud_offset(pgd, address);
- if (pud_none(*pud))
- continue;
- pmd = pmd_offset(pud, address);
- if (!pmd || pmd_none(*pmd))
- continue;
- if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
- /* Could handle this, but it should not happen currently. */
- printk(KERN_ERR
- "clear_kernel_mapping: mapping has been split. will leak memory\n");
- pmd_ERROR(*pmd);
- }
- set_pmd(pmd, __pmd(0));
- }
- __flush_tlb_all();
-}
-
-static inline int page_is_ram (unsigned long pagenr)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long addr, end;
-
- if (e820.map[i].type != E820_RAM) /* not usable memory */
- continue;
- /*
- * !!!FIXME!!! Some BIOSen report areas as RAM that
- * are not. Notably the 640->1Mb area. We need a sanity
- * check here.
- */
- addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
- end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
- if ((pagenr >= addr) && (pagenr < end))
- return 1;
- }
- return 0;
-}
-
-extern int swiotlb_force;
-
-static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
- kcore_vsyscall;
-
-void __init mem_init(void)
-{
- int codesize, reservedpages, datasize, initsize;
- int tmp;
-
-#ifdef CONFIG_SWIOTLB
- if (swiotlb_force)
- swiotlb = 1;
- if (!iommu_aperture &&
- (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu))
- swiotlb = 1;
- if (swiotlb)
- swiotlb_init();
-#endif
-
- /* How many end-of-memory variables you have, grandma! */
- max_low_pfn = end_pfn;
- max_pfn = end_pfn;
- num_physpages = end_pfn;
- high_memory = (void *) __va(end_pfn * PAGE_SIZE);
-
- /* clear the zero-page */
- memset(empty_zero_page, 0, PAGE_SIZE);
-
- reservedpages = 0;
-
- /* this will put all low memory onto the freelists */
-#ifdef CONFIG_NUMA
- totalram_pages += numa_free_all_bootmem();
- tmp = 0;
- /* should count reserved pages here for all nodes */
-#else
-
-#ifdef CONFIG_FLATMEM
- max_mapnr = end_pfn;
- if (!mem_map) BUG();
-#endif
-
- totalram_pages += free_all_bootmem();
-
- for (tmp = 0; tmp < end_pfn; tmp++)
- /*
- * Only count reserved RAM pages
- */
- if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
- reservedpages++;
-#endif
-
- after_bootmem = 1;
-
- codesize = (unsigned long) &_etext - (unsigned long) &_text;
- datasize = (unsigned long) &_edata - (unsigned long) &_etext;
- initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
- /* Register memory areas for /proc/kcore */
- kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
- kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
- VMALLOC_END-VMALLOC_START);
- kclist_add(&kcore_kernel, &_stext, _end - _stext);
- kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
- kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
- VSYSCALL_END - VSYSCALL_START);
-
- printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
- (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
- end_pfn << (PAGE_SHIFT-10),
- codesize >> 10,
- reservedpages << (PAGE_SHIFT-10),
- datasize >> 10,
- initsize >> 10);
-
- /*
- * Subtle. SMP is doing its boot stuff late (because it has to
- * fork idle threads) - but it also needs low mappings for the
- * protected-mode entry to work. We zap these entries only after
- * the WP-bit has been tested.
- */
-#ifndef CONFIG_SMP
- zap_low_mappings();
-#endif
-}
-
-extern char __initdata_begin[], __initdata_end[];
-
-void free_initmem(void)
-{
- unsigned long addr;
-
- addr = (unsigned long)(&__init_begin);
- for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
- memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
- }
- memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
- printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
-}
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
- if (start < (unsigned long)&_end)
- return;
- printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
- free_page(start);
- totalram_pages++;
- }
-}
-#endif
-
-void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
-{
- /* Should check here against the e820 map to avoid double free */
-#ifdef CONFIG_NUMA
- int nid = phys_to_nid(phys);
- reserve_bootmem_node(NODE_DATA(nid), phys, len);
-#else
- reserve_bootmem(phys, len);
-#endif
-}
-
-int kern_addr_valid(unsigned long addr)
-{
- unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- if (above != 0 && above != -1UL)
- return 0;
-
- pgd = pgd_offset_k(addr);
- if (pgd_none(*pgd))
- return 0;
-
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud))
- return 0;
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
- return 0;
- if (pmd_large(*pmd))
- return pfn_valid(pmd_pfn(*pmd));
-
- pte = pte_offset_kernel(pmd, addr);
- if (pte_none(*pte))
- return 0;
- return pfn_valid(pte_pfn(*pte));
-}
-
-#ifdef CONFIG_SYSCTL
-#include <linux/sysctl.h>
-
-extern int exception_trace, page_fault_trace;
-
-static ctl_table debug_table2[] = {
- { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
- proc_dointvec },
-#ifdef CONFIG_CHECKING
- { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
- proc_dointvec },
-#endif
- { 0, }
-};
-
-static ctl_table debug_root_table2[] = {
- { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
- .child = debug_table2 },
- { 0 },
-};
-
-static __init int x8664_sysctl_init(void)
-{
- register_sysctl_table(debug_root_table2, 1);
- return 0;
-}
-__initcall(x8664_sysctl_init);
-#endif
-
-/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
- covers the 64bit vsyscall page now. 32bit has a real VMA now and does
- not need special handling anymore. */
-
-static struct vm_area_struct gate_vma = {
- .vm_start = VSYSCALL_START,
- .vm_end = VSYSCALL_END,
- .vm_page_prot = PAGE_READONLY
-};
-
-struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(tsk, TIF_IA32))
- return NULL;
-#endif
- return &gate_vma;
-}
-
-int in_gate_area(struct task_struct *task, unsigned long addr)
-{
- struct vm_area_struct *vma = get_gate_vma(task);
- if (!vma)
- return 0;
- return (addr >= vma->vm_start) && (addr < vma->vm_end);
-}
-
-/* Use this when you have no reliable task/vma, typically from interrupt
- * context. It is less reliable than using the task's vma and may give
- * false positives.
- */
-int in_gate_area_no_task(unsigned long addr)
-{
- return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
-}
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
deleted file mode 100644
index 6972df480d2..00000000000
--- a/arch/x86_64/mm/ioremap.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * arch/x86_64/mm/ioremap.c
- *
- * Re-map IO memory to kernel address space so that we can access it.
- * This is needed for high PCI addresses that aren't mapped in the
- * 640k-1MB IO memory area on PC's
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- */
-
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <asm/io.h>
-#include <asm/pgalloc.h>
-#include <asm/fixmap.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/proto.h>
-
-#define ISA_START_ADDRESS 0xa0000
-#define ISA_END_ADDRESS 0x100000
-
-static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
- unsigned long phys_addr, unsigned long flags)
-{
- unsigned long end;
- unsigned long pfn;
-
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
- if (address >= end)
- BUG();
- pfn = phys_addr >> PAGE_SHIFT;
- do {
- if (!pte_none(*pte)) {
- printk("remap_area_pte: page already exists\n");
- BUG();
- }
- set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW |
- _PAGE_GLOBAL | _PAGE_DIRTY | _PAGE_ACCESSED | flags)));
- address += PAGE_SIZE;
- pfn++;
- pte++;
- } while (address && (address < end));
-}
-
-static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
- unsigned long phys_addr, unsigned long flags)
-{
- unsigned long end;
-
- address &= ~PUD_MASK;
- end = address + size;
- if (end > PUD_SIZE)
- end = PUD_SIZE;
- phys_addr -= address;
- if (address >= end)
- BUG();
- do {
- pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
- if (!pte)
- return -ENOMEM;
- remap_area_pte(pte, address, end - address, address + phys_addr, flags);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
- return 0;
-}
-
-static inline int remap_area_pud(pud_t * pud, unsigned long address, unsigned long size,
- unsigned long phys_addr, unsigned long flags)
-{
- unsigned long end;
-
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- phys_addr -= address;
- if (address >= end)
- BUG();
- do {
- pmd_t * pmd = pmd_alloc(&init_mm, pud, address);
- if (!pmd)
- return -ENOMEM;
- remap_area_pmd(pmd, address, end - address, address + phys_addr, flags);
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
- return 0;
-}
-
-static int remap_area_pages(unsigned long address, unsigned long phys_addr,
- unsigned long size, unsigned long flags)
-{
- int error;
- pgd_t *pgd;
- unsigned long end = address + size;
-
- phys_addr -= address;
- pgd = pgd_offset_k(address);
- flush_cache_all();
- if (address >= end)
- BUG();
- spin_lock(&init_mm.page_table_lock);
- do {
- pud_t *pud;
- pud = pud_alloc(&init_mm, pgd, address);
- error = -ENOMEM;
- if (!pud)
- break;
- if (remap_area_pud(pud, address, end - address,
- phys_addr + address, flags))
- break;
- error = 0;
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- pgd++;
- } while (address && (address < end));
- spin_unlock(&init_mm.page_table_lock);
- flush_tlb_all();
- return error;
-}
-
-/*
- * Fix up the linear direct mapping of the kernel to avoid cache attribute
- * conflicts.
- */
-static int
-ioremap_change_attr(unsigned long phys_addr, unsigned long size,
- unsigned long flags)
-{
- int err = 0;
- if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) {
- unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long vaddr = (unsigned long) __va(phys_addr);
-
- /*
- * Must use a address here and not struct page because the phys addr
- * can be a in hole between nodes and not have an memmap entry.
- */
- err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags));
- if (!err)
- global_flush_tlb();
- }
- return err;
-}
-
-/*
- * Generic mapping function
- */
-
-/*
- * Remap an arbitrary physical address space into the kernel virtual
- * address space. Needed when the kernel wants to access high addresses
- * directly.
- *
- * NOTE! We need to allow non-page-aligned mappings too: we will obviously
- * have to convert them into an offset in a page-aligned mapping, but the
- * caller shouldn't need to know that small detail.
- */
-void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
-{
- void * addr;
- struct vm_struct * area;
- unsigned long offset, last_addr;
-
- /* Don't allow wraparound or zero size */
- last_addr = phys_addr + size - 1;
- if (!size || last_addr < phys_addr)
- return NULL;
-
- /*
- * Don't remap the low PCI/ISA area, it's always mapped..
- */
- if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
- return (__force void __iomem *)phys_to_virt(phys_addr);
-
-#ifdef CONFIG_FLATMEM
- /*
- * Don't allow anybody to remap normal RAM that we're using..
- */
- if (last_addr < virt_to_phys(high_memory)) {
- char *t_addr, *t_end;
- struct page *page;
-
- t_addr = __va(phys_addr);
- t_end = t_addr + (size - 1);
-
- for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
- if(!PageReserved(page))
- return NULL;
- }
-#endif
-
- /*
- * Mappings have to be page-aligned
- */
- offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
- size = PAGE_ALIGN(last_addr+1) - phys_addr;
-
- /*
- * Ok, go for it..
- */
- area = get_vm_area(size, VM_IOREMAP | (flags << 20));
- if (!area)
- return NULL;
- area->phys_addr = phys_addr;
- addr = area->addr;
- if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
- remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
- return NULL;
- }
- if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) {
- area->flags &= 0xffffff;
- vunmap(addr);
- return NULL;
- }
- return (__force void __iomem *) (offset + (char *)addr);
-}
-
-/**
- * ioremap_nocache - map bus memory into CPU space
- * @offset: bus address of the memory
- * @size: size of the resource to map
- *
- * ioremap_nocache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * on the CPU as well as honouring existing caching rules from things like
- * the PCI bus. Note that there are other caches and buffers on many
- * busses. In particular driver authors should read up on PCI writes
- *
- * It's useful if some control registers are in such an area and
- * write combining or read caching is not desirable:
- *
- * Must be freed with iounmap.
- */
-
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
-{
- return __ioremap(phys_addr, size, _PAGE_PCD);
-}
-
-void iounmap(volatile void __iomem *addr)
-{
- struct vm_struct *p;
-
- if (addr <= high_memory)
- return;
- if (addr >= phys_to_virt(ISA_START_ADDRESS) &&
- addr < phys_to_virt(ISA_END_ADDRESS))
- return;
-
- write_lock(&vmlist_lock);
- p = __remove_vm_area((void *)((unsigned long)addr & PAGE_MASK));
- if (!p)
- printk("iounmap: bad address %p\n", addr);
- else if (p->flags >> 20)
- ioremap_change_attr(p->phys_addr, p->size, 0);
- write_unlock(&vmlist_lock);
- kfree(p);
-}
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
deleted file mode 100644
index ec35747aacd..00000000000
--- a/arch/x86_64/mm/k8topology.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * AMD K8 NUMA support.
- * Discover the memory map and associated nodes.
- *
- * This version reads it directly from the K8 northbridge.
- *
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-#include <asm/io.h>
-#include <linux/pci_ids.h>
-#include <asm/types.h>
-#include <asm/mmzone.h>
-#include <asm/proto.h>
-#include <asm/e820.h>
-#include <asm/pci-direct.h>
-#include <asm/numa.h>
-
-static __init int find_northbridge(void)
-{
- int num;
-
- for (num = 0; num < 32; num++) {
- u32 header;
-
- header = read_pci_config(0, num, 0, 0x00);
- if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)))
- continue;
-
- header = read_pci_config(0, num, 1, 0x00);
- if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)))
- continue;
- return num;
- }
-
- return -1;
-}
-
-int __init k8_scan_nodes(unsigned long start, unsigned long end)
-{
- unsigned long prevbase;
- struct node nodes[8];
- int nodeid, i, nb;
- int found = 0;
- u32 reg;
- unsigned numnodes;
- nodemask_t nodes_parsed;
-
- nodes_clear(nodes_parsed);
-
- nb = find_northbridge();
- if (nb < 0)
- return nb;
-
- printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
-
- reg = read_pci_config(0, nb, 0, 0x60);
- numnodes = ((reg >> 4) & 0xF) + 1;
-
- printk(KERN_INFO "Number of nodes %d\n", numnodes);
-
- memset(&nodes,0,sizeof(nodes));
- prevbase = 0;
- for (i = 0; i < 8; i++) {
- unsigned long base,limit;
-
- base = read_pci_config(0, nb, 1, 0x40 + i*8);
- limit = read_pci_config(0, nb, 1, 0x44 + i*8);
-
- nodeid = limit & 7;
- if ((base & 3) == 0) {
- if (i < numnodes)
- printk("Skipping disabled node %d\n", i);
- continue;
- }
- if (nodeid >= numnodes) {
- printk("Ignoring excess node %d (%lx:%lx)\n", nodeid,
- base, limit);
- continue;
- }
-
- if (!limit) {
- printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i,
- base);
- continue;
- }
- if ((base >> 8) & 3 || (limit >> 8) & 3) {
- printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
- nodeid, (base>>8)&3, (limit>>8) & 3);
- return -1;
- }
- if (node_isset(nodeid, nodes_parsed)) {
- printk(KERN_INFO "Node %d already present. Skipping\n",
- nodeid);
- continue;
- }
-
- limit >>= 16;
- limit <<= 24;
- limit |= (1<<24)-1;
-
- if (limit > end_pfn << PAGE_SHIFT)
- limit = end_pfn << PAGE_SHIFT;
- if (limit <= base)
- continue;
-
- base >>= 16;
- base <<= 24;
-
- if (base < start)
- base = start;
- if (limit > end)
- limit = end;
- if (limit == base) {
- printk(KERN_ERR "Empty node %d\n", nodeid);
- continue;
- }
- if (limit < base) {
- printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
- nodeid, base, limit);
- continue;
- }
-
- /* Could sort here, but pun for now. Should not happen anyroads. */
- if (prevbase > base) {
- printk(KERN_ERR "Node map not sorted %lx,%lx\n",
- prevbase,base);
- return -1;
- }
-
- printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n",
- nodeid, base, limit);
-
- found++;
-
- nodes[nodeid].start = base;
- nodes[nodeid].end = limit;
-
- prevbase = base;
-
- node_set(nodeid, nodes_parsed);
- }
-
- if (!found)
- return -1;
-
- memnode_shift = compute_hash_shift(nodes, numnodes);
- if (memnode_shift < 0) {
- printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
- return -1;
- }
- printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
-
- for (i = 0; i < 8; i++) {
- if (nodes[i].start != nodes[i].end) {
- /* assume 1:1 NODE:CPU */
- cpu_to_node[i] = i;
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
- }
-
- numa_init_array();
- return 0;
-}
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
deleted file mode 100644
index 70cb2904a90..00000000000
--- a/arch/x86_64/mm/numa.c
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Generic VM initialization for x86-64 NUMA setups.
- * Copyright 2002,2003 Andi Kleen, SuSE Labs.
- */
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/mmzone.h>
-#include <linux/ctype.h>
-#include <linux/module.h>
-#include <linux/nodemask.h>
-
-#include <asm/e820.h>
-#include <asm/proto.h>
-#include <asm/dma.h>
-#include <asm/numa.h>
-#include <asm/acpi.h>
-
-#ifndef Dprintk
-#define Dprintk(x...)
-#endif
-
-struct pglist_data *node_data[MAX_NUMNODES];
-bootmem_data_t plat_node_bdata[MAX_NUMNODES];
-
-int memnode_shift;
-u8 memnodemap[NODEMAPSIZE];
-
-unsigned char cpu_to_node[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
-cpumask_t node_to_cpumask[MAX_NUMNODES];
-
-int numa_off __initdata;
-
-int __init compute_hash_shift(struct node *nodes, int numnodes)
-{
- int i;
- int shift = 20;
- unsigned long addr,maxend=0;
-
- for (i = 0; i < numnodes; i++)
- if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend))
- maxend = nodes[i].end;
-
- while ((1UL << shift) < (maxend / NODEMAPSIZE))
- shift++;
-
- printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n",
- shift,maxend);
- memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
- for (i = 0; i < numnodes; i++) {
- if (nodes[i].start == nodes[i].end)
- continue;
- for (addr = nodes[i].start;
- addr < nodes[i].end;
- addr += (1UL << shift)) {
- if (memnodemap[addr >> shift] != 0xff) {
- printk(KERN_INFO
- "Your memory is not aligned you need to rebuild your kernel "
- "with a bigger NODEMAPSIZE shift=%d adder=%lu\n",
- shift,addr);
- return -1;
- }
- memnodemap[addr >> shift] = i;
- }
- }
- return shift;
-}
-
-#ifdef CONFIG_SPARSEMEM
-int early_pfn_to_nid(unsigned long pfn)
-{
- return phys_to_nid(pfn << PAGE_SHIFT);
-}
-#endif
-
-/* Initialize bootmem allocator for a node */
-void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
-{
- unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start;
- unsigned long nodedata_phys;
- const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
-
- start = round_up(start, ZONE_ALIGN);
-
- printk("Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
-
- start_pfn = start >> PAGE_SHIFT;
- end_pfn = end >> PAGE_SHIFT;
-
- memory_present(nodeid, start_pfn, end_pfn);
- nodedata_phys = find_e820_area(start, end, pgdat_size);
- if (nodedata_phys == -1L)
- panic("Cannot find memory pgdat in node %d\n", nodeid);
-
- Dprintk("nodedata_phys %lx\n", nodedata_phys);
-
- node_data[nodeid] = phys_to_virt(nodedata_phys);
- memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
- NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
- NODE_DATA(nodeid)->node_start_pfn = start_pfn;
- NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
-
- /* Find a place for the bootmem map */
- bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
- bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
- bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT);
- if (bootmap_start == -1L)
- panic("Not enough continuous space for bootmap on node %d", nodeid);
- Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
-
- bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
- bootmap_start >> PAGE_SHIFT,
- start_pfn, end_pfn);
-
- e820_bootmem_free(NODE_DATA(nodeid), start, end);
-
- reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
- reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
- node_set_online(nodeid);
-}
-
-/* Initialize final allocator for a zone */
-void __init setup_node_zones(int nodeid)
-{
- unsigned long start_pfn, end_pfn;
- unsigned long zones[MAX_NR_ZONES];
- unsigned long dma_end_pfn;
-
- memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES);
-
- start_pfn = node_start_pfn(nodeid);
- end_pfn = node_end_pfn(nodeid);
-
- Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn);
-
- /* All nodes > 0 have a zero length zone DMA */
- dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT;
- if (start_pfn < dma_end_pfn) {
- zones[ZONE_DMA] = dma_end_pfn - start_pfn;
- zones[ZONE_NORMAL] = end_pfn - dma_end_pfn;
- } else {
- zones[ZONE_NORMAL] = end_pfn - start_pfn;
- }
-
- free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
- start_pfn, NULL);
-}
-
-void __init numa_init_array(void)
-{
- int rr, i;
- /* There are unfortunately some poorly designed mainboards around
- that only connect memory to a single CPU. This breaks the 1:1 cpu->node
- mapping. To avoid this fill in the mapping for all possible
- CPUs, as the number of CPUs is not known yet.
- We round robin the existing nodes. */
- rr = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_to_node[i] != NUMA_NO_NODE)
- continue;
- rr = next_node(rr, node_online_map);
- if (rr == MAX_NUMNODES)
- rr = first_node(node_online_map);
- cpu_to_node[i] = rr;
- rr++;
- }
-
- set_bit(0, &node_to_cpumask[cpu_to_node(0)]);
-}
-
-#ifdef CONFIG_NUMA_EMU
-int numa_fake __initdata = 0;
-
-/* Numa emulation */
-static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
-{
- int i;
- struct node nodes[MAX_NUMNODES];
- unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
-
- /* Kludge needed for the hash function */
- if (hweight64(sz) > 1) {
- unsigned long x = 1;
- while ((x << 1) < sz)
- x <<= 1;
- if (x < sz/2)
- printk("Numa emulation unbalanced. Complain to maintainer\n");
- sz = x;
- }
-
- memset(&nodes,0,sizeof(nodes));
- for (i = 0; i < numa_fake; i++) {
- nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
- if (i == numa_fake-1)
- sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
- nodes[i].end = nodes[i].start + sz;
- if (i != numa_fake-1)
- nodes[i].end--;
- printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n",
- i,
- nodes[i].start, nodes[i].end,
- (nodes[i].end - nodes[i].start) >> 20);
- node_set_online(i);
- }
- memnode_shift = compute_hash_shift(nodes, numa_fake);
- if (memnode_shift < 0) {
- memnode_shift = 0;
- printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
- return -1;
- }
- for_each_online_node(i)
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- numa_init_array();
- return 0;
-}
-#endif
-
-void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
- int i;
-
-#ifdef CONFIG_NUMA_EMU
- if (numa_fake && !numa_emulation(start_pfn, end_pfn))
- return;
-#endif
-
-#ifdef CONFIG_ACPI_NUMA
- if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
- end_pfn << PAGE_SHIFT))
- return;
-#endif
-
-#ifdef CONFIG_K8_NUMA
- if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
- return;
-#endif
- printk(KERN_INFO "%s\n",
- numa_off ? "NUMA turned off" : "No NUMA configuration found");
-
- printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
- start_pfn << PAGE_SHIFT,
- end_pfn << PAGE_SHIFT);
- /* setup dummy node covering all memory */
- memnode_shift = 63;
- memnodemap[0] = 0;
- nodes_clear(node_online_map);
- node_set_online(0);
- for (i = 0; i < NR_CPUS; i++)
- cpu_to_node[i] = 0;
- node_to_cpumask[0] = cpumask_of_cpu(0);
- setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
-}
-
-__cpuinit void numa_add_cpu(int cpu)
-{
- /* BP is initialized elsewhere */
- if (cpu)
- set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
-}
-
-unsigned long __init numa_free_all_bootmem(void)
-{
- int i;
- unsigned long pages = 0;
- for_each_online_node(i) {
- pages += free_all_bootmem_node(NODE_DATA(i));
- }
- return pages;
-}
-
-void __init paging_init(void)
-{
- int i;
- for_each_online_node(i) {
- setup_node_zones(i);
- }
-}
-
-/* [numa=off] */
-__init int numa_setup(char *opt)
-{
- if (!strncmp(opt,"off",3))
- numa_off = 1;
-#ifdef CONFIG_NUMA_EMU
- if(!strncmp(opt, "fake=", 5)) {
- numa_fake = simple_strtoul(opt+5,NULL,0); ;
- if (numa_fake >= MAX_NUMNODES)
- numa_fake = MAX_NUMNODES;
- }
-#endif
-#ifdef CONFIG_ACPI_NUMA
- if (!strncmp(opt,"noacpi",6))
- acpi_numa = -1;
-#endif
- return 1;
-}
-
-EXPORT_SYMBOL(cpu_to_node);
-EXPORT_SYMBOL(node_to_cpumask);
-EXPORT_SYMBOL(memnode_shift);
-EXPORT_SYMBOL(memnodemap);
-EXPORT_SYMBOL(node_data);
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
deleted file mode 100644
index 94862e1ec03..00000000000
--- a/arch/x86_64/mm/pageattr.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- * Thanks to Ben LaHaise for precious feedback.
- */
-
-#include <linux/config.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/highmem.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
-#include <asm/tlbflush.h>
-#include <asm/io.h>
-
-static inline pte_t *lookup_address(unsigned long address)
-{
- pgd_t *pgd = pgd_offset_k(address);
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- if (pgd_none(*pgd))
- return NULL;
- pud = pud_offset(pgd, address);
- if (!pud_present(*pud))
- return NULL;
- pmd = pmd_offset(pud, address);
- if (!pmd_present(*pmd))
- return NULL;
- if (pmd_large(*pmd))
- return (pte_t *)pmd;
- pte = pte_offset_kernel(pmd, address);
- if (pte && !pte_present(*pte))
- pte = NULL;
- return pte;
-}
-
-static struct page *split_large_page(unsigned long address, pgprot_t prot,
- pgprot_t ref_prot)
-{
- int i;
- unsigned long addr;
- struct page *base = alloc_pages(GFP_KERNEL, 0);
- pte_t *pbase;
- if (!base)
- return NULL;
- address = __pa(address);
- addr = address & LARGE_PAGE_MASK;
- pbase = (pte_t *)page_address(base);
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
- pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
- addr == address ? prot : ref_prot);
- }
- return base;
-}
-
-
-static void flush_kernel_map(void *address)
-{
- if (0 && address && cpu_has_clflush) {
- /* is this worth it? */
- int i;
- for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
- asm volatile("clflush (%0)" :: "r" (address + i));
- } else
- asm volatile("wbinvd":::"memory");
- if (address)
- __flush_tlb_one(address);
- else
- __flush_tlb_all();
-}
-
-
-static inline void flush_map(unsigned long address)
-{
- on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
-}
-
-struct deferred_page {
- struct deferred_page *next;
- struct page *fpage;
- unsigned long address;
-};
-static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */
-
-static inline void save_page(unsigned long address, struct page *fpage)
-{
- struct deferred_page *df;
- df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL);
- if (!df) {
- flush_map(address);
- __free_page(fpage);
- } else {
- df->next = df_list;
- df->fpage = fpage;
- df->address = address;
- df_list = df;
- }
-}
-
-/*
- * No more special protections in this 2/4MB area - revert to a
- * large page again.
- */
-static void revert_page(unsigned long address, pgprot_t ref_prot)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t large_pte;
-
- pgd = pgd_offset_k(address);
- BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd,address);
- BUG_ON(pud_none(*pud));
- pmd = pmd_offset(pud, address);
- BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
- pgprot_val(ref_prot) |= _PAGE_PSE;
- large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
- set_pte((pte_t *)pmd, large_pte);
-}
-
-static int
-__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
- pgprot_t ref_prot)
-{
- pte_t *kpte;
- struct page *kpte_page;
- unsigned kpte_flags;
- kpte = lookup_address(address);
- if (!kpte) return 0;
- kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
- kpte_flags = pte_val(*kpte);
- if (pgprot_val(prot) != pgprot_val(ref_prot)) {
- if ((kpte_flags & _PAGE_PSE) == 0) {
- set_pte(kpte, pfn_pte(pfn, prot));
- } else {
- /*
- * split_large_page will take the reference for this change_page_attr
- * on the split page.
- */
- struct page *split = split_large_page(address, prot, ref_prot);
- if (!split)
- return -ENOMEM;
- set_pte(kpte,mk_pte(split, ref_prot));
- kpte_page = split;
- }
- get_page(kpte_page);
- } else if ((kpte_flags & _PAGE_PSE) == 0) {
- set_pte(kpte, pfn_pte(pfn, ref_prot));
- __put_page(kpte_page);
- } else
- BUG();
-
- /* on x86-64 the direct mapping set at boot is not using 4k pages */
- BUG_ON(PageReserved(kpte_page));
-
- switch (page_count(kpte_page)) {
- case 1:
- save_page(address, kpte_page);
- revert_page(address, ref_prot);
- break;
- case 0:
- BUG(); /* memleak and failed 2M page regeneration */
- }
- return 0;
-}
-
-/*
- * Change the page attributes of an page in the linear mapping.
- *
- * This should be used when a page is mapped with a different caching policy
- * than write-back somewhere - some CPUs do not like it when mappings with
- * different caching policies exist. This changes the page attributes of the
- * in kernel linear mapping too.
- *
- * The caller needs to ensure that there are no conflicting mappings elsewhere.
- * This function only deals with the kernel linear map.
- *
- * Caller must call global_flush_tlb() after this.
- */
-int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
-{
- int err = 0;
- int i;
-
- down_write(&init_mm.mmap_sem);
- for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
- unsigned long pfn = __pa(address) >> PAGE_SHIFT;
-
- err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
- if (err)
- break;
- /* Handle kernel mapping too which aliases part of the
- * lowmem */
- if (__pa(address) < KERNEL_TEXT_SIZE) {
- unsigned long addr2;
- pgprot_t prot2 = prot;
- addr2 = __START_KERNEL_map + __pa(address);
- pgprot_val(prot2) &= ~_PAGE_NX;
- err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
- }
- }
- up_write(&init_mm.mmap_sem);
- return err;
-}
-
-/* Don't call this for MMIO areas that may not have a mem_map entry */
-int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
- unsigned long addr = (unsigned long)page_address(page);
- return change_page_attr_addr(addr, numpages, prot);
-}
-
-void global_flush_tlb(void)
-{
- struct deferred_page *df, *next_df;
-
- down_read(&init_mm.mmap_sem);
- df = xchg(&df_list, NULL);
- up_read(&init_mm.mmap_sem);
- if (!df)
- return;
- flush_map((df && !df->next) ? df->address : 0);
- for (; df; df = next_df) {
- next_df = df->next;
- if (df->fpage)
- __free_page(df->fpage);
- kfree(df);
- }
-}
-
-EXPORT_SYMBOL(change_page_attr);
-EXPORT_SYMBOL(global_flush_tlb);
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
deleted file mode 100644
index 8e3d097a9dd..00000000000
--- a/arch/x86_64/mm/srat.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * ACPI 3.0 based NUMA setup
- * Copyright 2004 Andi Kleen, SuSE Labs.
- *
- * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
- *
- * Called from acpi_numa_init while reading the SRAT and SLIT tables.
- * Assumes all memory regions belonging to a single proximity domain
- * are in one chunk. Holes between them will be included in the node.
- */
-
-#include <linux/kernel.h>
-#include <linux/acpi.h>
-#include <linux/mmzone.h>
-#include <linux/bitmap.h>
-#include <linux/module.h>
-#include <linux/topology.h>
-#include <asm/proto.h>
-#include <asm/numa.h>
-
-static struct acpi_table_slit *acpi_slit;
-
-/* Internal processor count */
-static unsigned int __initdata num_processors = 0;
-
-static nodemask_t nodes_parsed __initdata;
-static nodemask_t nodes_found __initdata;
-static struct node nodes[MAX_NUMNODES] __initdata;
-static __u8 pxm2node[256] = { [0 ... 255] = 0xff };
-
-static __init int setup_node(int pxm)
-{
- unsigned node = pxm2node[pxm];
- if (node == 0xff) {
- if (nodes_weight(nodes_found) >= MAX_NUMNODES)
- return -1;
- node = first_unset_node(nodes_found);
- node_set(node, nodes_found);
- pxm2node[pxm] = node;
- }
- return pxm2node[pxm];
-}
-
-static __init int conflicting_nodes(unsigned long start, unsigned long end)
-{
- int i;
- for_each_online_node(i) {
- struct node *nd = &nodes[i];
- if (nd->start == nd->end)
- continue;
- if (nd->end > start && nd->start < end)
- return 1;
- if (nd->end == end && nd->start == start)
- return 1;
- }
- return -1;
-}
-
-static __init void cutoff_node(int i, unsigned long start, unsigned long end)
-{
- struct node *nd = &nodes[i];
- if (nd->start < start) {
- nd->start = start;
- if (nd->end < nd->start)
- nd->start = nd->end;
- }
- if (nd->end > end) {
- if (!(end & 0xfff))
- end--;
- nd->end = end;
- if (nd->start > nd->end)
- nd->start = nd->end;
- }
-}
-
-static __init void bad_srat(void)
-{
- printk(KERN_ERR "SRAT: SRAT not used.\n");
- acpi_numa = -1;
-}
-
-static __init inline int srat_disabled(void)
-{
- return numa_off || acpi_numa < 0;
-}
-
-/* Callback for SLIT parsing */
-void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
-{
- acpi_slit = slit;
-}
-
-/* Callback for Proximity Domain -> LAPIC mapping */
-void __init
-acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
-{
- int pxm, node;
- if (srat_disabled() || pa->flags.enabled == 0)
- return;
- pxm = pa->proximity_domain;
- node = setup_node(pxm);
- if (node < 0) {
- printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
- bad_srat();
- return;
- }
- if (num_processors >= NR_CPUS) {
- printk(KERN_ERR "SRAT: Processor #%d (lapic %u) INVALID. (Max ID: %d).\n",
- num_processors, pa->apic_id, NR_CPUS);
- bad_srat();
- return;
- }
- cpu_to_node[num_processors] = node;
- acpi_numa = 1;
- printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> CPU %u -> Node %u\n",
- pxm, pa->apic_id, num_processors, node);
-
- num_processors++;
-}
-
-/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
-void __init
-acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
-{
- struct node *nd;
- unsigned long start, end;
- int node, pxm;
- int i;
-
- if (srat_disabled() || ma->flags.enabled == 0)
- return;
- pxm = ma->proximity_domain;
- node = setup_node(pxm);
- if (node < 0) {
- printk(KERN_ERR "SRAT: Too many proximity domains.\n");
- bad_srat();
- return;
- }
- start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32);
- end = start + (ma->length_lo | ((u64)ma->length_hi << 32));
- /* It is fine to add this area to the nodes data it will be used later*/
- if (ma->flags.hot_pluggable == 1)
- printk(KERN_INFO "SRAT: hot plug zone found %lx - %lx \n",
- start, end);
- i = conflicting_nodes(start, end);
- if (i >= 0) {
- printk(KERN_ERR
- "SRAT: pxm %d overlap %lx-%lx with node %d(%Lx-%Lx)\n",
- pxm, start, end, i, nodes[i].start, nodes[i].end);
- bad_srat();
- return;
- }
- nd = &nodes[node];
- if (!node_test_and_set(node, nodes_parsed)) {
- nd->start = start;
- nd->end = end;
- } else {
- if (start < nd->start)
- nd->start = start;
- if (nd->end < end)
- nd->end = end;
- }
- if (!(nd->end & 0xfff))
- nd->end--;
- printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
- nd->start, nd->end);
-}
-
-void __init acpi_numa_arch_fixup(void) {}
-
-/* Use the information discovered above to actually set up the nodes. */
-int __init acpi_scan_nodes(unsigned long start, unsigned long end)
-{
- int i;
- if (acpi_numa <= 0)
- return -1;
- memnode_shift = compute_hash_shift(nodes, nodes_weight(nodes_parsed));
- if (memnode_shift < 0) {
- printk(KERN_ERR
- "SRAT: No NUMA node hash function found. Contact maintainer\n");
- bad_srat();
- return -1;
- }
- for (i = 0; i < MAX_NUMNODES; i++) {
- if (!node_isset(i, nodes_parsed))
- continue;
- cutoff_node(i, start, end);
- if (nodes[i].start == nodes[i].end) {
- node_clear(i, nodes_parsed);
- continue;
- }
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_to_node[i] == NUMA_NO_NODE)
- continue;
- if (!node_isset(cpu_to_node[i], nodes_parsed))
- cpu_to_node[i] = NUMA_NO_NODE;
- }
- numa_init_array();
- return 0;
-}
-
-int node_to_pxm(int n)
-{
- int i;
- if (pxm2node[n] == n)
- return n;
- for (i = 0; i < 256; i++)
- if (pxm2node[i] == n)
- return i;
- return 0;
-}
-
-int __node_distance(int a, int b)
-{
- int index;
-
- if (!acpi_slit)
- return a == b ? 10 : 20;
- index = acpi_slit->localities * node_to_pxm(a);
- return acpi_slit->entry[index + node_to_pxm(b)];
-}
-
-EXPORT_SYMBOL(__node_distance);