diff options
Diffstat (limited to 'arch/x86_64/mm')
| -rw-r--r-- | arch/x86_64/mm/Makefile | 11 | ||||
| -rw-r--r-- | arch/x86_64/mm/extable.c | 35 | ||||
| -rw-r--r-- | arch/x86_64/mm/fault.c | 571 | ||||
| -rw-r--r-- | arch/x86_64/mm/init.c | 623 | ||||
| -rw-r--r-- | arch/x86_64/mm/ioremap.c | 270 | ||||
| -rw-r--r-- | arch/x86_64/mm/k8topology.c | 168 | ||||
| -rw-r--r-- | arch/x86_64/mm/numa.c | 304 | ||||
| -rw-r--r-- | arch/x86_64/mm/pageattr.c | 235 | ||||
| -rw-r--r-- | arch/x86_64/mm/srat.c | 225 |
9 files changed, 0 insertions, 2442 deletions
diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile deleted file mode 100644 index 1d232a87f11..00000000000 --- a/arch/x86_64/mm/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# -# Makefile for the linux x86_64-specific parts of the memory manager. -# - -obj-y := init.o fault.o ioremap.o extable.o pageattr.o -obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_NUMA) += numa.o -obj-$(CONFIG_K8_NUMA) += k8topology.o -obj-$(CONFIG_ACPI_NUMA) += srat.o - -hugetlbpage-y = ../../i386/mm/hugetlbpage.o diff --git a/arch/x86_64/mm/extable.c b/arch/x86_64/mm/extable.c deleted file mode 100644 index 2d78f9fb403..00000000000 --- a/arch/x86_64/mm/extable.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * linux/arch/x86_64/mm/extable.c - */ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/init.h> -#include <asm/uaccess.h> - -/* Simple binary search */ -const struct exception_table_entry * -search_extable(const struct exception_table_entry *first, - const struct exception_table_entry *last, - unsigned long value) -{ - /* Work around a B stepping K8 bug */ - if ((value >> 32) == 0) - value |= 0xffffffffUL << 32; - - while (first <= last) { - const struct exception_table_entry *mid; - long diff; - - mid = (last - first) / 2 + first; - diff = mid->insn - value; - if (diff == 0) - return mid; - else if (diff < 0) - first = mid+1; - else - last = mid-1; - } - return NULL; -} diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c deleted file mode 100644 index 13792721037..00000000000 --- a/arch/x86_64/mm/fault.c +++ /dev/null @@ -1,571 +0,0 @@ -/* - * linux/arch/x86-64/mm/fault.c - * - * Copyright (C) 1995 Linus Torvalds - * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. - */ - -#include <linux/config.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/tty.h> -#include <linux/vt_kern.h> /* For unblank_screen() */ -#include <linux/compiler.h> -#include <linux/module.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/pgalloc.h> -#include <asm/smp.h> -#include <asm/tlbflush.h> -#include <asm/proto.h> -#include <asm/kdebug.h> -#include <asm-generic/sections.h> -#include <asm/kdebug.h> - -void bust_spinlocks(int yes) -{ - int loglevel_save = console_loglevel; - if (yes) { - oops_in_progress = 1; - } else { -#ifdef CONFIG_VT - unblank_screen(); -#endif - oops_in_progress = 0; - /* - * OK, the message is on the console. Now we call printk() - * without oops_in_progress set so that printk will give klogd - * a poke. Hold onto your hats... - */ - console_loglevel = 15; /* NMI oopser may have shut the console up */ - printk(" "); - console_loglevel = loglevel_save; - } -} - -/* Sometimes the CPU reports invalid exceptions on prefetch. - Check that here and ignore. - Opcode checker based on code by Richard Brunner */ -static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, - unsigned long error_code) -{ - unsigned char *instr; - int scan_more = 1; - int prefetch = 0; - unsigned char *max_instr; - - /* If it was a exec fault ignore */ - if (error_code & (1<<4)) - return 0; - - instr = (unsigned char *)convert_rip_to_linear(current, regs); - max_instr = instr + 15; - - if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) - return 0; - - while (scan_more && instr < max_instr) { - unsigned char opcode; - unsigned char instr_hi; - unsigned char instr_lo; - - if (__get_user(opcode, instr)) - break; - - instr_hi = opcode & 0xf0; - instr_lo = opcode & 0x0f; - instr++; - - switch (instr_hi) { - case 0x20: - case 0x30: - /* Values 0x26,0x2E,0x36,0x3E are valid x86 - prefixes. In long mode, the CPU will signal - invalid opcode if some of these prefixes are - present so we will never get here anyway */ - scan_more = ((instr_lo & 7) == 0x6); - break; - - case 0x40: - /* In AMD64 long mode, 0x40 to 0x4F are valid REX prefixes - Need to figure out under what instruction mode the - instruction was issued ... */ - /* Could check the LDT for lm, but for now it's good - enough to assume that long mode only uses well known - segments or kernel. */ - scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); - break; - - case 0x60: - /* 0x64 thru 0x67 are valid prefixes in all modes. */ - scan_more = (instr_lo & 0xC) == 0x4; - break; - case 0xF0: - /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */ - scan_more = !instr_lo || (instr_lo>>1) == 1; - break; - case 0x00: - /* Prefetch instruction is 0x0F0D or 0x0F18 */ - scan_more = 0; - if (__get_user(opcode, instr)) - break; - prefetch = (instr_lo == 0xF) && - (opcode == 0x0D || opcode == 0x18); - break; - default: - scan_more = 0; - break; - } - } - return prefetch; -} - -static int bad_address(void *p) -{ - unsigned long dummy; - return __get_user(dummy, (unsigned long *)p); -} - -void dump_pagetable(unsigned long address) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - asm("movq %%cr3,%0" : "=r" (pgd)); - - pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK); - pgd += pgd_index(address); - printk("PGD %lx ", pgd_val(*pgd)); - if (bad_address(pgd)) goto bad; - if (!pgd_present(*pgd)) goto ret; - - pud = __pud_offset_k((pud_t *)pgd_page(*pgd), address); - if (bad_address(pud)) goto bad; - printk("PUD %lx ", pud_val(*pud)); - if (!pud_present(*pud)) goto ret; - - pmd = pmd_offset(pud, address); - if (bad_address(pmd)) goto bad; - printk("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd)) goto ret; - - pte = pte_offset_kernel(pmd, address); - if (bad_address(pte)) goto bad; - printk("PTE %lx", pte_val(*pte)); -ret: - printk("\n"); - return; -bad: - printk("BAD\n"); -} - -static const char errata93_warning[] = -KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" -KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" -KERN_ERR "******* Please consider a BIOS update.\n" -KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; - -/* Workaround for K8 erratum #93 & buggy BIOS. - BIOS SMM functions are required to use a specific workaround - to avoid corruption of the 64bit RIP register on C stepping K8. - A lot of BIOS that didn't get tested properly miss this. - The OS sees this as a page fault with the upper 32bits of RIP cleared. - Try to work around it here. - Note we only handle faults in kernel here. */ - -static int is_errata93(struct pt_regs *regs, unsigned long address) -{ - static int warned; - if (address != regs->rip) - return 0; - if ((address >> 32) != 0) - return 0; - address |= 0xffffffffUL << 32; - if ((address >= (u64)_stext && address <= (u64)_etext) || - (address >= MODULES_VADDR && address <= MODULES_END)) { - if (!warned) { - printk(errata93_warning); - warned = 1; - } - regs->rip = address; - return 1; - } - return 0; -} - -int unhandled_signal(struct task_struct *tsk, int sig) -{ - if (tsk->pid == 1) - return 1; - /* Warn for strace, but not for gdb */ - if (!test_ti_thread_flag(tsk->thread_info, TIF_SYSCALL_TRACE) && - (tsk->ptrace & PT_PTRACED)) - return 0; - return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) || - (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL); -} - -static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, - unsigned long error_code) -{ - oops_begin(); - printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", - current->comm, address); - dump_pagetable(address); - __die("Bad pagetable", regs, error_code); - oops_end(); - do_exit(SIGKILL); -} - -/* - * Handle a fault on the vmalloc or module mapping area - * - * This assumes no large pages in there. - */ -static int vmalloc_fault(unsigned long address) -{ - pgd_t *pgd, *pgd_ref; - pud_t *pud, *pud_ref; - pmd_t *pmd, *pmd_ref; - pte_t *pte, *pte_ref; - - /* Copy kernel mappings over when needed. This can also - happen within a race in page table update. In the later - case just flush. */ - - pgd = pgd_offset(current->mm ?: &init_mm, address); - pgd_ref = pgd_offset_k(address); - if (pgd_none(*pgd_ref)) - return -1; - if (pgd_none(*pgd)) - set_pgd(pgd, *pgd_ref); - - /* Below here mismatches are bugs because these lower tables - are shared */ - - pud = pud_offset(pgd, address); - pud_ref = pud_offset(pgd_ref, address); - if (pud_none(*pud_ref)) - return -1; - if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref)) - BUG(); - pmd = pmd_offset(pud, address); - pmd_ref = pmd_offset(pud_ref, address); - if (pmd_none(*pmd_ref)) - return -1; - if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref)) - BUG(); - pte_ref = pte_offset_kernel(pmd_ref, address); - if (!pte_present(*pte_ref)) - return -1; - pte = pte_offset_kernel(pmd, address); - /* Don't use pte_page here, because the mappings can point - outside mem_map, and the NUMA hash lookup cannot handle - that. */ - if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) - BUG(); - __flush_tlb_all(); - return 0; -} - -int page_fault_trace = 0; -int exception_trace = 1; - -/* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate - * routines. - * - * error_code: - * bit 0 == 0 means no page found, 1 means protection fault - * bit 1 == 0 means read, 1 means write - * bit 2 == 0 means kernel, 1 means user-mode - * bit 3 == 1 means fault was an instruction fetch - */ -asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) -{ - struct task_struct *tsk; - struct mm_struct *mm; - struct vm_area_struct * vma; - unsigned long address; - const struct exception_table_entry *fixup; - int write; - siginfo_t info; - -#ifdef CONFIG_CHECKING - { - unsigned long gs; - struct x8664_pda *pda = cpu_pda + stack_smp_processor_id(); - rdmsrl(MSR_GS_BASE, gs); - if (gs != (unsigned long)pda) { - wrmsrl(MSR_GS_BASE, pda); - printk("page_fault: wrong gs %lx expected %p\n", gs, pda); - } - } -#endif - - /* get the address */ - __asm__("movq %%cr2,%0":"=r" (address)); - if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, - SIGSEGV) == NOTIFY_STOP) - return; - - if (likely(regs->eflags & X86_EFLAGS_IF)) - local_irq_enable(); - - if (unlikely(page_fault_trace)) - printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", - regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); - - tsk = current; - mm = tsk->mm; - info.si_code = SEGV_MAPERR; - - - /* - * We fault-in kernel-space virtual memory on-demand. The - * 'reference' page table is init_mm.pgd. - * - * NOTE! We MUST NOT take any locks for this case. We may - * be in an interrupt or a critical region, and should - * only copy the information from the master page table, - * nothing more. - * - * This verifies that the fault happens in kernel space - * (error_code & 4) == 0, and that the fault was not a - * protection error (error_code & 1) == 0. - */ - if (unlikely(address >= TASK_SIZE64)) { - if (!(error_code & 5) && - ((address >= VMALLOC_START && address < VMALLOC_END) || - (address >= MODULES_VADDR && address < MODULES_END))) { - if (vmalloc_fault(address) < 0) - goto bad_area_nosemaphore; - return; - } - /* - * Don't take the mm semaphore here. If we fixup a prefetch - * fault we could otherwise deadlock. - */ - goto bad_area_nosemaphore; - } - - if (unlikely(error_code & (1 << 3))) - pgtable_bad(address, regs, error_code); - - /* - * If we're in an interrupt or have no user - * context, we must not take the fault.. - */ - if (unlikely(in_atomic() || !mm)) - goto bad_area_nosemaphore; - - again: - /* When running in the kernel we expect faults to occur only to - * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunatly, in the case of an - * erroneous fault occuring in a code path which already holds mmap_sem - * we will deadlock attempting to validate the fault against the - * address space. Luckily the kernel only validly references user - * space from well defined areas of code, which are listed in the - * exceptions table. - * - * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibilty of a deadlock. - * Attempt to lock the address space, if we cannot we then validate the - * source. If this is invalid we can skip the address space check, - * thus avoiding the deadlock. - */ - if (!down_read_trylock(&mm->mmap_sem)) { - if ((error_code & 4) == 0 && - !search_exception_tables(regs->rip)) - goto bad_area_nosemaphore; - down_read(&mm->mmap_sem); - } - - vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (likely(vma->vm_start <= address)) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (error_code & 4) { - // XXX: align red zone size with ABI - if (address + 128 < regs->rsp) - goto bad_area; - } - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - info.si_code = SEGV_ACCERR; - write = 0; - switch (error_code & 3) { - default: /* 3: write, present */ - /* fall through */ - case 2: /* write, not present */ - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - write++; - break; - case 1: /* read, present */ - goto bad_area; - case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) - goto bad_area; - } - - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ - switch (handle_mm_fault(mm, vma, address, write)) { - case 1: - tsk->min_flt++; - break; - case 2: - tsk->maj_flt++; - break; - case 0: - goto do_sigbus; - default: - goto out_of_memory; - } - - up_read(&mm->mmap_sem); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - -bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if (error_code & 4) { - if (is_prefetch(regs, address, error_code)) - return; - - /* Work around K8 erratum #100 K8 in compat mode - occasionally jumps to illegal addresses >4GB. We - catch this here in the page fault handler because - these addresses are not reachable. Just detect this - case and return. Any code segment in LDT is - compatibility mode. */ - if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && - (address >> 32)) - return; - - if (exception_trace && unhandled_signal(tsk, SIGSEGV)) { - printk( - "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", - tsk->pid > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, tsk->pid, address, regs->rip, - regs->rsp, error_code); - } - - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void __user *)address; - force_sig_info(SIGSEGV, &info, tsk); - return; - } - -no_context: - - /* Are we prepared to handle this kernel fault? */ - fixup = search_exception_tables(regs->rip); - if (fixup) { - regs->rip = fixup->fixup; - return; - } - - /* - * Hall of shame of CPU/BIOS bugs. - */ - - if (is_prefetch(regs, address, error_code)) - return; - - if (is_errata93(regs, address)) - return; - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ - - oops_begin(); - - if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); - else - printk(KERN_ALERT "Unable to handle kernel paging request"); - printk(" at %016lx RIP: \n" KERN_ALERT,address); - printk_address(regs->rip); - printk("\n"); - dump_pagetable(address); - __die("Oops", regs, error_code); - /* Executive summary in case the body of the oops scrolled away */ - printk(KERN_EMERG "CR2: %016lx\n", address); - oops_end(); - do_exit(SIGKILL); - -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - up_read(&mm->mmap_sem); - if (current->pid == 1) { - yield(); - goto again; - } - printk("VM: killing process %s\n", tsk->comm); - if (error_code & 4) - do_exit(SIGKILL); - goto no_context; - -do_sigbus: - up_read(&mm->mmap_sem); - - /* Kernel mode? Handle exceptions or die */ - if (!(error_code & 4)) - goto no_context; - - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, tsk); - return; -} diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c deleted file mode 100644 index 72e4b364ed7..00000000000 --- a/arch/x86_64/mm/init.c +++ /dev/null @@ -1,623 +0,0 @@ -/* - * linux/arch/x86_64/mm/init.c - * - * Copyright (C) 1995 Linus Torvalds - * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> - * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de> - */ - -#include <linux/config.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/smp.h> -#include <linux/init.h> -#include <linux/pagemap.h> -#include <linux/bootmem.h> -#include <linux/proc_fs.h> - -#include <asm/processor.h> -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/dma.h> -#include <asm/fixmap.h> -#include <asm/e820.h> -#include <asm/apic.h> -#include <asm/tlb.h> -#include <asm/mmu_context.h> -#include <asm/proto.h> -#include <asm/smp.h> - -#ifndef Dprintk -#define Dprintk(x...) -#endif - -#ifdef CONFIG_GART_IOMMU -extern int swiotlb; -#endif - -extern char _stext[]; - -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - -/* - * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the - * physical space so we can cache the place of the first one and move - * around without checking the pgd every time. - */ - -void show_mem(void) -{ - int i, total = 0, reserved = 0; - int shared = 0, cached = 0; - pg_data_t *pgdat; - struct page *page; - - printk("Mem-info:\n"); - show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - - for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_spanned_pages; ++i) { - page = pfn_to_page(pgdat->node_start_pfn + i); - total++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page) - 1; - } - } - printk("%d pages of RAM\n", total); - printk("%d reserved pages\n",reserved); - printk("%d pages shared\n",shared); - printk("%d pages swap cached\n",cached); -} - -/* References to section boundaries */ - -extern char _text, _etext, _edata, __bss_start, _end[]; -extern char __init_begin, __init_end; - -int after_bootmem; - -static void *spp_getpage(void) -{ - void *ptr; - if (after_bootmem) - ptr = (void *) get_zeroed_page(GFP_ATOMIC); - else - ptr = alloc_bootmem_pages(PAGE_SIZE); - if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) - panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":""); - - Dprintk("spp_getpage %p\n", ptr); - return ptr; -} - -static void set_pte_phys(unsigned long vaddr, - unsigned long phys, pgprot_t prot) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte, new_pte; - - Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys); - - pgd = pgd_offset_k(vaddr); - if (pgd_none(*pgd)) { - printk("PGD FIXMAP MISSING, it should be setup in head.S!\n"); - return; - } - pud = pud_offset(pgd, vaddr); - if (pud_none(*pud)) { - pmd = (pmd_t *) spp_getpage(); - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); - if (pmd != pmd_offset(pud, 0)) { - printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0)); - return; - } - } - pmd = pmd_offset(pud, vaddr); - if (pmd_none(*pmd)) { - pte = (pte_t *) spp_getpage(); - set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); - if (pte != pte_offset_kernel(pmd, 0)) { - printk("PAGETABLE BUG #02!\n"); - return; - } - } - new_pte = pfn_pte(phys >> PAGE_SHIFT, prot); - - pte = pte_offset_kernel(pmd, vaddr); - if (!pte_none(*pte) && - pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) - pte_ERROR(*pte); - set_pte(pte, new_pte); - - /* - * It's enough to flush this one mapping. - * (PGE mappings get flushed as well) - */ - __flush_tlb_one(vaddr); -} - -/* NOTE: this is meant to be run only at boot */ -void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - printk("Invalid __set_fixmap\n"); - return; - } - set_pte_phys(address, phys, prot); -} - -unsigned long __initdata table_start, table_end; - -extern pmd_t temp_boot_pmds[]; - -static struct temp_map { - pmd_t *pmd; - void *address; - int allocated; -} temp_mappings[] __initdata = { - { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) }, - { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) }, - {} -}; - -static __init void *alloc_low_page(int *index, unsigned long *phys) -{ - struct temp_map *ti; - int i; - unsigned long pfn = table_end++, paddr; - void *adr; - - if (pfn >= end_pfn) - panic("alloc_low_page: ran out of memory"); - for (i = 0; temp_mappings[i].allocated; i++) { - if (!temp_mappings[i].pmd) - panic("alloc_low_page: ran out of temp mappings"); - } - ti = &temp_mappings[i]; - paddr = (pfn << PAGE_SHIFT) & PMD_MASK; - set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE)); - ti->allocated = 1; - __flush_tlb(); - adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); - *index = i; - *phys = pfn * PAGE_SIZE; - return adr; -} - -static __init void unmap_low_page(int i) -{ - struct temp_map *ti = &temp_mappings[i]; - set_pmd(ti->pmd, __pmd(0)); - ti->allocated = 0; -} - -static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) -{ - long i, j; - - i = pud_index(address); - pud = pud + i; - for (; i < PTRS_PER_PUD; pud++, i++) { - int map; - unsigned long paddr, pmd_phys; - pmd_t *pmd; - - paddr = address + i*PUD_SIZE; - if (paddr >= end) { - for (; i < PTRS_PER_PUD; i++, pud++) - set_pud(pud, __pud(0)); - break; - } - - if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) { - set_pud(pud, __pud(0)); - continue; - } - - pmd = alloc_low_page(&map, &pmd_phys); - set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); - for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { - unsigned long pe; - - if (paddr >= end) { - for (; j < PTRS_PER_PMD; j++, pmd++) - set_pmd(pmd, __pmd(0)); - break; - } - pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr; - pe &= __supported_pte_mask; - set_pmd(pmd, __pmd(pe)); - } - unmap_low_page(map); - } - __flush_tlb(); -} - -static void __init find_early_table_space(unsigned long end) -{ - unsigned long puds, pmds, tables; - - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + - round_up(pmds * sizeof(pmd_t), PAGE_SIZE); - - table_start = find_e820_area(0x8000, __pa_symbol(&_text), tables); - if (table_start == -1UL) - panic("Cannot find space for the kernel page tables"); - - table_start >>= PAGE_SHIFT; - table_end = table_start; -} - -/* Setup the direct mapping of the physical memory at PAGE_OFFSET. - This runs before bootmem is initialized and gets pages directly from the - physical memory. To access them they are temporarily mapped. */ -void __init init_memory_mapping(unsigned long start, unsigned long end) -{ - unsigned long next; - - Dprintk("init_memory_mapping\n"); - - /* - * Find space for the kernel direct mapping tables. - * Later we should allocate these tables in the local node of the memory - * mapped. Unfortunately this is done currently before the nodes are - * discovered. - */ - find_early_table_space(end); - - start = (unsigned long)__va(start); - end = (unsigned long)__va(end); - - for (; start < end; start = next) { - int map; - unsigned long pud_phys; - pud_t *pud = alloc_low_page(&map, &pud_phys); - next = start + PGDIR_SIZE; - if (next > end) - next = end; - phys_pud_init(pud, __pa(start), __pa(next)); - set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); - unmap_low_page(map); - } - - asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); - __flush_tlb_all(); - early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, - table_start<<PAGE_SHIFT, - table_end<<PAGE_SHIFT); -} - -extern struct x8664_pda cpu_pda[NR_CPUS]; - -/* Assumes all CPUs still execute in init_mm */ -void zap_low_mappings(void) -{ - pgd_t *pgd = pgd_offset_k(0UL); - pgd_clear(pgd); - flush_tlb_all(); -} - -#ifndef CONFIG_NUMA -void __init paging_init(void) -{ - { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; - unsigned int max_dma; - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - - if (end_pfn < max_dma) - zones_size[ZONE_DMA] = end_pfn; - else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = end_pfn - max_dma; - } - free_area_init(zones_size); - } - return; -} -#endif - -/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches - from the CPU leading to inconsistent cache lines. address and size - must be aligned to 2MB boundaries. - Does nothing when the mapping doesn't exist. */ -void __init clear_kernel_mapping(unsigned long address, unsigned long size) -{ - unsigned long end = address + size; - - BUG_ON(address & ~LARGE_PAGE_MASK); - BUG_ON(size & ~LARGE_PAGE_MASK); - - for (; address < end; address += LARGE_PAGE_SIZE) { - pgd_t *pgd = pgd_offset_k(address); - pud_t *pud; - pmd_t *pmd; - if (pgd_none(*pgd)) - continue; - pud = pud_offset(pgd, address); - if (pud_none(*pud)) - continue; - pmd = pmd_offset(pud, address); - if (!pmd || pmd_none(*pmd)) - continue; - if (0 == (pmd_val(*pmd) & _PAGE_PSE)) { - /* Could handle this, but it should not happen currently. */ - printk(KERN_ERR - "clear_kernel_mapping: mapping has been split. will leak memory\n"); - pmd_ERROR(*pmd); - } - set_pmd(pmd, __pmd(0)); - } - __flush_tlb_all(); -} - -static inline int page_is_ram (unsigned long pagenr) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long addr, end; - - if (e820.map[i].type != E820_RAM) /* not usable memory */ - continue; - /* - * !!!FIXME!!! Some BIOSen report areas as RAM that - * are not. Notably the 640->1Mb area. We need a sanity - * check here. - */ - addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; - end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; - if ((pagenr >= addr) && (pagenr < end)) - return 1; - } - return 0; -} - -extern int swiotlb_force; - -static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, - kcore_vsyscall; - -void __init mem_init(void) -{ - int codesize, reservedpages, datasize, initsize; - int tmp; - -#ifdef CONFIG_SWIOTLB - if (swiotlb_force) - swiotlb = 1; - if (!iommu_aperture && - (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu)) - swiotlb = 1; - if (swiotlb) - swiotlb_init(); -#endif - - /* How many end-of-memory variables you have, grandma! */ - max_low_pfn = end_pfn; - max_pfn = end_pfn; - num_physpages = end_pfn; - high_memory = (void *) __va(end_pfn * PAGE_SIZE); - - /* clear the zero-page */ - memset(empty_zero_page, 0, PAGE_SIZE); - - reservedpages = 0; - - /* this will put all low memory onto the freelists */ -#ifdef CONFIG_NUMA - totalram_pages += numa_free_all_bootmem(); - tmp = 0; - /* should count reserved pages here for all nodes */ -#else - -#ifdef CONFIG_FLATMEM - max_mapnr = end_pfn; - if (!mem_map) BUG(); -#endif - - totalram_pages += free_all_bootmem(); - - for (tmp = 0; tmp < end_pfn; tmp++) - /* - * Only count reserved RAM pages - */ - if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) - reservedpages++; -#endif - - after_bootmem = 1; - - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - /* Register memory areas for /proc/kcore */ - kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); - kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, - VMALLOC_END-VMALLOC_START); - kclist_add(&kcore_kernel, &_stext, _end - _stext); - kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN); - kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, - VSYSCALL_END - VSYSCALL_START); - - printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - end_pfn << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10); - - /* - * Subtle. SMP is doing its boot stuff late (because it has to - * fork idle threads) - but it also needs low mappings for the - * protected-mode entry to work. We zap these entries only after - * the WP-bit has been tested. - */ -#ifndef CONFIG_SMP - zap_low_mappings(); -#endif -} - -extern char __initdata_begin[], __initdata_end[]; - -void free_initmem(void) -{ - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - set_page_count(virt_to_page(addr), 1); - memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); - free_page(addr); - totalram_pages++; - } - memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); - printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - if (start < (unsigned long)&_end) - return; - printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - set_page_count(virt_to_page(start), 1); - free_page(start); - totalram_pages++; - } -} -#endif - -void __init reserve_bootmem_generic(unsigned long phys, unsigned len) -{ - /* Should check here against the e820 map to avoid double free */ -#ifdef CONFIG_NUMA - int nid = phys_to_nid(phys); - reserve_bootmem_node(NODE_DATA(nid), phys, len); -#else - reserve_bootmem(phys, len); -#endif -} - -int kern_addr_valid(unsigned long addr) -{ - unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - if (above != 0 && above != -1UL) - return 0; - - pgd = pgd_offset_k(addr); - if (pgd_none(*pgd)) - return 0; - - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) - return 0; - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) - return 0; - if (pmd_large(*pmd)) - return pfn_valid(pmd_pfn(*pmd)); - - pte = pte_offset_kernel(pmd, addr); - if (pte_none(*pte)) - return 0; - return pfn_valid(pte_pfn(*pte)); -} - -#ifdef CONFIG_SYSCTL -#include <linux/sysctl.h> - -extern int exception_trace, page_fault_trace; - -static ctl_table debug_table2[] = { - { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, - proc_dointvec }, -#ifdef CONFIG_CHECKING - { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL, - proc_dointvec }, -#endif - { 0, } -}; - -static ctl_table debug_root_table2[] = { - { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555, - .child = debug_table2 }, - { 0 }, -}; - -static __init int x8664_sysctl_init(void) -{ - register_sysctl_table(debug_root_table2, 1); - return 0; -} -__initcall(x8664_sysctl_init); -#endif - -/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only - covers the 64bit vsyscall page now. 32bit has a real VMA now and does - not need special handling anymore. */ - -static struct vm_area_struct gate_vma = { - .vm_start = VSYSCALL_START, - .vm_end = VSYSCALL_END, - .vm_page_prot = PAGE_READONLY -}; - -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(tsk, TIF_IA32)) - return NULL; -#endif - return &gate_vma; -} - -int in_gate_area(struct task_struct *task, unsigned long addr) -{ - struct vm_area_struct *vma = get_gate_vma(task); - if (!vma) - return 0; - return (addr >= vma->vm_start) && (addr < vma->vm_end); -} - -/* Use this when you have no reliable task/vma, typically from interrupt - * context. It is less reliable than using the task's vma and may give - * false positives. - */ -int in_gate_area_no_task(unsigned long addr) -{ - return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); -} diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c deleted file mode 100644 index 6972df480d2..00000000000 --- a/arch/x86_64/mm/ioremap.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * arch/x86_64/mm/ioremap.c - * - * Re-map IO memory to kernel address space so that we can access it. - * This is needed for high PCI addresses that aren't mapped in the - * 640k-1MB IO memory area on PC's - * - * (C) Copyright 1995 1996 Linus Torvalds - */ - -#include <linux/vmalloc.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <asm/io.h> -#include <asm/pgalloc.h> -#include <asm/fixmap.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> -#include <asm/proto.h> - -#define ISA_START_ADDRESS 0xa0000 -#define ISA_END_ADDRESS 0x100000 - -static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) -{ - unsigned long end; - unsigned long pfn; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - if (address >= end) - BUG(); - pfn = phys_addr >> PAGE_SHIFT; - do { - if (!pte_none(*pte)) { - printk("remap_area_pte: page already exists\n"); - BUG(); - } - set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | - _PAGE_GLOBAL | _PAGE_DIRTY | _PAGE_ACCESSED | flags))); - address += PAGE_SIZE; - pfn++; - pte++; - } while (address && (address < end)); -} - -static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) -{ - unsigned long end; - - address &= ~PUD_MASK; - end = address + size; - if (end > PUD_SIZE) - end = PUD_SIZE; - phys_addr -= address; - if (address >= end) - BUG(); - do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); - if (!pte) - return -ENOMEM; - remap_area_pte(pte, address, end - address, address + phys_addr, flags); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -static inline int remap_area_pud(pud_t * pud, unsigned long address, unsigned long size, - unsigned long phys_addr, unsigned long flags) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - phys_addr -= address; - if (address >= end) - BUG(); - do { - pmd_t * pmd = pmd_alloc(&init_mm, pud, address); - if (!pmd) - return -ENOMEM; - remap_area_pmd(pmd, address, end - address, address + phys_addr, flags); - address = (address + PUD_SIZE) & PUD_MASK; - pud++; - } while (address && (address < end)); - return 0; -} - -static int remap_area_pages(unsigned long address, unsigned long phys_addr, - unsigned long size, unsigned long flags) -{ - int error; - pgd_t *pgd; - unsigned long end = address + size; - - phys_addr -= address; - pgd = pgd_offset_k(address); - flush_cache_all(); - if (address >= end) - BUG(); - spin_lock(&init_mm.page_table_lock); - do { - pud_t *pud; - pud = pud_alloc(&init_mm, pgd, address); - error = -ENOMEM; - if (!pud) - break; - if (remap_area_pud(pud, address, end - address, - phys_addr + address, flags)) - break; - error = 0; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - pgd++; - } while (address && (address < end)); - spin_unlock(&init_mm.page_table_lock); - flush_tlb_all(); - return error; -} - -/* - * Fix up the linear direct mapping of the kernel to avoid cache attribute - * conflicts. - */ -static int -ioremap_change_attr(unsigned long phys_addr, unsigned long size, - unsigned long flags) -{ - int err = 0; - if (phys_addr + size - 1 < (end_pfn_map << PAGE_SHIFT)) { - unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long vaddr = (unsigned long) __va(phys_addr); - - /* - * Must use a address here and not struct page because the phys addr - * can be a in hole between nodes and not have an memmap entry. - */ - err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags)); - if (!err) - global_flush_tlb(); - } - return err; -} - -/* - * Generic mapping function - */ - -/* - * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. - * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. - */ -void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) -{ - void * addr; - struct vm_struct * area; - unsigned long offset, last_addr; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - - /* - * Don't remap the low PCI/ISA area, it's always mapped.. - */ - if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) - return (__force void __iomem *)phys_to_virt(phys_addr); - -#ifdef CONFIG_FLATMEM - /* - * Don't allow anybody to remap normal RAM that we're using.. - */ - if (last_addr < virt_to_phys(high_memory)) { - char *t_addr, *t_end; - struct page *page; - - t_addr = __va(phys_addr); - t_end = t_addr + (size - 1); - - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) - if(!PageReserved(page)) - return NULL; - } -#endif - - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr+1) - phys_addr; - - /* - * Ok, go for it.. - */ - area = get_vm_area(size, VM_IOREMAP | (flags << 20)); - if (!area) - return NULL; - area->phys_addr = phys_addr; - addr = area->addr; - if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { - remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); - return NULL; - } - if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) { - area->flags &= 0xffffff; - vunmap(addr); - return NULL; - } - return (__force void __iomem *) (offset + (char *)addr); -} - -/** - * ioremap_nocache - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - * - * ioremap_nocache performs a platform specific sequence of operations to - * make bus memory CPU accessible via the readb/readw/readl/writeb/ - * writew/writel functions and the other mmio helpers. The returned - * address is not guaranteed to be usable directly as a virtual - * address. - * - * This version of ioremap ensures that the memory is marked uncachable - * on the CPU as well as honouring existing caching rules from things like - * the PCI bus. Note that there are other caches and buffers on many - * busses. In particular driver authors should read up on PCI writes - * - * It's useful if some control registers are in such an area and - * write combining or read caching is not desirable: - * - * Must be freed with iounmap. - */ - -void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) -{ - return __ioremap(phys_addr, size, _PAGE_PCD); -} - -void iounmap(volatile void __iomem *addr) -{ - struct vm_struct *p; - - if (addr <= high_memory) - return; - if (addr >= phys_to_virt(ISA_START_ADDRESS) && - addr < phys_to_virt(ISA_END_ADDRESS)) - return; - - write_lock(&vmlist_lock); - p = __remove_vm_area((void *)((unsigned long)addr & PAGE_MASK)); - if (!p) - printk("iounmap: bad address %p\n", addr); - else if (p->flags >> 20) - ioremap_change_attr(p->phys_addr, p->size, 0); - write_unlock(&vmlist_lock); - kfree(p); -} diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c deleted file mode 100644 index ec35747aacd..00000000000 --- a/arch/x86_64/mm/k8topology.c +++ /dev/null @@ -1,168 +0,0 @@ -/* - * AMD K8 NUMA support. - * Discover the memory map and associated nodes. - * - * This version reads it directly from the K8 northbridge. - * - * Copyright 2002,2003 Andi Kleen, SuSE Labs. - */ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/module.h> -#include <linux/nodemask.h> -#include <asm/io.h> -#include <linux/pci_ids.h> -#include <asm/types.h> -#include <asm/mmzone.h> -#include <asm/proto.h> -#include <asm/e820.h> -#include <asm/pci-direct.h> -#include <asm/numa.h> - -static __init int find_northbridge(void) -{ - int num; - - for (num = 0; num < 32; num++) { - u32 header; - - header = read_pci_config(0, num, 0, 0x00); - if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16))) - continue; - - header = read_pci_config(0, num, 1, 0x00); - if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16))) - continue; - return num; - } - - return -1; -} - -int __init k8_scan_nodes(unsigned long start, unsigned long end) -{ - unsigned long prevbase; - struct node nodes[8]; - int nodeid, i, nb; - int found = 0; - u32 reg; - unsigned numnodes; - nodemask_t nodes_parsed; - - nodes_clear(nodes_parsed); - - nb = find_northbridge(); - if (nb < 0) - return nb; - - printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); - - reg = read_pci_config(0, nb, 0, 0x60); - numnodes = ((reg >> 4) & 0xF) + 1; - - printk(KERN_INFO "Number of nodes %d\n", numnodes); - - memset(&nodes,0,sizeof(nodes)); - prevbase = 0; - for (i = 0; i < 8; i++) { - unsigned long base,limit; - - base = read_pci_config(0, nb, 1, 0x40 + i*8); - limit = read_pci_config(0, nb, 1, 0x44 + i*8); - - nodeid = limit & 7; - if ((base & 3) == 0) { - if (i < numnodes) - printk("Skipping disabled node %d\n", i); - continue; - } - if (nodeid >= numnodes) { - printk("Ignoring excess node %d (%lx:%lx)\n", nodeid, - base, limit); - continue; - } - - if (!limit) { - printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i, - base); - continue; - } - if ((base >> 8) & 3 || (limit >> 8) & 3) { - printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", - nodeid, (base>>8)&3, (limit>>8) & 3); - return -1; - } - if (node_isset(nodeid, nodes_parsed)) { - printk(KERN_INFO "Node %d already present. Skipping\n", - nodeid); - continue; - } - - limit >>= 16; - limit <<= 24; - limit |= (1<<24)-1; - - if (limit > end_pfn << PAGE_SHIFT) - limit = end_pfn << PAGE_SHIFT; - if (limit <= base) - continue; - - base >>= 16; - base <<= 24; - - if (base < start) - base = start; - if (limit > end) - limit = end; - if (limit == base) { - printk(KERN_ERR "Empty node %d\n", nodeid); - continue; - } - if (limit < base) { - printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n", - nodeid, base, limit); - continue; - } - - /* Could sort here, but pun for now. Should not happen anyroads. */ - if (prevbase > base) { - printk(KERN_ERR "Node map not sorted %lx,%lx\n", - prevbase,base); - return -1; - } - - printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n", - nodeid, base, limit); - - found++; - - nodes[nodeid].start = base; - nodes[nodeid].end = limit; - - prevbase = base; - - node_set(nodeid, nodes_parsed); - } - - if (!found) - return -1; - - memnode_shift = compute_hash_shift(nodes, numnodes); - if (memnode_shift < 0) { - printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); - return -1; - } - printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); - - for (i = 0; i < 8; i++) { - if (nodes[i].start != nodes[i].end) { - /* assume 1:1 NODE:CPU */ - cpu_to_node[i] = i; - setup_node_bootmem(i, nodes[i].start, nodes[i].end); - } - } - - numa_init_array(); - return 0; -} diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c deleted file mode 100644 index 70cb2904a90..00000000000 --- a/arch/x86_64/mm/numa.c +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Generic VM initialization for x86-64 NUMA setups. - * Copyright 2002,2003 Andi Kleen, SuSE Labs. - */ -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/mmzone.h> -#include <linux/ctype.h> -#include <linux/module.h> -#include <linux/nodemask.h> - -#include <asm/e820.h> -#include <asm/proto.h> -#include <asm/dma.h> -#include <asm/numa.h> -#include <asm/acpi.h> - -#ifndef Dprintk -#define Dprintk(x...) -#endif - -struct pglist_data *node_data[MAX_NUMNODES]; -bootmem_data_t plat_node_bdata[MAX_NUMNODES]; - -int memnode_shift; -u8 memnodemap[NODEMAPSIZE]; - -unsigned char cpu_to_node[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; -cpumask_t node_to_cpumask[MAX_NUMNODES]; - -int numa_off __initdata; - -int __init compute_hash_shift(struct node *nodes, int numnodes) -{ - int i; - int shift = 20; - unsigned long addr,maxend=0; - - for (i = 0; i < numnodes; i++) - if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend)) - maxend = nodes[i].end; - - while ((1UL << shift) < (maxend / NODEMAPSIZE)) - shift++; - - printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n", - shift,maxend); - memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); - for (i = 0; i < numnodes; i++) { - if (nodes[i].start == nodes[i].end) - continue; - for (addr = nodes[i].start; - addr < nodes[i].end; - addr += (1UL << shift)) { - if (memnodemap[addr >> shift] != 0xff) { - printk(KERN_INFO - "Your memory is not aligned you need to rebuild your kernel " - "with a bigger NODEMAPSIZE shift=%d adder=%lu\n", - shift,addr); - return -1; - } - memnodemap[addr >> shift] = i; - } - } - return shift; -} - -#ifdef CONFIG_SPARSEMEM -int early_pfn_to_nid(unsigned long pfn) -{ - return phys_to_nid(pfn << PAGE_SHIFT); -} -#endif - -/* Initialize bootmem allocator for a node */ -void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) -{ - unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start; - unsigned long nodedata_phys; - const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); - - start = round_up(start, ZONE_ALIGN); - - printk("Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end); - - start_pfn = start >> PAGE_SHIFT; - end_pfn = end >> PAGE_SHIFT; - - memory_present(nodeid, start_pfn, end_pfn); - nodedata_phys = find_e820_area(start, end, pgdat_size); - if (nodedata_phys == -1L) - panic("Cannot find memory pgdat in node %d\n", nodeid); - - Dprintk("nodedata_phys %lx\n", nodedata_phys); - - node_data[nodeid] = phys_to_virt(nodedata_phys); - memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); - NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; - NODE_DATA(nodeid)->node_start_pfn = start_pfn; - NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; - - /* Find a place for the bootmem map */ - bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); - bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT); - if (bootmap_start == -1L) - panic("Not enough continuous space for bootmap on node %d", nodeid); - Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); - - bootmap_size = init_bootmem_node(NODE_DATA(nodeid), - bootmap_start >> PAGE_SHIFT, - start_pfn, end_pfn); - - e820_bootmem_free(NODE_DATA(nodeid), start, end); - - reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); - reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT); - node_set_online(nodeid); -} - -/* Initialize final allocator for a zone */ -void __init setup_node_zones(int nodeid) -{ - unsigned long start_pfn, end_pfn; - unsigned long zones[MAX_NR_ZONES]; - unsigned long dma_end_pfn; - - memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); - - start_pfn = node_start_pfn(nodeid); - end_pfn = node_end_pfn(nodeid); - - Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); - - /* All nodes > 0 have a zero length zone DMA */ - dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; - if (start_pfn < dma_end_pfn) { - zones[ZONE_DMA] = dma_end_pfn - start_pfn; - zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; - } else { - zones[ZONE_NORMAL] = end_pfn - start_pfn; - } - - free_area_init_node(nodeid, NODE_DATA(nodeid), zones, - start_pfn, NULL); -} - -void __init numa_init_array(void) -{ - int rr, i; - /* There are unfortunately some poorly designed mainboards around - that only connect memory to a single CPU. This breaks the 1:1 cpu->node - mapping. To avoid this fill in the mapping for all possible - CPUs, as the number of CPUs is not known yet. - We round robin the existing nodes. */ - rr = 0; - for (i = 0; i < NR_CPUS; i++) { - if (cpu_to_node[i] != NUMA_NO_NODE) - continue; - rr = next_node(rr, node_online_map); - if (rr == MAX_NUMNODES) - rr = first_node(node_online_map); - cpu_to_node[i] = rr; - rr++; - } - - set_bit(0, &node_to_cpumask[cpu_to_node(0)]); -} - -#ifdef CONFIG_NUMA_EMU -int numa_fake __initdata = 0; - -/* Numa emulation */ -static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) -{ - int i; - struct node nodes[MAX_NUMNODES]; - unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; - - /* Kludge needed for the hash function */ - if (hweight64(sz) > 1) { - unsigned long x = 1; - while ((x << 1) < sz) - x <<= 1; - if (x < sz/2) - printk("Numa emulation unbalanced. Complain to maintainer\n"); - sz = x; - } - - memset(&nodes,0,sizeof(nodes)); - for (i = 0; i < numa_fake; i++) { - nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz; - if (i == numa_fake-1) - sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; - nodes[i].end = nodes[i].start + sz; - if (i != numa_fake-1) - nodes[i].end--; - printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", - i, - nodes[i].start, nodes[i].end, - (nodes[i].end - nodes[i].start) >> 20); - node_set_online(i); - } - memnode_shift = compute_hash_shift(nodes, numa_fake); - if (memnode_shift < 0) { - memnode_shift = 0; - printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n"); - return -1; - } - for_each_online_node(i) - setup_node_bootmem(i, nodes[i].start, nodes[i].end); - numa_init_array(); - return 0; -} -#endif - -void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) -{ - int i; - -#ifdef CONFIG_NUMA_EMU - if (numa_fake && !numa_emulation(start_pfn, end_pfn)) - return; -#endif - -#ifdef CONFIG_ACPI_NUMA - if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, - end_pfn << PAGE_SHIFT)) - return; -#endif - -#ifdef CONFIG_K8_NUMA - if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT)) - return; -#endif - printk(KERN_INFO "%s\n", - numa_off ? "NUMA turned off" : "No NUMA configuration found"); - - printk(KERN_INFO "Faking a node at %016lx-%016lx\n", - start_pfn << PAGE_SHIFT, - end_pfn << PAGE_SHIFT); - /* setup dummy node covering all memory */ - memnode_shift = 63; - memnodemap[0] = 0; - nodes_clear(node_online_map); - node_set_online(0); - for (i = 0; i < NR_CPUS; i++) - cpu_to_node[i] = 0; - node_to_cpumask[0] = cpumask_of_cpu(0); - setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); -} - -__cpuinit void numa_add_cpu(int cpu) -{ - /* BP is initialized elsewhere */ - if (cpu) - set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); -} - -unsigned long __init numa_free_all_bootmem(void) -{ - int i; - unsigned long pages = 0; - for_each_online_node(i) { - pages += free_all_bootmem_node(NODE_DATA(i)); - } - return pages; -} - -void __init paging_init(void) -{ - int i; - for_each_online_node(i) { - setup_node_zones(i); - } -} - -/* [numa=off] */ -__init int numa_setup(char *opt) -{ - if (!strncmp(opt,"off",3)) - numa_off = 1; -#ifdef CONFIG_NUMA_EMU - if(!strncmp(opt, "fake=", 5)) { - numa_fake = simple_strtoul(opt+5,NULL,0); ; - if (numa_fake >= MAX_NUMNODES) - numa_fake = MAX_NUMNODES; - } -#endif -#ifdef CONFIG_ACPI_NUMA - if (!strncmp(opt,"noacpi",6)) - acpi_numa = -1; -#endif - return 1; -} - -EXPORT_SYMBOL(cpu_to_node); -EXPORT_SYMBOL(node_to_cpumask); -EXPORT_SYMBOL(memnode_shift); -EXPORT_SYMBOL(memnodemap); -EXPORT_SYMBOL(node_data); diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c deleted file mode 100644 index 94862e1ec03..00000000000 --- a/arch/x86_64/mm/pageattr.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright 2002 Andi Kleen, SuSE Labs. - * Thanks to Ben LaHaise for precious feedback. - */ - -#include <linux/config.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/highmem.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <asm/uaccess.h> -#include <asm/processor.h> -#include <asm/tlbflush.h> -#include <asm/io.h> - -static inline pte_t *lookup_address(unsigned long address) -{ - pgd_t *pgd = pgd_offset_k(address); - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - if (pgd_none(*pgd)) - return NULL; - pud = pud_offset(pgd, address); - if (!pud_present(*pud)) - return NULL; - pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) - return NULL; - if (pmd_large(*pmd)) - return (pte_t *)pmd; - pte = pte_offset_kernel(pmd, address); - if (pte && !pte_present(*pte)) - pte = NULL; - return pte; -} - -static struct page *split_large_page(unsigned long address, pgprot_t prot, - pgprot_t ref_prot) -{ - int i; - unsigned long addr; - struct page *base = alloc_pages(GFP_KERNEL, 0); - pte_t *pbase; - if (!base) - return NULL; - address = __pa(address); - addr = address & LARGE_PAGE_MASK; - pbase = (pte_t *)page_address(base); - for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { - pbase[i] = pfn_pte(addr >> PAGE_SHIFT, - addr == address ? prot : ref_prot); - } - return base; -} - - -static void flush_kernel_map(void *address) -{ - if (0 && address && cpu_has_clflush) { - /* is this worth it? */ - int i; - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) - asm volatile("clflush (%0)" :: "r" (address + i)); - } else - asm volatile("wbinvd":::"memory"); - if (address) - __flush_tlb_one(address); - else - __flush_tlb_all(); -} - - -static inline void flush_map(unsigned long address) -{ - on_each_cpu(flush_kernel_map, (void *)address, 1, 1); -} - -struct deferred_page { - struct deferred_page *next; - struct page *fpage; - unsigned long address; -}; -static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ - -static inline void save_page(unsigned long address, struct page *fpage) -{ - struct deferred_page *df; - df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); - if (!df) { - flush_map(address); - __free_page(fpage); - } else { - df->next = df_list; - df->fpage = fpage; - df->address = address; - df_list = df; - } -} - -/* - * No more special protections in this 2/4MB area - revert to a - * large page again. - */ -static void revert_page(unsigned long address, pgprot_t ref_prot) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t large_pte; - - pgd = pgd_offset_k(address); - BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd,address); - BUG_ON(pud_none(*pud)); - pmd = pmd_offset(pud, address); - BUG_ON(pmd_val(*pmd) & _PAGE_PSE); - pgprot_val(ref_prot) |= _PAGE_PSE; - large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); - set_pte((pte_t *)pmd, large_pte); -} - -static int -__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, - pgprot_t ref_prot) -{ - pte_t *kpte; - struct page *kpte_page; - unsigned kpte_flags; - kpte = lookup_address(address); - if (!kpte) return 0; - kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); - kpte_flags = pte_val(*kpte); - if (pgprot_val(prot) != pgprot_val(ref_prot)) { - if ((kpte_flags & _PAGE_PSE) == 0) { - set_pte(kpte, pfn_pte(pfn, prot)); - } else { - /* - * split_large_page will take the reference for this change_page_attr - * on the split page. - */ - struct page *split = split_large_page(address, prot, ref_prot); - if (!split) - return -ENOMEM; - set_pte(kpte,mk_pte(split, ref_prot)); - kpte_page = split; - } - get_page(kpte_page); - } else if ((kpte_flags & _PAGE_PSE) == 0) { - set_pte(kpte, pfn_pte(pfn, ref_prot)); - __put_page(kpte_page); - } else - BUG(); - - /* on x86-64 the direct mapping set at boot is not using 4k pages */ - BUG_ON(PageReserved(kpte_page)); - - switch (page_count(kpte_page)) { - case 1: - save_page(address, kpte_page); - revert_page(address, ref_prot); - break; - case 0: - BUG(); /* memleak and failed 2M page regeneration */ - } - return 0; -} - -/* - * Change the page attributes of an page in the linear mapping. - * - * This should be used when a page is mapped with a different caching policy - * than write-back somewhere - some CPUs do not like it when mappings with - * different caching policies exist. This changes the page attributes of the - * in kernel linear mapping too. - * - * The caller needs to ensure that there are no conflicting mappings elsewhere. - * This function only deals with the kernel linear map. - * - * Caller must call global_flush_tlb() after this. - */ -int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) -{ - int err = 0; - int i; - - down_write(&init_mm.mmap_sem); - for (i = 0; i < numpages; i++, address += PAGE_SIZE) { - unsigned long pfn = __pa(address) >> PAGE_SHIFT; - - err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); - if (err) - break; - /* Handle kernel mapping too which aliases part of the - * lowmem */ - if (__pa(address) < KERNEL_TEXT_SIZE) { - unsigned long addr2; - pgprot_t prot2 = prot; - addr2 = __START_KERNEL_map + __pa(address); - pgprot_val(prot2) &= ~_PAGE_NX; - err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC); - } - } - up_write(&init_mm.mmap_sem); - return err; -} - -/* Don't call this for MMIO areas that may not have a mem_map entry */ -int change_page_attr(struct page *page, int numpages, pgprot_t prot) -{ - unsigned long addr = (unsigned long)page_address(page); - return change_page_attr_addr(addr, numpages, prot); -} - -void global_flush_tlb(void) -{ - struct deferred_page *df, *next_df; - - down_read(&init_mm.mmap_sem); - df = xchg(&df_list, NULL); - up_read(&init_mm.mmap_sem); - if (!df) - return; - flush_map((df && !df->next) ? df->address : 0); - for (; df; df = next_df) { - next_df = df->next; - if (df->fpage) - __free_page(df->fpage); - kfree(df); - } -} - -EXPORT_SYMBOL(change_page_attr); -EXPORT_SYMBOL(global_flush_tlb); diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c deleted file mode 100644 index 8e3d097a9dd..00000000000 --- a/arch/x86_64/mm/srat.c +++ /dev/null @@ -1,225 +0,0 @@ -/* - * ACPI 3.0 based NUMA setup - * Copyright 2004 Andi Kleen, SuSE Labs. - * - * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. - * - * Called from acpi_numa_init while reading the SRAT and SLIT tables. - * Assumes all memory regions belonging to a single proximity domain - * are in one chunk. Holes between them will be included in the node. - */ - -#include <linux/kernel.h> -#include <linux/acpi.h> -#include <linux/mmzone.h> -#include <linux/bitmap.h> -#include <linux/module.h> -#include <linux/topology.h> -#include <asm/proto.h> -#include <asm/numa.h> - -static struct acpi_table_slit *acpi_slit; - -/* Internal processor count */ -static unsigned int __initdata num_processors = 0; - -static nodemask_t nodes_parsed __initdata; -static nodemask_t nodes_found __initdata; -static struct node nodes[MAX_NUMNODES] __initdata; -static __u8 pxm2node[256] = { [0 ... 255] = 0xff }; - -static __init int setup_node(int pxm) -{ - unsigned node = pxm2node[pxm]; - if (node == 0xff) { - if (nodes_weight(nodes_found) >= MAX_NUMNODES) - return -1; - node = first_unset_node(nodes_found); - node_set(node, nodes_found); - pxm2node[pxm] = node; - } - return pxm2node[pxm]; -} - -static __init int conflicting_nodes(unsigned long start, unsigned long end) -{ - int i; - for_each_online_node(i) { - struct node *nd = &nodes[i]; - if (nd->start == nd->end) - continue; - if (nd->end > start && nd->start < end) - return 1; - if (nd->end == end && nd->start == start) - return 1; - } - return -1; -} - -static __init void cutoff_node(int i, unsigned long start, unsigned long end) -{ - struct node *nd = &nodes[i]; - if (nd->start < start) { - nd->start = start; - if (nd->end < nd->start) - nd->start = nd->end; - } - if (nd->end > end) { - if (!(end & 0xfff)) - end--; - nd->end = end; - if (nd->start > nd->end) - nd->start = nd->end; - } -} - -static __init void bad_srat(void) -{ - printk(KERN_ERR "SRAT: SRAT not used.\n"); - acpi_numa = -1; -} - -static __init inline int srat_disabled(void) -{ - return numa_off || acpi_numa < 0; -} - -/* Callback for SLIT parsing */ -void __init acpi_numa_slit_init(struct acpi_table_slit *slit) -{ - acpi_slit = slit; -} - -/* Callback for Proximity Domain -> LAPIC mapping */ -void __init -acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) -{ - int pxm, node; - if (srat_disabled() || pa->flags.enabled == 0) - return; - pxm = pa->proximity_domain; - node = setup_node(pxm); - if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); - bad_srat(); - return; - } - if (num_processors >= NR_CPUS) { - printk(KERN_ERR "SRAT: Processor #%d (lapic %u) INVALID. (Max ID: %d).\n", - num_processors, pa->apic_id, NR_CPUS); - bad_srat(); - return; - } - cpu_to_node[num_processors] = node; - acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> CPU %u -> Node %u\n", - pxm, pa->apic_id, num_processors, node); - - num_processors++; -} - -/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ -void __init -acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) -{ - struct node *nd; - unsigned long start, end; - int node, pxm; - int i; - - if (srat_disabled() || ma->flags.enabled == 0) - return; - pxm = ma->proximity_domain; - node = setup_node(pxm); - if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains.\n"); - bad_srat(); - return; - } - start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32); - end = start + (ma->length_lo | ((u64)ma->length_hi << 32)); - /* It is fine to add this area to the nodes data it will be used later*/ - if (ma->flags.hot_pluggable == 1) - printk(KERN_INFO "SRAT: hot plug zone found %lx - %lx \n", - start, end); - i = conflicting_nodes(start, end); - if (i >= 0) { - printk(KERN_ERR - "SRAT: pxm %d overlap %lx-%lx with node %d(%Lx-%Lx)\n", - pxm, start, end, i, nodes[i].start, nodes[i].end); - bad_srat(); - return; - } - nd = &nodes[node]; - if (!node_test_and_set(node, nodes_parsed)) { - nd->start = start; - nd->end = end; - } else { - if (start < nd->start) - nd->start = start; - if (nd->end < end) - nd->end = end; - } - if (!(nd->end & 0xfff)) - nd->end--; - printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, - nd->start, nd->end); -} - -void __init acpi_numa_arch_fixup(void) {} - -/* Use the information discovered above to actually set up the nodes. */ -int __init acpi_scan_nodes(unsigned long start, unsigned long end) -{ - int i; - if (acpi_numa <= 0) - return -1; - memnode_shift = compute_hash_shift(nodes, nodes_weight(nodes_parsed)); - if (memnode_shift < 0) { - printk(KERN_ERR - "SRAT: No NUMA node hash function found. Contact maintainer\n"); - bad_srat(); - return -1; - } - for (i = 0; i < MAX_NUMNODES; i++) { - if (!node_isset(i, nodes_parsed)) - continue; - cutoff_node(i, start, end); - if (nodes[i].start == nodes[i].end) { - node_clear(i, nodes_parsed); - continue; - } - setup_node_bootmem(i, nodes[i].start, nodes[i].end); - } - for (i = 0; i < NR_CPUS; i++) { - if (cpu_to_node[i] == NUMA_NO_NODE) - continue; - if (!node_isset(cpu_to_node[i], nodes_parsed)) - cpu_to_node[i] = NUMA_NO_NODE; - } - numa_init_array(); - return 0; -} - -int node_to_pxm(int n) -{ - int i; - if (pxm2node[n] == n) - return n; - for (i = 0; i < 256; i++) - if (pxm2node[i] == n) - return i; - return 0; -} - -int __node_distance(int a, int b) -{ - int index; - - if (!acpi_slit) - return a == b ? 10 : 20; - index = acpi_slit->localities * node_to_pxm(a); - return acpi_slit->entry[index + node_to_pxm(b)]; -} - -EXPORT_SYMBOL(__node_distance); |
