From 38cb47ba0187c481aa949d3bbf149e014e8cacda Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:47:54 +0100 Subject: x86: relax RAM check in ioremap() Kevin Winchester reported the loss of direct rendering, due to: [ 0.588184] agpgart: Detected AGP bridge 0 [ 0.588184] agpgart: unable to get memory for graphics translation table. [ 0.588184] agpgart: agp_backend_initialize() failed. [ 0.588207] agpgart-amd64: probe of 0000:00:00.0 failed with error -12 and bisected it down to: commit 266b9f8727976769e2ed2dad77ac9295f37e321e Author: Thomas Gleixner Date: Wed Jan 30 13:34:06 2008 +0100 x86: fix ioremap RAM check this check was too strict and caused an ioremap() failure. the problem is due to the somewhat unclean way of how the GART code reserves a memory range for its aperture, and how it utilizes it later on. Allow RAM pages to be ioremap()-ed too, as long as they are reserved. Bisected-by: Kevin Winchester Signed-off-by: Ingo Molnar Tested-by: Kevin Winchester Signed-off-by: Thomas Gleixner --- arch/x86/mm/ioremap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index c004d94608f..1a88d1572a7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -116,7 +116,7 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, { void __iomem *addr; struct vm_struct *area; - unsigned long offset, last_addr; + unsigned long pfn, offset, last_addr; pgprot_t prot; /* Don't allow wraparound or zero size */ @@ -133,9 +133,10 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, /* * Don't allow anybody to remap normal RAM that we're using.. */ - for (offset = phys_addr >> PAGE_SHIFT; offset < max_pfn_mapped && - (offset << PAGE_SHIFT) < last_addr; offset++) { - if (page_is_ram(offset)) + for (pfn = phys_addr >> PAGE_SHIFT; pfn < max_pfn_mapped && + (pfn << PAGE_SHIFT) < last_addr; pfn++) { + if (page_is_ram(pfn) && pfn_valid(pfn) && + !PageReserved(pfn_to_page(pfn))) return NULL; } -- cgit v1.2.3-18-g5258 From 262d5ee27271703a0396d63649430f43f3b5deb3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:47:54 +0100 Subject: x86: VMI fix Jeff Chua bisected down a vmware guest boot breakage (hang) to this paravirt change: commit 8d947344c47a40626730bb80d136d8daac9f2060 Author: Glauber de Oliveira Costa Date: Wed Jan 30 13:31:12 2008 +0100 x86: change write_idt_entry signature fix the off-by-one indexing bug ... Bisected-by: Jeff Chua Tested-by: Jeff Chua Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmi_32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 4525bc2c2e1..12affe1f9bc 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -220,21 +220,21 @@ static void vmi_set_tr(void) static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { u32 *idt_entry = (u32 *)g; - vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[2]); + vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]); } static void vmi_write_gdt_entry(struct desc_struct *dt, int entry, const void *desc, int type) { u32 *gdt_entry = (u32 *)desc; - vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[2]); + vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]); } static void vmi_write_ldt_entry(struct desc_struct *dt, int entry, const void *desc) { u32 *ldt_entry = (u32 *)desc; - vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[2]); + vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]); } static void vmi_load_sp0(struct tss_struct *tss, -- cgit v1.2.3-18-g5258 From 3a900d89db35c133bc0874e71d9156b22db362b4 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 4 Feb 2008 16:47:55 +0100 Subject: x86: restore correct module name for apm The apm module were renamed to apm_32 during the merge of 32 and 64 bit x86 which is unfortunate. As apm is 32 bit specific we like to keep the _32 in the filename but the module should be named apm. Fix this in the Makefile. Reported-by: "A.E.Lawrence" Signed-off-by: Sam Ravnborg Cc: Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: "A.E.Lawrence" Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6f813009d44..f0806358180 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -37,7 +37,8 @@ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_PCI) += early-quirks.o -obj-$(CONFIG_APM) += apm_32.o +apm-y := apm_32.o +obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o obj-$(CONFIG_X86_32_SMP) += smpcommon_32.o obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o -- cgit v1.2.3-18-g5258 From 3bc9a77e84096148d5ada29c986d6e71a20eaeda Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 4 Feb 2008 16:47:55 +0100 Subject: x86: rename module scx200_32 to scx200 The module scx200 were renamed to scx200_32 by the merge of the 32 and 64 bit x86 arch trees. Keep the _32 prefix on the .c file as it is 32 bit specific and fix the module name in the Makefile. Signed-off-by: Sam Ravnborg Cc: "H. Peter Anvin" Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f0806358180..21dc1a061bf 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -75,7 +75,8 @@ ifdef CONFIG_INPUT_PCSPKR obj-y += pcspeaker.o endif -obj-$(CONFIG_SCx200) += scx200_32.o +obj-$(CONFIG_SCx200) += scx200.o +scx200-y += scx200_32.o ### # 64 bit specific files -- cgit v1.2.3-18-g5258 From 4cf31841762954ad2868156ccba94d798a16630f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 4 Feb 2008 16:47:55 +0100 Subject: x86: mach-rdc321x Kconfig fix The mach-rdc321x uses the leds-gpio driver and explicitely selects it, this driver also depends on the leds class module, select it as well. Signed-off-by: Florian Fainelli Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7109037bdf7..77198f49b38 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -309,6 +309,7 @@ config X86_RDC321X select M486 select X86_REBOOTFIXUPS select GENERIC_GPIO + select LEDS_CLASS select LEDS_GPIO help This option is needed for RDC R-321x system-on-chip, also known -- cgit v1.2.3-18-g5258 From b50516fc20f756cf4d18a89f6f9977d60151ccba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:47:55 +0100 Subject: x86: CPA remove bogus NX clear In split_large_page we clear the NX bit for the new split ptes, but we need to preserve the original setting of it for the split ptes. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e297bd65e51..877b5cca2cb 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -225,7 +225,6 @@ static int split_large_page(pte_t *kpte, unsigned long address) paravirt_alloc_pt(&init_mm, page_to_pfn(base)); #endif - pgprot_val(ref_prot) &= ~_PAGE_NX; for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot)); -- cgit v1.2.3-18-g5258 From 6118f76fb7408bad7631345cc41a5f0efc49ce3e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 4 Feb 2008 16:47:56 +0100 Subject: x86: print out node_data addr and bootmap_start addr print out node_data addr and bootmap_start addr. helpful for debugging early crashes on high-end NUMA systems. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/numa_64.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index a920d09b919..5a02bf4c91e 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -202,6 +202,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, if (node_data[nodeid] == NULL) return; nodedata_phys = __pa(node_data[nodeid]); + printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, + nodedata_phys + pgdat_size - 1); memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; @@ -225,12 +227,15 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, return; } bootmap_start = __pa(bootmap); - Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages); bootmap_size = init_bootmem_node(NODE_DATA(nodeid), bootmap_start >> PAGE_SHIFT, start_pfn, end_pfn); + printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", + bootmap_start, bootmap_start + bootmap_size - 1, + bootmap_pages); + free_bootmem_with_active_regions(nodeid, end); reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size); -- cgit v1.2.3-18-g5258 From cf89ec924da5b76cbff293a1b378f312c7161411 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 4 Feb 2008 16:47:56 +0100 Subject: x86: reduce ifdef sections in fault.c Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/fault.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e4440d0abf8..3fff490254a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -508,6 +508,10 @@ static int vmalloc_fault(unsigned long address) pmd_t *pmd, *pmd_ref; pte_t *pte, *pte_ref; + /* Make sure we are in vmalloc area */ + if (!(address >= VMALLOC_START && address < VMALLOC_END)) + return -1; + /* Copy kernel mappings over when needed. This can also happen within a race in page table update. In the later case just flush. */ @@ -603,6 +607,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) */ #ifdef CONFIG_X86_32 if (unlikely(address >= TASK_SIZE)) { +#else + if (unlikely(address >= TASK_SIZE64)) { +#endif if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && vmalloc_fault(address) >= 0) return; @@ -618,6 +625,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) goto bad_area_nosemaphore; } + +#ifdef CONFIG_X86_32 /* It's safe to allow irq's after cr2 has been saved and the vmalloc fault has been handled. */ if (regs->flags & (X86_EFLAGS_IF|VM_MASK)) @@ -630,28 +639,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (in_atomic() || !mm) goto bad_area_nosemaphore; #else /* CONFIG_X86_64 */ - if (unlikely(address >= TASK_SIZE64)) { - /* - * Don't check for the module range here: its PML4 - * is always initialized because it's shared with the main - * kernel text. Only vmalloc may need PML4 syncups. - */ - if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && - ((address >= VMALLOC_START && address < VMALLOC_END))) { - if (vmalloc_fault(address) >= 0) - return; - } - - /* Can handle a stale RO->RW TLB */ - if (spurious_fault(address, error_code)) - return; - - /* - * Don't take the mm semaphore here. If we fixup a prefetch - * fault we could otherwise deadlock. - */ - goto bad_area_nosemaphore; - } if (likely(regs->flags & X86_EFLAGS_IF)) local_irq_enable(); -- cgit v1.2.3-18-g5258 From 1622ac23bd3568c3ae8bb391dd3adb51887d7141 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 4 Feb 2008 16:47:56 +0100 Subject: x86: define OBJCOPYFLAGS explicitly for each target. Do this rather than defining a global version and overriding it in almost all cases in order to make subsequent patches simpler. Signed-off-by: Ian Campbell Acked-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 1 - arch/x86/boot/Makefile | 2 +- arch/x86/boot/compressed/Makefile | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 8978e98bed5..364865b1b08 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -92,7 +92,6 @@ KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) LDFLAGS := -m elf_$(UTS_MACHINE) -OBJCOPYFLAGS := -O binary -R .note -R .comment -S # Speed up the build KBUILD_CFLAGS += -pipe diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 349b81a39c4..254a58398a6 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -80,6 +80,7 @@ $(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \ $(call if_changed,image) @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' +OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) @@ -90,7 +91,6 @@ $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE $(call if_changed,ld) OBJCOPYFLAGS_setup.bin := -O binary - $(obj)/setup.bin: $(obj)/setup.elf FORCE $(call if_changed,objcopy) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index fe24ceabd90..d2b9f3bb87c 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -22,6 +22,7 @@ $(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $ $(call if_changed,ld) @: +OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) -- cgit v1.2.3-18-g5258 From 92909098a3b27147c4b80f9c387ccd63676aa807 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Feb 2008 16:47:56 +0100 Subject: x86: use _ASM_EXTABLE macro in arch/x86/kernel/test_nx.c Use the _ASM_EXTABLE macro from , instead of open-coding __ex_table entires in arch/x86/kernel/test_nx.c. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/test_nx.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index ae0ef2e304c..36c100c323a 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -12,6 +12,7 @@ #include #include #include +#include extern int rodata_test_data; @@ -89,16 +90,7 @@ static noinline int test_address(void *address) "2: mov %[zero], %[rslt]\n" " ret\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 8\n" -#ifdef CONFIG_X86_32 - " .long 0b\n" - " .long 2b\n" -#else - " .quad 0b\n" - " .quad 2b\n" -#endif - ".previous\n" + _ASM_EXTABLE(0b,2b) : [rslt] "=r" (result) : [fake_code] "r" (address), [zero] "r" (0UL), "0" (result) ); -- cgit v1.2.3-18-g5258 From e7a40d268ec2afab7e0596667cabd2ae53fec8d8 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Feb 2008 16:47:57 +0100 Subject: x86: use _ASM_EXTABLE macro in arch/x86/lib/mmx_32.c Use the _ASM_EXTABLE macro from , instead of open-coding __ex_table entires in arch/x86/lib/mmx_32.c. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/lib/mmx_32.c | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index 28084d2e8dd..cc9b4a4450f 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -4,6 +4,7 @@ #include #include +#include #include @@ -50,10 +51,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ " jmp 2b\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" + _ASM_EXTABLE(1b,3b) : : "r" (from) ); @@ -81,10 +79,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ " jmp 2b\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" + _ASM_EXTABLE(1b,3b) : : "r" (from), "r" (to) : "memory"); from+=64; to+=64; @@ -181,10 +176,7 @@ static void fast_copy_page(void *to, void *from) "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ " jmp 2b\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" + _ASM_EXTABLE(1b,3b) : : "r" (from) ); for(i=0; i<(4096-320)/64; i++) @@ -211,10 +203,7 @@ static void fast_copy_page(void *to, void *from) "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ " jmp 2b\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" + _ASM_EXTABLE(1b,3b) : : "r" (from), "r" (to) : "memory"); from+=64; to+=64; @@ -311,10 +300,7 @@ static void fast_copy_page(void *to, void *from) "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ " jmp 2b\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" + _ASM_EXTABLE(1b,3b) : : "r" (from) ); for(i=0; i<4096/64; i++) @@ -341,10 +327,7 @@ static void fast_copy_page(void *to, void *from) "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ " jmp 2b\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b, 3b\n" - ".previous" + _ASM_EXTABLE(1b,3b) : : "r" (from), "r" (to) : "memory"); from+=64; to+=64; -- cgit v1.2.3-18-g5258 From 287774414568010855642518513f085491644061 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Feb 2008 16:47:57 +0100 Subject: x86: use _ASM_EXTABLE macro in arch/x86/lib/usercopy_32.c Use the _ASM_EXTABLE macro from , instead of open-coding __ex_table entires in arch/x86/lib/usercopy_32.c. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/lib/usercopy_32.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 9c4ffd5bedb..e849b9998b0 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -48,10 +48,7 @@ do { \ "3: movl %5,%0\n" \ " jmp 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - ".previous" \ + _ASM_EXTABLE(0b,3b) \ : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ "=&D" (__d2) \ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ @@ -132,11 +129,8 @@ do { \ "3: lea 0(%2,%0,4),%0\n" \ " jmp 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 4\n" \ - " .long 0b,3b\n" \ - " .long 1b,2b\n" \ - ".previous" \ + _ASM_EXTABLE(0b,3b) \ + _ASM_EXTABLE(1b,2b) \ : "=&c"(size), "=&D" (__d0) \ : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ } while (0) -- cgit v1.2.3-18-g5258 From 8da804f2b23913ef362c6a578bf482e5ccc93d1a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Feb 2008 16:47:57 +0100 Subject: x86: use _ASM_EXTABLE macro in arch/x86/lib/usercopy_64.c Use the _ASM_EXTABLE macro from , instead of open-coding __ex_table entires in arch/x86/lib/usercopy_64.c. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/lib/usercopy_64.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 893d43f838c..0c89d1bb028 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -31,10 +31,7 @@ do { \ "3: movq %5,%0\n" \ " jmp 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n" \ - " .quad 0b,3b\n" \ - ".previous" \ + _ASM_EXTABLE(0b,3b) \ : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ "=&D" (__d2) \ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ @@ -87,11 +84,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size) "3: lea 0(%[size1],%[size8],8),%[size8]\n" " jmp 2b\n" ".previous\n" - ".section __ex_table,\"a\"\n" - " .align 8\n" - " .quad 0b,3b\n" - " .quad 1b,2b\n" - ".previous" + _ASM_EXTABLE(0b,3b) + _ASM_EXTABLE(1b,2b) : [size8] "=c"(size), [dst] "=&D" (__d0) : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), [zero] "r" (0UL), [eight] "r" (8UL)); -- cgit v1.2.3-18-g5258 From f832ff18e886ada0ff30a1edeab082ce218d107e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Feb 2008 16:47:58 +0100 Subject: x86: use _ASM_EXTABLE macro in arch/x86/mm/init_32.c Use the _ASM_EXTABLE macro from , instead of open-coding __ex_table entires in arch/x86/mm/init_32.c. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/init_32.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f2f36f8dae5..d1bc04006d1 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -718,10 +719,7 @@ static noinline int do_test_wp_bit(void) "1: movb %1, %0 \n" " xorl %2, %2 \n" "2: \n" - ".section __ex_table, \"a\"\n" - " .align 4 \n" - " .long 1b, 2b \n" - ".previous \n" + _ASM_EXTABLE(1b,2b) :"=m" (*(char *)fix_to_virt(FIX_WP_TEST)), "=q" (tmp_reg), "=r" (flag) -- cgit v1.2.3-18-g5258 From 2347d933b158932cf2b8aeebae3e5cc16b200bd1 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Feb 2008 16:47:59 +0100 Subject: x86: cpuid: allow querying %ecx-sensitive CPUID levels After /dev/*/cpuid was introduced, Intel changed the semantics of the CPUID instruction to be sentitive to %ecx as well as %eax. This patch allows querying of %ecx-sensitive levels by placing the %ecx value in the upper 32 bits of the file position (lower 32 bits always were used for the %eax value.) Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpuid.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index a63432d800f..c10ebc4b8e4 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * - * - * Copyright 2000 H. Peter Anvin - All Rights Reserved + * + * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,6 +17,10 @@ * and then read in chunks of 16 bytes. A larger size means multiple * reads of consecutive levels. * + * The lower 32 bits of the file position is used as the incoming %eax, + * and the upper 32 bits of the file position as the incoming %ecx, + * the latter intended for "counting" eax levels like eax=4. + * * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on * an SMP box will direct the access to CPU %d. */ @@ -43,27 +47,16 @@ static struct class *cpuid_class; -struct cpuid_command { - u32 reg; - u32 *data; +struct cpuid_regs { + u32 eax, ebx, ecx, edx; }; static void cpuid_smp_cpuid(void *cmd_block) { - struct cpuid_command *cmd = cmd_block; - - cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], - &cmd->data[3]); -} - -static inline void do_cpuid(int cpu, u32 reg, u32 * data) -{ - struct cpuid_command cmd; + struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block; - cmd.reg = reg; - cmd.data = data; - - smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); + cpuid_count(cmd->eax, cmd->ecx, + &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx); } static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) @@ -93,19 +86,21 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, size_t count, loff_t * ppos) { char __user *tmp = buf; - u32 data[4]; - u32 reg = *ppos; + struct cpuid_regs cmd; int cpu = iminor(file->f_path.dentry->d_inode); + u64 pos = *ppos; if (count % 16) return -EINVAL; /* Invalid chunk size */ for (; count; count -= 16) { - do_cpuid(cpu, reg, data); - if (copy_to_user(tmp, &data, 16)) + cmd.eax = pos; + cmd.ecx = pos >> 32; + smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); + if (copy_to_user(tmp, &cmd, 16)) return -EFAULT; tmp += 16; - *ppos = reg++; + *ppos = ++pos; } return tmp - buf; @@ -193,7 +188,7 @@ static int __init cpuid_init(void) } for_each_online_cpu(i) { err = cpuid_device_create(i); - if (err != 0) + if (err != 0) goto out_class; } register_hotcpu_notifier(&cpuid_class_cpu_notifier); @@ -208,7 +203,7 @@ out_class: } class_destroy(cpuid_class); out_chrdev: - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); + unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); out: return err; } -- cgit v1.2.3-18-g5258 From 2b06ac867176d5d24757bda7e13f6255d6b96d7b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Feb 2008 16:47:59 +0100 Subject: x86: cpuid, msr: use inode mutex instead of big kernel lock Instead of grabbing the BKL on seek, use the inode mutex in the style of generic_file_llseek(). Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpuid.c | 7 +++---- arch/x86/kernel/msr.c | 14 +++++++++----- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index c10ebc4b8e4..288e7a6598a 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -62,9 +62,9 @@ static void cpuid_smp_cpuid(void *cmd_block) static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) { loff_t ret; + struct inode *inode = file->f_mapping->host; - lock_kernel(); - + mutex_lock(&inode->i_mutex); switch (orig) { case 0: file->f_pos = offset; @@ -77,8 +77,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) default: ret = -EINVAL; } - - unlock_kernel(); + mutex_unlock(&inode->i_mutex); return ret; } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index bd82850e651..af51ea8400b 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * - * - * Copyright 2000 H. Peter Anvin - All Rights Reserved + * + * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,9 +45,10 @@ static struct class *msr_class; static loff_t msr_seek(struct file *file, loff_t offset, int orig) { - loff_t ret = -EINVAL; + loff_t ret; + struct inode *inode = file->f_mapping->host; - lock_kernel(); + mutex_lock(&inode->i_mutex); switch (orig) { case 0: file->f_pos = offset; @@ -56,8 +57,11 @@ static loff_t msr_seek(struct file *file, loff_t offset, int orig) case 1: file->f_pos += offset; ret = file->f_pos; + break; + default: + ret = -EINVAL; } - unlock_kernel(); + mutex_unlock(&inode->i_mutex); return ret; } -- cgit v1.2.3-18-g5258 From fa1408e4df53ec1e61f59c030b3488a1ef0c635d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Feb 2008 16:48:00 +0100 Subject: x86: unify CPU feature string names Move the CPU feature string names to a separate file (common to 32 and 64 bits); additionally, make includable by host code in preparation for including the CPU feature strings in the boot code. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/Makefile | 1 + arch/x86/kernel/cpu/feature_names.c | 83 +++++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/proc.c | 74 --------------------------------- arch/x86/kernel/setup_64.c | 76 --------------------------------- 4 files changed, 84 insertions(+), 150 deletions(-) create mode 100644 arch/x86/kernel/cpu/feature_names.c (limited to 'arch') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index cfdb2f3bd76..a0c4d7c5dbd 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -3,6 +3,7 @@ # obj-y := intel_cacheinfo.o addon_cpuid_features.o +obj-y += feature_names.o obj-$(CONFIG_X86_32) += common.o proc.o bugs.o obj-$(CONFIG_X86_32) += amd.o diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c new file mode 100644 index 00000000000..ee975ac6bbc --- /dev/null +++ b/arch/x86/kernel/cpu/feature_names.c @@ -0,0 +1,83 @@ +/* + * Strings for the various x86 capability flags. + * + * This file must not contain any executable code. + */ + +#include "asm/cpufeature.h" + +/* + * These flag bits must match the definitions in . + * NULL means this bit is undefined or reserved; either way it doesn't + * have meaning as far as Linux is concerned. Note that it's important + * to realize there is a difference between this table and CPUID -- if + * applications want to get the raw CPUID data, they should access + * /dev/cpu//cpuid instead. + */ +const char * const x86_cap_flags[NCAPINTS*32] = { + /* Intel-defined */ + "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", + "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", + "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", + "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", + + /* AMD-defined */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, + NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", + "3dnowext", "3dnow", + + /* Transmeta-defined */ + "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Other (Linux-defined) */ + "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", + NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, "arch_perfmon", + "pebs", "bts", NULL, NULL, + "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Intel-defined (#2) */ + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", + "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, + NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* VIA/Cyrix/Centaur-defined */ + NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", + "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* AMD-defined (#2) */ + "lahf_lm", "cmp_legacy", "svm", "extapic", + "cr8_legacy", "abm", "sse4a", "misalignsse", + "3dnowprefetch", "osvw", "ibs", "sse5", + "skinit", "wdt", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* Auxiliary (Linux-defined) */ + "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +}; + +const char *const x86_power_flags[32] = { + "ts", /* temperature sensor */ + "fid", /* frequency id control */ + "vid", /* voltage id control */ + "ttp", /* thermal trip */ + "tm", + "stc", + "100mhzsteps", + "hwpstate", + "", /* tsc invariant mapped to constant_tsc */ + /* nothing */ +}; diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 02821326014..af11d31dce0 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -10,80 +10,6 @@ */ static int show_cpuinfo(struct seq_file *m, void *v) { - /* - * These flag bits must match the definitions in . - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu//cpuid instead. - */ - static const char * const x86_cap_flags[] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", - "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", - NULL, NULL, NULL, NULL, - "constant_tsc", "up", NULL, "arch_perfmon", - "pebs", "bts", NULL, "sync_rdtsc", - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", - "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", - "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", - "cr8_legacy", "abm", "sse4a", "misalignsse", - "3dnowprefetch", "osvw", "ibs", "sse5", - "skinit", "wdt", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Auxiliary (Linux-defined) */ - "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - }; - static const char * const x86_power_flags[] = { - "ts", /* temperature sensor */ - "fid", /* frequency id control */ - "vid", /* voltage id control */ - "ttp", /* thermal trip */ - "tm", - "stc", - "100mhzsteps", - "hwpstate", - "", /* constant_tsc - moved to flags */ - /* nothing */ - }; struct cpuinfo_x86 *c = v; int i, n = 0; int fpu_exception; diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 18df70c534b..c8939dfddfb 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -1068,82 +1068,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) struct cpuinfo_x86 *c = v; int cpu = 0, i; - /* - * These flag bits must match the definitions in . - * NULL means this bit is undefined or reserved; either way it doesn't - * have meaning as far as Linux is concerned. Note that it's important - * to realize there is a difference between this table and CPUID -- if - * applications want to get the raw CPUID data, they should access - * /dev/cpu//cpuid instead. - */ - static const char *const x86_cap_flags[] = { - /* Intel-defined */ - "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", - "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", - "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx", - "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe", - - /* AMD-defined */ - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, - NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", - "3dnowext", "3dnow", - - /* Transmeta-defined */ - "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Other (Linux-defined) */ - "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", - NULL, NULL, NULL, NULL, - "constant_tsc", "up", NULL, "arch_perfmon", - "pebs", "bts", NULL, "sync_rdtsc", - "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", - "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* VIA/Cyrix/Centaur-defined */ - NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en", - "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", - "cr8_legacy", "abm", "sse4a", "misalignsse", - "3dnowprefetch", "osvw", "ibs", "sse5", - "skinit", "wdt", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - - /* Auxiliary (Linux-defined) */ - "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - }; - static const char *const x86_power_flags[] = { - "ts", /* temperature sensor */ - "fid", /* frequency id control */ - "vid", /* voltage id control */ - "ttp", /* thermal trip */ - "tm", - "stc", - "100mhzsteps", - "hwpstate", - "", /* tsc invariant mapped to constant_tsc */ - /* nothing */ - }; - - #ifdef CONFIG_SMP cpu = c->cpu_index; #endif -- cgit v1.2.3-18-g5258 From f0be6c6a697c2fe8e2efbe98cd157bdbcff969ae Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Feb 2008 16:48:00 +0100 Subject: x86 setup: print missing CPU features in cleartext Instead of obscure numbers, print the list of missing CPU features in cleartext. To conserve space, use a host program (mkcpustr.c) to produce a compact list of mandatory features only. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/boot/Makefile | 16 ++++++++++++---- arch/x86/boot/cpu.c | 26 ++++++++++++++++++++----- arch/x86/boot/mkcpustr.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 9 deletions(-) create mode 100644 arch/x86/boot/mkcpustr.c (limited to 'arch') diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 254a58398a6..f88458e83ef 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -26,7 +26,7 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA #RAMDISK := -DRAMDISK=512 targets := vmlinux.bin setup.bin setup.elf zImage bzImage -subdir- := compressed +subdir- := compressed setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o setup-y += header.o main.o mca.o memory.o pm.o pmjump.o @@ -43,9 +43,17 @@ setup-y += video-vesa.o setup-y += video-bios.o targets += $(setup-y) -hostprogs-y := tools/build +hostprogs-y := mkcpustr tools/build -HOSTCFLAGS_build.o := $(LINUXINCLUDE) +HOST_EXTRACFLAGS += $(LINUXINCLUDE) + +$(obj)/cpu.o: $(obj)/cpustr.h + +quiet_cmd_cpustr = CPUSTR $@ + cmd_cpustr = $(obj)/mkcpustr > $@ +targets += cpustr.h +$(obj)/cpustr.h: $(obj)/mkcpustr FORCE + $(call if_changed,cpustr) # --------------------------------------------------------------------------- @@ -98,7 +106,7 @@ $(obj)/compressed/vmlinux: FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@ # Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel -FDARGS = +FDARGS = # Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel FDINITRD = diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 2a5c32da585..00e19edd852 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -1,7 +1,7 @@ /* -*- linux-c -*- ------------------------------------------------------- * * * Copyright (C) 1991, 1992 Linus Torvalds - * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2007-2008 rPath, Inc. - All Rights Reserved * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -9,7 +9,7 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/cpu.c + * arch/x86/boot/cpu.c * * Check for obligatory CPU features and abort if the features are not * present. @@ -19,6 +19,8 @@ #include "bitops.h" #include +#include "cpustr.h" + static char *cpu_name(int level) { static char buf[6]; @@ -35,6 +37,7 @@ int validate_cpu(void) { u32 *err_flags; int cpu_level, req_level; + const unsigned char *msg_strs; check_cpu(&cpu_level, &req_level, &err_flags); @@ -51,13 +54,26 @@ int validate_cpu(void) puts("This kernel requires the following features " "not present on the CPU:\n"); + msg_strs = (const unsigned char *)x86_cap_strs; + for (i = 0; i < NCAPINTS; i++) { u32 e = err_flags[i]; for (j = 0; j < 32; j++) { - if (e & 1) - printf("%d:%d ", i, j); - + int n = (i << 5)+j; + if (*msg_strs < n) { + /* Skip to the next string */ + do { + msg_strs++; + } while (*msg_strs); + msg_strs++; + } + if (e & 1) { + if (*msg_strs == n && msg_strs[1]) + printf("%s ", msg_strs+1); + else + printf("%d:%d ", i, j); + } e >>= 1; } } diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c new file mode 100644 index 00000000000..bbe76953bae --- /dev/null +++ b/arch/x86/boot/mkcpustr.c @@ -0,0 +1,49 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2008 rPath, Inc. - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * This is a host program to preprocess the CPU strings into a + * compact format suitable for the setup code. + */ + +#include + +#include "../kernel/cpu/feature_names.c" + +#if NCAPFLAGS > 8 +# error "Need to adjust the boot code handling of CPUID strings" +#endif + +int main(void) +{ + int i; + const char *str; + + printf("static const char x86_cap_strs[] = \n"); + + for (i = 0; i < NCAPINTS*32; i++) { + str = x86_cap_flags[i]; + + if (i == NCAPINTS*32-1) { + /* The last entry must be unconditional; this + also consumes the compiler-added null character */ + if (!str) + str = ""; + printf("\t\"\\x%02x\"\"%s\"\n", i, str); + } else if (str) { + printf("#if REQUIRED_MASK%d & (1 << %d)\n" + "\t\"\\x%02x\"\"%s\\0\"\n" + "#endif\n", + i >> 5, i & 31, i, str); + } + } + printf("\t;\n"); + return 0; +} -- cgit v1.2.3-18-g5258 From 9a6b344ea967efa0bb5ca4cb5405f840652b66c4 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 4 Feb 2008 16:48:01 +0100 Subject: x86: remove long dead cyrix mtrr code cyrix_arr_init was #if 0 all the way back to at least v2.6.12. This was the only place where arr3_protected was set to anything but zero. Eliminate this variable. Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mtrr/cyrix.c | 107 --------------------------------------- arch/x86/kernel/cpu/mtrr/main.c | 12 ----- 2 files changed, 119 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 8e139c70f88..ff14c320040 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -7,8 +7,6 @@ #include #include "mtrr.h" -int arr3_protected; - static void cyrix_get_arr(unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type * type) @@ -99,8 +97,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) case 4: return replace_reg; case 3: - if (arr3_protected) - break; case 2: case 1: case 0: @@ -115,8 +111,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) } else { for (i = 0; i < 7; i++) { cyrix_get_arr(i, &lbase, &lsize, <ype); - if ((i == 3) && arr3_protected) - continue; if (lsize == 0) return i; } @@ -260,107 +254,6 @@ static void cyrix_set_all(void) post_set(); } -#if 0 -/* - * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection - * with the SMM (System Management Mode) mode. So we need the following: - * Check whether SMI_LOCK (CCR3 bit 0) is set - * if it is set, write a warning message: ARR3 cannot be changed! - * (it cannot be changed until the next processor reset) - * if it is reset, then we can change it, set all the needed bits: - * - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset) - * - disable access to SMM memory (CCR1 bit 2 reset) - * - disable SMM mode (CCR1 bit 1 reset) - * - disable write protection of ARR3 (CCR6 bit 1 reset) - * - (maybe) disable ARR3 - * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set) - */ -static void __init -cyrix_arr_init(void) -{ - struct set_mtrr_context ctxt; - unsigned char ccr[7]; - int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 }; -#ifdef CONFIG_SMP - int i; -#endif - - /* flush cache and enable MAPEN */ - set_mtrr_prepare_save(&ctxt); - set_mtrr_cache_disable(&ctxt); - - /* Save all CCRs locally */ - ccr[0] = getCx86(CX86_CCR0); - ccr[1] = getCx86(CX86_CCR1); - ccr[2] = getCx86(CX86_CCR2); - ccr[3] = ctxt.ccr3; - ccr[4] = getCx86(CX86_CCR4); - ccr[5] = getCx86(CX86_CCR5); - ccr[6] = getCx86(CX86_CCR6); - - if (ccr[3] & 1) { - ccrc[3] = 1; - arr3_protected = 1; - } else { - /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and - * access to SMM memory through ARR3 (bit 7). - */ - if (ccr[1] & 0x80) { - ccr[1] &= 0x7f; - ccrc[1] |= 0x80; - } - if (ccr[1] & 0x04) { - ccr[1] &= 0xfb; - ccrc[1] |= 0x04; - } - if (ccr[1] & 0x02) { - ccr[1] &= 0xfd; - ccrc[1] |= 0x02; - } - arr3_protected = 0; - if (ccr[6] & 0x02) { - ccr[6] &= 0xfd; - ccrc[6] = 1; /* Disable write protection of ARR3 */ - setCx86(CX86_CCR6, ccr[6]); - } - /* Disable ARR3. This is safe now that we disabled SMM. */ - /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */ - } - /* If we changed CCR1 in memory, change it in the processor, too. */ - if (ccrc[1]) - setCx86(CX86_CCR1, ccr[1]); - - /* Enable ARR usage by the processor */ - if (!(ccr[5] & 0x20)) { - ccr[5] |= 0x20; - ccrc[5] = 1; - setCx86(CX86_CCR5, ccr[5]); - } -#ifdef CONFIG_SMP - for (i = 0; i < 7; i++) - ccr_state[i] = ccr[i]; - for (i = 0; i < 8; i++) - cyrix_get_arr(i, - &arr_state[i].base, &arr_state[i].size, - &arr_state[i].type); -#endif - - set_mtrr_done(&ctxt); /* flush cache and disable MAPEN */ - - if (ccrc[5]) - printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n"); - if (ccrc[3]) - printk(KERN_INFO "mtrr: ARR3 cannot be changed\n"); -/* - if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n"); - if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n"); - if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n"); -*/ - if (ccrc[6]) - printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n"); -} -#endif - static struct mtrr_ops cyrix_mtrr_ops = { .vendor = X86_VENDOR_CYRIX, // .init = cyrix_arr_init, diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 71591958265..822d8f90c1e 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -59,12 +59,6 @@ struct mtrr_ops * mtrr_if = NULL; static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); -#ifndef CONFIG_X86_64 -extern int arr3_protected; -#else -#define arr3_protected 0 -#endif - void set_mtrr_ops(struct mtrr_ops * ops) { if (ops->vendor && ops->vendor < X86_VENDOR_NUM) @@ -513,12 +507,6 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) printk(KERN_WARNING "mtrr: register: %d too big\n", reg); goto out; } - if (is_cpu(CYRIX) && !use_intel()) { - if ((reg == 3) && arr3_protected) { - printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n"); - goto out; - } - } mtrr_if->get(reg, &lbase, &lsize, <ype); if (lsize < 1) { printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); -- cgit v1.2.3-18-g5258 From 94a8a7acbe4d9aa83d53597516cc71101ebd2f6d Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 4 Feb 2008 16:48:01 +0100 Subject: x86: remove misleading comments in trampoline_*.S Both trampolines actually *do* set up stack. (Is the "we jump into compressed/head.S" comment still true?) Signed-off-by: Pavel Machek Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/trampoline_32.S | 7 +------ arch/x86/kernel/trampoline_64.S | 3 --- 2 files changed, 1 insertion(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index 9bcc1c6aca3..64580679861 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S @@ -11,12 +11,7 @@ * trampoline page to make our stack and everything else * is a mystery. * - * In fact we don't actually need a stack so we don't - * set one up. - * - * We jump into the boot/compressed/head.S code. So you'd - * better be running a compressed kernel image or you - * won't get very far. + * We jump into arch/x86/kernel/head_32.S. * * On entry to trampoline_data, the processor is in real mode * with 16-bit addressing and 16-bit data. CS has some value diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index e30b67c6a9f..4aedd0bcee4 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S @@ -10,9 +10,6 @@ * trampoline page to make our stack and everything else * is a mystery. * - * In fact we don't actually need a stack so we don't - * set one up. - * * On entry to trampoline_data, the processor is in real mode * with 16-bit addressing and 16-bit data. CS has some value * and IP is zero. Thus, data addresses need to be absolute -- cgit v1.2.3-18-g5258 From e618c9579c745742c422b7c3de1f802aa67e6110 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 4 Feb 2008 16:48:02 +0100 Subject: x86: unify PAE/non-PAE pgd_ctor The constructors for PAE and non-PAE pgd_ctors are more or less identical, and can be made into the same function. Signed-off-by: Jeremy Fitzhardinge Cc: William Irwin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pgtable_32.c | 54 ++++++++++++++++++------------------------------ 1 file changed, 20 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index cb3aa470249..f34e33d1844 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -219,50 +219,39 @@ static inline void pgd_list_del(pgd_t *pgd) list_del(&page->lru); } +#define UNSHARED_PTRS_PER_PGD \ + (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) - -#if (PTRS_PER_PMD == 1) -/* Non-PAE pgd constructor */ -static void pgd_ctor(void *pgd) +static void pgd_ctor(void *p) { + pgd_t *pgd = p; unsigned long flags; - /* !PAE, no pagetable sharing */ + /* Clear usermode parts of PGD */ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); spin_lock_irqsave(&pgd_lock, flags); - /* must happen under lock */ - clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, - swapper_pg_dir + USER_PTRS_PER_PGD, - KERNEL_PGD_PTRS); - paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, - __pa(swapper_pg_dir) >> PAGE_SHIFT, - USER_PTRS_PER_PGD, - KERNEL_PGD_PTRS); - pgd_list_add(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); -} -#else /* PTRS_PER_PMD > 1 */ -/* PAE pgd constructor */ -static void pgd_ctor(void *pgd) -{ - /* PAE, kernel PMD may be shared */ - - if (SHARED_KERNEL_PMD) { - clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, + /* If the pgd points to a shared pagetable level (either the + ptes in non-PAE, or shared PMD in PAE), then just copy the + references from swapper_pg_dir. */ + if (PAGETABLE_LEVELS == 2 || + (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) { + clone_pgd_range(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, KERNEL_PGD_PTRS); - } else { - unsigned long flags; + paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, + __pa(swapper_pg_dir) >> PAGE_SHIFT, + USER_PTRS_PER_PGD, + KERNEL_PGD_PTRS); + } - memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); - spin_lock_irqsave(&pgd_lock, flags); + /* list required to sync kernel mapping updates */ + if (!SHARED_KERNEL_PMD) pgd_list_add(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); - } + + spin_unlock_irqrestore(&pgd_lock, flags); } -#endif /* PTRS_PER_PMD */ static void pgd_dtor(void *pgd) { @@ -276,9 +265,6 @@ static void pgd_dtor(void *pgd) spin_unlock_irqrestore(&pgd_lock, flags); } -#define UNSHARED_PTRS_PER_PGD \ - (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) - #ifdef CONFIG_X86_PAE /* * Mop up any pmd pages which may still be attached to the pgd. -- cgit v1.2.3-18-g5258 From a67ad9c9f82342a9b320fdad204a490727ef4a18 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 4 Feb 2008 16:48:02 +0100 Subject: x86: revert "defer cr3 reload when doing pud_clear()" Revert "defer cr3 reload when doing pud_clear()" since I'm going to replace it. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pgtable_32.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index f34e33d1844..c7db504be1e 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -373,13 +373,6 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte) void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { - /* This is called just after the pmd has been detached from - the pgd, which requires a full tlb flush to be recognized - by the CPU. Rather than incurring multiple tlb flushes - while the address space is being pulled down, make the tlb - gathering machinery do a full flush when we're done. */ - tlb->fullmm = 1; - paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); tlb_remove_page(tlb, virt_to_page(pmd)); } -- cgit v1.2.3-18-g5258 From fa0c864d998c9c97d11db097d5736028d5c80985 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 4 Feb 2008 16:48:03 +0100 Subject: x86: cleanup - eliminate numbers in LDT allocation code This patch eliminates numbers in LDT allocation code trying to make it clear to understand from where these numbers come. No code changed: text data bss dec hex filename 1896 0 0 1896 768 ldt.o.before 1896 0 0 1896 768 ldt.o.after md5: 6cbec8705008ddb4b704aade60bceda3 ldt.o.before.asm 6cbec8705008ddb4b704aade60bceda3 ldt.o.after.asm Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/ldt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 8a7660c8394..0224c3637c7 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -35,7 +35,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) if (mincount <= pc->size) return 0; oldsize = pc->size; - mincount = (mincount + 511) & (~511); + mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & + (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) newldt = vmalloc(mincount * LDT_ENTRY_SIZE); else -- cgit v1.2.3-18-g5258 From c7e844f0415252c7e1a2153a97e7a0c511d61ada Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:03 +0100 Subject: x86: move NUMAQ io handling into arch/x86/pci/numa.c numa.c is the only user of the {in,out}*_quad functions. And it has only a few call sites. Change them to open code the magic NUMAQ port access. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/numa.c | 52 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c index f5f165f69e0..55270c26237 100644 --- a/arch/x86/pci/numa.c +++ b/arch/x86/pci/numa.c @@ -5,36 +5,62 @@ #include #include #include +#include #include "pci.h" +#define XQUAD_PORTIO_BASE 0xfe400000 +#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ + #define BUS2QUAD(global) (mp_bus_id_to_node[global]) #define BUS2LOCAL(global) (mp_bus_id_to_local[global]) #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) +extern void *xquad_portio; /* Where the IO area was mapped */ +#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) + #define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) +static void write_cf8(unsigned bus, unsigned devfn, unsigned reg) +{ + unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg); + if (xquad_portio) + writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus))); + else + outl(val, 0xCF8); +} + static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value) { unsigned long flags; + void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) return -EINVAL; spin_lock_irqsave(&pci_config_lock, flags); - outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus)); + write_cf8(bus, devfn, reg); switch (len) { case 1: - *value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus)); + if (xquad_portio) + *value = readb(adr + (reg & 3)); + else + *value = inb(0xCFC + (reg & 3)); break; case 2: - *value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus)); + if (xquad_portio) + *value = readw(adr + (reg & 2)); + else + *value = inw(0xCFC + (reg & 2)); break; case 4: - *value = inl_quad(0xCFC, BUS2QUAD(bus)); + if (xquad_portio) + *value = readl(adr); + else + *value = inl(0xCFC); break; } @@ -47,23 +73,33 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 value) { unsigned long flags; + void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) return -EINVAL; spin_lock_irqsave(&pci_config_lock, flags); - outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus)); + write_cf8(bus, devfn, reg); switch (len) { case 1: - outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus)); + if (xquad_portio) + writeb(value, adr + (reg & 3)); + else + outb((u8)value, 0xCFC + (reg & 3)); break; case 2: - outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus)); + if (xquad_portio) + writew(value, adr + (reg & 2)); + else + outw((u16)value, 0xCFC + (reg & 2)); break; case 4: - outl_quad((u32)value, 0xCFC, BUS2QUAD(bus)); + if (xquad_portio) + writel(value, adr + reg); + else + outl((u32)value, 0xCFC); break; } -- cgit v1.2.3-18-g5258 From 73bdb73f6666228289af4be55a77e2ed978061a7 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 4 Feb 2008 16:48:04 +0100 Subject: x86: add include to cpu/intel.c Fixes sparse warning: arch/x86/kernel/cpu/intel.c:48:15: warning: symbol 'ppro_with_ram_bug' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index d1c372b018d..fae31ce747b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "cpu.h" -- cgit v1.2.3-18-g5258 From 7bb308a1eae2a3b869c498017aed15a699d80799 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 4 Feb 2008 16:48:04 +0100 Subject: x86: small sparse fix in process_32.c arch/x86/kernel/process_32.c:254:43: warning: Using plain integer as NULL pointer Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 968371ab223..dabdbeff1f7 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -251,7 +251,7 @@ void cpu_idle_wait(void) * because it has nothing to do. * Give all the remaining CPUS a kick. */ - smp_call_function_mask(map, do_nothing, 0, 0); + smp_call_function_mask(map, do_nothing, NULL, 0); } while (!cpus_empty(map)); set_cpus_allowed(current, tmp); -- cgit v1.2.3-18-g5258 From b6d549a2967881af4f02d02062acbfeb807d44b4 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 4 Feb 2008 16:48:04 +0100 Subject: x86: add cpu init function defintions to cpu.h cpu.h was already included everywhere needed. Fixes following sparse warnings: arch/x86/kernel/cpu/amd.c:343:12: warning: symbol 'amd_init_cpu' was not declared. Should it be static? arch/x86/kernel/cpu/cyrix.c:444:12: warning: symbol 'cyrix_init_cpu' was not declared. Should it be static? arch/x86/kernel/cpu/cyrix.c:456:12: warning: symbol 'nsc_init_cpu' was not declared. Should it be static? arch/x86/kernel/cpu/centaur.c:467:12: warning: symbol 'centaur_init_cpu' was not declared. Should it be static? arch/x86/kernel/cpu/transmeta.c:112:12: warning: symbol 'transmeta_init_cpu' was not declared. Should it be static? arch/x86/kernel/cpu/intel.c:296:12: warning: symbol 'intel_cpu_init' was not declared. Should it be static? arch/x86/kernel/cpu/nexgen.c:56:12: warning: symbol 'nexgen_init_cpu' was not declared. Should it be static? arch/x86/kernel/cpu/umc.c:22:12: warning: symbol 'umc_init_cpu' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/common.c | 10 ---------- arch/x86/kernel/cpu/cpu.h | 9 +++++++++ 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b7b2142b58e..d9313d9adce 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -623,16 +623,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; * They will insert themselves into the cpu_devs structure. * Then, when cpu_init() is called, we can just iterate over that array. */ - -extern int intel_cpu_init(void); -extern int cyrix_init_cpu(void); -extern int nsc_init_cpu(void); -extern int amd_init_cpu(void); -extern int centaur_init_cpu(void); -extern int transmeta_init_cpu(void); -extern int nexgen_init_cpu(void); -extern int umc_init_cpu(void); - void __init early_cpu_init(void) { intel_cpu_init(); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index ad6527a5beb..e0b38c33d84 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -27,3 +27,12 @@ extern void display_cacheinfo(struct cpuinfo_x86 *c); extern void early_init_intel(struct cpuinfo_x86 *c); extern void early_init_amd(struct cpuinfo_x86 *c); +/* Specific CPU type init functions */ +int intel_cpu_init(void); +int amd_init_cpu(void); +int cyrix_init_cpu(void); +int nsc_init_cpu(void); +int centaur_init_cpu(void); +int transmeta_init_cpu(void); +int nexgen_init_cpu(void); +int umc_init_cpu(void); -- cgit v1.2.3-18-g5258 From 16c02ed74361433a4fc5d8bd5f67abbac6e1c5ca Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 4 Feb 2008 16:48:05 +0100 Subject: x86: add cpu mtrr init function definitions to mtrr.h mtrr.h was included everywhere needed. Fixes the following sparse warnings. Also, the return types in the extern definitions were incorrect. arch/x86/kernel/cpu/mtrr/amd.c:113:12: warning: symbol 'amd_init_mtrr' was not declared. Should it be static? arch/x86/kernel/cpu/mtrr/cyrix.c:268:12: warning: symbol 'cyrix_init_mtrr' was not declared. Should it be static? arch/x86/kernel/cpu/mtrr/centaur.c:218:12: warning: symbol 'centaur_init_mtrr' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mtrr/main.c | 4 ---- arch/x86/kernel/cpu/mtrr/mtrr.h | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 822d8f90c1e..1e27b69a7a0 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -554,10 +554,6 @@ EXPORT_SYMBOL(mtrr_del); * These should be called implicitly, but we can't yet until all the initcall * stuff is done... */ -extern void amd_init_mtrr(void); -extern void cyrix_init_mtrr(void); -extern void centaur_init_mtrr(void); - static void __init init_ifs(void) { #ifndef CONFIG_X86_64 diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index fb74a2c2081..2cc77eb6fea 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -97,3 +97,7 @@ void mtrr_state_warn(void); const char *mtrr_attrib_to_str(int x); void mtrr_wrmsr(unsigned, unsigned, unsigned); +/* CPU specific mtrr init functions */ +int amd_init_mtrr(void); +int cyrix_init_mtrr(void); +int centaur_init_mtrr(void); -- cgit v1.2.3-18-g5258 From cc0f21bbc12dc9f05b2e7f2469128f8717b2f4d3 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 4 Feb 2008 16:48:05 +0100 Subject: x86: teach the static_protection function about high mappings Right now, enforcing that the high mapping of the kernel text doesn't get the NX bit is done deep in the guts of CPA, rather than in the static_protection() function that enforces all other per-arch sanity checks. This patch moves this sanity check into the central static_protection() function instead, and makes it apply ONLY to the kernel text, not to all other areas in the high mapping. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 877b5cca2cb..bf5e33f6a32 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -106,6 +106,22 @@ static void cpa_flush_range(unsigned long start, int numpages) } } +#define HIGH_MAP_START __START_KERNEL_map +#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE) + + +/* + * Converts a virtual address to a X86-64 highmap address + */ +static unsigned long virt_to_highmap(void *address) +{ +#ifdef CONFIG_X86_64 + return __pa((unsigned long)address) + HIGH_MAP_START - phys_base; +#else + return (unsigned long)address; +#endif +} + /* * Certain areas of memory on x86 require very specific protection flags, * for example the BIOS area or kernel text. Callers don't always get this @@ -129,12 +145,24 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) */ if (within(address, (unsigned long)_text, (unsigned long)_etext)) pgprot_val(forbidden) |= _PAGE_NX; + /* + * Do the same for the x86-64 high kernel mapping + */ + if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext))) + pgprot_val(forbidden) |= _PAGE_NX; + #ifdef CONFIG_DEBUG_RODATA /* The .rodata section needs to be read-only */ if (within(address, (unsigned long)__start_rodata, (unsigned long)__end_rodata)) pgprot_val(forbidden) |= _PAGE_RW; + /* + * Do the same for the x86-64 high kernel mapping + */ + if (within(address, virt_to_highmap(__start_rodata), + virt_to_highmap(__end_rodata))) + pgprot_val(forbidden) |= _PAGE_RW; #endif prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); @@ -304,8 +332,6 @@ repeat: * Modules and drivers should use the set_memory_* APIs instead. */ -#define HIGH_MAP_START __START_KERNEL_map -#define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE) static int change_page_attr_addr(unsigned long address, pgprot_t mask_set, @@ -338,10 +364,11 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set, /* * Calc the high mapping address. See __phys_addr() * for the non obvious details. + * + * Note that NX and other required permissions are + * checked in static_protections(). */ address = phys_addr + HIGH_MAP_START - phys_base; - /* Make sure the kernel mappings stay executable */ - pgprot_val(mask_clr) |= _PAGE_NX; /* * Our high aliases are imprecise, because we check -- cgit v1.2.3-18-g5258 From 626c2c9d065da0cbd9997e112501487958fde690 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 4 Feb 2008 16:48:05 +0100 Subject: x86: use the pfn from the page when change its attributes When changing the attributes of a pte, we should use the PFN from the existing PTE rather than going through hoops calculating what we think it might have been; this is both fragile and totally unneeded. It also makes it more hairy to call any of these functions on non-direct maps for no good reason whatsover. With this change, __change_page_attr() no longer takes a pfn as argument, which simplifies all the callers. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bf5e33f6a32..6c55fbdbd7e 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -277,17 +277,12 @@ out_unlock: } static int -__change_page_attr(unsigned long address, unsigned long pfn, - pgprot_t mask_set, pgprot_t mask_clr) +__change_page_attr(unsigned long address, pgprot_t mask_set, pgprot_t mask_clr) { struct page *kpte_page; int level, err = 0; pte_t *kpte; -#ifdef CONFIG_X86_32 - BUG_ON(pfn > max_low_pfn); -#endif - repeat: kpte = lookup_address(address, &level); if (!kpte) @@ -298,17 +293,25 @@ repeat: BUG_ON(PageCompound(kpte_page)); if (level == PG_LEVEL_4K) { - pgprot_t new_prot = pte_pgprot(*kpte); pte_t new_pte, old_pte = *kpte; + pgprot_t new_prot = pte_pgprot(old_pte); + + if(!pte_val(old_pte)) { + WARN_ON_ONCE(1); + return -EINVAL; + } pgprot_val(new_prot) &= ~pgprot_val(mask_clr); pgprot_val(new_prot) |= pgprot_val(mask_set); new_prot = static_protections(new_prot, address); - new_pte = pfn_pte(pfn, canon_pgprot(new_prot)); - BUG_ON(pte_pfn(new_pte) != pte_pfn(old_pte)); - + /* + * We need to keep the pfn from the existing PTE, + * after all we're only going to change it's attributes + * not the memory it points to + */ + new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); set_pte_atomic(kpte, new_pte); } else { err = split_large_page(kpte, address); @@ -337,11 +340,11 @@ static int change_page_attr_addr(unsigned long address, pgprot_t mask_set, pgprot_t mask_clr) { - unsigned long phys_addr = __pa(address); - unsigned long pfn = phys_addr >> PAGE_SHIFT; int err; #ifdef CONFIG_X86_64 + unsigned long phys_addr = __pa(address); + /* * If we are inside the high mapped kernel range, then we * fixup the low mapping first. __va() returns the virtual @@ -351,7 +354,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set, address = (unsigned long) __va(phys_addr); #endif - err = __change_page_attr(address, pfn, mask_set, mask_clr); + err = __change_page_attr(address, mask_set, mask_clr); if (err) return err; @@ -375,7 +378,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set, * everything between 0 and KERNEL_TEXT_SIZE, so do * not propagate lookup failures back to users: */ - __change_page_attr(address, pfn, mask_set, mask_clr); + __change_page_attr(address, mask_set, mask_clr); } #endif return err; -- cgit v1.2.3-18-g5258 From 63c1dcf4bc9a26b1d8baa9a8c7cc1b2e1e694011 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:05 +0100 Subject: x86: CPA use the existing pfn in split as well When splitting large pages, we ge the pfn from the existing entry instead of calculating it ourself. This removes the last remaining range restriction of the cpa code. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 6c55fbdbd7e..a629cea5e46 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -221,8 +221,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) { pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte)); gfp_t gfp_flags = GFP_KERNEL; - unsigned long flags; - unsigned long addr; + unsigned long flags, addr, pfn; pte_t *pbase, *tmp; struct page *base; unsigned int i, level; @@ -253,8 +252,12 @@ static int split_large_page(pte_t *kpte, unsigned long address) paravirt_alloc_pt(&init_mm, page_to_pfn(base)); #endif - for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) - set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot)); + /* + * Get the target pfn from the original entry: + */ + pfn = pte_pfn(*kpte); + for (i = 0; i < PTRS_PER_PTE; i++, pfn++) + set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); /* * Install the new, split up pagetable. Important detail here: -- cgit v1.2.3-18-g5258 From e66aadbe6cb90813b3bbf07e3bc2a6aedcef7cd1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:05 +0100 Subject: x86: simplify __ioremap Remove tons of castings which make the code hard to read. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 1a88d1572a7..2c3fa718950 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -114,9 +114,8 @@ static int ioremap_change_attr(unsigned long paddr, unsigned long size, static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, enum ioremap_mode mode) { - void __iomem *addr; + unsigned long pfn, offset, last_addr, vaddr; struct vm_struct *area; - unsigned long pfn, offset, last_addr; pgprot_t prot; /* Don't allow wraparound or zero size */ @@ -164,19 +163,18 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, if (!area) return NULL; area->phys_addr = phys_addr; - addr = (void __iomem *) area->addr; - if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, - phys_addr, prot)) { - remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); + vaddr = (unsigned long) area->addr; + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) { + remove_vm_area((void *)(vaddr & PAGE_MASK)); return NULL; } if (ioremap_change_attr(phys_addr, size, mode) < 0) { - vunmap(addr); + vunmap(area->addr); return NULL; } - return (void __iomem *) (offset + (char __iomem *)addr); + return (void __iomem *) (vaddr + offset); } /** -- cgit v1.2.3-18-g5258 From 75ab43bfce51085ffd627c470f48ae49ba6e6da3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:05 +0100 Subject: x86: ioremap remove the range check of cpa Now that cpa works on non-direct mappings as well, we can safely remove the range check in ioremap_change_attr(). Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 2c3fa718950..4e21231a5ce 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -70,25 +70,12 @@ int page_is_ram(unsigned long pagenr) * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. */ -static int ioremap_change_attr(unsigned long paddr, unsigned long size, +static int ioremap_change_attr(unsigned long vaddr, unsigned long size, enum ioremap_mode mode) { - unsigned long vaddr = (unsigned long)__va(paddr); unsigned long nrpages = size >> PAGE_SHIFT; - unsigned int level; int err; - /* No change for pages after the last mapping */ - if ((paddr + size - 1) >= (max_pfn_mapped << PAGE_SHIFT)) - return 0; - - /* - * If there is no identity map for this address, - * change_page_attr_addr is unnecessary - */ - if (!lookup_address(vaddr, &level)) - return 0; - switch (mode) { case IOR_MODE_UNCACHED: default: @@ -169,7 +156,7 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, return NULL; } - if (ioremap_change_attr(phys_addr, size, mode) < 0) { + if (ioremap_change_attr(vaddr, size, mode) < 0) { vunmap(area->addr); return NULL; } -- cgit v1.2.3-18-g5258 From f56d005d30342a45d8af2b75ecccc82200f09600 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:05 +0100 Subject: x86: no CPA on iounmap When an ioremap is unmapped, do not change the page attributes. There might be another mapping of the same physical address. PAT might detect a conflicting mapping attribute for no good reason. The mapping is removed anyway. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 4e21231a5ce..ee6648fe6b1 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -240,9 +240,6 @@ void iounmap(volatile void __iomem *addr) return; } - /* Reset the direct mapping. Can block */ - ioremap_change_attr(p->phys_addr, p->size, IOR_MODE_CACHED); - /* Finally remove it */ o = remove_vm_area((void *)addr); BUG_ON(p != o || o == NULL); -- cgit v1.2.3-18-g5258 From 1c083eb2cbdd917149f6acaa55efca129d05c2a9 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Mon, 4 Feb 2008 16:48:06 +0100 Subject: x86: fix EFI mapping The patch updates EFI runtime memory mapping code, by making EFI areas explicitly executable. Signed-off-by: Huang Ying Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/efi.c | 57 +++++++++++++++++++++++++----------------------- arch/x86/kernel/efi_64.c | 22 +++++++++---------- 2 files changed, 41 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1411324a625..32dd62b36ff 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -379,11 +379,9 @@ void __init efi_init(void) #endif } -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) static void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; - unsigned long end; void *p; if (!(__supported_pte_mask & _PAGE_NX)) @@ -392,18 +390,13 @@ static void __init runtime_code_page_mkexec(void) /* Make EFI runtime service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); - if (md->type == EFI_RUNTIME_SERVICES_CODE && - (end >> PAGE_SHIFT) <= max_pfn_mapped) { - set_memory_x(md->virt_addr, md->num_pages); - set_memory_uc(md->virt_addr, md->num_pages); - } + + if (md->type != EFI_RUNTIME_SERVICES_CODE) + continue; + + set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT); } - __flush_tlb_all(); } -#else -static inline void __init runtime_code_page_mkexec(void) { } -#endif /* * This function will switch the EFI runtime services to virtual mode. @@ -417,30 +410,40 @@ void __init efi_enter_virtual_mode(void) { efi_memory_desc_t *md; efi_status_t status; - unsigned long end; - void *p; + unsigned long size; + u64 end, systab; + void *p, *va; efi.systab = NULL; for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; if (!(md->attribute & EFI_MEMORY_RUNTIME)) continue; - end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); - if ((md->attribute & EFI_MEMORY_WB) && - ((end >> PAGE_SHIFT) <= max_pfn_mapped)) - md->virt_addr = (unsigned long)__va(md->phys_addr); + + size = md->num_pages << EFI_PAGE_SHIFT; + end = md->phys_addr + size; + + if ((end >> PAGE_SHIFT) <= max_pfn_mapped) + va = __va(md->phys_addr); else - md->virt_addr = (unsigned long) - efi_ioremap(md->phys_addr, - md->num_pages << EFI_PAGE_SHIFT); - if (!md->virt_addr) + va = efi_ioremap(md->phys_addr, size); + + if (md->attribute & EFI_MEMORY_WB) + set_memory_uc(md->virt_addr, size); + + md->virt_addr = (u64) (unsigned long) va; + + if (!va) { printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n", (unsigned long long)md->phys_addr); - if ((md->phys_addr <= (unsigned long)efi_phys.systab) && - ((unsigned long)efi_phys.systab < end)) - efi.systab = (efi_system_table_t *)(unsigned long) - (md->virt_addr - md->phys_addr + - (unsigned long)efi_phys.systab); + continue; + } + + systab = (u64) (unsigned long) efi_phys.systab; + if (md->phys_addr <= systab && systab < end) { + systab += md->virt_addr - md->phys_addr; + efi.systab = (efi_system_table_t *) (unsigned long) systab; + } } BUG_ON(!efi.systab); diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 674f2379480..09d5c233093 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -54,10 +54,10 @@ static void __init early_mapping_set_exec(unsigned long start, else set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \ __supported_pte_mask)); - if (level == 4) - start = (start + PMD_SIZE) & PMD_MASK; - else + if (level == PG_LEVEL_4K) start = (start + PAGE_SIZE) & PAGE_MASK; + else + start = (start + PMD_SIZE) & PMD_MASK; } } @@ -109,23 +109,23 @@ void __init efi_reserve_bootmem(void) memmap.nr_map * memmap.desc_size); } -void __iomem * __init efi_ioremap(unsigned long offset, - unsigned long size) +void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) { static unsigned pages_mapped; - unsigned long last_addr; unsigned i, pages; - last_addr = offset + size - 1; - offset &= PAGE_MASK; - pages = (PAGE_ALIGN(last_addr) - offset) >> PAGE_SHIFT; + /* phys_addr and size must be page aligned */ + if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK)) + return NULL; + + pages = size >> PAGE_SHIFT; if (pages_mapped + pages > MAX_EFI_IO_PAGES) return NULL; for (i = 0; i < pages; i++) { __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped, - offset, PAGE_KERNEL_EXEC_NOCACHE); - offset += PAGE_SIZE; + phys_addr, PAGE_KERNEL); + phys_addr += PAGE_SIZE; pages_mapped++; } -- cgit v1.2.3-18-g5258 From 331e406588dc90331753e6562e5e3757bb907eb8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:06 +0100 Subject: x86: CPA return early when requested feature is not available Mask out the not supported bits (e.g. NX). If the clr/set masks are empty after the mask return without changing anything. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a629cea5e46..f60b93dc2e5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -405,8 +405,18 @@ static int __change_page_attr_set_clr(unsigned long addr, int numpages, static int change_page_attr_set_clr(unsigned long addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr) { - int ret = __change_page_attr_set_clr(addr, numpages, mask_set, - mask_clr); + int ret; + + /* + * Check, if we are requested to change a not supported + * feature: + */ + mask_set = canon_pgprot(mask_set); + mask_clr = canon_pgprot(mask_clr); + if (!pgprot_val(mask_set) && !pgprot_val(mask_clr)) + return 0; + + ret = __change_page_attr_set_clr(addr, numpages, mask_set, mask_clr); /* * On success we use clflush, when the CPU supports it to -- cgit v1.2.3-18-g5258 From 6bb8383bebc02dae08a17f561401f58005f75c03 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:06 +0100 Subject: x86: cpa, only flush the cache if the caching attributes have changed We only need to flush the caches in cpa() if the the caching attributes have changed. Otherwise only flush the TLBs. This checks the PAT bits too although they are currently not used by the kernel. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f60b93dc2e5..456ad0ab9c7 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -52,21 +52,23 @@ void clflush_cache_range(void *vaddr, unsigned int size) static void __cpa_flush_all(void *arg) { + unsigned long cache = (unsigned long)arg; + /* * Flush all to work around Errata in early athlons regarding * large page flushing. */ __flush_tlb_all(); - if (boot_cpu_data.x86_model >= 4) + if (cache && boot_cpu_data.x86_model >= 4) wbinvd(); } -static void cpa_flush_all(void) +static void cpa_flush_all(unsigned long cache) { BUG_ON(irqs_disabled()); - on_each_cpu(__cpa_flush_all, NULL, 1, 1); + on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1); } static void __cpa_flush_range(void *arg) @@ -79,7 +81,7 @@ static void __cpa_flush_range(void *arg) __flush_tlb_all(); } -static void cpa_flush_range(unsigned long start, int numpages) +static void cpa_flush_range(unsigned long start, int numpages, int cache) { unsigned int i, level; unsigned long addr; @@ -89,6 +91,9 @@ static void cpa_flush_range(unsigned long start, int numpages) on_each_cpu(__cpa_flush_range, NULL, 1, 1); + if (!cache) + return; + /* * We only need to flush on one CPU, * clflush is a MESI-coherent instruction that @@ -402,10 +407,16 @@ static int __change_page_attr_set_clr(unsigned long addr, int numpages, return 0; } +static inline int cache_attr(pgprot_t attr) +{ + return pgprot_val(attr) & + (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); +} + static int change_page_attr_set_clr(unsigned long addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr) { - int ret; + int ret, cache; /* * Check, if we are requested to change a not supported @@ -418,6 +429,12 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, ret = __change_page_attr_set_clr(addr, numpages, mask_set, mask_clr); + /* + * No need to flush, when we did not set any of the caching + * attributes: + */ + cache = cache_attr(mask_set); + /* * On success we use clflush, when the CPU supports it to * avoid the wbindv. If the CPU does not support it and in the @@ -425,9 +442,9 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, * wbindv): */ if (!ret && cpu_has_clflush) - cpa_flush_range(addr, numpages); + cpa_flush_range(addr, numpages, cache); else - cpa_flush_all(); + cpa_flush_all(cache); return ret; } -- cgit v1.2.3-18-g5258 From 72e458dfa63b3db7a46f66b0eb19e9ff4e17fc0e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:07 +0100 Subject: x86: introduce struct cpa_data The number of arguments which need to be transported is increasing and we want to add flush optimizations and large page preserving. Create struct cpa data and pass a pointer instead of increasing the number of arguments further. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 75 +++++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 456ad0ab9c7..d1c08308ecb 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -16,6 +16,13 @@ #include #include +struct cpa_data { + unsigned long vaddr; + int numpages; + pgprot_t mask_set; + pgprot_t mask_clr; +}; + static inline int within(unsigned long addr, unsigned long start, unsigned long end) { @@ -284,8 +291,7 @@ out_unlock: return 0; } -static int -__change_page_attr(unsigned long address, pgprot_t mask_set, pgprot_t mask_clr) +static int __change_page_attr(unsigned long address, struct cpa_data *cpa) { struct page *kpte_page; int level, err = 0; @@ -305,12 +311,15 @@ repeat: pgprot_t new_prot = pte_pgprot(old_pte); if(!pte_val(old_pte)) { - WARN_ON_ONCE(1); + printk(KERN_WARNING "CPA: called for zero pte. " + "vaddr = %lx cpa->vaddr = %lx\n", address, + cpa->vaddr); + WARN_ON(1); return -EINVAL; } - pgprot_val(new_prot) &= ~pgprot_val(mask_clr); - pgprot_val(new_prot) |= pgprot_val(mask_set); + pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); new_prot = static_protections(new_prot, address); @@ -343,12 +352,10 @@ repeat: * Modules and drivers should use the set_memory_* APIs instead. */ - -static int -change_page_attr_addr(unsigned long address, pgprot_t mask_set, - pgprot_t mask_clr) +static int change_page_attr_addr(struct cpa_data *cpa) { int err; + unsigned long address = cpa->vaddr; #ifdef CONFIG_X86_64 unsigned long phys_addr = __pa(address); @@ -362,7 +369,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set, address = (unsigned long) __va(phys_addr); #endif - err = __change_page_attr(address, mask_set, mask_clr); + err = __change_page_attr(address, cpa); if (err) return err; @@ -386,20 +393,19 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set, * everything between 0 and KERNEL_TEXT_SIZE, so do * not propagate lookup failures back to users: */ - __change_page_attr(address, mask_set, mask_clr); + __change_page_attr(address, cpa); } #endif return err; } -static int __change_page_attr_set_clr(unsigned long addr, int numpages, - pgprot_t mask_set, pgprot_t mask_clr) +static int __change_page_attr_set_clr(struct cpa_data *cpa) { unsigned int i; int ret; - for (i = 0; i < numpages ; i++, addr += PAGE_SIZE) { - ret = change_page_attr_addr(addr, mask_set, mask_clr); + for (i = 0; i < cpa->numpages ; i++, cpa->vaddr += PAGE_SIZE) { + ret = change_page_attr_addr(cpa); if (ret) return ret; } @@ -416,6 +422,7 @@ static inline int cache_attr(pgprot_t attr) static int change_page_attr_set_clr(unsigned long addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr) { + struct cpa_data cpa; int ret, cache; /* @@ -427,7 +434,12 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, if (!pgprot_val(mask_set) && !pgprot_val(mask_clr)) return 0; - ret = __change_page_attr_set_clr(addr, numpages, mask_set, mask_clr); + cpa.vaddr = addr; + cpa.numpages = numpages; + cpa.mask_set = mask_set; + cpa.mask_clr = mask_clr; + + ret = __change_page_attr_set_clr(&cpa); /* * No need to flush, when we did not set any of the caching @@ -548,37 +560,26 @@ int set_pages_rw(struct page *page, int numpages) return set_memory_rw(addr, numpages); } - -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG) -static inline int __change_page_attr_set(unsigned long addr, int numpages, - pgprot_t mask) -{ - return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0)); -} - -static inline int __change_page_attr_clear(unsigned long addr, int numpages, - pgprot_t mask) -{ - return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask); -} -#endif - #ifdef CONFIG_DEBUG_PAGEALLOC static int __set_pages_p(struct page *page, int numpages) { - unsigned long addr = (unsigned long)page_address(page); + struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), + .numpages = numpages, + .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), + .mask_clr = __pgprot(0)}; - return __change_page_attr_set(addr, numpages, - __pgprot(_PAGE_PRESENT | _PAGE_RW)); + return __change_page_attr_set_clr(&cpa); } static int __set_pages_np(struct page *page, int numpages) { - unsigned long addr = (unsigned long)page_address(page); + struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), + .numpages = numpages, + .mask_set = __pgprot(0), + .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)}; - return __change_page_attr_clear(addr, numpages, - __pgprot(_PAGE_PRESENT)); + return __change_page_attr_set_clr(&cpa); } void kernel_map_pages(struct page *page, int numpages, int enable) -- cgit v1.2.3-18-g5258 From f4ae5da0e8e92caa168e7c2a7c4a6c4064b082c2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:07 +0100 Subject: x86: cpa, check if we changed anything and tlb flushing is necessary Flush tlbs only when there was a real change. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index d1c08308ecb..79a9f1b42dd 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -21,6 +21,7 @@ struct cpa_data { int numpages; pgprot_t mask_set; pgprot_t mask_clr; + int flushtlb; }; static inline int @@ -329,11 +330,19 @@ repeat: * not the memory it points to */ new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); - set_pte_atomic(kpte, new_pte); + + /* + * Do we really change anything ? + */ + if (pte_val(old_pte) != pte_val(new_pte)) { + set_pte_atomic(kpte, new_pte); + cpa->flushtlb = 1; + } } else { err = split_large_page(kpte, address); if (!err) goto repeat; + cpa->flushtlb = 1; } return err; } @@ -438,9 +447,16 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, cpa.numpages = numpages; cpa.mask_set = mask_set; cpa.mask_clr = mask_clr; + cpa.flushtlb = 0; ret = __change_page_attr_set_clr(&cpa); + /* + * Check whether we really changed something: + */ + if (!cpa.flushtlb) + return ret; + /* * No need to flush, when we did not set any of the caching * attributes: -- cgit v1.2.3-18-g5258 From 65e074dffa198978ab0c9976a19b954fbe1183e2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:07 +0100 Subject: x86: cpa, preserve large pages if possible When CPA is called on a range which fits into a large page mapping, avoid to split the page when: 1) There is no change of attributes 2) The range to change is a complete large mapping Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 142 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 130 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 79a9f1b42dd..40b7ac58e67 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -18,12 +18,17 @@ struct cpa_data { unsigned long vaddr; - int numpages; pgprot_t mask_set; pgprot_t mask_clr; + int numpages; int flushtlb; }; +enum { + CPA_NO_SPLIT = 0, + CPA_SPLIT, +}; + static inline int within(unsigned long addr, unsigned long start, unsigned long end) { @@ -230,6 +235,86 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) #endif } +static int try_preserve_large_page(pte_t *kpte, unsigned long address, + struct cpa_data *cpa) +{ + unsigned long nextpage_addr, numpages, pmask, psize, flags; + pte_t new_pte, old_pte, *tmp; + pgprot_t old_prot, new_prot; + int level, res = CPA_SPLIT; + + spin_lock_irqsave(&pgd_lock, flags); + /* + * Check for races, another CPU might have split this page + * up already: + */ + tmp = lookup_address(address, &level); + if (tmp != kpte) + goto out_unlock; + + switch (level) { + case PG_LEVEL_2M: + psize = LARGE_PAGE_SIZE; + pmask = LARGE_PAGE_MASK; + break; + case PG_LEVEL_1G: + default: + res = -EINVAL; + goto out_unlock; + } + + /* + * Calculate the number of pages, which fit into this large + * page starting at address: + */ + nextpage_addr = (address + psize) & pmask; + numpages = (nextpage_addr - address) >> PAGE_SHIFT; + if (numpages < cpa->numpages) + cpa->numpages = numpages; + + /* + * We are safe now. Check whether the new pgprot is the same: + */ + old_pte = *kpte; + old_prot = new_prot = pte_pgprot(old_pte); + + pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); + new_prot = static_protections(new_prot, address); + + /* + * If there are no changes, return. maxpages has been updated + * above: + */ + if (pgprot_val(new_prot) == pgprot_val(old_prot)) { + res = CPA_NO_SPLIT; + goto out_unlock; + } + + /* + * We need to change the attributes. Check, whether we can + * change the large page in one go. We request a split, when + * the address is not aligned and the number of pages is + * smaller than the number of pages in the large page. Note + * that we limited the number of possible pages already to + * the number of pages in the large page. + */ + if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { + /* + * The address is aligned and the number of pages + * covers the full page. + */ + new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); + __set_pmd_pte(kpte, address, new_pte); + cpa->flushtlb = 1; + res = CPA_NO_SPLIT; + } + +out_unlock: + spin_unlock_irqrestore(&pgd_lock, flags); + return res; +} + static int split_large_page(pte_t *kpte, unsigned long address) { pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte)); @@ -295,7 +380,7 @@ out_unlock: static int __change_page_attr(unsigned long address, struct cpa_data *cpa) { struct page *kpte_page; - int level, err = 0; + int level, res; pte_t *kpte; repeat: @@ -338,13 +423,34 @@ repeat: set_pte_atomic(kpte, new_pte); cpa->flushtlb = 1; } - } else { - err = split_large_page(kpte, address); - if (!err) - goto repeat; - cpa->flushtlb = 1; + cpa->numpages = 1; + return 0; } - return err; + + /* + * Check, whether we can keep the large page intact + * and just change the pte: + */ + res = try_preserve_large_page(kpte, address, cpa); + if (res < 0) + return res; + + /* + * When the range fits into the existing large page, + * return. cp->numpages and cpa->tlbflush have been updated in + * try_large_page: + */ + if (res == CPA_NO_SPLIT) + return 0; + + /* + * We have to split the large page: + */ + res = split_large_page(kpte, address); + if (res) + return res; + cpa->flushtlb = 1; + goto repeat; } /** @@ -410,15 +516,27 @@ static int change_page_attr_addr(struct cpa_data *cpa) static int __change_page_attr_set_clr(struct cpa_data *cpa) { - unsigned int i; - int ret; + int ret, numpages = cpa->numpages; - for (i = 0; i < cpa->numpages ; i++, cpa->vaddr += PAGE_SIZE) { + while (numpages) { + /* + * Store the remaining nr of pages for the large page + * preservation check. + */ + cpa->numpages = numpages; ret = change_page_attr_addr(cpa); if (ret) return ret; - } + /* + * Adjust the number of pages with the result of the + * CPA operation. Either a large page has been + * preserved or a single page update happened. + */ + BUG_ON(cpa->numpages > numpages); + numpages -= cpa->numpages; + cpa->vaddr += cpa->numpages * PAGE_SIZE; + } return 0; } -- cgit v1.2.3-18-g5258 From 34508f66b69ff1708192654f631eb8f1d4c52005 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:48:07 +0100 Subject: x86: AMD Athlon X2 hard hang fix An Athlon 64 X2 test system showed hard hangs shortly after marking the kernel text read-only, if we tried to preserve largepages and changed the PSE entry from RW to RO. The pagetable code itself is correct, it's the CPU that locked up hard (and not even the NMI watchdog could punch through that hard hang). So be conservative and always do splitups - like we did in the past. Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 40b7ac58e67..3810f7a83b1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -243,6 +243,17 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, pgprot_t old_prot, new_prot; int level, res = CPA_SPLIT; + /* + * An Athlon 64 X2 showed hard hangs if we tried to preserve + * largepages and changed the PSE entry from RW to RO. + * + * As AMD CPUs have a long series of erratas in this area, + * (and none of the known ones seem to explain this hang), + * disable this code until the hang can be debugged: + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return res; + spin_lock_irqsave(&pgd_lock, flags); /* * Check for races, another CPU might have split this page -- cgit v1.2.3-18-g5258 From 9a14aefc1d28c6037122965ee8c10d92a970ade0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:07 +0100 Subject: x86: cpa, fix lookup_address lookup_address() returns a wrong level and a wrong pointer to a non existing pte, when pmd or pud entries are marked !present. This happens for example due to boot time mapping of GART into the low memory space. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3810f7a83b1..7d21cd658ed 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -188,6 +188,14 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) return prot; } +/* + * Lookup the page table entry for a virtual address. Return a pointer + * to the entry and the level of the mapping. + * + * Note: We return pud and pmd either when the entry is marked large + * or when the present bit is not set. Otherwise we would return a + * pointer to a nonexisting mapping. + */ pte_t *lookup_address(unsigned long address, int *level) { pgd_t *pgd = pgd_offset_k(address); @@ -206,7 +214,7 @@ pte_t *lookup_address(unsigned long address, int *level) return NULL; *level = PG_LEVEL_2M; - if (pmd_large(*pmd)) + if (pmd_large(*pmd) || !pmd_present(*pmd)) return (pte_t *)pmd; *level = PG_LEVEL_4K; -- cgit v1.2.3-18-g5258 From 31422c51e0dc72532d82e80895932d430c3ed307 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:08 +0100 Subject: x86: rename LARGE_PAGE_SIZE to PMD_PAGE_SIZE Fix up all users. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/boot/compressed/head_64.S | 8 ++++---- arch/x86/kernel/head_64.S | 4 ++-- arch/x86/kernel/pci-gart_64.c | 2 +- arch/x86/mm/init_64.c | 6 +++--- arch/x86/mm/pageattr.c | 6 +++--- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 1ccb38a7f0d..e8657b98c90 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -80,8 +80,8 @@ startup_32: #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - addl $(LARGE_PAGE_SIZE -1), %ebx - andl $LARGE_PAGE_MASK, %ebx + addl $(PMD_PAGE_SIZE -1), %ebx + andl $PMD_PAGE_MASK, %ebx #else movl $CONFIG_PHYSICAL_START, %ebx #endif @@ -220,8 +220,8 @@ ENTRY(startup_64) /* Start with the delta to where the kernel will run at. */ #ifdef CONFIG_RELOCATABLE leaq startup_32(%rip) /* - $startup_32 */, %rbp - addq $(LARGE_PAGE_SIZE - 1), %rbp - andq $LARGE_PAGE_MASK, %rbp + addq $(PMD_PAGE_SIZE - 1), %rbp + andq $PMD_PAGE_MASK, %rbp movq %rbp, %rbx #else movq $CONFIG_PHYSICAL_START, %rbp diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 1d5a7a36120..4f283ad215e 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -63,7 +63,7 @@ startup_64: /* Is the address not 2M aligned? */ movq %rbp, %rax - andl $~LARGE_PAGE_MASK, %eax + andl $~PMD_PAGE_MASK, %eax testl %eax, %eax jnz bad_address @@ -88,7 +88,7 @@ startup_64: /* Add an Identity mapping if I am above 1G */ leaq _text(%rip), %rdi - andq $LARGE_PAGE_MASK, %rdi + andq $PMD_PAGE_MASK, %rdi movq %rdi, %rax shrq $PUD_SHIFT, %rax diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 4d5cc718198..ae1d3d8b384 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -501,7 +501,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) } a = aper + iommu_size; - iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a; + iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; if (iommu_size < 64*1024*1024) { printk(KERN_WARNING diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index eabcaed76c2..b7a7992c28b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -444,10 +444,10 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) { unsigned long end = address + size; - BUG_ON(address & ~LARGE_PAGE_MASK); - BUG_ON(size & ~LARGE_PAGE_MASK); + BUG_ON(address & ~PMD_PAGE_MASK); + BUG_ON(size & ~PMD_PAGE_MASK); - for (; address < end; address += LARGE_PAGE_SIZE) { + for (; address < end; address += PMD_PAGE_SIZE) { pgd_t *pgd = pgd_offset_k(address); pud_t *pud; pmd_t *pmd; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7d21cd658ed..74446ea23ff 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -273,8 +273,8 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, switch (level) { case PG_LEVEL_2M: - psize = LARGE_PAGE_SIZE; - pmask = LARGE_PAGE_MASK; + psize = PMD_PAGE_SIZE; + pmask = PMD_PAGE_MASK; break; case PG_LEVEL_1G: default: @@ -363,7 +363,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) } address = __pa(address); - addr = address & LARGE_PAGE_MASK; + addr = address & PMD_PAGE_MASK; pbase = (pte_t *)page_address(base); #ifdef CONFIG_X86_32 paravirt_alloc_pt(&init_mm, page_to_pfn(base)); -- cgit v1.2.3-18-g5258 From 07cf89c05f2bbafa002401ac4e09ac31678513e4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:08 +0100 Subject: x86: CPA fix pagetable split Move the readout of the large entry into the spinlock section to prevent an unlikely but possible race. Mark the pmd/pud entry present after the split. We preserved the non present bit in the new split mapping. Remove the stale gfp_flags double initialization. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 74446ea23ff..72880993af8 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -336,7 +336,7 @@ out_unlock: static int split_large_page(pte_t *kpte, unsigned long address) { - pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte)); + pgprot_t ref_prot; gfp_t gfp_flags = GFP_KERNEL; unsigned long flags, addr, pfn; pte_t *pbase, *tmp; @@ -344,7 +344,6 @@ static int split_large_page(pte_t *kpte, unsigned long address) unsigned int i, level; #ifdef CONFIG_DEBUG_PAGEALLOC - gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN; gfp_flags = GFP_ATOMIC | __GFP_NOWARN; #endif base = alloc_pages(gfp_flags, 0); @@ -368,6 +367,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) #ifdef CONFIG_X86_32 paravirt_alloc_pt(&init_mm, page_to_pfn(base)); #endif + ref_prot = pte_pgprot(pte_clrhuge(*kpte)); /* * Get the target pfn from the original entry: @@ -377,13 +377,17 @@ static int split_large_page(pte_t *kpte, unsigned long address) set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); /* - * Install the new, split up pagetable. Important detail here: + * Install the new, split up pagetable. Important details here: * * On Intel the NX bit of all levels must be cleared to make a * page executable. See section 4.13.2 of Intel 64 and IA-32 * Architectures Software Developer's Manual). + * + * Mark the entry present. The current mapping might be + * set to not present, which we preserved above. */ ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); + pgprot_val(ref_prot) |= _PAGE_PRESENT; __set_pmd_pte(kpte, address, mk_pte(base, ref_prot)); base = NULL; -- cgit v1.2.3-18-g5258 From 64f351d197d9ae8ad9624998afa8ee18e696ca44 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:08 +0100 Subject: x86: cpa selftest, skip non present entries pud and pmd entries in the RAM area might be marked as non present. Do not try to modify them in the selftest. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr-test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 7573e786d2f..398f3a578dd 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -137,7 +137,8 @@ static __init int exercise_pageattr(void) for (k = 0; k < len[i]; k++) { pte = lookup_address(addr[i] + k*PAGE_SIZE, &level); - if (!pte || pgprot_val(pte_pgprot(*pte)) == 0) { + if (!pte || pgprot_val(pte_pgprot(*pte)) == 0 || + !(pte_val(*pte) & _PAGE_PRESENT)) { addr[i] = 0; break; } -- cgit v1.2.3-18-g5258 From 28d6ee41a6ff8139e442af2dc55928bfbb475586 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:08 +0100 Subject: x86: switch pci-gart over to using set_memory_np() instead of clear_kernel_mapping() pci-gart needs to unmap the IOMMU aperture to prevent cache corruptions. Switch this over to using set_memory_np() instead of clear_kernel_mapping(). Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-gart_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index ae1d3d8b384..845cbecd68e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -731,7 +731,8 @@ void __init gart_iommu_init(void) * the backing memory. The GART address is only used by PCI * devices. */ - clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size); + set_memory_np((unsigned long)__va(iommu_bus_base), + iommu_size >> PAGE_SHIFT); /* * Try to workaround a bug (thanks to BenH) -- cgit v1.2.3-18-g5258 From bde1965ce8c63e17cc284e1af616c85aba483f11 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:08 +0100 Subject: x86: remove now unused clear_kernel_mapping Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/init_64.c | 43 ------------------------------------------- 1 file changed, 43 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b7a7992c28b..5855449ce7a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -434,49 +434,6 @@ void __init paging_init(void) } #endif -/* - * Unmap a kernel mapping if it exists. This is useful to avoid - * prefetches from the CPU leading to inconsistent cache lines. - * address and size must be aligned to 2MB boundaries. - * Does nothing when the mapping doesn't exist. - */ -void __init clear_kernel_mapping(unsigned long address, unsigned long size) -{ - unsigned long end = address + size; - - BUG_ON(address & ~PMD_PAGE_MASK); - BUG_ON(size & ~PMD_PAGE_MASK); - - for (; address < end; address += PMD_PAGE_SIZE) { - pgd_t *pgd = pgd_offset_k(address); - pud_t *pud; - pmd_t *pmd; - - if (pgd_none(*pgd)) - continue; - - pud = pud_offset(pgd, address); - if (pud_none(*pud)) - continue; - - pmd = pmd_offset(pud, address); - if (!pmd || pmd_none(*pmd)) - continue; - - if (!(pmd_val(*pmd) & _PAGE_PSE)) { - /* - * Could handle this, but it should not happen - * currently: - */ - printk(KERN_ERR "clear_kernel_mapping: " - "mapping has been split. will leak memory\n"); - pmd_ERROR(*pmd); - } - set_pmd(pmd, __pmd(0)); - } - __flush_tlb_all(); -} - /* * Memory hotplug specific functions */ -- cgit v1.2.3-18-g5258 From 6ce9fc17d913ae51f8434d2826f306347820b07d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:48:08 +0100 Subject: x86: remove cpa warning this race is legit and can happen on SMP systems. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 72880993af8..0b029c97174 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -356,10 +356,8 @@ static int split_large_page(pte_t *kpte, unsigned long address) * up for us already: */ tmp = lookup_address(address, &level); - if (tmp != kpte) { - WARN_ON_ONCE(1); + if (tmp != kpte) goto out_unlock; - } address = __pa(address); addr = address & PMD_PAGE_MASK; -- cgit v1.2.3-18-g5258 From 7bfb72e847c201fe32271fb13f75d060671d8890 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:08 +0100 Subject: x86: fix page-present check in cpa_flush_range pte_present() might return true for PROT_NONE mappings. Explicitely check the present bit. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 0b029c97174..9be684e61dc 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -119,7 +119,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) /* * Only flush present addresses: */ - if (pte && pte_present(*pte)) + if (pte && (pte_val(*pte) & _PAGE_PRESENT)) clflush_cache_range((void *) addr, PAGE_SIZE); } } -- cgit v1.2.3-18-g5258 From d4f71f7969ee2c16e2969185280c13d4f51a9172 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:09 +0100 Subject: x86: switch direct mapping setup over to set_pte Use set_pte() for setting up the 2MB pages in the direct mapping. Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/init_64.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5855449ce7a..3a98d6f724a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -273,7 +273,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) int i = pmd_index(address); for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) { - unsigned long entry; pmd_t *pmd = pmd_page + pmd_index(address); if (address >= end) { @@ -287,9 +286,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) if (pmd_val(*pmd)) continue; - entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address; - entry &= __supported_pte_mask; - set_pmd(pmd, __pmd(entry)); + set_pte((pte_t *)pmd, + pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); } } -- cgit v1.2.3-18-g5258 From c2f71ee2140b2a506735ff9fcb7e3b1dfaab8f2b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:09 +0100 Subject: x86: add gbpages support to lookup_address [ tglx@linutronix.de: fix bootup crash on sparse mappings. ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 9be684e61dc..143fbafc948 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -209,6 +209,11 @@ pte_t *lookup_address(unsigned long address, int *level) pud = pud_offset(pgd, address); if (pud_none(*pud)) return NULL; + + *level = PG_LEVEL_1G; + if (pud_large(*pud) || !pud_present(*pud)) + return (pte_t *)pud; + pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) return NULL; -- cgit v1.2.3-18-g5258 From b5360222273cb3e57a119c18eef42f59da4da87b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:09 +0100 Subject: x86: support gbpages in pagetable dump Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/fault.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 3fff490254a..ad8b9733d6b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -240,7 +240,8 @@ void dump_pagetable(unsigned long address) pud = pud_offset(pgd, address); if (bad_address(pud)) goto bad; printk("PUD %lx ", pud_val(*pud)); - if (!pud_present(*pud)) goto ret; + if (!pud_present(*pud) || pud_large(*pud)) + goto ret; pmd = pmd_offset(pud, address); if (bad_address(pmd)) goto bad; -- cgit v1.2.3-18-g5258 From f07333fd149eb6826da26a89c3aff90324f270b0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 4 Feb 2008 16:48:09 +0100 Subject: x86: implement gbpages support in change_page_attr() Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 143fbafc948..42ca3d8effa 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -281,7 +281,12 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, psize = PMD_PAGE_SIZE; pmask = PMD_PAGE_MASK; break; +#ifdef CONFIG_X86_64 case PG_LEVEL_1G: + psize = PMD_PAGE_SIZE; + pmask = PMD_PAGE_MASK; + break; +#endif default: res = -EINVAL; goto out_unlock; @@ -343,7 +348,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) { pgprot_t ref_prot; gfp_t gfp_flags = GFP_KERNEL; - unsigned long flags, addr, pfn; + unsigned long flags, addr, pfn, pfninc = 1; pte_t *pbase, *tmp; struct page *base; unsigned int i, level; @@ -372,11 +377,19 @@ static int split_large_page(pte_t *kpte, unsigned long address) #endif ref_prot = pte_pgprot(pte_clrhuge(*kpte)); +#ifdef CONFIG_X86_64 + if (level == PG_LEVEL_1G) { + pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; + pgprot_val(ref_prot) |= _PAGE_PSE; + addr &= PUD_PAGE_MASK; + } +#endif + /* * Get the target pfn from the original entry: */ pfn = pte_pfn(*kpte); - for (i = 0; i < PTRS_PER_PTE; i++, pfn++) + for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); /* -- cgit v1.2.3-18-g5258 From 9df84993cb3d71669894654ab257f01f6e4ed48e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:48:09 +0100 Subject: x86: cpa, cleanups Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 42ca3d8effa..029fb07b3f0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -16,6 +16,9 @@ #include #include +/* + * The current flushing context - we pass it instead of 5 arguments: + */ struct cpa_data { unsigned long vaddr; pgprot_t mask_set; @@ -206,6 +209,7 @@ pte_t *lookup_address(unsigned long address, int *level) if (pgd_none(*pgd)) return NULL; + pud = pud_offset(pgd, address); if (pud_none(*pud)) return NULL; @@ -223,9 +227,13 @@ pte_t *lookup_address(unsigned long address, int *level) return (pte_t *)pmd; *level = PG_LEVEL_4K; + return pte_offset_kernel(pmd, address); } +/* + * Set the new pmd in all the pgds we know about: + */ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) { /* change init_mm */ @@ -248,8 +256,9 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) #endif } -static int try_preserve_large_page(pte_t *kpte, unsigned long address, - struct cpa_data *cpa) +static int +try_preserve_large_page(pte_t *kpte, unsigned long address, + struct cpa_data *cpa) { unsigned long nextpage_addr, numpages, pmask, psize, flags; pte_t new_pte, old_pte, *tmp; @@ -341,17 +350,18 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, out_unlock: spin_unlock_irqrestore(&pgd_lock, flags); + return res; } static int split_large_page(pte_t *kpte, unsigned long address) { - pgprot_t ref_prot; - gfp_t gfp_flags = GFP_KERNEL; unsigned long flags, addr, pfn, pfninc = 1; + gfp_t gfp_flags = GFP_KERNEL; + unsigned int i, level; pte_t *pbase, *tmp; + pgprot_t ref_prot; struct page *base; - unsigned int i, level; #ifdef CONFIG_DEBUG_PAGEALLOC gfp_flags = GFP_ATOMIC | __GFP_NOWARN; @@ -505,7 +515,6 @@ repeat: * * Modules and drivers should use the set_memory_* APIs instead. */ - static int change_page_attr_addr(struct cpa_data *cpa) { int err; -- cgit v1.2.3-18-g5258 From beaff6333b4a21e8f3b7f9a7c3c8f8716b2334bc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:48:09 +0100 Subject: x86: cpa, eliminate CPA_ enum Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 029fb07b3f0..fb2eedba76a 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -27,11 +27,6 @@ struct cpa_data { int flushtlb; }; -enum { - CPA_NO_SPLIT = 0, - CPA_SPLIT, -}; - static inline int within(unsigned long addr, unsigned long start, unsigned long end) { @@ -263,7 +258,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, unsigned long nextpage_addr, numpages, pmask, psize, flags; pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot; - int level, res = CPA_SPLIT; + int level, do_split = 1; /* * An Athlon 64 X2 showed hard hangs if we tried to preserve @@ -274,7 +269,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * disable this code until the hang can be debugged: */ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return res; + return 1; spin_lock_irqsave(&pgd_lock, flags); /* @@ -297,7 +292,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, break; #endif default: - res = -EINVAL; + do_split = -EINVAL; goto out_unlock; } @@ -325,7 +320,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * above: */ if (pgprot_val(new_prot) == pgprot_val(old_prot)) { - res = CPA_NO_SPLIT; + do_split = 0; goto out_unlock; } @@ -345,13 +340,13 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); __set_pmd_pte(kpte, address, new_pte); cpa->flushtlb = 1; - res = CPA_NO_SPLIT; + do_split = 0; } out_unlock: spin_unlock_irqrestore(&pgd_lock, flags); - return res; + return do_split; } static int split_large_page(pte_t *kpte, unsigned long address) @@ -429,7 +424,7 @@ out_unlock: static int __change_page_attr(unsigned long address, struct cpa_data *cpa) { struct page *kpte_page; - int level, res; + int level, do_split; pte_t *kpte; repeat: @@ -480,25 +475,26 @@ repeat: * Check, whether we can keep the large page intact * and just change the pte: */ - res = try_preserve_large_page(kpte, address, cpa); - if (res < 0) - return res; + do_split = try_preserve_large_page(kpte, address, cpa); + if (do_split < 0) + return do_split; /* * When the range fits into the existing large page, * return. cp->numpages and cpa->tlbflush have been updated in * try_large_page: */ - if (res == CPA_NO_SPLIT) + if (do_split == 0) return 0; /* * We have to split the large page: */ - res = split_large_page(kpte, address); - if (res) - return res; + do_split = split_large_page(kpte, address); + if (do_split) + return do_split; cpa->flushtlb = 1; + goto repeat; } -- cgit v1.2.3-18-g5258 From 87f7f8fe328388a1430a4c27cbe684f3925fd8a5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Feb 2008 16:48:10 +0100 Subject: x86: cpa, clean up code flow Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index fb2eedba76a..4f033505127 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -423,8 +423,8 @@ out_unlock: static int __change_page_attr(unsigned long address, struct cpa_data *cpa) { + int level, do_split, err; struct page *kpte_page; - int level, do_split; pte_t *kpte; repeat: @@ -476,26 +476,24 @@ repeat: * and just change the pte: */ do_split = try_preserve_large_page(kpte, address, cpa); - if (do_split < 0) - return do_split; - /* * When the range fits into the existing large page, * return. cp->numpages and cpa->tlbflush have been updated in * try_large_page: */ - if (do_split == 0) - return 0; + if (do_split <= 0) + return do_split; /* * We have to split the large page: */ - do_split = split_large_page(kpte, address); - if (do_split) - return do_split; - cpa->flushtlb = 1; + err = split_large_page(kpte, address); + if (!err) { + cpa->flushtlb = 1; + goto repeat; + } - goto repeat; + return err; } /** -- cgit v1.2.3-18-g5258 From 7b610eec7a06ede64f71459e7f412dfd96f4cc5e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Feb 2008 16:48:10 +0100 Subject: x86: cpa, micro-optimization Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4f033505127..bb55a78dcd6 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -237,6 +237,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) if (!SHARED_KERNEL_PMD) { struct page *page; + address = __pa(address); list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; pud_t *pud; @@ -351,7 +352,7 @@ out_unlock: static int split_large_page(pte_t *kpte, unsigned long address) { - unsigned long flags, addr, pfn, pfninc = 1; + unsigned long flags, pfn, pfninc = 1; gfp_t gfp_flags = GFP_KERNEL; unsigned int i, level; pte_t *pbase, *tmp; @@ -374,8 +375,6 @@ static int split_large_page(pte_t *kpte, unsigned long address) if (tmp != kpte) goto out_unlock; - address = __pa(address); - addr = address & PMD_PAGE_MASK; pbase = (pte_t *)page_address(base); #ifdef CONFIG_X86_32 paravirt_alloc_pt(&init_mm, page_to_pfn(base)); @@ -386,7 +385,6 @@ static int split_large_page(pte_t *kpte, unsigned long address) if (level == PG_LEVEL_1G) { pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; pgprot_val(ref_prot) |= _PAGE_PSE; - addr &= PUD_PAGE_MASK; } #endif -- cgit v1.2.3-18-g5258