Merge git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86

* git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86: (78 commits) x86: fix RTC lockdep warning: potential hardirq recursion x86: cpa, micro-optimization x86: cpa, clean up code flow x86: cpa, eliminate CPA_ enum x86: cpa, cleanups x86: implement gbpages support in change_page_attr() x86: support gbpages in pagetable dump x86: add gbpages support to lookup_address x86: add pgtable accessor functions for gbpages x86: add PUD_PAGE_SIZE x86: add feature macros for the gbpages cpuid bit x86: switch direct mapping setup over to set_pte x86: fix page-present check in cpa_flush_range x86: remove cpa warning x86: remove now unused clear_kernel_mapping x86: switch pci-gart over to using set_memory_np() instead of clear_kernel_mapping() x86: cpa selftest, skip non present entries x86: CPA fix pagetable split x86: rename LARGE_PAGE_SIZE to PMD_PAGE_SIZE x86: cpa, fix lookup_address ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2008-02-04 09:16:03 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-02-04 09:16:03 -0800
commit: d2fc0bacd5c438cb459fdf531eff00ab18422a00 (patch)
tree: d0ea52e4d2ad2fac12e19eaf6891c6af98353cfc /arch
parent: 93890b71a34f9490673a6edd56b61c2124215e46 (diff)
parent: 795d45b22c079946332bf3825afefe5a981a97b6 (diff)
42 files changed, 707 insertions, 679 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 99af1272ab1..59eef1c7fda 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -306,6 +306,7 @@ config X86_RDC321X
 	select M486
 	select X86_REBOOTFIXUPS
 	select GENERIC_GPIO
+	select LEDS_CLASS
 	select LEDS_GPIO
 	help
 	  This option is needed for RDC R-321x system-on-chip, also known
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8978e98bed5..364865b1b08 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -92,7 +92,6 @@ KBUILD_AFLAGS += $(cfi) $(cfi-sigframe)
 KBUILD_CFLAGS += $(cfi) $(cfi-sigframe)
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
-OBJCOPYFLAGS := -O binary -R .note -R .comment -S
 
 # Speed up the build
 KBUILD_CFLAGS += -pipe
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 349b81a39c4..f88458e83ef 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -26,7 +26,7 @@ SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
 #RAMDISK := -DRAMDISK=512
 
 targets		:= vmlinux.bin setup.bin setup.elf zImage bzImage
-subdir- 	:= compressed
+subdir-		:= compressed
 
 setup-y		+= a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
 setup-y		+= header.o main.o mca.o memory.o pm.o pmjump.o
@@ -43,9 +43,17 @@ setup-y		+= video-vesa.o
 setup-y		+= video-bios.o
 
 targets		+= $(setup-y)
-hostprogs-y	:= tools/build
+hostprogs-y	:= mkcpustr tools/build
 
-HOSTCFLAGS_build.o := $(LINUXINCLUDE)
+HOST_EXTRACFLAGS += $(LINUXINCLUDE)
+
+$(obj)/cpu.o: $(obj)/cpustr.h
+
+quiet_cmd_cpustr = CPUSTR  $@
+      cmd_cpustr = $(obj)/mkcpustr > $@
+targets		+= cpustr.h
+$(obj)/cpustr.h: $(obj)/mkcpustr FORCE
+	$(call if_changed,cpustr)
 
 # ---------------------------------------------------------------------------
 
@@ -80,6 +88,7 @@ $(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \
 	$(call if_changed,image)
 	@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
 
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
 $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
 	$(call if_changed,objcopy)
 
@@ -90,7 +99,6 @@ $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
 	$(call if_changed,ld)
 
 OBJCOPYFLAGS_setup.bin	:= -O binary
-
 $(obj)/setup.bin: $(obj)/setup.elf FORCE
 	$(call if_changed,objcopy)
 
@@ -98,7 +106,7 @@ $(obj)/compressed/vmlinux: FORCE
 	$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
 
 # Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
-FDARGS = 
+FDARGS =
 # Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel
 FDINITRD =
 
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fe24ceabd90..d2b9f3bb87c 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -22,6 +22,7 @@ $(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $
 	$(call if_changed,ld)
 	@:
 
+OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
 
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 1ccb38a7f0d..e8657b98c90 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -80,8 +80,8 @@ startup_32:
 
 #ifdef CONFIG_RELOCATABLE
 	movl	%ebp, %ebx
-	addl	$(LARGE_PAGE_SIZE -1), %ebx
-	andl	$LARGE_PAGE_MASK, %ebx
+	addl	$(PMD_PAGE_SIZE -1), %ebx
+	andl	$PMD_PAGE_MASK, %ebx
 #else
 	movl	$CONFIG_PHYSICAL_START, %ebx
 #endif
@@ -220,8 +220,8 @@ ENTRY(startup_64)
 	/* Start with the delta to where the kernel will run at. */
 #ifdef CONFIG_RELOCATABLE
 	leaq	startup_32(%rip) /* - $startup_32 */, %rbp
-	addq	$(LARGE_PAGE_SIZE - 1), %rbp
-	andq	$LARGE_PAGE_MASK, %rbp
+	addq	$(PMD_PAGE_SIZE - 1), %rbp
+	andq	$PMD_PAGE_MASK, %rbp
 	movq	%rbp, %rbx
 #else
 	movq	$CONFIG_PHYSICAL_START, %rbp
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 2a5c32da585..00e19edd852 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -1,7 +1,7 @@
 /* -*- linux-c -*- ------------------------------------------------------- *
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
- *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2007-2008 rPath, Inc. - All Rights Reserved
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -9,7 +9,7 @@
  * ----------------------------------------------------------------------- */
 
 /*
- * arch/i386/boot/cpu.c
+ * arch/x86/boot/cpu.c
  *
  * Check for obligatory CPU features and abort if the features are not
  * present.
@@ -19,6 +19,8 @@
 #include "bitops.h"
 #include <asm/cpufeature.h>
 
+#include "cpustr.h"
+
 static char *cpu_name(int level)
 {
 	static char buf[6];
@@ -35,6 +37,7 @@ int validate_cpu(void)
 {
 	u32 *err_flags;
 	int cpu_level, req_level;
+	const unsigned char *msg_strs;
 
 	check_cpu(&cpu_level, &req_level, &err_flags);
 
@@ -51,13 +54,26 @@ int validate_cpu(void)
 		puts("This kernel requires the following features "
 		     "not present on the CPU:\n");
 
+		msg_strs = (const unsigned char *)x86_cap_strs;
+
 		for (i = 0; i < NCAPINTS; i++) {
 			u32 e = err_flags[i];
 
 			for (j = 0; j < 32; j++) {
-				if (e & 1)
-					printf("%d:%d ", i, j);
-
+				int n = (i << 5)+j;
+				if (*msg_strs < n) {
+					/* Skip to the next string */
+					do {
+						msg_strs++;
+					} while (*msg_strs);
+					msg_strs++;
+				}
+				if (e & 1) {
+					if (*msg_strs == n && msg_strs[1])
+						printf("%s ", msg_strs+1);
+					else
+						printf("%d:%d ", i, j);
+				}
 				e >>= 1;
 			}
 		}
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
new file mode 100644
index 00000000000..bbe76953bae
--- /dev/null
+++ b/arch/x86/boot/mkcpustr.c
@@ -0,0 +1,49 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *   Copyright 2008 rPath, Inc. - All Rights Reserved
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * This is a host program to preprocess the CPU strings into a
+ * compact format suitable for the setup code.
+ */
+
+#include <stdio.h>
+
+#include "../kernel/cpu/feature_names.c"
+
+#if NCAPFLAGS > 8
+# error "Need to adjust the boot code handling of CPUID strings"
+#endif
+
+int main(void)
+{
+	int i;
+	const char *str;
+
+	printf("static const char x86_cap_strs[] = \n");
+
+	for (i = 0; i < NCAPINTS*32; i++) {
+		str = x86_cap_flags[i];
+
+		if (i == NCAPINTS*32-1) {
+			/* The last entry must be unconditional; this
+			   also consumes the compiler-added null character */
+			if (!str)
+				str = "";
+			printf("\t\"\\x%02x\"\"%s\"\n", i, str);
+		} else if (str) {
+			printf("#if REQUIRED_MASK%d & (1 << %d)\n"
+			       "\t\"\\x%02x\"\"%s\\0\"\n"
+			       "#endif\n",
+			       i >> 5, i & 31, i, str);
+		}
+	}
+	printf("\t;\n");
+	return 0;
+}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6f813009d44..21dc1a061bf 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -37,7 +37,8 @@ obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
 obj-$(CONFIG_MICROCODE)		+= microcode.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
-obj-$(CONFIG_APM)		+= apm_32.o
+apm-y				:= apm_32.o
+obj-$(CONFIG_APM)		+= apm.o
 obj-$(CONFIG_X86_SMP)		+= smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o
 obj-$(CONFIG_X86_32_SMP)	+= smpcommon_32.o
 obj-$(CONFIG_X86_64_SMP)	+= smp_64.o smpboot_64.o tsc_sync.o
@@ -74,7 +75,8 @@ ifdef CONFIG_INPUT_PCSPKR
 obj-y				+= pcspeaker.o
 endif
 
-obj-$(CONFIG_SCx200)		+= scx200_32.o
+obj-$(CONFIG_SCx200)		+= scx200.o
+scx200-y			+= scx200_32.o
 
 ###
 # 64 bit specific files
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cfdb2f3bd76..a0c4d7c5dbd 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
+obj-y			+= feature_names.o
 
 obj-$(CONFIG_X86_32)	+= common.o proc.o bugs.o
 obj-$(CONFIG_X86_32)	+= amd.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b7b2142b58e..d9313d9adce 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -623,16 +623,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
  * They will insert themselves into the cpu_devs structure.
  * Then, when cpu_init() is called, we can just iterate over that array.
  */
-
-extern int intel_cpu_init(void);
-extern int cyrix_init_cpu(void);
-extern int nsc_init_cpu(void);
-extern int amd_init_cpu(void);
-extern int centaur_init_cpu(void);
-extern int transmeta_init_cpu(void);
-extern int nexgen_init_cpu(void);
-extern int umc_init_cpu(void);
-
 void __init early_cpu_init(void)
 {
 	intel_cpu_init();
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index ad6527a5beb..e0b38c33d84 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -27,3 +27,12 @@ extern void display_cacheinfo(struct cpuinfo_x86 *c);
 extern void early_init_intel(struct cpuinfo_x86 *c);
 extern void early_init_amd(struct cpuinfo_x86 *c);
 
+/* Specific CPU type init functions */
+int intel_cpu_init(void);
+int amd_init_cpu(void);
+int cyrix_init_cpu(void);
+int nsc_init_cpu(void);
+int centaur_init_cpu(void);
+int transmeta_init_cpu(void);
+int nexgen_init_cpu(void);
+int umc_init_cpu(void);
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
new file mode 100644
index 00000000000..ee975ac6bbc
--- /dev/null
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -0,0 +1,83 @@
+/*
+ * Strings for the various x86 capability flags.
+ *
+ * This file must not contain any executable code.
+ */
+
+#include "asm/cpufeature.h"
+
+/*
+ * These flag bits must match the definitions in <asm/cpufeature.h>.
+ * NULL means this bit is undefined or reserved; either way it doesn't
+ * have meaning as far as Linux is concerned.  Note that it's important
+ * to realize there is a difference between this table and CPUID -- if
+ * applications want to get the raw CPUID data, they should access
+ * /dev/cpu/<cpu_nr>/cpuid instead.
+ */
+const char * const x86_cap_flags[NCAPINTS*32] = {
+	/* Intel-defined */
+	"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+	"cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+	"pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+	"fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
+
+	/* AMD-defined */
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
+	NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
+	"3dnowext", "3dnow",
+
+	/* Transmeta-defined */
+	"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+	/* Other (Linux-defined) */
+	"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
+	NULL, NULL, NULL, NULL,
+	"constant_tsc", "up", NULL, "arch_perfmon",
+	"pebs", "bts", NULL, NULL,
+	"rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+	/* Intel-defined (#2) */
+	"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
+	"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
+	NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+	/* VIA/Cyrix/Centaur-defined */
+	NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
+	"ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+	/* AMD-defined (#2) */
+	"lahf_lm", "cmp_legacy", "svm", "extapic",
+	"cr8_legacy", "abm", "sse4a", "misalignsse",
+	"3dnowprefetch", "osvw", "ibs", "sse5",
+	"skinit", "wdt", NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+	/* Auxiliary (Linux-defined) */
+	"ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+};
+
+const char *const x86_power_flags[32] = {
+	"ts",	/* temperature sensor */
+	"fid",  /* frequency id control */
+	"vid",  /* voltage id control */
+	"ttp",  /* thermal trip */
+	"tm",
+	"stc",
+	"100mhzsteps",
+	"hwpstate",
+	"",	/* tsc invariant mapped to constant_tsc */
+		/* nothing */
+};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d1c372b018d..fae31ce747b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -13,6 +13,7 @@
 #include <asm/uaccess.h>
 #include <asm/ptrace.h>
 #include <asm/ds.h>
+#include <asm/bugs.h>
 
 #include "cpu.h"
 
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 8e139c70f88..ff14c320040 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -7,8 +7,6 @@
 #include <asm/processor-flags.h>
 #include "mtrr.h"
 
-int arr3_protected;
-
 static void
 cyrix_get_arr(unsigned int reg, unsigned long *base,
 	      unsigned long *size, mtrr_type * type)
@@ -99,8 +97,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 	case 4:
 		return replace_reg;
 	case 3:
-		if (arr3_protected)
-			break;
 	case 2:
 	case 1:
 	case 0:
@@ -115,8 +111,6 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 	} else {
 		for (i = 0; i < 7; i++) {
 			cyrix_get_arr(i, &lbase, &lsize, &ltype);
-			if ((i == 3) && arr3_protected)
-				continue;
 			if (lsize == 0)
 				return i;
 		}
@@ -260,107 +254,6 @@ static void cyrix_set_all(void)
 	post_set();
 }
 
-#if 0
-/*
- * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
- * with the SMM (System Management Mode) mode. So we need the following:
- * Check whether SMI_LOCK (CCR3 bit 0) is set
- *   if it is set, write a warning message: ARR3 cannot be changed!
- *     (it cannot be changed until the next processor reset)
- *   if it is reset, then we can change it, set all the needed bits:
- *   - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
- *   - disable access to SMM memory (CCR1 bit 2 reset)
- *   - disable SMM mode (CCR1 bit 1 reset)
- *   - disable write protection of ARR3 (CCR6 bit 1 reset)
- *   - (maybe) disable ARR3
- * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
- */
-static void __init
-cyrix_arr_init(void)
-{
-	struct set_mtrr_context ctxt;
-	unsigned char ccr[7];
-	int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
-#ifdef CONFIG_SMP
-	int i;
-#endif
-
-	/* flush cache and enable MAPEN */
-	set_mtrr_prepare_save(&ctxt);
-	set_mtrr_cache_disable(&ctxt);
-
-	/* Save all CCRs locally */
-	ccr[0] = getCx86(CX86_CCR0);
-	ccr[1] = getCx86(CX86_CCR1);
-	ccr[2] = getCx86(CX86_CCR2);
-	ccr[3] = ctxt.ccr3;
-	ccr[4] = getCx86(CX86_CCR4);
-	ccr[5] = getCx86(CX86_CCR5);
-	ccr[6] = getCx86(CX86_CCR6);
-
-	if (ccr[3] & 1) {
-		ccrc[3] = 1;
-		arr3_protected = 1;
-	} else {
-		/* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
-		 * access to SMM memory through ARR3 (bit 7).
-		 */
-		if (ccr[1] & 0x80) {
-			ccr[1] &= 0x7f;
-			ccrc[1] |= 0x80;
-		}
-		if (ccr[1] & 0x04) {
-			ccr[1] &= 0xfb;
-			ccrc[1] |= 0x04;
-		}
-		if (ccr[1] & 0x02) {
-			ccr[1] &= 0xfd;
-			ccrc[1] |= 0x02;
-		}
-		arr3_protected = 0;
-		if (ccr[6] & 0x02) {
-			ccr[6] &= 0xfd;
-			ccrc[6] = 1;	/* Disable write protection of ARR3 */
-			setCx86(CX86_CCR6, ccr[6]);
-		}
-		/* Disable ARR3. This is safe now that we disabled SMM. */
-		/* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
-	}
-	/* If we changed CCR1 in memory, change it in the processor, too. */
-	if (ccrc[1])
-		setCx86(CX86_CCR1, ccr[1]);
-
-	/* Enable ARR usage by the processor */
-	if (!(ccr[5] & 0x20)) {
-		ccr[5] |= 0x20;
-		ccrc[5] = 1;
-		setCx86(CX86_CCR5, ccr[5]);
-	}
-#ifdef CONFIG_SMP
-	for (i = 0; i < 7; i++)
-		ccr_state[i] = ccr[i];
-	for (i = 0; i < 8; i++)
-		cyrix_get_arr(i,
-			      &arr_state[i].base, &arr_state[i].size,
-			      &arr_state[i].type);
-#endif
-
-	set_mtrr_done(&ctxt);	/* flush cache and disable MAPEN */
-
-	if (ccrc[5])
-		printk(KERN_INFO "mtrr: ARR usage was not enabled, enabled manually\n");
-	if (ccrc[3])
-		printk(KERN_INFO "mtrr: ARR3 cannot be changed\n");
-/*
-    if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n");
-    if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
-    if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
-*/
-	if (ccrc[6])
-		printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n");
-}
-#endif
-
 static struct mtrr_ops cyrix_mtrr_ops = {
 	.vendor            = X86_VENDOR_CYRIX,
 //	.init              = cyrix_arr_init,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 71591958265..1e27b69a7a0 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -59,12 +59,6 @@ struct mtrr_ops * mtrr_if = NULL;
 static void set_mtrr(unsigned int reg, unsigned long base,
 		     unsigned long size, mtrr_type type);
 
-#ifndef CONFIG_X86_64
-extern int arr3_protected;
-#else
-#define arr3_protected 0
-#endif
-
 void set_mtrr_ops(struct mtrr_ops * ops)
 {
 	if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
@@ -513,12 +507,6 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 		printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
 		goto out;
 	}
-	if (is_cpu(CYRIX) && !use_intel()) {
-		if ((reg == 3) && arr3_protected) {
-			printk(KERN_WARNING "mtrr: ARR3 cannot be changed\n");
-			goto out;
-		}
-	}
 	mtrr_if->get(reg, &lbase, &lsize, &ltype);
 	if (lsize < 1) {
 		printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
@@ -566,10 +554,6 @@ EXPORT_SYMBOL(mtrr_del);
  * These should be called implicitly, but we can't yet until all the initcall
  * stuff is done...
  */
-extern void amd_init_mtrr(void);
-extern void cyrix_init_mtrr(void);
-extern void centaur_init_mtrr(void);
-
 static void __init init_ifs(void)
 {
 #ifndef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index fb74a2c2081..2cc77eb6fea 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -97,3 +97,7 @@ void mtrr_state_warn(void);
 const char *mtrr_attrib_to_str(int x);
 void mtrr_wrmsr(unsigned, unsigned, unsigned);
 
+/* CPU specific mtrr init functions */
+int amd_init_mtrr(void);
+int cyrix_init_mtrr(void);
+int centaur_init_mtrr(void);
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 02821326014..af11d31dce0 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -10,80 +10,6 @@
  */
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
-	/* 
-	 * These flag bits must match the definitions in <asm/cpufeature.h>.
-	 * NULL means this bit is undefined or reserved; either way it doesn't
-	 * have meaning as far as Linux is concerned.  Note that it's important
-	 * to realize there is a difference between this table and CPUID -- if
-	 * applications want to get the raw CPUID data, they should access
-	 * /dev/cpu/<cpu_nr>/cpuid instead.
-	 */
-	static const char * const x86_cap_flags[] = {
-		/* Intel-defined */
-	        "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
-	        "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
-	        "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
-	        "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
-		/* AMD-defined */
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
-		NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
-		"3dnowext", "3dnow",
-
-		/* Transmeta-defined */
-		"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Other (Linux-defined) */
-		"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
-		NULL, NULL, NULL, NULL,
-		"constant_tsc", "up", NULL, "arch_perfmon",
-		"pebs", "bts", NULL, "sync_rdtsc",
-		"rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Intel-defined (#2) */
-		"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
-		"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
-		NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* VIA/Cyrix/Centaur-defined */
-		NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
-		"ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* AMD-defined (#2) */
-		"lahf_lm", "cmp_legacy", "svm", "extapic",
-		"cr8_legacy", "abm", "sse4a", "misalignsse",
-		"3dnowprefetch", "osvw", "ibs", "sse5",
-		"skinit", "wdt", NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Auxiliary (Linux-defined) */
-		"ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	};
-	static const char * const x86_power_flags[] = {
-		"ts",	/* temperature sensor */
-		"fid",  /* frequency id control */
-		"vid",  /* voltage id control */
-		"ttp",  /* thermal trip */
-		"tm",
-		"stc",
-		"100mhzsteps",
-		"hwpstate",
-		"",	/* constant_tsc - moved to flags */
-		/* nothing */
-	};
 	struct cpuinfo_x86 *c = v;
 	int i, n = 0;
 	int fpu_exception;
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index a63432d800f..288e7a6598a 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
- *   
- *   Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ *   Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -17,6 +17,10 @@
  * and then read in chunks of 16 bytes.  A larger size means multiple
  * reads of consecutive levels.
  *
+ * The lower 32 bits of the file position is used as the incoming %eax,
+ * and the upper 32 bits of the file position as the incoming %ecx,
+ * the latter intended for "counting" eax levels like eax=4.
+ *
  * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on
  * an SMP box will direct the access to CPU %d.
  */
@@ -43,35 +47,24 @@
 
 static struct class *cpuid_class;
 
-struct cpuid_command {
-	u32 reg;
-	u32 *data;
+struct cpuid_regs {
+	u32 eax, ebx, ecx, edx;
 };
 
 static void cpuid_smp_cpuid(void *cmd_block)
 {
-	struct cpuid_command *cmd = cmd_block;
-
-	cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2],
-		      &cmd->data[3]);
-}
-
-static inline void do_cpuid(int cpu, u32 reg, u32 * data)
-{
-	struct cpuid_command cmd;
-
-	cmd.reg = reg;
-	cmd.data = data;
+	struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
 
-	smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+	cpuid_count(cmd->eax, cmd->ecx,
+		    &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx);
 }
 
 static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
 {
 	loff_t ret;
+	struct inode *inode = file->f_mapping->host;
 
-	lock_kernel();
-
+	mutex_lock(&inode->i_mutex);
 	switch (orig) {
 	case 0:
 		file->f_pos = offset;
@@ -84,8 +77,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
 	default:
 		ret = -EINVAL;
 	}
-
-	unlock_kernel();
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
 
@@ -93,19 +85,21 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
 			  size_t count, loff_t * ppos)
 {
 	char __user *tmp = buf;
-	u32 data[4];
-	u32 reg = *ppos;
+	struct cpuid_regs cmd;
 	int cpu = iminor(file->f_path.dentry->d_inode);
+	u64 pos = *ppos;
 
 	if (count % 16)
 		return -EINVAL;	/* Invalid chunk size */
 
 	for (; count; count -= 16) {
-		do_cpuid(cpu, reg, data);
-		if (copy_to_user(tmp, &data, 16))
+		cmd.eax = pos;
+		cmd.ecx = pos >> 32;
+		smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+		if (copy_to_user(tmp, &cmd, 16))
 			return -EFAULT;
 		tmp += 16;
-		*ppos = reg++;
+		*ppos = ++pos;
 	}
 
 	return tmp - buf;
@@ -193,7 +187,7 @@ static int __init cpuid_init(void)
 	}
 	for_each_online_cpu(i) {
 		err = cpuid_device_create(i);
-		if (err != 0) 
+		if (err != 0)
 			goto out_class;
 	}
 	register_hotcpu_notifier(&cpuid_class_cpu_notifier);
@@ -208,7 +202,7 @@ out_class:
 	}
 	class_destroy(cpuid_class);
 out_chrdev:
-	unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");	
+	unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
 out:
 	return err;
 }
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1411324a625..32dd62b36ff 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -379,11 +379,9 @@ void __init efi_init(void)
 #endif
 }
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 static void __init runtime_code_page_mkexec(void)
 {
 	efi_memory_desc_t *md;
-	unsigned long end;
 	void *p;
 
 	if (!(__supported_pte_mask & _PAGE_NX))
@@ -392,18 +390,13 @@ static void __init runtime_code_page_mkexec(void)
 	/* Make EFI runtime service code area executable */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
-		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-		if (md->type == EFI_RUNTIME_SERVICES_CODE &&
-		    (end >> PAGE_SHIFT) <= max_pfn_mapped) {
-			set_memory_x(md->virt_addr, md->num_pages);
-			set_memory_uc(md->virt_addr, md->num_pages);
-		}
+
+		if (md->type != EFI_RUNTIME_SERVICES_CODE)
+			continue;
+
+		set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT);
 	}
-	__flush_tlb_all();
 }
-#else
-static inline void __init runtime_code_page_mkexec(void) { }
-#endif
 
 /*
  * This function will switch the EFI runtime services to virtual mode.
@@ -417,30 +410,40 @@ void __init efi_enter_virtual_mode(void)
 {
 	efi_memory_desc_t *md;
 	efi_status_t status;
-	unsigned long end;
-	void *p;
+	unsigned long size;
+	u64 end, systab;
+	void *p, *va;
 
 	efi.systab = NULL;
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		md = p;
 		if (!(md->attribute & EFI_MEMORY_RUNTIME))
 			continue;
-		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
-		if ((md->attribute & EFI_MEMORY_WB) &&
-		    ((end >> PAGE_SHIFT) <= max_pfn_mapped))
-			md->virt_addr = (unsigned long)__va(md->phys_addr);
+
+		size = md->num_pages << EFI_PAGE_SHIFT;
+		end = md->phys_addr + size;
+
+		if ((end >> PAGE_SHIFT) <= max_pfn_mapped)
+			va = __va(md->phys_addr);
 		else
-			md->virt_addr = (unsigned long)
-				efi_ioremap(md->phys_addr,
-					    md->num_pages << EFI_PAGE_SHIFT);
-		if (!md->virt_addr)
+			va = efi_ioremap(md->phys_addr, size);
+
+		if (md->attribute & EFI_MEMORY_WB)
+			set_memory_uc(md->virt_addr, size);
+
+		md->virt_addr = (u64) (unsigned long) va;
+
+		if (!va) {
 			printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
 			       (unsigned long long)md->phys_addr);
-		if ((md->phys_addr <= (unsigned long)efi_phys.systab) &&
-		    ((unsigned long)efi_phys.systab < end))
-			efi.systab = (efi_system_table_t *)(unsigned long)
-				(md->virt_addr - md->phys_addr +
-				 (unsigned long)efi_phys.systab);
+			continue;
+		}
+
+		systab = (u64) (unsigned long) efi_phys.systab;
+		if (md->phys_addr <= systab && systab < end) {
+			systab += md->virt_addr - md->phys_addr;
+			efi.systab = (efi_system_table_t *) (unsigned long) systab;
+		}
 	}
 
 	BUG_ON(!efi.systab);
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 674f2379480..09d5c233093 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -54,10 +54,10 @@ static void __init early_mapping_set_exec(unsigned long start,
 		else
 			set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
 					    __supported_pte_mask));
-		if (level == 4)
-			start = (start + PMD_SIZE) & PMD_MASK;
-		else
+		if (level == PG_LEVEL_4K)
 			start = (start + PAGE_SIZE) & PAGE_MASK;
+		else
+			start = (start + PMD_SIZE) & PMD_MASK;
 	}
 }
 
@@ -109,23 +109,23 @@ void __init efi_reserve_bootmem(void)
 				memmap.nr_map * memmap.desc_size);
 }
 
-void __iomem * __init efi_ioremap(unsigned long offset,
-				  unsigned long size)
+void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
 {
 	static unsigned pages_mapped;
-	unsigned long last_addr;
 	unsigned i, pages;
 
-	last_addr = offset + size - 1;
-	offset &= PAGE_MASK;
-	pages = (PAGE_ALIGN(last_addr) - offset) >> PAGE_SHIFT;
+	/* phys_addr and size must be page aligned */
+	if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK))
+		return NULL;
+
+	pages = size >> PAGE_SHIFT;
 	if (pages_mapped + pages > MAX_EFI_IO_PAGES)
 		return NULL;
 
 	for (i = 0; i < pages; i++) {
 		__set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped,
-			     offset, PAGE_KERNEL_EXEC_NOCACHE);
-		offset += PAGE_SIZE;
+			     phys_addr, PAGE_KERNEL);
+		phys_addr += PAGE_SIZE;
 		pages_mapped++;
 	}
 
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 1d5a7a36120..4f283ad215e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -63,7 +63,7 @@ startup_64:
 
 	/* Is the address not 2M aligned? */
 	movq	%rbp, %rax
-	andl	$~LARGE_PAGE_MASK, %eax
+	andl	$~PMD_PAGE_MASK, %eax
 	testl	%eax, %eax
 	jnz	bad_address
 
@@ -88,7 +88,7 @@ startup_64:
 
 	/* Add an Identity mapping if I am above 1G */
 	leaq	_text(%rip), %rdi
-	andq	$LARGE_PAGE_MASK, %rdi
+	andq	$PMD_PAGE_MASK, %rdi
 
 	movq	%rdi, %rax
 	shrq	$PUD_SHIFT, %rax
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 8a7660c8394..0224c3637c7 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -35,7 +35,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
 	if (mincount <= pc->size)
 		return 0;
 	oldsize = pc->size;
-	mincount = (mincount + 511) & (~511);
+	mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
+			(~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
 	if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
 		newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
 	else
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index bd82850e651..af51ea8400b 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
- *   
- *   Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ *   Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
  *
  *   This program is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU General Public License as published by
@@ -45,9 +45,10 @@ static struct class *msr_class;
 
 static loff_t msr_seek(struct file *file, loff_t offset, int orig)
 {
-	loff_t ret = -EINVAL;
+	loff_t ret;
+	struct inode *inode = file->f_mapping->host;
 
-	lock_kernel();
+	mutex_lock(&inode->i_mutex);
 	switch (orig) {
 	case 0:
 		file->f_pos = offset;
@@ -56,8 +57,11 @@ static loff_t msr_seek(struct file *file, loff_t offset, int orig)
 	case 1:
 		file->f_pos += offset;
 		ret = file->f_pos;
+		break;
+	default:
+		ret = -EINVAL;
 	}
-	unlock_kernel();
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
 
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 4d5cc718198..845cbecd68e 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -501,7 +501,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
 	}
 
 	a = aper + iommu_size;
-	iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
+	iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
 
 	if (iommu_size < 64*1024*1024) {
 		printk(KERN_WARNING
@@ -731,7 +731,8 @@ void __init gart_iommu_init(void)
 	 * the backing memory. The GART address is only used by PCI
 	 * devices.
 	 */
-	clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
+	set_memory_np((unsigned long)__va(iommu_bus_base),
+				iommu_size >> PAGE_SHIFT);
 
 	/*
 	 * Try to workaround a bug (thanks to BenH)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 968371ab223..dabdbeff1f7 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -251,7 +251,7 @@ void cpu_idle_wait(void)
 		 * because it has nothing to do.
 		 * Give all the remaining CPUS a kick.
 		 */
-		smp_call_function_mask(map, do_nothing, 0, 0);
+		smp_call_function_mask(map, do_nothing, NULL, 0);
 	} while (!cpus_empty(map));
 
 	set_cpus_allowed(current, tmp);
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 18df70c534b..c8939dfddfb 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1068,82 +1068,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	struct cpuinfo_x86 *c = v;
 	int cpu = 0, i;
 
-	/*
-	 * These flag bits must match the definitions in <asm/cpufeature.h>.
-	 * NULL means this bit is undefined or reserved; either way it doesn't
-	 * have meaning as far as Linux is concerned.  Note that it's important
-	 * to realize there is a difference between this table and CPUID -- if
-	 * applications want to get the raw CPUID data, they should access
-	 * /dev/cpu/<cpu_nr>/cpuid instead.
-	 */
-	static const char *const x86_cap_flags[] = {
-		/* Intel-defined */
-		"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
-		"cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
-		"pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
-		"fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
-		/* AMD-defined */
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
-		NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
-		"3dnowext", "3dnow",
-
-		/* Transmeta-defined */
-		"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Other (Linux-defined) */
-		"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
-		NULL, NULL, NULL, NULL,
-		"constant_tsc", "up", NULL, "arch_perfmon",
-		"pebs", "bts", NULL, "sync_rdtsc",
-		"rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Intel-defined (#2) */
-		"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
-		"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
-		NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* VIA/Cyrix/Centaur-defined */
-		NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
-		"ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* AMD-defined (#2) */
-		"lahf_lm", "cmp_legacy", "svm", "extapic",
-		"cr8_legacy", "abm", "sse4a", "misalignsse",
-		"3dnowprefetch", "osvw", "ibs", "sse5",
-		"skinit", "wdt", NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-		/* Auxiliary (Linux-defined) */
-		"ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	};
-	static const char *const x86_power_flags[] = {
-		"ts",	/* temperature sensor */
-		"fid",  /* frequency id control */
-		"vid",  /* voltage id control */
-		"ttp",  /* thermal trip */
-		"tm",
-		"stc",
-		"100mhzsteps",
-		"hwpstate",
-		"",	/* tsc invariant mapped to constant_tsc */
-		/* nothing */
-	};
-
-
 #ifdef CONFIG_SMP
 	cpu = c->cpu_index;
 #endif
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index ae0ef2e304c..36c100c323a 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/sort.h>
 #include <asm/uaccess.h>
+#include <asm/asm.h>
 
 extern int rodata_test_data;
 
@@ -89,16 +90,7 @@ static noinline int test_address(void *address)
 		"2:	mov %[zero], %[rslt]\n"
 		"	ret\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"       .align 8\n"
-#ifdef CONFIG_X86_32
-		"	.long 0b\n"
-		"	.long 2b\n"
-#else
-		"	.quad 0b\n"
-		"	.quad 2b\n"
-#endif
-		".previous\n"
+		_ASM_EXTABLE(0b,2b)
 		: [rslt] "=r" (result)
 		: [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
 	);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 9bcc1c6aca3..64580679861 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -11,12 +11,7 @@
  *	trampoline page to make our stack and everything else
  *	is a mystery.
  *
- *	In fact we don't actually need a stack so we don't
- *	set one up.
- *
- *	We jump into the boot/compressed/head.S code. So you'd
- *	better be running a compressed kernel image or you
- *	won't get very far.
+ *	We jump into arch/x86/kernel/head_32.S.
  *
  *	On entry to trampoline_data, the processor is in real mode
  *	with 16-bit addressing and 16-bit data.  CS has some value
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index e30b67c6a9f..4aedd0bcee4 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -10,9 +10,6 @@
  *	trampoline page to make our stack and everything else
  *	is a mystery.
  *
- *	In fact we don't actually need a stack so we don't
- *	set one up.
- *
  *	On entry to trampoline_data, the processor is in real mode
  *	with 16-bit addressing and 16-bit data.  CS has some value
  *	and IP is zero.  Thus, data addresses need to be absolute
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 4525bc2c2e1..12affe1f9bc 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -220,21 +220,21 @@ static void vmi_set_tr(void)
 static void vmi_write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
 {
 	u32 *idt_entry = (u32 *)g;
-	vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[2]);
+	vmi_ops.write_idt_entry(dt, entry, idt_entry[0], idt_entry[1]);
 }
 
 static void vmi_write_gdt_entry(struct desc_struct *dt, int entry,
 				const void *desc, int type)
 {
 	u32 *gdt_entry = (u32 *)desc;
-	vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[2]);
+	vmi_ops.write_gdt_entry(dt, entry, gdt_entry[0], gdt_entry[1]);
 }
 
 static void vmi_write_ldt_entry(struct desc_struct *dt, int entry,
 				const void *desc)
 {
 	u32 *ldt_entry = (u32 *)desc;
-	vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[2]);
+	vmi_ops.write_idt_entry(dt, entry, ldt_entry[0], ldt_entry[1]);
 }
 
 static void vmi_load_sp0(struct tss_struct *tss,
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index 28084d2e8dd..cc9b4a4450f 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -4,6 +4,7 @@
 #include <linux/hardirq.h>
 #include <linux/module.h>
 
+#include <asm/asm.h>
 #include <asm/i387.h>
 
 
@@ -50,10 +51,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from) );
 		
 	
@@ -81,10 +79,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from), "r" (to) : "memory");
 		from+=64;
 		to+=64;
@@ -181,10 +176,7 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from) );
 
 	for(i=0; i<(4096-320)/64; i++)
@@ -211,10 +203,7 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from), "r" (to) : "memory");
 		from+=64;
 		to+=64;
@@ -311,10 +300,7 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from) );
 
 	for(i=0; i<4096/64; i++)
@@ -341,10 +327,7 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b, 3b\n"
-		".previous"
+		_ASM_EXTABLE(1b,3b)
 		: : "r" (from), "r" (to) : "memory");
 		from+=64;
 		to+=64;
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9c4ffd5bedb..e849b9998b0 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -48,10 +48,7 @@ do {									   \
 		"3:	movl %5,%0\n"					   \
 		"	jmp 2b\n"					   \
 		".previous\n"						   \
-		".section __ex_table,\"a\"\n"				   \
-		"	.align 4\n"					   \
-		"	.long 0b,3b\n"					   \
-		".previous"						   \
+		_ASM_EXTABLE(0b,3b)					   \
 		: "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),	   \
 		  "=&D" (__d2)						   \
 		: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -132,11 +129,8 @@ do {									\
 		"3:	lea 0(%2,%0,4),%0\n"				\
 		"	jmp 2b\n"					\
 		".previous\n"						\
-		".section __ex_table,\"a\"\n"				\
-		"	.align 4\n"					\
-		"	.long 0b,3b\n"					\
-		"	.long 1b,2b\n"					\
-		".previous"						\
+		_ASM_EXTABLE(0b,3b)					\
+		_ASM_EXTABLE(1b,2b)					\
 		: "=&c"(size), "=&D" (__d0)				\
 		: "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0));	\
 } while (0)
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 893d43f838c..0c89d1bb028 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -31,10 +31,7 @@ do {									   \
 		"3:	movq %5,%0\n"					   \
 		"	jmp 2b\n"					   \
 		".previous\n"						   \
-		".section __ex_table,\"a\"\n"				   \
-		"	.align 8\n"					   \
-		"	.quad 0b,3b\n"					   \
-		".previous"						   \
+		_ASM_EXTABLE(0b,3b)					   \
 		: "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),	   \
 		  "=&D" (__d2)						   \
 		: "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
@@ -87,11 +84,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
 		"3:	lea 0(%[size1],%[size8],8),%[size8]\n"
 		"	jmp 2b\n"
 		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"       .align 8\n"
-		"	.quad 0b,3b\n"
-		"	.quad 1b,2b\n"
-		".previous"
+		_ASM_EXTABLE(0b,3b)
+		_ASM_EXTABLE(1b,2b)
 		: [size8] "=c"(size), [dst] "=&D" (__d0)
 		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
 		  [zero] "r" (0UL), [eight] "r" (8UL));
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4440d0abf8..ad8b9733d6b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -240,7 +240,8 @@ void dump_pagetable(unsigned long address)
 	pud = pud_offset(pgd, address);
 	if (bad_address(pud)) goto bad;
 	printk("PUD %lx ", pud_val(*pud));
-	if (!pud_present(*pud))	goto ret;
+	if (!pud_present(*pud) || pud_large(*pud))
+		goto ret;
 
 	pmd = pmd_offset(pud, address);
 	if (bad_address(pmd)) goto bad;
@@ -508,6 +509,10 @@ static int vmalloc_fault(unsigned long address)
 	pmd_t *pmd, *pmd_ref;
 	pte_t *pte, *pte_ref;
 
+	/* Make sure we are in vmalloc area */
+	if (!(address >= VMALLOC_START && address < VMALLOC_END))
+		return -1;
+
 	/* Copy kernel mappings over when needed. This can also
 	   happen within a race in page table update. In the later
 	   case just flush. */
@@ -603,6 +608,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	 */
 #ifdef CONFIG_X86_32
 	if (unlikely(address >= TASK_SIZE)) {
+#else
+	if (unlikely(address >= TASK_SIZE64)) {
+#endif
 		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 		    vmalloc_fault(address) >= 0)
 			return;
@@ -618,6 +626,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 		goto bad_area_nosemaphore;
 	}
 
+
+#ifdef CONFIG_X86_32
 	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
 	   fault has been handled. */
 	if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
@@ -630,28 +640,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (in_atomic() || !mm)
 		goto bad_area_nosemaphore;
 #else /* CONFIG_X86_64 */
-	if (unlikely(address >= TASK_SIZE64)) {
-		/*
-		 * Don't check for the module range here: its PML4
-		 * is always initialized because it's shared with the main
-		 * kernel text. Only vmalloc may need PML4 syncups.
-		 */
-		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
-		      ((address >= VMALLOC_START && address < VMALLOC_END))) {
-			if (vmalloc_fault(address) >= 0)
-				return;
-		}
-
-		/* Can handle a stale RO->RW TLB */
-		if (spurious_fault(address, error_code))
-			return;
-
-		/*
-		 * Don't take the mm semaphore here. If we fixup a prefetch
-		 * fault we could otherwise deadlock.
-		 */
-		goto bad_area_nosemaphore;
-	}
 	if (likely(regs->flags & X86_EFLAGS_IF))
 		local_irq_enable();
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index f2f36f8dae5..d1bc04006d1 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -31,6 +31,7 @@
 #include <linux/initrd.h>
 #include <linux/cpumask.h>
 
+#include <asm/asm.h>
 #include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -718,10 +719,7 @@ static noinline int do_test_wp_bit(void)
 		"1:	movb %1, %0	\n"
 		"	xorl %2, %2	\n"
 		"2:			\n"
-		".section __ex_table, \"a\"\n"
-		"	.align 4	\n"
-		"	.long 1b, 2b	\n"
-		".previous		\n"
+		_ASM_EXTABLE(1b,2b)
 		:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
 		 "=q" (tmp_reg),
 		 "=r" (flag)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index eabcaed76c2..3a98d6f724a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -273,7 +273,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 	int i = pmd_index(address);
 
 	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
-		unsigned long entry;
 		pmd_t *pmd = pmd_page + pmd_index(address);
 
 		if (address >= end) {
@@ -287,9 +286,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 		if (pmd_val(*pmd))
 			continue;
 
-		entry = __PAGE_KERNEL_LARGE|_PAGE_GLOBAL|address;
-		entry &= __supported_pte_mask;
-		set_pmd(pmd, __pmd(entry));
+		set_pte((pte_t *)pmd,
+			pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
 	}
 }
 
@@ -435,49 +433,6 @@ void __init paging_init(void)
 #endif
 
 /*
- * Unmap a kernel mapping if it exists. This is useful to avoid
- * prefetches from the CPU leading to inconsistent cache lines.
- * address and size must be aligned to 2MB boundaries.
- * Does nothing when the mapping doesn't exist.
- */
-void __init clear_kernel_mapping(unsigned long address, unsigned long size)
-{
-	unsigned long end = address + size;
-
-	BUG_ON(address & ~LARGE_PAGE_MASK);
-	BUG_ON(size & ~LARGE_PAGE_MASK);
-
-	for (; address < end; address += LARGE_PAGE_SIZE) {
-		pgd_t *pgd = pgd_offset_k(address);
-		pud_t *pud;
-		pmd_t *pmd;
-
-		if (pgd_none(*pgd))
-			continue;
-
-		pud = pud_offset(pgd, address);
-		if (pud_none(*pud))
-			continue;
-
-		pmd = pmd_offset(pud, address);
-		if (!pmd || pmd_none(*pmd))
-			continue;
-
-		if (!(pmd_val(*pmd) & _PAGE_PSE)) {
-			/*
-			 * Could handle this, but it should not happen
-			 * currently:
-			 */
-			printk(KERN_ERR "clear_kernel_mapping: "
-				"mapping has been split. will leak memory\n");
-			pmd_ERROR(*pmd);
-		}
-		set_pmd(pmd, __pmd(0));
-	}
-	__flush_tlb_all();
-}
-
-/*
  * Memory hotplug specific functions
  */
 void online_page(struct page *page)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c004d94608f..ee6648fe6b1 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -70,25 +70,12 @@ int page_is_ram(unsigned long pagenr)
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
  */
-static int ioremap_change_attr(unsigned long paddr, unsigned long size,
+static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
 			       enum ioremap_mode mode)
 {
-	unsigned long vaddr = (unsigned long)__va(paddr);
 	unsigned long nrpages = size >> PAGE_SHIFT;
-	unsigned int level;
 	int err;
 
-	/* No change for pages after the last mapping */
-	if ((paddr + size - 1) >= (max_pfn_mapped << PAGE_SHIFT))
-		return 0;
-
-	/*
-	 * If there is no identity map for this address,
-	 * change_page_attr_addr is unnecessary
-	 */
-	if (!lookup_address(vaddr, &level))
-		return 0;
-
 	switch (mode) {
 	case IOR_MODE_UNCACHED:
 	default:
@@ -114,9 +101,8 @@ static int ioremap_change_attr(unsigned long paddr, unsigned long size,
 static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 			       enum ioremap_mode mode)
 {
-	void __iomem *addr;
+	unsigned long pfn, offset, last_addr, vaddr;
 	struct vm_struct *area;
-	unsigned long offset, last_addr;
 	pgprot_t prot;
 
 	/* Don't allow wraparound or zero size */
@@ -133,9 +119,10 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
-	for (offset = phys_addr >> PAGE_SHIFT; offset < max_pfn_mapped &&
-	     (offset << PAGE_SHIFT) < last_addr; offset++) {
-		if (page_is_ram(offset))
+	for (pfn = phys_addr >> PAGE_SHIFT; pfn < max_pfn_mapped &&
+	     (pfn << PAGE_SHIFT) < last_addr; pfn++) {
+		if (page_is_ram(pfn) && pfn_valid(pfn) &&
+		    !PageReserved(pfn_to_page(pfn)))
 			return NULL;
 	}
 
@@ -163,19 +150,18 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 	if (!area)
 		return NULL;
 	area->phys_addr = phys_addr;
-	addr = (void __iomem *) area->addr;
-	if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
-			       phys_addr, prot)) {
-		remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
+	vaddr = (unsigned long) area->addr;
+	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
+		remove_vm_area((void *)(vaddr & PAGE_MASK));
 		return NULL;
 	}
 
-	if (ioremap_change_attr(phys_addr, size, mode) < 0) {
-		vunmap(addr);
+	if (ioremap_change_attr(vaddr, size, mode) < 0) {
+		vunmap(area->addr);
 		return NULL;
 	}
 
-	return (void __iomem *) (offset + (char __iomem *)addr);
+	return (void __iomem *) (vaddr + offset);
 }
 
 /**
@@ -254,9 +240,6 @@ void iounmap(volatile void __iomem *addr)
 		return;
 	}
 
-	/* Reset the direct mapping. Can block */
-	ioremap_change_attr(p->phys_addr, p->size, IOR_MODE_CACHED);
-
 	/* Finally remove it */
 	o = remove_vm_area((void *)addr);
 	BUG_ON(p != o || o == NULL);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a920d09b919..5a02bf4c91e 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -202,6 +202,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 	if (node_data[nodeid] == NULL)
 		return;
 	nodedata_phys = __pa(node_data[nodeid]);
+	printk(KERN_INFO "  NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
+		nodedata_phys + pgdat_size - 1);
 
 	memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
 	NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
@@ -225,12 +227,15 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 		return;
 	}
 	bootmap_start = __pa(bootmap);
-	Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
 
 	bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
 					 bootmap_start >> PAGE_SHIFT,
 					 start_pfn, end_pfn);
 
+	printk(KERN_INFO "  bootmap [%016lx -  %016lx] pages %lx\n",
+		 bootmap_start, bootmap_start + bootmap_size - 1,
+		 bootmap_pages);
+
 	free_bootmem_with_active_regions(nodeid, end);
 
 	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 7573e786d2f..398f3a578dd 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -137,7 +137,8 @@ static __init int exercise_pageattr(void)
 
 		for (k = 0; k < len[i]; k++) {
 			pte = lookup_address(addr[i] + k*PAGE_SIZE, &level);
-			if (!pte || pgprot_val(pte_pgprot(*pte)) == 0) {
+			if (!pte || pgprot_val(pte_pgprot(*pte)) == 0 ||
+			    !(pte_val(*pte) & _PAGE_PRESENT)) {
 				addr[i] = 0;
 				break;
 			}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e297bd65e51..bb55a78dcd6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,17 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
+/*
+ * The current flushing context - we pass it instead of 5 arguments:
+ */
+struct cpa_data {
+	unsigned long	vaddr;
+	pgprot_t	mask_set;
+	pgprot_t	mask_clr;
+	int		numpages;
+	int		flushtlb;
+};
+
 static inline int
 within(unsigned long addr, unsigned long start, unsigned long end)
 {
@@ -52,21 +63,23 @@ void clflush_cache_range(void *vaddr, unsigned int size)
 
 static void __cpa_flush_all(void *arg)
 {
+	unsigned long cache = (unsigned long)arg;
+
 	/*
 	 * Flush all to work around Errata in early athlons regarding
 	 * large page flushing.
 	 */
 	__flush_tlb_all();
 
-	if (boot_cpu_data.x86_model >= 4)
+	if (cache && boot_cpu_data.x86_model >= 4)
 		wbinvd();
 }
 
-static void cpa_flush_all(void)
+static void cpa_flush_all(unsigned long cache)
 {
 	BUG_ON(irqs_disabled());
 
-	on_each_cpu(__cpa_flush_all, NULL, 1, 1);
+	on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
 }
 
 static void __cpa_flush_range(void *arg)
@@ -79,7 +92,7 @@ static void __cpa_flush_range(void *arg)
 	__flush_tlb_all();
 }
 
-static void cpa_flush_range(unsigned long start, int numpages)
+static void cpa_flush_range(unsigned long start, int numpages, int cache)
 {
 	unsigned int i, level;
 	unsigned long addr;
@@ -89,6 +102,9 @@ static void cpa_flush_range(unsigned long start, int numpages)
 
 	on_each_cpu(__cpa_flush_range, NULL, 1, 1);
 
+	if (!cache)
+		return;
+
 	/*
 	 * We only need to flush on one CPU,
 	 * clflush is a MESI-coherent instruction that
@@ -101,11 +117,27 @@ static void cpa_flush_range(unsigned long start, int numpages)
 		/*
 		 * Only flush present addresses:
 		 */
-		if (pte && pte_present(*pte))
+		if (pte && (pte_val(*pte) & _PAGE_PRESENT))
 			clflush_cache_range((void *) addr, PAGE_SIZE);
 	}
 }
 
+#define HIGH_MAP_START	__START_KERNEL_map
+#define HIGH_MAP_END	(__START_KERNEL_map + KERNEL_TEXT_SIZE)
+
+
+/*
+ * Converts a virtual address to a X86-64 highmap address
+ */
+static unsigned long virt_to_highmap(void *address)
+{
+#ifdef CONFIG_X86_64
+	return __pa((unsigned long)address) + HIGH_MAP_START - phys_base;
+#else
+	return (unsigned long)address;
+#endif
+}
+
 /*
  * Certain areas of memory on x86 require very specific protection flags,
  * for example the BIOS area or kernel text. Callers don't always get this
@@ -129,12 +161,24 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
 	 */
 	if (within(address, (unsigned long)_text, (unsigned long)_etext))
 		pgprot_val(forbidden) |= _PAGE_NX;
+	/*
+	 * Do the same for the x86-64 high kernel mapping
+	 */
+	if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
+		pgprot_val(forbidden) |= _PAGE_NX;
+
 
 #ifdef CONFIG_DEBUG_RODATA
 	/* The .rodata section needs to be read-only */
 	if (within(address, (unsigned long)__start_rodata,
 				(unsigned long)__end_rodata))
 		pgprot_val(forbidden) |= _PAGE_RW;
+	/*
+	 * Do the same for the x86-64 high kernel mapping
+	 */
+	if (within(address, virt_to_highmap(__start_rodata),
+				virt_to_highmap(__end_rodata)))
+		pgprot_val(forbidden) |= _PAGE_RW;
 #endif
 
 	prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
@@ -142,6 +186,14 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
 	return prot;
 }
 
+/*
+ * Lookup the page table entry for a virtual address. Return a pointer
+ * to the entry and the level of the mapping.
+ *
+ * Note: We return pud and pmd either when the entry is marked large
+ * or when the present bit is not set. Otherwise we would return a
+ * pointer to a nonexisting mapping.
+ */
 pte_t *lookup_address(unsigned long address, int *level)
 {
 	pgd_t *pgd = pgd_offset_k(address);
@@ -152,21 +204,31 @@ pte_t *lookup_address(unsigned long address, int *level)
 
 	if (pgd_none(*pgd))
 		return NULL;
+
 	pud = pud_offset(pgd, address);
 	if (pud_none(*pud))
 		return NULL;
+
+	*level = PG_LEVEL_1G;
+	if (pud_large(*pud) || !pud_present(*pud))
+		return (pte_t *)pud;
+
 	pmd = pmd_offset(pud, address);
 	if (pmd_none(*pmd))
 		return NULL;
 
 	*level = PG_LEVEL_2M;
-	if (pmd_large(*pmd))
+	if (pmd_large(*pmd) || !pmd_present(*pmd))
 		return (pte_t *)pmd;
 
 	*level = PG_LEVEL_4K;
+
 	return pte_offset_kernel(pmd, address);
 }
 
+/*
+ * Set the new pmd in all the pgds we know about:
+ */
 static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 {
 	/* change init_mm */
@@ -175,6 +237,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	if (!SHARED_KERNEL_PMD) {
 		struct page *page;
 
+		address = __pa(address);
 		list_for_each_entry(page, &pgd_list, lru) {
 			pgd_t *pgd;
 			pud_t *pud;
@@ -189,18 +252,114 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 #endif
 }
 
+static int
+try_preserve_large_page(pte_t *kpte, unsigned long address,
+			struct cpa_data *cpa)
+{
+	unsigned long nextpage_addr, numpages, pmask, psize, flags;
+	pte_t new_pte, old_pte, *tmp;
+	pgprot_t old_prot, new_prot;
+	int level, do_split = 1;
+
+	/*
+	 * An Athlon 64 X2 showed hard hangs if we tried to preserve
+	 * largepages and changed the PSE entry from RW to RO.
+	 *
+	 * As AMD CPUs have a long series of erratas in this area,
+	 * (and none of the known ones seem to explain this hang),
+	 * disable this code until the hang can be debugged:
+	 */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		return 1;
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	/*
+	 * Check for races, another CPU might have split this page
+	 * up already:
+	 */
+	tmp = lookup_address(address, &level);
+	if (tmp != kpte)
+		goto out_unlock;
+
+	switch (level) {
+	case PG_LEVEL_2M:
+		psize = PMD_PAGE_SIZE;
+		pmask = PMD_PAGE_MASK;
+		break;
+#ifdef CONFIG_X86_64
+	case PG_LEVEL_1G:
+		psize = PMD_PAGE_SIZE;
+		pmask = PMD_PAGE_MASK;
+		break;
+#endif
+	default:
+		do_split = -EINVAL;
+		goto out_unlock;
+	}
+
+	/*
+	 * Calculate the number of pages, which fit into this large
+	 * page starting at address:
+	 */
+	nextpage_addr = (address + psize) & pmask;
+	numpages = (nextpage_addr - address) >> PAGE_SHIFT;
+	if (numpages < cpa->numpages)
+		cpa->numpages = numpages;
+
+	/*
+	 * We are safe now. Check whether the new pgprot is the same:
+	 */
+	old_pte = *kpte;
+	old_prot = new_prot = pte_pgprot(old_pte);
+
+	pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+	pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
+	new_prot = static_protections(new_prot, address);
+
+	/*
+	 * If there are no changes, return. maxpages has been updated
+	 * above:
+	 */
+	if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
+		do_split = 0;
+		goto out_unlock;
+	}
+
+	/*
+	 * We need to change the attributes. Check, whether we can
+	 * change the large page in one go. We request a split, when
+	 * the address is not aligned and the number of pages is
+	 * smaller than the number of pages in the large page. Note
+	 * that we limited the number of possible pages already to
+	 * the number of pages in the large page.
+	 */
+	if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
+		/*
+		 * The address is aligned and the number of pages
+		 * covers the full page.
+		 */
+		new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
+		__set_pmd_pte(kpte, address, new_pte);
+		cpa->flushtlb = 1;
+		do_split = 0;
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&pgd_lock, flags);
+
+	return do_split;
+}
+
 static int split_large_page(pte_t *kpte, unsigned long address)
 {
-	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+	unsigned long flags, pfn, pfninc = 1;
 	gfp_t gfp_flags = GFP_KERNEL;
-	unsigned long flags;
-	unsigned long addr;
+	unsigned int i, level;
 	pte_t *pbase, *tmp;
+	pgprot_t ref_prot;
 	struct page *base;
-	unsigned int i, level;
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-	gfp_flags = __GFP_HIGH | __GFP_NOFAIL | __GFP_NOWARN;
 	gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
 #endif
 	base = alloc_pages(gfp_flags, 0);
@@ -213,30 +372,41 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	 * up for us already:
 	 */
 	tmp = lookup_address(address, &level);
-	if (tmp != kpte) {
-		WARN_ON_ONCE(1);
+	if (tmp != kpte)
 		goto out_unlock;
-	}
 
-	address = __pa(address);
-	addr = address & LARGE_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
 #ifdef CONFIG_X86_32
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
 #endif
+	ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+
+#ifdef CONFIG_X86_64
+	if (level == PG_LEVEL_1G) {
+		pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+		pgprot_val(ref_prot) |= _PAGE_PSE;
+	}
+#endif
 
-	pgprot_val(ref_prot) &= ~_PAGE_NX;
-	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
-		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+	/*
+	 * Get the target pfn from the original entry:
+	 */
+	pfn = pte_pfn(*kpte);
+	for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
+		set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
 	/*
-	 * Install the new, split up pagetable. Important detail here:
+	 * Install the new, split up pagetable. Important details here:
 	 *
 	 * On Intel the NX bit of all levels must be cleared to make a
 	 * page executable. See section 4.13.2 of Intel 64 and IA-32
 	 * Architectures Software Developer's Manual).
+	 *
+	 * Mark the entry present. The current mapping might be
+	 * set to not present, which we preserved above.
 	 */
 	ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
+	pgprot_val(ref_prot) |= _PAGE_PRESENT;
 	__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
 	base = NULL;
 
@@ -249,18 +419,12 @@ out_unlock:
 	return 0;
 }
 
-static int
-__change_page_attr(unsigned long address, unsigned long pfn,
-		   pgprot_t mask_set, pgprot_t mask_clr)
+static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
 {
+	int level, do_split, err;
 	struct page *kpte_page;
-	int level, err = 0;
 	pte_t *kpte;
 
-#ifdef CONFIG_X86_32
-	BUG_ON(pfn > max_low_pfn);
-#endif
-
 repeat:
 	kpte = lookup_address(address, &level);
 	if (!kpte)
@@ -271,23 +435,62 @@ repeat:
 	BUG_ON(PageCompound(kpte_page));
 
 	if (level == PG_LEVEL_4K) {
-		pgprot_t new_prot = pte_pgprot(*kpte);
 		pte_t new_pte, old_pte = *kpte;
+		pgprot_t new_prot = pte_pgprot(old_pte);
+
+		if(!pte_val(old_pte)) {
+			printk(KERN_WARNING "CPA: called for zero pte. "
+			       "vaddr = %lx cpa->vaddr = %lx\n", address,
+				cpa->vaddr);
+			WARN_ON(1);
+			return -EINVAL;
+		}
 
-		pgprot_val(new_prot) &= ~pgprot_val(mask_clr);
-		pgprot_val(new_prot) |= pgprot_val(mask_set);
+		pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+		pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
 
 		new_prot = static_protections(new_prot, address);
 
-		new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
-		BUG_ON(pte_pfn(new_pte) != pte_pfn(old_pte));
+		/*
+		 * We need to keep the pfn from the existing PTE,
+		 * after all we're only going to change it's attributes
+		 * not the memory it points to
+		 */
+		new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
+
+		/*
+		 * Do we really change anything ?
+		 */
+		if (pte_val(old_pte) != pte_val(new_pte)) {
+			set_pte_atomic(kpte, new_pte);
+			cpa->flushtlb = 1;
+		}
+		cpa->numpages = 1;
+		return 0;
+	}
+
+	/*
+	 * Check, whether we can keep the large page intact
+	 * and just change the pte:
+	 */
+	do_split = try_preserve_large_page(kpte, address, cpa);
+	/*
+	 * When the range fits into the existing large page,
+	 * return. cp->numpages and cpa->tlbflush have been updated in
+	 * try_large_page:
+	 */
+	if (do_split <= 0)
+		return do_split;
 
-		set_pte_atomic(kpte, new_pte);
-	} else {
-		err = split_large_page(kpte, address);
-		if (!err)
-			goto repeat;
+	/*
+	 * We have to split the large page:
+	 */
+	err = split_large_page(kpte, address);
+	if (!err) {
+		cpa->flushtlb = 1;
+		goto repeat;
 	}
+
 	return err;
 }
 
@@ -304,19 +507,14 @@ repeat:
  *
  * Modules and drivers should use the set_memory_* APIs instead.
  */
-
-#define HIGH_MAP_START	__START_KERNEL_map
-#define HIGH_MAP_END	(__START_KERNEL_map + KERNEL_TEXT_SIZE)
-
-static int
-change_page_attr_addr(unsigned long address, pgprot_t mask_set,
-		      pgprot_t mask_clr)
+static int change_page_attr_addr(struct cpa_data *cpa)
 {
-	unsigned long phys_addr = __pa(address);
-	unsigned long pfn = phys_addr >> PAGE_SHIFT;
 	int err;
+	unsigned long address = cpa->vaddr;
 
 #ifdef CONFIG_X86_64
+	unsigned long phys_addr = __pa(address);
+
 	/*
 	 * If we are inside the high mapped kernel range, then we
 	 * fixup the low mapping first. __va() returns the virtual
@@ -326,7 +524,7 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
 		address = (unsigned long) __va(phys_addr);
 #endif
 
-	err = __change_page_attr(address, pfn, mask_set, mask_clr);
+	err = __change_page_attr(address, cpa);
 	if (err)
 		return err;
 
@@ -339,42 +537,89 @@ change_page_attr_addr(unsigned long address, pgprot_t mask_set,
 		/*
 		 * Calc the high mapping address. See __phys_addr()
 		 * for the non obvious details.
+		 *
+		 * Note that NX and other required permissions are
+		 * checked in static_protections().
 		 */
 		address = phys_addr + HIGH_MAP_START - phys_base;
-		/* Make sure the kernel mappings stay executable */
-		pgprot_val(mask_clr) |= _PAGE_NX;
 
 		/*
 		 * Our high aliases are imprecise, because we check
 		 * everything between 0 and KERNEL_TEXT_SIZE, so do
 		 * not propagate lookup failures back to users:
 		 */
-		__change_page_attr(address, pfn, mask_set, mask_clr);
+		__change_page_attr(address, cpa);
 	}
 #endif
 	return err;
 }
 
-static int __change_page_attr_set_clr(unsigned long addr, int numpages,
-				      pgprot_t mask_set, pgprot_t mask_clr)
+static int __change_page_attr_set_clr(struct cpa_data *cpa)
 {
-	unsigned int i;
-	int ret;
+	int ret, numpages = cpa->numpages;
 
-	for (i = 0; i < numpages ; i++, addr += PAGE_SIZE) {
-		ret = change_page_attr_addr(addr, mask_set, mask_clr);
+	while (numpages) {
+		/*
+		 * Store the remaining nr of pages for the large page
+		 * preservation check.
+		 */
+		cpa->numpages = numpages;
+		ret = change_page_attr_addr(cpa);
 		if (ret)
 			return ret;
-	}
 
+		/*
+		 * Adjust the number of pages with the result of the
+		 * CPA operation. Either a large page has been
+		 * preserved or a single page update happened.
+		 */
+		BUG_ON(cpa->numpages > numpages);
+		numpages -= cpa->numpages;
+		cpa->vaddr += cpa->numpages * PAGE_SIZE;
+	}
 	return 0;
 }
 
+static inline int cache_attr(pgprot_t attr)
+{
+	return pgprot_val(attr) &
+		(_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
+}
+
 static int change_page_attr_set_clr(unsigned long addr, int numpages,
 				    pgprot_t mask_set, pgprot_t mask_clr)
 {
-	int ret = __change_page_attr_set_clr(addr, numpages, mask_set,
-					     mask_clr);
+	struct cpa_data cpa;
+	int ret, cache;
+
+	/*
+	 * Check, if we are requested to change a not supported
+	 * feature:
+	 */
+	mask_set = canon_pgprot(mask_set);
+	mask_clr = canon_pgprot(mask_clr);
+	if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
+		return 0;
+
+	cpa.vaddr = addr;
+	cpa.numpages = numpages;
+	cpa.mask_set = mask_set;
+	cpa.mask_clr = mask_clr;
+	cpa.flushtlb = 0;
+
+	ret = __change_page_attr_set_clr(&cpa);
+
+	/*
+	 * Check whether we really changed something:
+	 */
+	if (!cpa.flushtlb)
+		return ret;
+
+	/*
+	 * No need to flush, when we did not set any of the caching
+	 * attributes:
+	 */
+	cache = cache_attr(mask_set);
 
 	/*
 	 * On success we use clflush, when the CPU supports it to
@@ -383,9 +628,9 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	 * wbindv):
 	 */
 	if (!ret && cpu_has_clflush)
-		cpa_flush_range(addr, numpages);
+		cpa_flush_range(addr, numpages, cache);
 	else
-		cpa_flush_all();
+		cpa_flush_all(cache);
 
 	return ret;
 }
@@ -489,37 +734,26 @@ int set_pages_rw(struct page *page, int numpages)
 	return set_memory_rw(addr, numpages);
 }
 
-
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_CPA_DEBUG)
-static inline int __change_page_attr_set(unsigned long addr, int numpages,
-					 pgprot_t mask)
-{
-	return __change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
-}
-
-static inline int __change_page_attr_clear(unsigned long addr, int numpages,
-					   pgprot_t mask)
-{
-	return __change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
-}
-#endif
-
 #ifdef CONFIG_DEBUG_PAGEALLOC
 
 static int __set_pages_p(struct page *page, int numpages)
 {
-	unsigned long addr = (unsigned long)page_address(page);
+	struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+				.numpages = numpages,
+				.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
+				.mask_clr = __pgprot(0)};
 
-	return __change_page_attr_set(addr, numpages,
-				      __pgprot(_PAGE_PRESENT | _PAGE_RW));
+	return __change_page_attr_set_clr(&cpa);
 }
 
 static int __set_pages_np(struct page *page, int numpages)
 {
-	unsigned long addr = (unsigned long)page_address(page);
+	struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
+				.numpages = numpages,
+				.mask_set = __pgprot(0),
+				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
 
-	return __change_page_attr_clear(addr, numpages,
-					__pgprot(_PAGE_PRESENT));
+	return __change_page_attr_set_clr(&cpa);
 }
 
 void kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cb3aa470249..c7db504be1e 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -219,50 +219,39 @@ static inline void pgd_list_del(pgd_t *pgd)
 	list_del(&page->lru);
 }
 
+#define UNSHARED_PTRS_PER_PGD				\
+	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
 
-
-#if (PTRS_PER_PMD == 1)
-/* Non-PAE pgd constructor */
-static void pgd_ctor(void *pgd)
+static void pgd_ctor(void *p)
 {
+	pgd_t *pgd = p;
 	unsigned long flags;
 
-	/* !PAE, no pagetable sharing */
+	/* Clear usermode parts of PGD */
 	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
 
 	spin_lock_irqsave(&pgd_lock, flags);
 
-	/* must happen under lock */
-	clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
-			swapper_pg_dir + USER_PTRS_PER_PGD,
-			KERNEL_PGD_PTRS);
-	paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
-				__pa(swapper_pg_dir) >> PAGE_SHIFT,
-				USER_PTRS_PER_PGD,
-				KERNEL_PGD_PTRS);
-	pgd_list_add(pgd);
-	spin_unlock_irqrestore(&pgd_lock, flags);
-}
-#else  /* PTRS_PER_PMD > 1 */
-/* PAE pgd constructor */
-static void pgd_ctor(void *pgd)
-{
-	/* PAE, kernel PMD may be shared */
-
-	if (SHARED_KERNEL_PMD) {
-		clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+	/* If the pgd points to a shared pagetable level (either the
+	   ptes in non-PAE, or shared PMD in PAE), then just copy the
+	   references from swapper_pg_dir. */
+	if (PAGETABLE_LEVELS == 2 ||
+	    (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
+		clone_pgd_range(pgd + USER_PTRS_PER_PGD,
 				swapper_pg_dir + USER_PTRS_PER_PGD,
 				KERNEL_PGD_PTRS);
-	} else {
-		unsigned long flags;
+		paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+					__pa(swapper_pg_dir) >> PAGE_SHIFT,
+					USER_PTRS_PER_PGD,
+					KERNEL_PGD_PTRS);
+	}
 
-		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
-		spin_lock_irqsave(&pgd_lock, flags);
+	/* list required to sync kernel mapping updates */
+	if (!SHARED_KERNEL_PMD)
 		pgd_list_add(pgd);
-		spin_unlock_irqrestore(&pgd_lock, flags);
-	}
+
+	spin_unlock_irqrestore(&pgd_lock, flags);
 }
-#endif	/* PTRS_PER_PMD */
 
 static void pgd_dtor(void *pgd)
 {
@@ -276,9 +265,6 @@ static void pgd_dtor(void *pgd)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
-#define UNSHARED_PTRS_PER_PGD				\
-	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-
 #ifdef CONFIG_X86_PAE
 /*
  * Mop up any pmd pages which may still be attached to the pgd.
@@ -387,13 +373,6 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 
 void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
-	/* This is called just after the pmd has been detached from
-	   the pgd, which requires a full tlb flush to be recognized
-	   by the CPU.  Rather than incurring multiple tlb flushes
-	   while the address space is being pulled down, make the tlb
-	   gathering machinery do a full flush when we're done. */
-	tlb->fullmm = 1;
-
 	paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pmd));
 }
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
index f5f165f69e0..55270c26237 100644
--- a/arch/x86/pci/numa.c
+++ b/arch/x86/pci/numa.c
@@ -5,36 +5,62 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/nodemask.h>
+#include <mach_apic.h>
 #include "pci.h"
 
+#define XQUAD_PORTIO_BASE 0xfe400000
+#define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
+
 #define BUS2QUAD(global) (mp_bus_id_to_node[global])
 #define BUS2LOCAL(global) (mp_bus_id_to_local[global])
 #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
 
+extern void *xquad_portio;    /* Where the IO area was mapped */
+#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
+
 #define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
 	(0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
 
+static void write_cf8(unsigned bus, unsigned devfn, unsigned reg)
+{
+	unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg);
+	if (xquad_portio)
+		writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus)));
+	else
+		outl(val, 0xCF8);
+}
+
 static int pci_conf1_mq_read(unsigned int seg, unsigned int bus,
 			     unsigned int devfn, int reg, int len, u32 *value)
 {
 	unsigned long flags;
+	void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
 
 	if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255))
 		return -EINVAL;
 
 	spin_lock_irqsave(&pci_config_lock, flags);
 
-	outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+	write_cf8(bus, devfn, reg);
 
 	switch (len) {
 	case 1:
-		*value = inb_quad(0xCFC + (reg & 3), BUS2QUAD(bus));
+		if (xquad_portio)
+			*value = readb(adr + (reg & 3));
+		else
+			*value = inb(0xCFC + (reg & 3));
 		break;
 	case 2:
-		*value = inw_quad(0xCFC + (reg & 2), BUS2QUAD(bus));
+		if (xquad_portio)
+			*value = readw(adr + (reg & 2));
+		else
+			*value = inw(0xCFC + (reg & 2));
 		break;
 	case 4:
-		*value = inl_quad(0xCFC, BUS2QUAD(bus));
+		if (xquad_portio)
+			*value = readl(adr);
+		else
+			*value = inl(0xCFC);
 		break;
 	}
 
@@ -47,23 +73,33 @@ static int pci_conf1_mq_write(unsigned int seg, unsigned int bus,
 			      unsigned int devfn, int reg, int len, u32 value)
 {
 	unsigned long flags;
+	void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus));
 
 	if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) 
 		return -EINVAL;
 
 	spin_lock_irqsave(&pci_config_lock, flags);
 
-	outl_quad(PCI_CONF1_MQ_ADDRESS(bus, devfn, reg), 0xCF8, BUS2QUAD(bus));
+	write_cf8(bus, devfn, reg);
 
 	switch (len) {
 	case 1:
-		outb_quad((u8)value, 0xCFC + (reg & 3), BUS2QUAD(bus));
+		if (xquad_portio)
+			writeb(value, adr + (reg & 3));
+		else
+			outb((u8)value, 0xCFC + (reg & 3));
 		break;
 	case 2:
-		outw_quad((u16)value, 0xCFC + (reg & 2), BUS2QUAD(bus));
+		if (xquad_portio)
+			writew(value, adr + (reg & 2));
+		else
+			outw((u16)value, 0xCFC + (reg & 2));
 		break;
 	case 4:
-		outl_quad((u32)value, 0xCFC, BUS2QUAD(bus));
+		if (xquad_portio)
+			writel(value, adr + reg);
+		else
+			outl((u32)value, 0xCFC);
 		break;
 	}
author	Linus Torvalds <torvalds@linux-foundation.org>	2008-02-04 09:16:03 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-02-04 09:16:03 -0800
commit	d2fc0bacd5c438cb459fdf531eff00ab18422a00 (patch)
tree	d0ea52e4d2ad2fac12e19eaf6891c6af98353cfc /arch
parent	93890b71a34f9490673a6edd56b61c2124215e46 (diff)
parent	795d45b22c079946332bf3825afefe5a981a97b6 (diff)