diff options
82 files changed, 1481 insertions, 1097 deletions
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index ffe1c062088..e566affeed7 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -7,10 +7,12 @@ Machine check mce=off disable machine check mce=bootlog Enable logging of machine checks left over from booting. - Disabled by default because some BIOS leave bogus ones. + Disabled by default on AMD because some BIOS leave bogus ones. If your BIOS doesn't do that it's a good idea to enable though to make sure you log even machine check events that result - in a reboot. + in a reboot. On Intel systems it is enabled by default. + mce=nobootlog + Disable boot machine check logging. mce=tolerancelevel (number) 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic, 3: never panic or exit (for testing) @@ -122,6 +124,9 @@ SMP cpumask=MASK only use cpus with bits set in mask + additional_cpus=NUM Allow NUM more CPUs for hotplug + (defaults are specified by the BIOS or half the available CPUs) + NUMA numa=off Only set up a single NUMA node spanning all memory. @@ -188,6 +193,9 @@ Debugging kstack=N Print that many words from the kernel stack in oops dumps. + pagefaulttrace Dump all page faults. Only useful for extreme debugging + and will create a lot of output. + Misc noreplacement Don't replace instructions with more appropiate ones diff --git a/Documentation/x86_64/mm.txt b/Documentation/x86_64/mm.txt index 662b73971a6..133561b9cb0 100644 --- a/Documentation/x86_64/mm.txt +++ b/Documentation/x86_64/mm.txt @@ -6,7 +6,7 @@ Virtual memory map with 4 level page tables: 0000000000000000 - 00007fffffffffff (=47bits) user space, different per mm hole caused by [48:63] sign extension ffff800000000000 - ffff80ffffffffff (=40bits) guard hole -ffff810000000000 - ffffc0ffffffffff (=46bits) direct mapping of phys. memory +ffff810000000000 - ffffc0ffffffffff (=46bits) direct mapping of all phys. memory ffffc10000000000 - ffffc1ffffffffff (=40bits) hole ffffc20000000000 - ffffe1ffffffffff (=45bits) vmalloc/ioremap space ... unused hole ... @@ -14,6 +14,10 @@ ffffffff80000000 - ffffffff82800000 (=40MB) kernel text mapping, from phys 0 ... unused hole ... ffffffff88000000 - fffffffffff00000 (=1919MB) module mapping space +The direct mapping covers all memory in the system upto the highest +memory address (this means in some cases it can also include PCI memory +holes) + vmalloc space is lazily synchronized into the different PML4 pages of the processes using the page fault handler, with init_level4_pgt as reference. diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index b66c13c0cc0..f36677241ec 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -39,17 +39,14 @@ #ifdef CONFIG_X86_64 -static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ -} extern void __init clustered_apic_check(void); -static inline int ioapic_setup_disabled(void) -{ - return 0; -} +extern int gsi_irq_sharing(int gsi); #include <asm/proto.h> +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } + + #else /* X86 */ #ifdef CONFIG_X86_LOCAL_APIC @@ -57,6 +54,8 @@ static inline int ioapic_setup_disabled(void) #include <mach_mpparse.h> #endif /* CONFIG_X86_LOCAL_APIC */ +static inline int gsi_irq_sharing(int gsi) { return gsi; } + #endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ @@ -459,7 +458,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) *irq = IO_APIC_VECTOR(gsi); else #endif - *irq = gsi; + *irq = gsi_irq_sharing(gsi); return 0; } @@ -543,7 +542,7 @@ acpi_scan_rsdp(unsigned long start, unsigned long length) * RSDP signature. */ for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *)(start + offset), "RSD PTR ", sig_len)) + if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len)) continue; return (start + offset); } diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 53a1681cd96..e344ef88cfc 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -206,9 +206,9 @@ static void __init init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); if (cpuid_eax(0x80000000) >= 0x80000008) { - c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_num_cores & (c->x86_num_cores - 1)) - c->x86_num_cores = 1; + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + if (c->x86_max_cores & (c->x86_max_cores - 1)) + c->x86_max_cores = 1; } #ifdef CONFIG_X86_HT @@ -217,15 +217,15 @@ static void __init init_amd(struct cpuinfo_x86 *c) * distingush the cores. Assumes number of cores is a power * of two. */ - if (c->x86_num_cores > 1) { + if (c->x86_max_cores > 1) { int cpu = smp_processor_id(); unsigned bits = 0; - while ((1 << bits) < c->x86_num_cores) + while ((1 << bits) < c->x86_max_cores) bits++; cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); phys_proc_id[cpu] >>= bits; printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_num_cores, cpu_core_id[cpu]); + cpu, c->x86_max_cores, cpu_core_id[cpu]); } #endif } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index c145fb30002..31e344b26ba 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -231,10 +231,10 @@ static void __init early_cpu_detect(void) cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 15; c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) { + if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - } c->x86_mask = tfms & 15; if (cap0 & (1<<19)) c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; @@ -333,7 +333,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_num_cores = 1; + c->x86_max_cores = 1; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -443,52 +443,44 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) void __devinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - int index_msb, tmp; + int index_msb, core_bits; int cpu = smp_processor_id(); + cpuid(1, &eax, &ebx, &ecx, &edx); + + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; - cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); } else if (smp_num_siblings > 1 ) { - index_msb = 31; if (smp_num_siblings > NR_CPUS) { printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); smp_num_siblings = 1; return; } - tmp = smp_num_siblings; - while ((tmp & 0x80000000 ) == 0) { - tmp <<=1 ; - index_msb--; - } - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + + index_msb = get_count_order(smp_num_siblings); phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); printk(KERN_INFO "CPU: Physical Processor ID: %d\n", phys_proc_id[cpu]); - smp_num_siblings = smp_num_siblings / c->x86_num_cores; + smp_num_siblings = smp_num_siblings / c->x86_max_cores; - tmp = smp_num_siblings; - index_msb = 31; - while ((tmp & 0x80000000) == 0) { - tmp <<=1 ; - index_msb--; - } + index_msb = get_count_order(smp_num_siblings) ; - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + core_bits = get_count_order(c->x86_max_cores); - cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + ((1 << core_bits) - 1); - if (c->x86_num_cores > 1) + if (c->x86_max_cores > 1) printk(KERN_INFO "CPU: Processor Core ID: %d\n", cpu_core_id[cpu]); } diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index c28d26fb5f2..5e2da704f0f 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -158,7 +158,7 @@ static void __devinit init_intel(struct cpuinfo_x86 *c) if ( p ) strcpy(c->x86_model_id, p); - c->x86_num_cores = num_cpu_cores(c); + c->x86_max_cores = num_cpu_cores(c); detect_ht(c); diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 4dc42a189ae..fbfd374aa33 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -293,29 +293,45 @@ static struct _cpuid4_info *cpuid4_info[NR_CPUS]; #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) { - struct _cpuid4_info *this_leaf; + struct _cpuid4_info *this_leaf, *sibling_leaf; unsigned long num_threads_sharing; -#ifdef CONFIG_X86_HT - struct cpuinfo_x86 *c = cpu_data + cpu; -#endif + int index_msb, i; + struct cpuinfo_x86 *c = cpu_data; this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; if (num_threads_sharing == 1) cpu_set(cpu, this_leaf->shared_cpu_map); -#ifdef CONFIG_X86_HT - else if (num_threads_sharing == smp_num_siblings) - this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; - else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings)) - this_leaf->shared_cpu_map = cpu_core_map[cpu]; - else - printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " - "any known set of CPUs\n"); -#endif + else { + index_msb = get_count_order(num_threads_sharing); + + for_each_online_cpu(i) { + if (c[i].apicid >> index_msb == + c[cpu].apicid >> index_msb) { + cpu_set(i, this_leaf->shared_cpu_map); + if (i != cpu && cpuid4_info[i]) { + sibling_leaf = CPUID4_INFO_IDX(i, index); + cpu_set(cpu, sibling_leaf->shared_cpu_map); + } + } + } + } +} +static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + int sibling; + + this_leaf = CPUID4_INFO_IDX(cpu, index); + for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { + sibling_leaf = CPUID4_INFO_IDX(sibling, index); + cpu_clear(cpu, sibling_leaf->shared_cpu_map); + } } #else static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} +static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} #endif static void free_cache_attributes(unsigned int cpu) @@ -574,8 +590,10 @@ static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) unsigned int cpu = sys_dev->id; unsigned long i; - for (i = 0; i < num_cache_leaves; i++) + for (i = 0; i < num_cache_leaves; i++) { + cache_remove_shared_cpu_map(cpu, i); kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); + } kobject_unregister(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); return; diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index dd4ebd6af7e..1e9db198c44 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -626,6 +626,14 @@ void __init mtrr_bp_init(void) if (cpuid_eax(0x80000000) >= 0x80000008) { u32 phys_addr; phys_addr = cpuid_eax(0x80000008) & 0xff; + /* CPUID workaround for Intel 0F33/0F34 CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 0xF && + boot_cpu_data.x86_model == 0x3 && + (boot_cpu_data.x86_mask == 0x3 || + boot_cpu_data.x86_mask == 0x4)) + phys_addr = 36; + size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); size_and_mask = ~size_or_mask & 0xfff00000; } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 41b871ecf4b..e7921315ae9 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -94,12 +94,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (c->x86_cache_size >= 0) seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); #ifdef CONFIG_X86_HT - if (c->x86_num_cores * smp_num_siblings > 1) { + if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); - seq_printf(m, "siblings\t: %d\n", - c->x86_num_cores * smp_num_siblings); + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); - seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } #endif diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index bc5a9d97466..d16520da455 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -72,9 +72,11 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; /* Core ID of each logical CPU */ int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; +/* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); +/* representing HT and core siblings of each logical CPU */ cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); @@ -442,35 +444,60 @@ static void __devinit smp_callin(void) static int cpucount; +/* representing cpus for which sibling maps can be computed */ +static cpumask_t cpu_sibling_setup_map; + static inline void set_cpu_sibling_map(int cpu) { int i; + struct cpuinfo_x86 *c = cpu_data; + + cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (cpu_core_id[cpu] == cpu_core_id[i]) { + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i] && + cpu_core_id[cpu] == cpu_core_id[i]) { cpu_set(i, cpu_sibling_map[cpu]); cpu_set(cpu, cpu_sibling_map[i]); + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } - if (current_cpu_data.x86_num_cores > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (phys_proc_id[cpu] == ph |