diff options
52 files changed, 2767 insertions, 490 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 09ad7450647..556b4187d01 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1388,6 +1388,12 @@ and is between 256 and 4096 characters. It is defined in the file nolapic_timer [X86-32,APIC] Do not use the local APIC timer. + nox2apic [X86-64,APIC] Do not enable x2APIC mode. + + x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of + default x2apic cluster mode on platforms + supporting x2apic. + noltlbs [PPC] Do not use large page/tlb entries for kernel lowmem mapping on PPC40x. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 96e0c2ebc38..baca5545500 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1650,6 +1650,14 @@ config DMAR_FLOPPY_WA workaround will setup a 1:1 mapping for the first 16M to make floppy (an ISA device) work. +config INTR_REMAP + bool "Support for Interrupt Remapping (EXPERIMENTAL)" + depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL + help + Supports Interrupt remapping for IO-APIC and MSI devices. + To use x2apic mode in the CPU's which support x2APIC enhancements or + to support platforms with CPU's having > 8 bit APIC ID, say Y. + source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index da140611bb5..673f1d12b42 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -102,6 +102,8 @@ obj-$(CONFIG_OLPC) += olpc.o # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o + obj-y += genx2apic_cluster.o + obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f489d7a9be9..b41b27af33e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address) set_fixmap_nocache(FIX_APIC_BASE, address); if (boot_cpu_physical_apicid == -1U) { - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #ifdef CONFIG_X86_32 apic_version[boot_cpu_physical_apicid] = GET_APIC_VERSION(apic_read(APIC_LVR)); @@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; +#ifdef CONFIG_X86_32 setup_apic_routing(); +#endif } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a437d027f20..34101962fb0 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -145,13 +145,18 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_wait_icr_idle(void) +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -167,6 +172,35 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write_around(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return icr1 | ((u64)icr2 << 32); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .write_atomic = native_apic_mem_write_atomic, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; +EXPORT_SYMBOL_GPL(apic_ops); + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ @@ -1201,7 +1235,7 @@ void __init init_apic_mappings(void) * default configuration (or the MP table is broken). */ if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } @@ -1241,7 +1275,7 @@ int __init APIC_init_uniprocessor(void) * might be zero if read from MP tables. Get it from LAPIC. */ #ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); #endif physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e3d32e27c1..c75f58a66d8 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -27,6 +27,7 @@ #include <linux/clockchips.h> #include <linux/acpi_pmtmr.h> #include <linux/module.h> +#include <linux/dmar.h> #include <asm/atomic.h> #include <asm/smp.h> @@ -39,6 +40,7 @@ #include <asm/proto.h> #include <asm/timex.h> #include <asm/apic.h> +#include <asm/i8259.h> #include <mach_ipi.h> #include <mach_apic.h> @@ -46,6 +48,11 @@ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; int disable_apic; +int disable_x2apic; +int x2apic; + +/* x2apic enabled before OS handover */ +int x2apic_preenabled; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; @@ -119,13 +126,13 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } -void apic_wait_icr_idle(void) +void xapic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) cpu_relax(); } -u32 safe_apic_wait_icr_idle(void) +u32 safe_xapic_wait_icr_idle(void) { u32 send_status; int timeout; @@ -141,6 +148,71 @@ u32 safe_apic_wait_icr_idle(void) return send_status; } +void xapic_icr_write(u32 low, u32 id) +{ + apic_write(APIC_ICR2, id << 24); + apic_write(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return (icr1 | ((u64)icr2 << 32)); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .write_atomic = native_apic_mem_write_atomic, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; + +EXPORT_SYMBOL_GPL(apic_ops); + +static void x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static u32 safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +void x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +u64 x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +static struct apic_ops x2apic_ops = { + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .write_atomic = native_apic_msr_write, + .icr_read = x2apic_icr_read, + .icr_write = x2apic_icr_write, + .wait_icr_idle = x2apic_wait_icr_idle, + .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, +}; + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ @@ -626,10 +698,10 @@ int __init verify_local_APIC(void) /* * The ID register is read/write in a real APIC. */ - reg0 = read_apic_id(); + reg0 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = read_apic_id(); + reg1 = apic_read(APIC_ID); apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); apic_write(APIC_ID, reg0); if (reg1 != (reg0 ^ APIC_ID_MASK)) @@ -830,6 +902,125 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +void check_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + + if (msr & X2APIC_ENABLE) { + printk("x2apic enabled by BIOS, switching to x2apic ops\n"); + x2apic_preenabled = x2apic = 1; + apic_ops = &x2apic_ops; + } +} + +void enable_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + if (!(msr & X2APIC_ENABLE)) { + printk("Enabling x2apic\n"); + wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); + } +} + +void enable_IR_x2apic(void) +{ +#ifdef CONFIG_INTR_REMAP + int ret; + unsigned long flags; + + if (!cpu_has_x2apic) + return; + + if (!x2apic_preenabled && disable_x2apic) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of nox2apic\n"); + return; + } + + if (x2apic_preenabled && disable_x2apic) + panic("Bios already enabled x2apic, can't enforce nox2apic"); + + if (!x2apic_preenabled && skip_ioapic_setup) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of skipping io-apic setup\n"); + return; + } + + ret = dmar_table_init(); + if (ret) { + printk(KERN_INFO + "dmar_table_init() failed with %d:\n", ret); + + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else + printk(KERN_INFO + "Not enabling x2apic,Intr-remapping\n"); + return; + } + + local_irq_save(flags); + mask_8259A(); + save_mask_IO_APIC_setup(); + + ret = enable_intr_remapping(1); + + if (ret && x2apic_preenabled) { + local_irq_restore(flags); + panic("x2apic enabled by bios. But IR enabling failed"); + } + + if (ret) + goto end; + + if (!x2apic) { + x2apic = 1; + apic_ops = &x2apic_ops; + enable_x2apic(); + } +end: + if (ret) + /* + * IR enabling failed + */ + restore_IO_APIC_setup(); + else + reinit_intr_remapped_IO_APIC(x2apic_preenabled); + + unmask_8259A(); + local_irq_restore(flags); + + if (!ret) { + if (!x2apic_preenabled) + printk(KERN_INFO + "Enabled x2apic and interrupt-remapping\n"); + else + printk(KERN_INFO + "Enabled Interrupt-remapping\n"); + } else + printk(KERN_ERR + "Failed to enable Interrupt-remapping and x2apic\n"); +#else + if (!cpu_has_x2apic) + return; + + if (x2apic_preenabled) + panic("x2apic enabled prior OS handover," + " enable CONFIG_INTR_REMAP"); + + printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " + " and x2apic\n"); +#endif + + return; +} + /* * Detect and enable local APICs on non-SMP boards. * Original code written by Keir Fraser. @@ -869,7 +1060,7 @@ void __init early_init_lapic_mapping(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /** @@ -877,6 +1068,11 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { + if (x2apic) { + boot_cpu_physical_apicid = read_apic_id(); + return; + } + /* * If no local APIC can be found then set up a fake all * zeroes page to simulate the local APIC and another @@ -896,7 +1092,7 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); + boot_cpu_physical_apicid = read_apic_id(); } /* @@ -915,6 +1111,9 @@ int __init APIC_init_uniprocessor(void) return -1; } + enable_IR_x2apic(); + setup_apic_routing(); + verify_local_APIC(); connect_bsp_APIC(); @@ -1096,6 +1295,11 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu_set(cpu, cpu_present_map); } +int hard_smp_processor_id(void) +{ + return read_apic_id(); +} + /* * Power management */ @@ -1132,7 +1336,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) maxlvt = lapic_get_maxlvt(); - apic_pm_state.apic_id = read_apic_id(); + apic_pm_state.apic_id = apic_read(APIC_ID); apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); apic_pm_state.apic_ldr = apic_read(APIC_LDR); apic_pm_state.apic_dfr = apic_read(APIC_DFR); @@ -1167,10 +1371,14 @@ static int lapic_resume(struct sys_device *dev) maxlvt = lapic_get_maxlvt(); local_irq_save(flags); - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); + if (!x2apic) { + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } else + enable_x2apic(); + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); apic_write(APIC_DFR, apic_pm_state.apic_dfr); @@ -1310,6 +1518,15 @@ __cpuinit int apic_is_clustered_box(void) return (clusters > 2); } +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); + + /* * APIC command line parameters */ diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 7b8cc72feb4..c6bee77ca9e 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -608,6 +608,8 @@ void __cpuinit cpu_init(void) barrier(); check_efer(); + if (cpu != 0 && x2apic) + enable_x2apic(); /* * set up and load the per-CPU TSS diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4ad4cb..0bf4d37a048 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c @@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = { /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", + NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5bd21..3940d8161f8 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -16,6 +16,7 @@ #include <linux/ctype.h> #include <linux/init.h> #include <linux/hardirq.h> +#include <linux/dmar.h> #include <asm/smp.h> #include <asm/ipi.h> @@ -29,6 +30,15 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); struct genapic __read_mostly *genapic = &apic_flat; +static int x2apic_phys = 0; + +static int set_x2apic_phys_mode(char *arg) +{ + x2apic_phys = 1; + return 0; +} +early_param("x2apic_phys", set_x2apic_phys_mode); + static enum uv_system_type uv_system_type; /* @@ -38,7 +48,12 @@ void __init setup_apic_routing(void) { if (uv_system_type == UV_NON_UNIQUE_APIC) genapic = &apic_x2apic_uv_x; - else + else if (cpu_has_x2apic && intr_remapping_enabled) { + if (x2apic_phys) + genapic = &apic_x2apic_phys; + else + genapic = &apic_x2apic_cluster; + } else #ifdef CONFIG_ACPI /* * Quirk: some x86_64 machines can only use physical APIC mode @@ -61,7 +76,7 @@ void __init setup_apic_routing(void) /* Same for both flat and physical. */ -void send_IPI_self(int vector) +void apic_send_IPI_self(int vector) { __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); } @@ -79,17 +94,6 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -unsigned int read_apic_id(void) -{ - unsigned int id; - - WARN_ON(preemptible() && num_online_cpus() > 1); - id = apic_read(APIC_ID); - if (uv_system_type >= UV_X2APIC) - id |= __get_cpu_var(x2apic_extra_bits); - return id; -} - enum uv_system_type get_uv_system_type(void) { return uv_system_type; diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 1a9c68845ee..2c973cbf054 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -15,9 +15,11 @@ #include <linux/kernel.h> #include <linux/ctype.h> #include <linux/init.h> +#include <linux/hardirq.h> #include <asm/smp.h> #include <asm/ipi.h> #include <asm/genapic.h> +#include <mach_apicdef.h> static cpumask_t flat_target_cpus(void) { @@ -95,9 +97,33 @@ static void flat_send_IPI_all(int vector) __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } +static unsigned int get_apic_id(unsigned long x) +{ + unsigned int id; + + id = (((x)>>24) & 0xFFu); + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = ((id & 0xFFu)<<24); + return x; +} + +static unsigned int read_xapic_id(void) +{ + unsigned int id; + + id = get_apic_id(apic_read(APIC_ID)); + return id; +} + static int flat_apic_id_registered(void) { - return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); + return physid_isset(read_xapic_id(), phys_cpu_present_map); } static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) @@ -121,8 +147,12 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; /* @@ -185,6 +215,10 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = (0xFFu<<24), }; diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c new file mode 100644 index 00000000000..40bc0140d89 --- /dev/null +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -0,0 +1,153 @@ +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <asm/smp.h> +#include <asm/ipi.h> +#include <asm/genapic.h> + +DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); + +/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ + +static cpumask_t x2apic_targe |