diff options
Diffstat (limited to 'arch/x86/kernel/apic')
| -rw-r--r-- | arch/x86/kernel/apic/Makefile | 22 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic.c | 981 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_flat_64.c | 120 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_noop.c | 32 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/apic_numachip.c | 264 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/bigsmp_32.c | 96 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/es7000_32.c | 761 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/hw_nmi.c | 93 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 2378 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/ipi.c | 15 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/nmi.c | 567 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/numaq_32.c | 550 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/probe_32.c | 152 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/probe_64.c | 81 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/summit_32.c | 568 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/x2apic_cluster.c | 301 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/x2apic_phys.c | 170 | ||||
| -rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 507 |
18 files changed, 2729 insertions, 4929 deletions
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 910f20b457c..dcb5b15401c 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,23 +2,23 @@ # Makefile for local APIC drivers and for the IO-APIC code # -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o -ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) -obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o -endif -obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o +obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o ifeq ($(CONFIG_X86_64),y) -obj-y += apic_flat_64.o -obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o -obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o +# APIC probe will depend on the listing order here +obj-$(CONFIG_X86_NUMACHIP) += apic_numachip.o obj-$(CONFIG_X86_UV) += x2apic_uv_x.o +obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o +obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o +obj-y += apic_flat_64.o endif +# APIC probe will depend on the listing order here obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o -obj-$(CONFIG_X86_NUMAQ) += numaq_32.o -obj-$(CONFIG_X86_ES7000) += es7000_32.o -obj-$(CONFIG_X86_SUMMIT) += summit_32.o + +# For 32bit, probe_32 need to be listed last +obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3f838d53739..ad28db7e6bd 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -24,41 +24,45 @@ #include <linux/ftrace.h> #include <linux/ioport.h> #include <linux/module.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/delay.h> #include <linux/timex.h> +#include <linux/i8253.h> #include <linux/dmar.h> #include <linux/init.h> #include <linux/cpu.h> #include <linux/dmi.h> -#include <linux/nmi.h> #include <linux/smp.h> #include <linux/mm.h> +#include <asm/trace/irq_vectors.h> +#include <asm/irq_remapping.h> #include <asm/perf_event.h> #include <asm/x86_init.h> #include <asm/pgalloc.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/mpspec.h> -#include <asm/i8253.h> #include <asm/i8259.h> #include <asm/proto.h> #include <asm/apic.h> +#include <asm/io_apic.h> #include <asm/desc.h> #include <asm/hpet.h> #include <asm/idle.h> #include <asm/mtrr.h> +#include <asm/time.h> #include <asm/smp.h> #include <asm/mce.h> -#include <asm/kvm_para.h> #include <asm/tsc.h> +#include <asm/hypervisor.h> unsigned int num_processors; -unsigned disabled_cpus __cpuinitdata; +unsigned disabled_cpus; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; +EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); /* * The highest APIC ID seen during enumeration. @@ -71,29 +75,30 @@ unsigned int max_physical_apicid; physid_mask_t phys_cpu_present_map; /* + * Processor to be disabled specified by kernel parameter + * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to + * avoid undefined behaviour caused by sending INIT from AP to BSP. + */ +static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID; + +/* * Map cpu index to physical APIC ID */ -DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); #ifdef CONFIG_X86_32 + /* - * Knob to control our willingness to enable the local APIC. - * - * +1=force-enable - */ -static int force_enable_local_apic; -/* - * APIC command line parameters + * On x86_32, the mapping between cpu and logical apicid may vary + * depending on apic in use. The following early percpu variable is + * used for the mapping. This is where the behaviors of x86_64 and 32 + * actually diverge. Let's keep it ugly for now. */ -static int __init parse_lapic(char *arg) -{ - force_enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID); + /* Local APIC was disabled by the BIOS and enabled by the kernel */ static int enabled_via_apicbase; @@ -122,6 +127,29 @@ static inline void imcr_apic_to_pic(void) } #endif +/* + * Knob to control our willingness to enable the local APIC. + * + * +1=force-enable + */ +static int force_enable_local_apic __initdata; + +/* Control whether x2APIC mode is enabled or not */ +static bool nox2apic __initdata; + +/* + * APIC command line parameters + */ +static int __init parse_lapic(char *arg) +{ + if (config_enabled(CONFIG_X86_32) && !arg) + force_enable_local_apic = 1; + else if (arg && !strncmp(arg, "notscdeadline", 13)) + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); + return 0; +} +early_param("lapic", parse_lapic); + #ifdef CONFIG_X86_64 static int apic_calibrate_pmtmr __initdata; static __init int setup_apicpmtimer(char *s) @@ -136,16 +164,25 @@ __setup("apicpmtimer", setup_apicpmtimer); int x2apic_mode; #ifdef CONFIG_X86_X2APIC /* x2apic enabled before OS handover */ -static int x2apic_preenabled; -static __init int setup_nox2apic(char *str) +int x2apic_preenabled; +static int x2apic_disabled; +static int __init setup_nox2apic(char *str) { if (x2apic_enabled()) { - pr_warning("Bios already enabled x2apic, " - "can't enforce nox2apic"); - return 0; - } + int apicid = native_apic_msr_read(APIC_ID); + + if (apicid >= 255) { + pr_warning("Apicid: %08x, cannot enforce nox2apic\n", + apicid); + return 0; + } + + pr_warning("x2apic already enabled. will disable it\n"); + } else + setup_clear_cpu_cap(X86_FEATURE_X2APIC); + + nox2apic = true; - setup_clear_cpu_cap(X86_FEATURE_X2APIC); return 0; } early_param("nox2apic", setup_nox2apic); @@ -154,7 +191,7 @@ early_param("nox2apic", setup_nox2apic); unsigned long mp_lapic_addr; int disable_apic; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int disable_apic_timer __cpuinitdata; +static int disable_apic_timer __initdata; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -176,31 +213,10 @@ static struct resource lapic_resource = { .flags = IORESOURCE_MEM | IORESOURCE_BUSY, }; -static unsigned int calibration_result; +unsigned int lapic_timer_frequency = 0; -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt); -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt); -static void lapic_timer_broadcast(const struct cpumask *mask); static void apic_pm_activate(void); -/* - * The local apic timer can be used for any function which is CPU local. - */ -static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT - | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_mode = lapic_timer_setup, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, -}; -static DEFINE_PER_CPU(struct clock_event_device, lapic_events); - static unsigned long apic_phys; /* @@ -239,7 +255,7 @@ static int modern_apic(void) * right after this call apic become NOOP driven * so apic->write/read doesn't do anything */ -void apic_disable(void) +static void __init apic_disable(void) { pr_info("APIC: switched to apic NOOP\n"); apic = &apic_noop; @@ -261,6 +277,7 @@ u32 native_safe_apic_wait_icr_idle(void) send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; if (!send_status) break; + inc_irq_stat(icr_read_retry_count); udelay(100); } while (timeout++ < 1000); @@ -269,8 +286,12 @@ u32 native_safe_apic_wait_icr_idle(void) void native_apic_icr_write(u32 low, u32 id) { + unsigned long flags; + + local_irq_save(flags); apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); apic_write(APIC_ICR, low); + local_irq_restore(flags); } u64 native_apic_icr_read(void) @@ -283,23 +304,6 @@ u64 native_apic_icr_read(void) return icr1 | ((u64)icr2 << 32); } -/** - * enable_NMI_through_LVT0 - enable NMI through local vector table 0 - */ -void __cpuinit enable_NMI_through_LVT0(void) -{ - unsigned int v; - - /* unmask and set to NMI */ - v = APIC_DM_NMI; - - /* Level triggered for 82489DX (32bit mode) */ - if (!lapic_is_integrated()) - v |= APIC_LVT_LEVEL_TRIGGER; - - apic_write(APIC_LVT0, v); -} - #ifdef CONFIG_X86_32 /** * get_physical_broadcast - Get number of physical broadcast IDs @@ -331,6 +335,7 @@ int lapic_get_maxlvt(void) /* Clock divisor */ #define APIC_DIVISOR 16 +#define TSC_DIVISOR 32 /* * This function sets up the local APIC timer, with a timeout of @@ -349,6 +354,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) lvtt_value = LOCAL_TIMER_VECTOR; if (!oneshot) lvtt_value |= APIC_LVT_TIMER_PERIODIC; + else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE; + if (!lapic_is_integrated()) lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); @@ -357,6 +365,11 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_LVTT, lvtt_value); + if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); + return; + } + /* * Divide PICLK by 16 */ @@ -400,26 +413,32 @@ static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new) static unsigned int reserve_eilvt_offset(int offset, unsigned int new) { - unsigned int rsvd; /* 0: uninitialized */ + unsigned int rsvd, vector; if (offset >= APIC_EILVT_NR_MAX) return ~0; - rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED; + rsvd = atomic_read(&eilvt_offsets[offset]); do { - if (rsvd && - !eilvt_entry_is_changeable(rsvd, new)) + vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */ + if (vector && !eilvt_entry_is_changeable(vector, new)) /* may not change if vectors are different */ return rsvd; rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new); } while (rsvd != new); + rsvd &= ~APIC_EILVT_MASKED; + if (rsvd && rsvd != vector) + pr_info("LVT offset %d assigned for vector 0x%02x\n", + offset, rsvd); + return new; } /* * If mask=1, the LVT entry does not generate interrupts while mask=0 - * enables the vector. See also the BKDGs. + * enables the vector. See also the BKDGs. Must be called with + * preemption disabled. */ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) @@ -432,17 +451,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask) reserved = reserve_eilvt_offset(offset, new); if (reserved != new) { - pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " - "vector 0x%x was already reserved by another core, " - "APIC%lX=0x%x\n", - smp_processor_id(), new, reserved, reg, old); + pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " + "vector 0x%x, but the register is already in use for " + "vector 0x%x on another cpu\n", + smp_processor_id(), reg, offset, new, reserved); return -EINVAL; } if (!eilvt_entry_is_changeable(old, new)) { - pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " - "register already in use, APIC%lX=0x%x\n", - smp_processor_id(), new, reg, old); + pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for " + "vector 0x%x, but the register is already in use for " + "vector 0x%x on this cpu\n", + smp_processor_id(), reg, offset, new, old); return -EBUSY; } @@ -462,6 +482,16 @@ static int lapic_next_event(unsigned long delta, return 0; } +static int lapic_next_deadline(unsigned long delta, + struct clock_event_device *evt) +{ + u64 tsc; + + rdtscll(tsc); + wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); + return 0; +} + /* * Setup the lapic timer in periodic or oneshot mode */ @@ -480,7 +510,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, switch (mode) { case CLOCK_EVT_MODE_PERIODIC: case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(calibration_result, + __setup_APIC_LVTT(lapic_timer_frequency, mode != CLOCK_EVT_MODE_PERIODIC, 1); break; case CLOCK_EVT_MODE_UNUSED: @@ -508,15 +538,32 @@ static void lapic_timer_broadcast(const struct cpumask *mask) #endif } + +/* + * The local apic timer can be used for any function which is CPU local. + */ +static struct clock_event_device lapic_clockevent = { + .name = "lapic", + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT + | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, + .shift = 32, + .set_mode = lapic_timer_setup, + .set_next_event = lapic_next_event, + .broadcast = lapic_timer_broadcast, + .rating = 100, + .irq = -1, +}; +static DEFINE_PER_CPU(struct clock_event_device, lapic_events); + /* * Setup the local APIC timer for this CPU. Copy the initialized values * of the boot CPU and register the clock event in the framework. */ -static void __cpuinit setup_APIC_timer(void) +static void setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); - if (cpu_has(¤t_cpu_data, X86_FEATURE_ARAT)) { + if (this_cpu_has(X86_FEATURE_ARAT)) { lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; /* Make LAPIC timer preferrable over percpu HPET */ lapic_clockevent.rating = 150; @@ -525,7 +572,15 @@ static void __cpuinit setup_APIC_timer(void) memcpy(levt, &lapic_clockevent, sizeof(*levt)); levt->cpumask = cpumask_of(smp_processor_id()); - clockevents_register_device(levt); + if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { + levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_DUMMY); + levt->set_next_event = lapic_next_deadline; + clockevents_config_and_register(levt, + (tsc_khz / TSC_DIVISOR) * 1000, + 0xF, ~0UL); + } else + clockevents_register_device(levt); } /* @@ -647,6 +702,30 @@ static int __init calibrate_APIC_clock(void) long delta, deltatsc; int pm_referenced = 0; + /** + * check if lapic timer has already been calibrated by platform + * specific routine, such as tsc calibration code. if so, we just fill + * in the clockevent structure and return. + */ + + if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) { + return 0; + } else if (lapic_timer_frequency) { + apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", + lapic_timer_frequency); + lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, + TICK_NSEC, lapic_clockevent.shift); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + return 0; + } + + apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" + "calibrating APIC timer ...\n"); + local_irq_disable(); /* Replace the global interrupt handler */ @@ -684,16 +763,16 @@ static int __init calibrate_APIC_clock(void) lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, lapic_clockevent.shift); lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &lapic_clockevent); - calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; + lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - calibration_result); + lapic_timer_frequency); if (cpu_has_tsc) { apic_printk(APIC_VERBOSE, "..... CPU clock speed is " @@ -704,13 +783,13 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "..... host bus clock speed is " "%u.%04u MHz.\n", - calibration_result / (1000000 / HZ), - calibration_result % (1000000 / HZ)); + lapic_timer_frequency / (1000000 / HZ), + lapic_timer_frequency % (1000000 / HZ)); /* * Do a sanity check on the APIC calibration result */ - if (calibration_result < (1000000 / HZ)) { + if (lapic_timer_frequency < (1000000 / HZ)) { local_irq_enable(); pr_warning("APIC frequency too slow, disabling apic timer\n"); return -1; @@ -784,9 +863,6 @@ void __init setup_boot_APIC_clock(void) return; } - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); - if (calibrate_APIC_clock()) { /* No broadcast on UP ! */ if (num_possible_cpus() > 1) @@ -799,17 +875,13 @@ void __init setup_boot_APIC_clock(void) * PIT/HPET going. Otherwise register lapic as a dummy * device. */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - pr_warning("APIC timer registered as dummy," - " due to nmi_watchdog=%d!\n", nmi_watchdog); + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; /* Setup the lapic or request the broadcast */ setup_APIC_timer(); } -void __cpuinit setup_secondary_APIC_clock(void) +void setup_secondary_APIC_clock(void) { setup_APIC_timer(); } @@ -856,24 +928,42 @@ static void local_apic_timer_interrupt(void) * [ if a single-CPU system runs an SMP kernel then we call the local * interrupt as well. Thus we cannot inline the local irq ... ] */ -void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. + * + * update_process_times() expects us to have done irq_enter(). + * Besides, if we don't timer interrupts ignore the global + * interrupt lock, which is the WrongThing (tm) to do. */ - ack_APIC_irq(); + entering_ack_irq(); + local_apic_timer_interrupt(); + exiting_irq(); + + set_irq_regs(old_regs); +} + +__visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + * * update_process_times() expects us to have done irq_enter(). * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ - exit_idle(); - irq_enter(); + entering_ack_irq(); + trace_local_timer_entry(LOCAL_TIMER_VECTOR); local_apic_timer_interrupt(); - irq_exit(); + trace_local_timer_exit(LOCAL_TIMER_VECTOR); + exiting_irq(); set_irq_regs(old_regs); } @@ -1154,7 +1244,7 @@ void __init init_bsp_APIC(void) apic_write(APIC_LVT1, value); } -static void __cpuinit lapic_setup_esr(void) +static void lapic_setup_esr(void) { unsigned int oldvalue, value, maxlvt; @@ -1195,12 +1285,15 @@ static void __cpuinit lapic_setup_esr(void) oldvalue, value); } - /** * setup_local_APIC - setup the local APIC + * + * Used to setup local APIC while initializing BSP or bringin up APs. + * Always called with preemption disabled. */ -void __cpuinit setup_local_APIC(void) +void setup_local_APIC(void) { + int cpu = smp_processor_id(); unsigned int value, queued; int i, j, acked = 0; unsigned long long tsc = 0, ntsc; @@ -1210,7 +1303,7 @@ void __cpuinit setup_local_APIC(void) rdtscll(tsc); if (disable_apic) { - arch_disable_smp_support(); + disable_ioapic_support(); return; } @@ -1225,8 +1318,6 @@ void __cpuinit setup_local_APIC(void) #endif perf_events_lapic_init(); - preempt_disable(); - /* * Double-check whether this APIC is really registered. * This is meaningless in clustered apic mode, so we skip it. @@ -1240,6 +1331,30 @@ void __cpuinit setup_local_APIC(void) */ apic->init_apic_ldr(); +#ifdef CONFIG_X86_32 + /* + * APIC LDR is initialized. If logical_apicid mapping was + * initialized during get_smp_config(), make sure it matches the + * actual value. + */ + i = early_per_cpu(x86_cpu_to_logical_apicid, cpu); + WARN_ON(i != BAD_APICID && i != logical_smp_processor_id()); + /* always use the value from LDR */ + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = + logical_smp_processor_id(); + + /* + * Some NUMA implementations (NUMAQ) don't initialize apicid to + * node mapping during NUMA init. Now that logical apicid is + * guaranteed to be known, give it another chance. This is already + * a bit too late - percpu allocation has already happened without + * proper NUMA affinity. + */ + if (apic->x86_32_numa_cpu_node) + set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu), + apic->x86_32_numa_cpu_node(cpu)); +#endif + /* * Set Task Priority to 'accept all'. We never change this * later on. @@ -1278,11 +1393,13 @@ void __cpuinit setup_local_APIC(void) acked); break; } - if (cpu_has_tsc) { - rdtscll(ntsc); - max_loops = (cpu_khz << 10) - (ntsc - tsc); - } else - max_loops--; + if (queued) { + if (cpu_has_tsc) { + rdtscll(ntsc); + max_loops = (cpu_khz << 10) - (ntsc - tsc); + } else + max_loops--; + } } while (queued && max_loops > 0); WARN_ON(max_loops <= 0); @@ -1342,21 +1459,19 @@ void __cpuinit setup_local_APIC(void) * TODO: set up through-local-APIC from through-I/O-APIC? --macro */ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; - if (!smp_processor_id() && (pic_mode || !value)) { + if (!cpu && (pic_mode || !value)) { value = APIC_DM_EXTINT; - apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", - smp_processor_id()); + apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu); } else { value = APIC_DM_EXTINT | APIC_LVT_MASKED; - apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", - smp_processor_id()); + apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu); } apic_write(APIC_LVT0, value); /* * only the BP should see the LINT1 NMI signal, obviously. */ - if (!smp_processor_id()) + if (!cpu) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; @@ -1364,16 +1479,14 @@ void __cpuinit setup_local_APIC(void) value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); - preempt_enable(); - #ifdef CONFIG_X86_MCE_INTEL /* Recheck CMCI information after local APIC is up on CPU #0 */ - if (smp_processor_id() == 0) + if (!cpu) cmci_recheck(); #endif } -void __cpuinit end_local_APIC_setup(void) +void end_local_APIC_setup(void) { lapic_setup_esr(); @@ -1387,11 +1500,61 @@ void __cpuinit end_local_APIC_setup(void) } #endif - setup_apic_nmi_watchdog(NULL); apic_pm_activate(); } +void __init bsp_end_local_APIC_setup(void) +{ + end_local_APIC_setup(); + + /* + * Now that local APIC setup is completed for BP, configure the fault + * handling for interrupt remapping. + */ + irq_remap_enable_fault_handling(); + +} + #ifdef CONFIG_X86_X2APIC +/* + * Need to disable xapic and x2apic at the same time and then enable xapic mode + */ +static inline void __disable_x2apic(u64 msr) +{ + wrmsrl(MSR_IA32_APICBASE, + msr & ~(X2APIC_ENABLE | XAPIC_ENABLE)); + wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE); +} + +static __init void disable_x2apic(void) +{ + u64 msr; + + if (!cpu_has_x2apic) + return; + + rdmsrl(MSR_IA32_APICBASE, msr); + if (msr & X2APIC_ENABLE) { + u32 x2apic_id = read_apic_id(); + + if (x2apic_id >= 255) + panic("Cannot disable x2apic, id: %08x\n", x2apic_id); + + pr_info("Disabling x2apic\n"); + __disable_x2apic(msr); + + if (nox2apic) { + clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC); + setup_clear_cpu_cap(X86_FEATURE_X2APIC); + } + + x2apic_disabled = 1; + x2apic_mode = 0; + + register_lapic_address(mp_lapic_addr); + } +} + void check_x2apic(void) { if (x2apic_enabled()) { @@ -1402,82 +1565,87 @@ void check_x2apic(void) void enable_x2apic(void) { - int msr, msr2; + u64 msr; + + rdmsrl(MSR_IA32_APICBASE, msr); + if (x2apic_disabled) { + __disable_x2apic(msr); + return; + } if (!x2apic_mode) return; - rdmsr(MSR_IA32_APICBASE, msr, msr2); if (!(msr & X2APIC_ENABLE)) { printk_once(KERN_INFO "Enabling x2apic\n"); - wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); + wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE); } } #endif /* CONFIG_X86_X2APIC */ int __init enable_IR(void) { -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_supported()) { +#ifdef CONFIG_IRQ_REMAP + if (!irq_remapping_supported()) { pr_debug("intr-remapping not supported\n"); - return 0; + return -1; } if (!x2apic_preenabled && skip_ioapic_setup) { pr_info("Skipped enabling intr-remap because of skipping " "io-apic setup\n"); - return 0; + return -1; } - if (enable_intr_remapping(x2apic_supported())) - return 0; - - pr_info("Enabled Interrupt-remapping\n"); - - return 1; - + return irq_remapping_enable(); #endif - return 0; + return -1; } void __init enable_IR_x2apic(void) { unsigned long flags; - struct IO_APIC_route_entry **ioapic_entries = NULL; int ret, x2apic_enabled = 0; - int dmar_table_init_ret; + int hardware_init_ret; - dmar_table_init_ret = dmar_table_init(); - if (dmar_table_init_ret && !x2apic_supported()) - return; + /* Make sure irq_remap_ops are initialized */ + setup_irq_remapping_ops(); - ioapic_entries = alloc_ioapic_entries(); - if (!ioapic_entries) { - pr_err("Allocate ioapic_entries failed\n"); - goto out; - } + hardware_init_ret = irq_remapping_prepare(); + if (hardware_init_ret && !x2apic_supported()) + return; - ret = save_IO_APIC_setup(ioapic_entries); + ret = save_ioapic_entries(); if (ret) { pr_info("Saving IO-APIC state failed: %d\n", ret); - goto out; + return; } local_irq_save(flags); legacy_pic->mask_all(); - mask_IO_APIC_setup(ioapic_entries); + mask_ioapic_entries(); - if (dmar_table_init_ret) - ret = 0; + if (x2apic_preenabled && nox2apic) + disable_x2apic(); + + if (hardware_init_ret) + ret = -1; else ret = enable_IR(); - if (!ret) { + if (!x2apic_supported()) + goto skip_x2apic; + + if (ret < 0) { /* IR is required if there is APIC ID > 255 even when running * under KVM */ - if (max_physical_apicid > 255 || !kvm_para_available()) - goto nox2apic; + if (max_physical_apicid > 255 || + !hypervisor_x2apic_available()) { + if (x2apic_preenabled) + disable_x2apic(); + goto skip_x2apic; + } /* * without IR all CPUs can be addressed by IOAPIC/MSI * only in physical mode @@ -1485,6 +1653,11 @@ void __init enable_IR_x2apic(void) x2apic_force_phys(); } + if (ret == IRQ_REMAP_XAPIC_MODE) { + pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); + goto skip_x2apic; + } + x2apic_enabled = 1; if (x2apic_supported() && !x2apic_mode) { @@ -1493,23 +1666,11 @@ void __init enable_IR_x2apic(void) pr_info("Enabled x2apic\n"); } -nox2apic: - if (!ret) /* IR enabling failed */ - restore_IO_APIC_setup(ioapic_entries); +skip_x2apic: + if (ret < 0) /* IR enabling failed */ + restore_ioapic_entries(); legacy_pic->restore_mask(); local_irq_restore(flags); - -out: - if (ioapic_entries) - free_ioapic_entries(ioapic_entries); - - if (x2apic_enabled) - return; - - if (x2apic_preenabled) - panic("x2apic: enabled by BIOS but kernel init failed."); - else if (cpu_has_x2apic) - pr_info("Not enabling x2apic, Intr-remapping init failed.\n"); } #ifdef CONFIG_X86_64 @@ -1530,13 +1691,64 @@ static int __init detect_init_APIC(void) return 0; } #else + +static int __init apic_verify(void) +{ + u32 features, h, l; + + /* + * The APIC feature bit should now be enabled + * in `cpuid' + */ + features = cpuid_edx(1); + if (!(features & (1 << X86_FEATURE_APIC))) { + pr_warning("Could not enable APIC!\n"); + return -1; + } + set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* The BIOS may have set up the APIC at some other address */ + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + if (l & MSR_IA32_APICBASE_ENABLE) + mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + } + + pr_info("Found and enabled local APIC!\n"); + return 0; +} + +int __init apic_force_enable(unsigned long addr) +{ + u32 h, l; + + if (disable_apic) + return -1; + + /* + * Some BIOSes disable the local APIC in the APIC_BASE + * MSR. This can only be done in software for Intel P6 or later + * and AMD K7 (Model > 1) or later. + */ + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + pr_info("Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | addr; + wrmsr(MSR_IA32_APICBASE, l, h); + enabled_via_apicbase = 1; + } + } + return apic_verify(); +} + /* * Detect and initialize APIC */ static int __init detect_init_APIC(void) { - u32 h, l, features; - /* Disabled by kernel option? */ if (disable_apic) return -1; @@ -1566,38 +1778,12 @@ static int __init detect_init_APIC(void) "you can enable it with \"lapic\"\n"); return -1; } - /* - * Some BIOSes disable the local APIC in the APIC_BASE - * MSR. This can only be done in software for Intel P6 or later - * and AMD K7 (Model > 1) or later. - */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - pr_info("Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; - } - } - /* - * The APIC feature bit should now be enabled - * in `cpuid' - */ - features = cpuid_edx(1); - if (!(features & (1 << X86_FEATURE_APIC))) { - pr_warning("Could not enable APIC!\n"); - return -1; + if (apic_force_enable(APIC_DEFAULT_PHYS_BASE)) + return -1; + } else { + if (apic_verify()) + return -1; } - set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - - pr_info("Found and enabled local APIC!\n"); apic_pm_activate(); @@ -1609,28 +1795,6 @@ no_apic: } #endif -#ifdef CONFIG_X86_64 -void __init early_init_lapic_mapping(void) -{ - /* - * If no local APIC can be found then go out - * : it means there is no mpatable and MADT - */ - if (!smp_found_config) - return; - - set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, mp_lapic_addr); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - boot_cpu_physical_apicid = read_apic_id(); -} -#endif - /** * init_apic_mappings - initialize APIC mappings */ @@ -1656,10 +1820,7 @@ void __init init_apic_mappings(void) * acpi_register_lapic_address() */ if (!acpi_lapic && !smp_found_config) - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - - apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", - APIC_BASE, apic_phys); + register_lapic_address(apic_phys); } /* @@ -1681,11 +1842,27 @@ void __init init_apic_mappings(void) } } +void __init register_lapic_address(unsigned long address) +{ + mp_lapic_addr = address; + + if (!x2apic_mode) { + set_fixmap_nocache(FIX_APIC_BASE, address); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, mp_lapic_addr); + } + if (boot_cpu_physical_apicid == -1U) { + boot_cpu_physical_apicid = read_apic_id(); + apic_version[boot_cpu_physical_apicid] = + GET_APIC_VERSION(apic_read(APIC_LVR)); + } +} + /* * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ -int apic_version[MAX_APICS]; +int apic_version[MAX_LOCAL_APIC]; int __init APIC_init_uniprocessor(void) { @@ -1743,24 +1920,17 @@ int __init APIC_init_uniprocessor(void) enable_IO_APIC(); #endif - end_local_APIC_setup(); + bsp_end_local_APIC_setup(); #ifdef CONFIG_X86_IO_APIC if (smp_found_config && !skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); else { nr_ioapics = 0; - localise_nmi_watchdog(); } -#else - localise_nmi_watchdog(); #endif x86_init.timers.setup_percpu_clockev(); -#ifdef CONFIG_X86_64 - check_nmi_watchdog(); -#endif - return 0; } @@ -1771,12 +1941,10 @@ int __init APIC_init_uniprocessor(void) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ -void smp_spurious_interrupt(struct pt_regs *regs) +static inline void __smp_spurious_interrupt(void) { u32 v; - exit_idle(); - irq_enter(); /* * Check if this really is a spurious interrupt and ACK it * if it is a vectored one. Just in case... @@ -1791,39 +1959,78 @@ void smp_spurious_interrupt(struct pt_regs *regs) /* see sw-dev-man vol 3, chapter 7.4.13.5 */ pr_info("spurious APIC interrupt on CPU#%d, " "should never happen.\n", smp_processor_id()); - irq_exit(); +} + +__visible void smp_spurious_interrupt(struct pt_regs *regs) +{ + entering_irq(); + __smp_spurious_interrupt(); + exiting_irq(); +} + +__visible void smp_trace_spurious_interrupt(struct pt_regs *regs) +{ + entering_irq(); + trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR); + __smp_spurious_interrupt(); + trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR); + exiting_irq(); } /* * This interrupt should never happen with our APIC/SMP architecture */ -void smp_error_interrupt(struct pt_regs *regs) +static inline void __smp_error_interrupt(struct pt_regs *regs) { - u32 v, v1; + u32 v; + u32 i = 0; + static const char * const error_interrupt_reason[] = { + "Send CS error", /* APIC Error Bit 0 */ + "Receive CS error", /* APIC Error Bit 1 */ + "Send accept error", /* APIC Error Bit 2 */ + "Receive accept error", /* APIC Error Bit 3 */ + "Redirectable IPI", /* APIC Error Bit 4 */ + "Send illegal vector", /* APIC Error Bit 5 */ + "Received illegal vector", /* APIC Error Bit 6 */ + "Illegal register address", /* APIC Error Bit 7 */ + }; - exit_idle(); - irq_enter(); /* First tickle the hardware, only then report what went on. -- REW */ + if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); v = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); ack_APIC_irq(); atomic_inc(&irq_err_count); - /* - * Here is what the APIC error bits mean: - * 0: Send CS error - * 1: Receive CS error - * 2: Send accept error - * 3: Receive accept error - * 4: Reserved - * 5: Send illegal vector - * 6: Received illegal vector - * 7: Illegal register address - */ - pr_debug("APIC error on CPU%d: %02x(%02x)\n", - smp_processor_id(), v , v1); - irq_exit(); + apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x", + smp_processor_id(), v); + + v &= 0xff; + while (v) { + if (v & 0x1) + apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); + i++; + v >>= 1; + } + + apic_printk(APIC_DEBUG, KERN_CONT "\n"); + +} + +__visible void smp_error_interrupt(struct pt_regs *regs) +{ + entering_irq(); + __smp_error_interrupt(regs); + exiting_irq(); +} + +__visible void smp_trace_error_interrupt(struct pt_regs *regs) +{ + entering_irq(); + trace_error_apic_entry(ERROR_APIC_VECTOR); + __smp_error_interrupt(regs); + trace_error_apic_exit(ERROR_APIC_VECTOR); + exiting_irq(); } /** @@ -1915,23 +2122,62 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT1, value); } -void __cpuinit generic_processor_info(int apicid, int version) +int generic_processor_info(int apicid, int version) { - int cpu; + int cpu, max = nr_cpu_ids; + bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, + phys_cpu_present_map); /* - * Validate version + * boot_cpu_physical_apicid is designed to have the apicid + * returned by read_apic_id(), i.e, the apicid of the + * currently booting-up processor. However, on some platforms, + * it is temporarily modified by the apicid reported as BSP + * through MP table. Concretely: + * + * - arch/x86/kernel/mpparse.c: MP_processor_info() + * - arch/x86/mm/amdtopology.c: amd_numa_init() + * + * This function is executed with the modified + * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel + * parameter doesn't work to disable APs on kdump 2nd kernel. + * + * Since fixing handling of boot_cpu_physical_apicid requires + * another discussion and tests on each platform, we leave it + * for now and here we use read_apic_id() directly in this + * function, generic_processor_info(). */ - if (version == 0x0) { - pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); - version = 0x10; + if (disabled_cpu_apicid != BAD_APICID && + disabled_cpu_apicid != read_apic_id() && + disabled_cpu_apicid == apicid) { + int thiscpu = num_processors + disabled_cpus; + + pr_warning("APIC: Disabling requested cpu." + " Processor %d/0x%x ignored.\n", + thiscpu, apicid); + + disabled_cpus++; + return -ENODEV; + } + + /* + * If boot cpu has not been detected yet, then only allow upto + * nr_cpu_ids - 1 processors and keep one slot free for boot cpu + */ + if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 && + apicid != boot_cpu_physical_apicid) { + int thiscpu = max + disabled_cpus - 1; + + pr_warning( + "ACPI: NR_CPUS/possible_cpus limit of %i almost" + " reached. Keeping one slot for boot cpu." + " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); + + disabled_cpus++; + return -ENODEV; } - apic_version[apicid] = version; if (num_processors >= nr_cpu_ids) { - int max = nr_cpu_ids; int thiscpu = max + disabled_cpus; pr_warning( @@ -1939,26 +2185,38 @@ void __cpuinit generic_processor_info(int apicid, int version) " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); disabled_cpus++; - return; + return -EINVAL; } num_processors++; - cpu = cpumask_next_zero(-1, cpu_present_mask); - - if (version != apic_version[boot_cpu_physical_apicid]) - WARN_ONCE(1, - "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n", - apic_version[boot_cpu_physical_apicid], cpu, version); - - physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed * in same order as logical cpu numbers. Hence the first * entry is BSP, and so on. + * boot_cpu_init() already hold bit 0 in cpu_present_mask + * for BSP. */ cpu = 0; + } else + cpu = cpumask_next_zero(-1, cpu_present_mask); + + /* + * Validate version + */ + if (version == 0x0) { + pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", + cpu, apicid); + version = 0x10; + } + apic_version[apicid] = version; + + if (version != apic_version[boot_cpu_physical_apicid]) { + pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", + apic_version[boot_cpu_physical_apicid], cpu, version); } + + physid_set(apicid, phys_cpu_present_map); if (apicid > max_physical_apicid) max_physical_apicid = apicid; @@ -1966,9 +2224,14 @@ void __cpuinit generic_processor_info(int apicid, int version) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; #endif - +#ifdef CONFIG_X86_32 + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = + apic->x86_32_early_logical_apicid(cpu); +#endif set_cpu_possible(cpu, true); set_cpu_present(cpu, true); + + return cpu; } int hard_smp_processor_id(void) @@ -1986,16 +2249,41 @@ void default_init_apic_ldr(void) apic_write(APIC_LDR, val); } -#ifdef CONFIG_X86_32 -int default_apicid_to_node(int logical_apicid) +int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask, + unsigned int *apicid) { -#ifdef CONFIG_SMP - return apicid_2_node[hard_smp_processor_id()]; -#else - return 0; -#endif + unsigned int cpu; + + for_each_cpu_and(cpu, cpumask, andmask) { + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + } + + if (likely(cpu < nr_cpu_ids)) { + *apicid = per_cpu(x86_cpu_to_apicid, cpu); + return 0; + } + + return -EINVAL; +} + +/* + * Override the generic EOI implementation with an optimized version. + * Only called during early boot when only one CPU is active and with + * interrupts disabled, so we know this does not race with actual APIC driver + * use. + */ +void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) +{ + struct apic **drv; + + for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { + /* Should happen once for each apic */ + WARN_ON((*drv)->eoi_write == eoi_write); + (*drv)->eoi_write = eoi_write; + } } -#endif /* * Power management @@ -2025,7 +2313,7 @@ static struct { unsigned int apic_thmr; } apic_pm_state; -static int lapic_suspend(struct sys_device *dev, pm_message_t state) +static int lapic_suspend(void) { unsigned long flags; int maxlvt; @@ -2056,43 +2344,31 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) local_irq_save(flags); disable_local_APIC(); - if (intr_remapping_enabled) - disable_intr_remapping(); + irq_remapping_disable(); local_irq_restore(flags); return 0; } -static int lapic_resume(struct sys_device *dev) +static void lapic_resume(void) { unsigned int l, h; unsigned long flags; int maxlvt; - int ret = 0; - struct IO_APIC_route_entry **ioapic_entries = NULL; if (!apic_pm_state.active) - return 0; + return; local_irq_save(flags); - if (intr_remapping_enabled) { - ioapic_entries = alloc_ioapic_entries(); - if (!ioapic_entries) { - WARN(1, "Alloc ioapic_entries in lapic resume failed."); - ret = -ENOMEM; - goto restore; - } - - ret = save_IO_APIC_setup(ioapic_entries); - if (ret) { - WARN(1, "Saving IO-APIC state failed: %d\n", ret); - free_ioapic_entries(ioapic_entries); - goto restore; - } - mask_IO_APIC_setup(ioapic_entries); - legacy_pic->mask_all(); - } + /* + * IO-APIC and PIC have their own resume routines. + * We just mask them here to make sure the interrupt + * subsystem is completely quiet while we enable x2apic + * and interrupt-remapping. + */ + mask_ioapic_entries(); + legacy_pic->mask_all(); if (x2apic_mode) enable_x2apic(); @@ -2103,10 +2379,12 @@ static int lapic_resume(struct sys_device *dev) * FIXME! This will be wrong if we ever support suspend on * SMP! We'll need to do this as part of the CPU restore! */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } } maxlvt = lapic_get_maxlvt(); @@ -2118,7 +2396,7 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) +#if defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif @@ -2133,16 +2411,9 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - if (intr_remapping_enabled) { - reenable_intr_remapping(x2apic_mode); - legacy_pic->restore_mask(); - restore_IO_APIC_setup(ioapic_entries); - free_ioapic_entries(ioapic_entries); - } -restore: - local_irq_restore(flags); + irq_remapping_reenable(x2apic_mode); - return ret; + local_irq_restore(flags); } /* @@ -2150,34 +2421,23 @@ restore: * are needed on every CPU up until machine_halt/restart/poweroff. */ -static struct sysdev_class lapic_sysclass = { - .name = "lapic", +static struct syscore_ops lapic_syscore_ops = { .resume = lapic_resume, .suspend = lapic_suspend, }; -static struct sys_device device_lapic = { - .id = 0, - .cls = &lapic_sysclass, -}; - -static void __cpuinit apic_pm_activate(void) +static void apic_pm_activate(void) { apic_pm_state.active = 1; } static int __init init_lapic_sysfs(void) { - int error; - - if (!cpu_has_apic) - return 0; /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ + if (cpu_has_apic) + register_syscore_ops(&lapic_syscore_ops); - error = sysdev_class_register(&lapic_sysclass); - if (!error) - error = sysdev_register(&device_lapic); - return error; + return 0; } /* local apic needs to resume before other devices access its registers. */ @@ -2191,7 +2451,7 @@ static void apic_pm_activate(void) { } #ifdef CONFIG_X86_64 -static int __cpuinit apic_cluster_num(void) +static int apic_cluster_num(void) { int i, clusters, zeros; unsigned id; @@ -2236,10 +2496,10 @@ static int __cpuinit apic_cluster_num(void) return clusters; } -static int __cpuinitdata multi_checked; -static int __cpuinitdata multi; +static int multi_checked; +static int multi; -static int __cpuinit set_multi(const struct dmi_system_id *d) +static int set_multi(const struct dmi_system_id *d) { if (multi) return 0; @@ -2248,7 +2508,7 @@ static int __cpuinit set_multi(const struct dmi_system_id *d) return 0; } -static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = { +static const struct dmi_system_id multi_dmi_table[] = { { .callback = set_multi, .ident = "IBM System Summit2", @@ -2260,7 +2520,7 @@ static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = { {} }; -static void __cpuinit dmi_check_multi(void) +static void dmi_check_multi(void) { if (multi_checked) return; @@ -2277,7 +2537,7 @@ static void __cpuinit dmi_check_multi(void) * multi-chassis. * Use DMI to check them */ -__cpuinit int apic_is_clustered_box(void) +int apic_is_clustered_box(void) { dmi_check_multi(); if (multi) @@ -2378,3 +2638,12 @@ static int __init lapic_insert_resource(void) * that is using request_resource */ late_initcall(lapic_insert_resource); + +static int __init apic_set_disabled_cpu_apicid(char *arg) +{ + if (!arg || !get_option(&arg, &disabled_cpu_apicid)) + return -EINVAL; + + return 0; +} +early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 09d3b17ce0c..7c1b2947951 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -14,38 +14,23 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/ctype.h> -#include <linux/init.h> #include <linux/hardirq.h> +#include <linux/module.h> #include <asm/smp.h> #include <asm/apic.h> #include <asm/ipi.h> -#ifdef CONFIG_ACPI -#include <acpi/acpi_bus.h> -#endif +#include <linux/acpi.h> -static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 1; -} +static struct apic apic_physflat; +static struct apic apic_flat; -static const struct cpumask *flat_target_cpus(void) -{ - return cpu_online_mask; -} +struct apic __read_mostly *apic = &apic_flat; +EXPORT_SYMBOL_GPL(apic); -static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) +static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; + return 1; } /* @@ -55,7 +40,7 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ -static void flat_init_apic_ldr(void) +void flat_init_apic_ldr(void) { unsigned long val; unsigned long num, id; @@ -85,7 +70,7 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) } static void - flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) +flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { unsigned long mask = cpumask_bits(cpumask)[0]; int cpu = smp_processor_id(); @@ -164,16 +149,22 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb) return initial_apic_id >> index_msb; } -struct apic apic_flat = { +static int flat_probe(void) +{ + return 1; +} + +static struct apic apic_flat = { .name = "flat", - .probe = NULL, + .probe = flat_probe, .acpi_madt_oem_check = flat_acpi_madt_oem_check, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = flat_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ - .target_cpus = flat_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, @@ -185,8 +176,6 @@ struct apic apic_flat = { .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .multi_timer_check = NULL, - .apicid_to_node = NULL, - .cpu_to_logical_apicid = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, @@ -199,8 +188,7 @@ struct apic apic_flat = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = flat_send_IPI_mask, .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, @@ -210,12 +198,13 @@ struct apic apic_flat = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, .write = native_apic_mem_write, + .eoi_write = native_apic_mem_write, .icr_read = native_apic_icr_read, .icr_write = native_apic_icr_write, .wait_icr_idle = native_apic_wait_icr_idle, @@ -250,17 +239,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static const struct cpumask *physflat_target_cpus(void) -{ - return cpu_online_mask; -} - -static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { default_send_IPI_mask_sequence_phys(cpumask, vector); @@ -282,63 +260,38 @@ static void physflat_send_IPI_all(int vector) physflat_send_IPI_mask(cpu_online_mask, vector); } -static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - cpu = cpumask_first(cpumask); - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - else - return BAD_APICID; -} - -static unsigned int -physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static int physflat_probe(void) { - int cpu; + if (apic == &apic_physflat || num_possible_cpus() > 8) + return 1; - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - return per_cpu(x86_cpu_to_apicid, cpu); + return 0; } -struct apic apic_physflat = { +static struct apic apic_physflat = { .name = "physical flat", - .probe = NULL, + .probe = physflat_probe, .acpi_madt_oem_check = physflat_acpi_madt_oem_check, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = flat_apic_id_registered, .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = physflat_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = physflat_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, /* not needed, but shouldn't hurt: */ .init_apic_ldr = flat_init_apic_ldr, .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .multi_timer_check = NULL, - .apicid_to_node = NULL, - .cpu_to_logical_apicid = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, @@ -351,8 +304,7 @@ struct apic apic_physflat = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFu << 24, - .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = physflat_send_IPI_mask, .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, @@ -362,14 +314,20 @@ struct apic apic_physflat = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, .write = native_apic_mem_write, + .eoi_write = native_apic_mem_write, .icr_read = native_apic_icr_read, .icr_write = native_apic_icr_write, .wait_icr_idle = native_apic_wait_icr_idle, .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, }; + +/* + * We need to check for physflat first, so this order is important. + */ +apic_drivers(apic_physflat, apic_flat); diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index e31b9ffe25f..8c7c98249c2 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -15,7 +15,6 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/ctype.h> -#include <linux/init.h> #include <linux/errno.h> #include <asm/fixmap.h> #include <asm/mpspec.h> @@ -54,11 +53,6 @@ static u64 noop_apic_icr_read(void) return 0; } -static int noop_cpu_to_logical_apicid(int cpu) -{ - return 0; -} - static int noop_phys_pkg_id(int cpuid_apic, int index_msb) { return 0; @@ -105,18 +99,12 @@ static unsigned long noop_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) { if (cpu != 0) pr_warning("APIC: Vector allocated for non-BSP cpu\n"); - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - -int noop_apicid_to_node(int logical_apicid) -{ - /* we're always on node 0 */ - return 0; + cpumask_copy(retmask, cpumask_of(cpu)); } static u32 noop_apic_read(u32 reg) @@ -135,6 +123,7 @@ struct apic apic_noop = { .probe = noop_probe, .acpi_madt_oem_check = NULL, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = noop_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, @@ -153,9 +142,7 @@ struct apic apic_noop = { .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = NULL, .multi_timer_check = NULL, - .apicid_to_node = noop_apicid_to_node, - .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, @@ -171,8 +158,7 @@ struct apic apic_noop = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = noop_send_IPI_mask, .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, @@ -186,15 +172,19 @@ struct apic apic_noop = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, - + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = noop_apic_read, .write = noop_apic_write, + .eoi_write = noop_apic_write, .icr_read = noop_apic_icr_read, .icr_write = noop_apic_icr_write, .wait_icr_idle = noop_apic_wait_icr_idle, .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, + +#ifdef CONFIG_X86_32 + .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, +#endif }; diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c new file mode 100644 index 00000000000..a5b45df8bc8 --- /dev/null +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -0,0 +1,264 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Numascale NumaConnect-Specific APIC Code + * + * Copyright (C) 2011 Numascale AS. All rights reserved. + * + * Send feedback to <support@numascale.com> + * + */ + +#include <linux/errno.h> +#include <linux/threads.h> +#include <linux/cpumask.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/init.h> +#include <linux/hardirq.h> +#include <linux/delay.h> + +#include <asm/numachip/numachip.h> +#include <asm/numachip/numachip_csr.h> +#include <asm/smp.h> +#include <asm/apic.h> +#include <asm/ipi.h> +#include <asm/apic_flat_64.h> +#include <asm/pgtable.h> + +static int numachip_system __read_mostly; + +static const struct apic apic_numachip __read_mostly; + +static unsigned int get_apic_id(unsigned long x) +{ + unsigned long value; + unsigned int id; + + rdmsrl(MSR_FAM10H_NODE_ID, value); + id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U); + + return id; +} + +static unsigned long set_apic_id(unsigned int id) +{ + unsigned long x; + + x = ((id & 0xffU) << 24); + return x; +} + +static unsigned int read_xapic_id(void) +{ + return get_apic_id(apic_read(APIC_ID)); +} + +static int numachip_apic_id_valid(int apicid) +{ + /* Trust what bootloader passes in MADT */ + return 1; +} + +static int numachip_apic_id_registered(void) +{ + return physid_isset(read_xapic_id(), phys_cpu_present_map); +} + +static int numachip_phys_pkg_id(int initial_apic_id, int index_msb) +{ + return initial_apic_id >> index_msb; +} + +static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) +{ + union numachip_csr_g3_ext_irq_gen int_gen; + + int_gen.s._destination_apic_id = phys_apicid; + int_gen.s._vector = 0; + int_gen.s._msgtype = APIC_DM_INIT >> 8; + int_gen.s._index = 0; + + write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); + + int_gen.s._msgtype = APIC_DM_STARTUP >> 8; + int_gen.s._vector = start_rip >> 12; + + write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); + + atomic_set(&init_deasserted, 1); + return 0; +} + +static void numachip_send_IPI_one(int cpu, int vector) +{ + union numachip_csr_g3_ext_irq_gen int_gen; + int apicid = per_cpu(x86_cpu_to_apicid, cpu); + + int_gen.s._destination_apic_id = apicid; + int_gen.s._vector = vector; + int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8; + int_gen.s._index = 0; + + write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v); +} + +static void numachip_send_IPI_mask(const struct cpumask *mask, int vector) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) + numachip_send_IPI_one(cpu, vector); +} + +static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask, + int vector) +{ + unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; + + for_each_cpu(cpu, mask) { + if (cpu != this_cpu) + numachip_send_IPI_one(cpu, vector); + } +} + +static void numachip_send_IPI_allbutself(int vector) +{ + unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; + + for_each_online_cpu(cpu) { + if (cpu != this_cpu) + numachip_send_IPI_one(cpu, vector); + } +} + +static void numachip_send_IPI_all(int vector) +{ + numachip_send_IPI_mask(cpu_online_mask, vector); +} + +static void numachip_send_IPI_self(int vector) +{ + __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); +} + +static int __init numachip_probe(void) +{ + return apic == &apic_numachip; +} + +static void __init map_csrs(void) +{ + printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n", + NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1); + init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE); + + printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n", + NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1); + init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE); +} + +static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) +{ + + if (c->phys_proc_id != node) { + c->phys_proc_id = node; + per_cpu(cpu_llc_id, smp_processor_id()) = node; + } +} + +static int __init numachip_system_init(void) +{ + unsigned int val; + + if (!numachip_system) + return 0; + + x86_cpuinit.fixup_cpu_id = fixup_cpu_id; + x86_init.pci.arch_init = pci_numachip_init; + + map_csrs(); + + val = read_lcsr(CSR_G0_NODE_IDS); + printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val); + + return 0; +} +early_initcall(numachip_system_init); + +static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strncmp(oem_id, "NUMASC", 6)) { + numachip_system = 1; + return 1; + } + + return 0; +} + +static const struct apic apic_numachip __refconst = { + + .name = "NumaConnect system", + .probe = numachip_probe, + .acpi_madt_oem_check = numachip_acpi_madt_oem_check, + .apic_id_valid = numachip_apic_id_valid, + .apic_id_registered = numachip_apic_id_registered, + + .irq_delivery_mode = dest_Fixed, + .irq_dest_mode = 0, /* physical */ + + .target_cpus = online_target_cpus, + .disable_esr = 0, + .dest_logical = 0, + .check_apicid_used = NULL, + .check_apicid_present = NULL, + + .vector_allocation_domain = default_vector_allocation_domain, + .init_apic_ldr = flat_init_apic_ldr, + + .ioapic_phys_id_map = NULL, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = NULL, + .setup_portio_remap = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, + .enable_apic_mode = NULL, + .phys_pkg_id = numachip_phys_pkg_id, + .mps_oem_check = NULL, + + .get_apic_id = get_apic_id, + .set_apic_id = set_apic_id, + .apic_id_mask = 0xffU << 24, + + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + + .send_IPI_mask = numachip_send_IPI_mask, + .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, + .send_IPI_allbutself = numachip_send_IPI_allbutself, + .send_IPI_all = numachip_send_IPI_all, + .send_IPI_self = numachip_send_IPI_self, + + .wakeup_secondary_cpu = numachip_wakeup_secondary, + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + .wait_for_init_deassert = false, + .smp_callin_clear_local_apic = NULL, + .inquire_remote_apic = NULL, /* REMRD not supported */ + + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .eoi_write = native_apic_mem_write, + .icr_read = native_apic_icr_read, + .icr_write = native_apic_icr_write, + .wait_icr_idle = native_apic_wait_icr_idle, + .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, +}; +apic_driver(apic_numachip); + diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index cb804c5091b..e4840aa7a25 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -26,15 +26,6 @@ static int bigsmp_apic_id_registered(void) return 1; } -static const struct cpumask *bigsmp_target_cpus(void) -{ -#ifdef CONFIG_SMP - return cpu_online_mask; -#else - return cpumask_of(0); -#endif -} - static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) { return 0; @@ -45,6 +36,12 @@ static unsigned long bigsmp_check_apicid_present(int bit) return 1; } +static int bigsmp_early_logical_apicid(int cpu) +{ + /* on bigsmp, logical apicid is the same as physical */ + return early_per_cpu(x86_cpu_to_apicid, cpu); +} + static inline unsigned long calculate_ldr(int cpu) { unsigned long val, id; @@ -80,11 +77,6 @@ static void bigsmp_setup_apic_routing(void) nr_ioapics); } -static int bigsmp_apicid_to_node(int logical_apicid) -{ - return apicid_2_node[hard_smp_processor_id()]; -} - static int bigsmp_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids) @@ -93,14 +85,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -/* Mapping from cpu number to logical apicid */ -static inline int bigsmp_cpu_to_logical_apicid(int cpu) -{ - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return cpu_physical_id(cpu); -} - static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ @@ -112,28 +96,6 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid) return 1; } -/* As we are using single CPU as destination, pick only one CPU here */ -static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask)); -} - -static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - return bigsmp_cpu_to_logical_apicid(cpu); -} - static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; @@ -180,12 +142,6 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } /* NULL entry stops DMI scanning */ }; -static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - static int probe_bigsmp(void) { if (def_to_bigsmp) @@ -196,31 +152,30 @@ static int probe_bigsmp(void) return dmi_bigsmp; } -struct apic apic_bigsmp = { +static struct apic apic_bigsmp = { .name = "bigsmp", .probe = probe_bigsmp, .acpi_madt_oem_check = NULL, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = bigsmp_apic_id_registered, .irq_delivery_mode = dest_Fixed, /* phys delivery to target CPU: */ .irq_dest_mode = 0, - .target_cpus = bigsmp_target_cpus, + .target_cpus = default_target_cpus, .disable_esr = 1, .dest_logical = 0, .check_apicid_used = bigsmp_check_apicid_used, .check_apicid_present = bigsmp_check_apicid_present, - .vector_allocation_domain = bigsmp_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = bigsmp_init_apic_ldr, .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .setup_apic_routing = bigsmp_setup_apic_routing, .multi_timer_check = NULL, - .apicid_to_node = bigsmp_apicid_to_node, - .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, @@ -233,8 +188,7 @@ struct apic apic_bigsmp = { .set_apic_id = NULL, .apic_id_mask = 0xFF << 24, - .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = bigsmp_send_IPI_mask, .send_IPI_mask_allbutself = NULL, @@ -245,15 +199,39 @@ struct apic apic_bigsmp = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = default_wait_for_init_deassert, - + .wait_for_init_deassert = true, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, .write = native_apic_mem_write, + .eoi_write = native_apic_mem_write, .icr_read = native_apic_icr_read, .icr_write = native_apic_icr_write, .wait_icr_idle = native_apic_wait_icr_idle, .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, + + .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, }; + +void __init generic_bigsmp_probe(void) +{ + unsigned int cpu; + + if (!probe_bigsmp()) + return; + + apic = &apic_bigsmp; + + for_each_possible_cpu(cpu) { + if (early_per_cpu(x86_cpu_to_logical_apicid, + cpu) == BAD_APICID) + continue; + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = + bigsmp_early_logical_apicid(cpu); + } + + pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name); +} + +apic_driver(apic_bigsmp); diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c deleted file mode 100644 index 8593582d802..00000000000 --- a/arch/x86/kernel/apic/es7000_32.c +++ /dev/null @@ -1,761 +0,0 @@ -/* - * Written by: Garry Forsgren, Unisys Corporation - * Natalie Protasevich, Unisys Corporation - * - * This file contains the code to configure and interface - * with Unisys ES7000 series hardware system manager. - * - * Copyright (c) 2003 Unisys Corporation. - * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar - * - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write the Free Software Foundation, Inc., 59 - * Temple Place - Suite 330, Boston MA 02111-1307, USA. - * - * Contact information: Unisys Corporation, Township Line & Union Meeting - * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: - * - * http://www.unisys.com - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/notifier.h> -#include <linux/spinlock.h> -#include <linux/cpumask.h> -#include <linux/threads.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/reboot.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/acpi.h> -#include <linux/init.h> -#include <linux/gfp.h> -#include <linux/nmi.h> -#include <linux/smp.h> -#include <linux/io.h> - -#include <asm/apicdef.h> -#include <asm/atomic.h> -#include <asm/fixmap.h> -#include <asm/mpspec.h> -#include <asm/setup.h> -#include <asm/apic.h> -#include <asm/ipi.h> - -/* - * ES7000 chipsets - */ - -#define NON_UNISYS 0 -#define ES7000_CLASSIC 1 -#define ES7000_ZORRO 2 - -#define MIP_REG 1 -#define MIP_PSAI_REG 4 - -#define MIP_BUSY 1 -#define MIP_SPIN 0xf0000 -#define MIP_VALID 0x0100000000000000ULL -#define MIP_SW_APIC 0x1020b - -#define MIP_PORT(val) ((val >> 32) & 0xffff) - -#define MIP_RD_LO(val) (val & 0xffffffff) - -struct mip_reg { - unsigned long long off_0x00; - unsigned long long off_0x08; - unsigned long long off_0x10; - unsigned long long off_0x18; - unsigned long long off_0x20; - unsigned long long off_0x28; - unsigned long long off_0x30; - unsigned long long off_0x38; -}; - -struct mip_reg_info { - unsigned long long mip_info; - unsigned long long delivery_info; - unsigned long long host_reg; - unsigned long long mip_reg; -}; - -struct psai { - unsigned long long entry_type; - unsigned long long addr; - unsigned long long bep_addr; -}; - -#ifdef CONFIG_ACPI - -struct es7000_oem_table { - struct acpi_table_header Header; - u32 OEMTableAddr; - u32 OEMTableSize; -}; - -static unsigned long oem_addrX; -static unsigned long oem_size; - -#endif - -/* - * ES7000 Globals - */ - -static volatile unsigned long *psai; -static struct mip_reg *mip_reg; -static struct mip_reg *host_reg; -static int mip_port; -static unsigned long mip_addr; -static unsigned long host_addr; - -int es7000_plat; - -/* - * GSI override for ES7000 platforms. - */ - - -static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) -{ - unsigned long vect = 0, psaival = 0; - - if (psai == NULL) - return -1; - - vect = ((unsigned long)__pa(eip)/0x1000) << 16; - psaival = (0x1000000 | vect | cpu); - - while (*psai & 0x1000000) - ; - - *psai = psaival; - - return 0; -} - -static int es7000_apic_is_cluster(void) -{ - /* MPENTIUMIII */ - if (boot_cpu_data.x86 == 6 && - (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11)) - return 1; - - return 0; -} - -static void setup_unisys(void) -{ - /* - * Determine the generation of the ES7000 currently running. - * - * es7000_plat = 1 if the machine is a 5xx ES7000 box - * es7000_plat = 2 if the machine is a x86_64 ES7000 box - * - */ - if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) - es7000_plat = ES7000_ZORRO; - else - es7000_plat = ES7000_CLASSIC; -} - -/* - * Parse the OEM Table: - */ -static int parse_unisys_oem(char *oemptr) -{ - int i; - int success = 0; - unsigned char type, size; - unsigned long val; - char *tp = NULL; - struct psai *psaip = NULL; - struct mip_reg_info *mi; - struct mip_reg *host, *mip; - - tp = oemptr; - - tp += 8; - - for (i = 0; i <= 6; i++) { - type = *tp++; - size = *tp++; - tp -= 2; - switch (type) { - case MIP_REG: - mi = (struct mip_reg_info *)tp; - val = MIP_RD_LO(mi->host_reg); - host_addr = val; - host = (struct mip_reg *)val; - host_reg = __va(host); - val = MIP_RD_LO(mi->mip_reg); - mip_port = MIP_PORT(mi->mip_info); - mip_addr = val; - mip = (struct mip_reg *)val; - mip_reg = __va(mip); - pr_debug("host_reg = 0x%lx\n", - (unsigned long)host_reg); - pr_debug("mip_reg = 0x%lx\n", - (unsigned long)mip_reg); - success++; - break; - case MIP_PSAI_REG: - psaip = (struct psai *)tp; - if (tp != NULL) { - if (psaip->addr) - psai = __va(psaip->addr); - else - psai = NULL; - success++; - } - break; - default: - break; - } - tp += size; - } - - if (success < 2) - es7000_plat = NON_UNISYS; - else - setup_unisys(); - - return es7000_plat; -} - -#ifdef CONFIG_ACPI -static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) -{ - struct acpi_table_header *header = NULL; - struct es7000_oem_table *table; - acpi_size tbl_size; - acpi_status ret; - int i = 0; - - for (;;) { - ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size); - if (!ACPI_SUCCESS(ret)) - return -1; - - if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) - break; - - early_acpi_os_unmap_memory(header, tbl_size); - } - - table = (void *)header; - - oem_addrX = table->OEMTableAddr; - oem_size = table->OEMTableSize; - - early_acpi_os_unmap_memory(header, tbl_size); - - *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size); - - return 0; -} - -static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) -{ - if (!oem_addr) - return; - - __acpi_unmap_table((char *)oem_addr, oem_size); -} - -static int es7000_check_dsdt(void) -{ - struct acpi_table_header header; - - if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && - !strncmp(header.oem_id, "UNISYS", 6)) - return 1; - return 0; -} - -static int es7000_acpi_ret; - -/* Hook from generic ACPI tables.c */ -static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - unsigned long oem_addr = 0; - int check_dsdt; - int ret = 0; - - /* check dsdt at first to avoid clear fix_map for oem_addr */ - check_dsdt = es7000_check_dsdt(); - - if (!find_unisys_acpi_oem_table(&oem_addr)) { - if (check_dsdt) { - ret = parse_unisys_oem((char *)oem_addr); - } else { - setup_unisys(); - ret = 1; - } - /* - * we need to unmap it - */ - unmap_unisys_acpi_oem_table(oem_addr); - } - - es7000_acpi_ret = ret; - - return ret && !es7000_apic_is_cluster(); -} - -static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) -{ - int ret = es7000_acpi_ret; - - return ret && es7000_apic_is_cluster(); -} - -#else /* !CONFIG_ACPI: */ -static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - return 0; -} - -static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) -{ - return 0; -} -#endif /* !CONFIG_ACPI */ - -static void es7000_spin(int n) -{ - int i = 0; - - while (i++ < n) - rep_nop(); -} - -static int es7000_mip_write(struct mip_reg *mip_reg) -{ - int status = 0; - int spin; - - spin = MIP_SPIN; - while ((host_reg->off_0x38 & MIP_VALID) != 0) { - if (--spin <= 0) { - WARN(1, "Timeout waiting for Host Valid Flag\n"); - return -1; - } - es7000_spin(MIP_SPIN); - } - - memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); - outb(1, mip_port); - - spin = MIP_SPIN; - - while ((mip_reg->off_0x38 & MIP_VALID) == 0) { - if (--spin <= 0) { - WARN(1, "Timeout waiting for MIP Valid Flag\n"); - return -1; - } - es7000_spin(MIP_SPIN); - } - - status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48; - mip_reg->off_0x38 &= ~MIP_VALID; - - return status; -} - -static void es7000_enable_apic_mode(void) -{ - struct mip_reg es7000_mip_reg; - int mip_status; - - if (!es7000_plat) - return; - - pr_info("Enabling APIC mode.\n"); - memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); - es7000_mip_reg.off_0x00 = MIP_SW_APIC; - es7000_mip_reg.off_0x38 = MIP_VALID; - - while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) - WARN(1, "Command failed, status = %x\n", mip_status); -} - -static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - - -static void es7000_wait_for_init_deassert(atomic_t *deassert) -{ - while (!atomic_read(deassert)) - cpu_relax(); -} - -static unsigned int es7000_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence_phys(mask, vector); -} - -static void es7000_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); -} - -static void es7000_send_IPI_all(int vector) -{ - es7000_send_IPI_mask(cpu_online_mask, vector); -} - -static int es7000_apic_id_registered(void) -{ - return 1; -} - -static const struct cpumask *target_cpus_cluster(void) -{ - return cpu_all_mask; -} - -static const struct cpumask *es7000_target_cpus(void) -{ - return cpumask_of(smp_processor_id()); -} - -static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid) -{ - return 0; -} - -static unsigned long es7000_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static unsigned long calculate_ldr(int cpu) -{ - unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu); - - return SET_APIC_LOGICAL_ID(id); -} - -/* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LdR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ -static void es7000_init_apic_ldr_cluster(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_CLUSTER); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static void es7000_init_apic_ldr(void) -{ - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_FLAT); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); -} - -static void es7000_setup_apic_routing(void) -{ - int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - - pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", - (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", - nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); -} - -static int es7000_apicid_to_node(int logical_apicid) -{ - return 0; -} - - -static int es7000_cpu_present_to_apicid(int mps_cpu) -{ - if (!mps_cpu) - return boot_cpu_physical_apicid; - else if (mps_cpu < nr_cpu_ids) - return per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static int cpu_id; - -static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) -{ - physid_set_mask_of_physid(cpu_id, retmap); - ++cpu_id; -} - -/* Mapping from cpu number to logical apicid */ -static int es7000_cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - /* For clustered we don't have a good way to do this yet - hack */ - physids_promote(0xFFL, retmap); -} - -static int es7000_check_phys_apicid_present(int cpu_physical_apicid) -{ - boot_cpu_physical_apicid = read_apic_id(); - return 1; -} - -static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - unsigned int round = 0; - int cpu, uninitialized_var(apicid); - - /* - * The cpus in the mask must all be on the apic cluster. - */ - for_each_cpu(cpu, cpumask) { - int new_apicid = es7000_cpu_to_logical_apicid(cpu); - - if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - WARN(1, "Not a valid mask!"); - - return BAD_APICID; - } - apicid = new_apicid; - round++; - } - return apicid; -} - -static unsigned int -es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int apicid = es7000_cpu_to_logical_apicid(0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = es7000_cpu_mask_to_apicid(cpumask); - - free_cpumask_var(cpumask); - - return apicid; -} - -static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -static int probe_es7000(void) -{ - /* probed later in mptable/ACPI hooks */ - return 0; -} - -static int es7000_mps_ret; -static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem, - char *productid) -{ - int ret = 0; - - if (mpc->oemptr) { - struct mpc_oemtable *oem_table = - (struct mpc_oemtable *)mpc->oemptr; - - if (!strncmp(oem, "UNISYS", 6)) - ret = parse_unisys_oem((char *)oem_table); - } - - es7000_mps_ret = ret; - - return ret && !es7000_apic_is_cluster(); -} - -static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, - char *productid) -{ - int ret = es7000_mps_ret; - - return ret && es7000_apic_is_cluster(); -} - -/* We've been warned by a false positive warning.Use __refdata to keep calm. */ -struct apic __refdata apic_es7000_cluster = { - - .name = "es7000", - .probe = probe_es7000, - .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster, - .apic_id_registered = es7000_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* logical delivery broadcast to all procs: */ - .irq_dest_mode = 1, - - .target_cpus = target_cpus_cluster, - .disable_esr = 1, - .dest_logical = 0, - .check_apicid_used = es7000_check_apicid_used, - .check_apicid_present = es7000_check_apicid_present, - - .vector_allocation_domain = es7000_vector_allocation_domain, - .init_apic_ldr = es7000_init_apic_ldr_cluster, - - .ioapic_phys_id_map = es7000_ioapic_phys_id_map, - .setup_apic_routing = es7000_setup_apic_routing, - .multi_timer_check = NULL, - .apicid_to_node = es7000_apicid_to_node, - .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, - .cpu_present_to_apicid = es7000_cpu_present_to_apicid, - .apicid_to_cpu_present = es7000_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = es7000_check_phys_apicid_present, - .enable_apic_mode = es7000_enable_apic_mode, - .phys_pkg_id = es7000_phys_pkg_id, - .mps_oem_check = es7000_mps_oem_check_cluster, - - .get_apic_id = es7000_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, - - .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, - - .send_IPI_mask = es7000_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = es7000_send_IPI_allbutself, - .send_IPI_all = es7000_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip, - - .trampoline_phys_low = 0x467, - .trampoline_phys_high = 0x469, - - .wait_for_init_deassert = NULL, - - /* Nothing to do for most platforms, since cleared by the INIT cycle: */ - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, -}; - -struct apic __refdata apic_es7000 = { - - .name = "es7000", - .probe = probe_es7000, - .acpi_madt_oem_check = es7000_acpi_madt_oem_check, - .apic_id_registered = es7000_apic_id_registered, - - .irq_delivery_mode = dest_Fixed, - /* phys delivery to target CPUs: */ - .irq_dest_mode = 0, - - .target_cpus = es7000_target_cpus, - .disable_esr = 1, - .dest_logical = 0, - .check_apicid_used = es7000_check_apicid_used, - .check_apicid_present = es7000_check_apicid_present, - - .vector_allocation_domain = es7000_vector_allocation_domain, - .init_apic_ldr = es7000_init_apic_ldr, - - .ioapic_phys_id_map = es7000_ioapic_phys_id_map, - .setup_apic_routing = es7000_setup_apic_routing, - .multi_timer_check = NULL, - .apicid_to_node = es7000_apicid_to_node, - .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, - .cpu_present_to_apicid = es7000_cpu_present_to_apicid, - .apicid_to_cpu_present = es7000_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = es7000_check_phys_apicid_present, - .enable_apic_mode = es7000_enable_apic_mode, - .phys_pkg_id = es7000_phys_pkg_id, - .mps_oem_check = es7000_mps_oem_check, - - .get_apic_id = es7000_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, - - .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, - - .send_IPI_mask = es7000_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = es7000_send_IPI_allbutself, - .send_IPI_all = es7000_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .trampoline_phys_low = 0x467, - .trampoline_phys_high = 0x469, - - .wait_for_init_deassert = es7000_wait_for_init_deassert, - - /* Nothing to do for most platforms, since cleared by the INIT cycle: */ - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, -}; diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index cefd6942f0e..6a1e71bde32 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -9,6 +9,7 @@ * */ #include <asm/apic.h> +#include <asm/nmi.h> #include <linux/cpumask.h> #include <linux/kdebug.h> @@ -16,51 +17,65 @@ #include <linux/kprobes.h> #include <linux/nmi.h> #include <linux/module.h> +#include <linux/delay.h> +#ifdef CONFIG_HARDLOCKUP_DETECTOR +u64 hw_nmi_get_sample_period(int watchdog_thresh) +{ + return (u64)(cpu_khz) * 1000 * watchdog_thresh; +} +#endif + +#ifdef arch_trigger_all_cpu_backtrace /* For reliability, we're prepared to waste bits here. */ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; -u64 hw_nmi_get_sample_period(void) -{ - return (u64)(cpu_khz) * 1000 * 60; -} +/* "in progress" flag of arch_trigger_all_cpu_backtrace */ +static unsigned long backtrace_flag; -#ifdef ARCH_HAS_NMI_WATCHDOG -void arch_trigger_all_cpu_backtrace(void) +void arch_trigger_all_cpu_backtrace(bool include_self) { int i; + int cpu = get_cpu(); + + if (test_and_set_bit(0, &backtrace_flag)) { + /* + * If there is already a trigger_all_cpu_backtrace() in progress + * (backtrace_flag == 1), don't output double cpu dump infos. + */ + put_cpu(); + return; + } cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); + if (!include_self) + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - printk(KERN_INFO "sending NMI to all CPUs:\n"); - apic->send_IPI_all(NMI_VECTOR); + if (!cpumask_empty(to_cpumask(backtrace_mask))) { + pr_info("sending NMI to %s CPUs:\n", + (include_self ? "all" : "other")); + apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR); + } /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { if (cpumask_empty(to_cpumask(backtrace_mask))) break; mdelay(1); + touch_softlockup_watchdog(); } + + clear_bit(0, &backtrace_flag); + smp_mb__after_atomic(); + put_cpu(); } -static int __kprobes -arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, - unsigned long cmd, void *__args) +static int +arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { - struct die_args *args = __args; - struct pt_regs *regs; - int cpu = smp_processor_id(); - - switch (cmd) { - case DIE_NMI: - case DIE_NMI_IPI: - break; - - default: - return NOTIFY_DONE; - } + int cpu; - regs = args->regs; + cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; @@ -68,40 +83,20 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, arch_spin_lock(&lock); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); show_regs(regs); - dump_stack(); arch_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - return NOTIFY_STOP; + return NMI_HANDLED; } - return NOTIFY_DONE; + return NMI_DONE; } - -static __read_mostly struct notifier_block backtrace_notifier = { - .notifier_call = arch_trigger_all_cpu_backtrace_handler, - .next = NULL, - .priority = 1 -}; +NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler); static int __init register_trigger_all_cpu_backtrace(void) { - register_die_notifier(&backtrace_notifier); + register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler, + 0, "arch_bt"); return 0; } early_initcall(register_trigger_all_cpu_backtrace); #endif - -/* STUB calls to mimic old nmi_watchdog behaviour */ -#if defined(CONFIG_X86_LOCAL_APIC) -unsigned int nmi_watchdog = NMI_NONE; -EXPORT_SYMBOL(nmi_watchdog); -void acpi_nmi_enable(void) { return; } -void acpi_nmi_disable(void) { return; } -#endif -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -EXPORT_SYMBOL(nmi_active); -int unknown_nmi_panic; -void cpu_nmi_set_wd_enabled(void) { return; } -void stop_apic_nmi_watchdog(void *unused) { return; } -void setup_apic_nmi_watchdog(void *unused) { return; } -int __init check_nmi_watchdog(void) { return 0; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7cc0a721f62..81e08eff05e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -30,16 +30,13 @@ #include <linux/compiler.h> #include <linux/acpi.h> #include <linux/module.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/msi.h> #include <linux/htirq.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/jiffies.h> /* time_after() */ #include <linux/slab.h> -#ifdef CONFIG_ACPI -#include <acpi/acpi_bus.h> -#endif #include <linux/bootmem.h> #include <linux/dmar.h> #include <linux/hpet.h> @@ -54,7 +51,6 @@ #include <asm/dma.h> #include <asm/timer.h> #include <asm/i8259.h> -#include <asm/nmi.h> #include <asm/msidef.h> #include <asm/hypertransport.h> #include <asm/setup.h> @@ -65,6 +61,7 @@ #include <asm/apic.h> #define __apicdebuginit(type) static type __init + #define for_each_irq_pin(entry, head) \ for (entry = head; entry; entry = entry->next) @@ -77,17 +74,40 @@ int sis_apic_bug = -1; static DEFINE_RAW_SPINLOCK(ioapic_lock); static DEFINE_RAW_SPINLOCK(vector_lock); -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; +static struct ioapic { + /* + * # of IRQ routing registers + */ + int nr_registers; + /* + * Saved state during suspend/resume, or while enabling intr-remap. + */ + struct IO_APIC_route_entry *saved_registers; + /* I/O APIC config */ + struct mpc_ioapic mp_config; + /* IO APIC gsi routing info */ + struct mp_ioapic_gsi gsi_config; + DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); +} ioapics[MAX_IO_APICS]; -/* I/O APIC entries */ -struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; -int nr_ioapics; +#define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver + +int mpc_ioapic_id(int ioapic_idx) +{ + return ioapics[ioapic_idx].mp_config.apicid; +} -/* IO APIC gsi routing info */ -struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; +unsigned int mpc_ioapic_addr(int ioapic_idx) +{ + return ioapics[ioapic_idx].mp_config.apicaddr; +} + +struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) +{ + return &ioapics[ioapic_idx].gsi_config; +} + +int nr_ioapics; /* The one past the highest gsi number used */ u32 gsi_top; @@ -101,7 +121,7 @@ int mp_irq_entries; /* GSI interrupts */ static int nr_irqs_gsi = NR_IRQS_LEGACY; -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) +#ifdef CONFIG_EISA int mp_bus_id_to_type[MAX_MP_BUSSES]; #endif @@ -109,7 +129,10 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int skip_ioapic_setup; -void arch_disable_smp_support(void) +/** + * disable_ioapic_support() - disables ioapic support at runtime + */ +void disable_ioapic_support(void) { #ifdef CONFIG_PCI noioapicquirk = 1; @@ -121,11 +144,34 @@ void arch_disable_smp_support(void) static int __init parse_noapic(char *str) { /* disable IO-APIC */ - arch_disable_smp_support(); + disable_ioapic_support(); return 0; } early_param("noapic", parse_noapic); +static int io_apic_setup_irq_pin(unsigned int irq, int node, + struct io_apic_irq_attr *attr); + +/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ +void mp_save_irq(struct mpc_intsrc *m) +{ + int i; + + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", + m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus, + m->srcbusirq, m->dstapic, m->dstirq); + + for (i = 0; i < mp_irq_entries; i++) { + if (!memcmp(&mp_irqs[i], m, sizeof(*m))) + return; + } + + memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m)); + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); +} + struct irq_pin_list { int apic, pin; struct irq_pin_list *next; @@ -136,51 +182,50 @@ static struct irq_pin_list *alloc_irq_pin_list(int node) return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); } + /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -#ifdef CONFIG_SPARSE_IRQ static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; -#else -static struct irq_cfg irq_cfgx[NR_IRQS]; -#endif int __init arch_early_irq_init(void) { struct irq_cfg *cfg; int count, node, i; - if (!legacy_pic->nr_legacy_irqs) { - nr_irqs_gsi = 0; + if (!legacy_pic->nr_legacy_irqs) io_apic_irqs = ~0UL; + + for (i = 0; i < nr_ioapics; i++) { + ioapics[i].saved_registers = + kzalloc(sizeof(struct IO_APIC_route_entry) * + ioapics[i].nr_registers, GFP_KERNEL); + if (!ioapics[i].saved_registers) + pr_err("IOAPIC %d: suspend/resume impossible!\n", i); } cfg = irq_cfgx; count = ARRAY_SIZE(irq_cfgx); node = cpu_to_node(0); - /* Make sure the legacy interrupts are marked in the bitmap */ - irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs); - for (i = 0; i < count; i++) { - set_irq_chip_data(i, &cfg[i]); + irq_set_chip_data(i, &cfg[i]); zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); /* * For legacy IRQ's, start with assigning irq0 to irq15 to - * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. + * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's. */ if (i < legacy_pic->nr_legacy_irqs) { cfg[i].vector = IRQ0_VECTOR + i; - cpumask_set_cpu(0, cfg[i].domain); + cpumask_setall(cfg[i].domain); } } return 0; } -#ifdef CONFIG_SPARSE_IRQ static struct irq_cfg *irq_cfg(unsigned int irq) { - return get_irq_chip_data(irq); + return irq_get_chip_data(irq); } static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) @@ -206,28 +251,12 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { if (!cfg) return; - set_irq_chip_data(at, NULL); + irq_set_chip_data(at, NULL); free_cpumask_var(cfg->domain); free_cpumask_var(cfg->old_domain); kfree(cfg); } -#else - -struct irq_cfg *irq_cfg(unsigned int irq) -{ - return irq < nr_irqs ? irq_cfgx + irq : NULL; -} - -static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) -{ - return irq_cfgx + irq; -} - -static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { } - -#endif - static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) { int res = irq_alloc_desc_at(at, node); @@ -236,30 +265,19 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) if (res < 0) { if (res != -EEXIST) return NULL; - cfg = get_irq_chip_data(at); + cfg = irq_get_chip_data(at); if (cfg) return cfg; } cfg = alloc_irq_cfg(at, node); if (cfg) - set_irq_chip_data(at, cfg); + irq_set_chip_data(at, cfg); else irq_free_desc(at); return cfg; } -static int alloc_irq_from(unsigned int from, int node) -{ - return irq_alloc_desc_from(from, node); -} - -static void free_irq_at(unsigned int at, struct irq_cfg *cfg) -{ - free_irq_cfg(at, cfg); - irq_free_desc(at); -} - struct io_apic { unsigned int index; unsigned int unused[3]; @@ -271,25 +289,26 @@ struct io_apic { static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) { return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); + + (mpc_ioapic_addr(idx) & ~PAGE_MASK); } -static inline void io_apic_eoi(unsigned int apic, unsigned int vector) +void io_apic_eoi(unsigned int apic, unsigned int vector) { struct io_apic __iomem *io_apic = io_apic_base(apic); writel(vector, &io_apic->eoi); } -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +unsigned int native_io_apic_read(unsigned int apic, unsigned int reg) { struct io_apic __iomem *io_apic = io_apic_base(apic); writel(reg, &io_apic->index); return readl(&io_apic->data); } -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); + writel(reg, &io_apic->index); writel(value, &io_apic->data); } @@ -300,7 +319,7 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i * * Older SiS APIC requires we rewrite the index register */ -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) +void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); @@ -309,42 +328,30 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned writel(value, &io_apic->data); } -static bool io_apic_level_ack_pending(struct irq_cfg *cfg) -{ - struct irq_pin_list *entry; - unsigned long flags; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - int pin; - - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - /* Is the remote IRR bit set? */ - if (reg & IO_APIC_REDIR_REMOTE_IRR) { - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return true; - } - } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - - return false; -} - union entry_union { struct { u32 w1, w2; }; struct IO_APIC_route_entry entry; }; +static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin) +{ + union entry_union eu; + + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + + return eu.entry; +} + static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) { union entry_union eu; unsigned long flags; + raw_spin_lock_irqsave(&ioapic_lock, flags); - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + eu.entry = __ioapic_read_entry(apic, pin); raw_spin_unlock_irqrestore(&ioapic_lock, flags); + return eu.entry; } @@ -354,8 +361,7 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) * the interrupt, and we need to make sure the entry is fully populated * before that happens. */ -static void -__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { union entry_union eu = {{0, 0}}; @@ -367,6 +373,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { unsigned long flags; + raw_spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(apic, pin, e); raw_spin_unlock_irqrestore(&ioapic_lock, flags); @@ -393,8 +400,7 @@ static void ioapic_mask_entry(int apic, int pin) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static int -__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) +static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { struct irq_pin_list **last, *entry; @@ -408,8 +414,8 @@ __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) entry = alloc_irq_pin_list(node); if (!entry) { - printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", - node, apic, pin); + pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", + node, apic, pin); return -ENOMEM; } entry->apic = apic; @@ -472,18 +478,6 @@ static void io_apic_modify_irq(struct irq_cfg *cfg, __io_apic_modify_irq(entry, mask_and, mask_or, final); } -static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) -{ - __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED, NULL); -} - -static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) -{ - __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER, NULL); -} - static void io_apic_sync(struct irq_pin_list *entry) { /* @@ -491,6 +485,7 @@ static void io_apic_sync(struct irq_pin_list *entry) * a dummy read from the IO-APIC */ struct io_apic __iomem *io_apic; + io_apic = io_apic_base(entry->apic); readl(&io_apic->data); } @@ -528,6 +523,58 @@ static void unmask_ioapic_irq(struct irq_data *data) unmask_ioapic(data->chip_data); } +/* + * IO-APIC versions below 0x20 don't support EOI register. + * For the record, here is the information about various versions: + * 0Xh 82489DX + * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant + * 2Xh I/O(x)APIC which is PCI 2.2 Compliant + * 30h-FFh Reserved + * + * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic + * version as 0x2. This is an error with documentation and these ICH chips + * use io-apic's of version 0x20. + * + * For IO-APIC's with EOI register, we use that to do an explicit EOI. + * Otherwise, we simulate the EOI message manually by changing the trigger + * mode to edge and then back to level, with RTE being masked during this. + */ +void native_eoi_ioapic_pin(int apic, int pin, int vector) +{ + if (mpc_ioapic_ver(apic) >= 0x20) { + io_apic_eoi(apic, vector); + } else { + struct IO_APIC_route_entry entry, entry1; + + entry = entry1 = __ioapic_read_entry(apic, pin); + + /* + * Mask the entry and change the trigger mode to edge. + */ + entry1.mask = 1; + entry1.trigger = IOAPIC_EDGE; + + __ioapic_write_entry(apic, pin, entry1); + + /* + * Restore the previous level triggered entry. + */ + __ioapic_write_entry(apic, pin, entry); + } +} + +void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry; + unsigned long flags; + + raw_spin_lock_irqsave(&ioapic_lock, flags); + for_each_irq_pin(entry, cfg->irq_2_pin) + x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin, + cfg->vector); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); +} + static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; @@ -536,10 +583,44 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) entry = ioapic_read_entry(apic, pin); if (entry.delivery_mode == dest_SMI) return; + /* - * Disable it in the IO-APIC irq-routing table: + * Make sure the entry is masked and re-read the contents to check + * if it is a level triggered pin and if the remote-IRR is set. + */ + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + entry = ioapic_read_entry(apic, pin); + } + + if (entry.irr) { + unsigned long flags; + + /* + * Make sure the trigger mode is set to level. Explicit EOI + * doesn't clear the remote-IRR if the trigger mode is not + * set to level. + */ + if (!entry.trigger) { + entry.trigger = IOAPIC_LEVEL; + ioapic_write_entry(apic, pin, entry); + } + + raw_spin_lock_irqsave(&ioapic_lock, flags); + x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + } + + /* + * Clear the rest of the bits in the IO-APIC RTE except for the mask + * bit. */ ioapic_mask_entry(apic, pin); + entry = ioapic_read_entry(apic, pin); + if (entry.irr) + pr_err("Unable to reset IRR for apic: %d, pin :%d\n", + mpc_ioapic_id(apic), pin); } static void clear_IO_APIC (void) @@ -547,7 +628,7 @@ static void clear_IO_APIC (void) int apic, pin; for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + for (pin = 0; pin < ioapics[apic].nr_registers; pin++) clear_IO_APIC_pin(apic, pin); } @@ -589,74 +670,43 @@ static int __init ioapic_pirq_setup(char *str) __setup("pirq=", ioapic_pirq_setup); #endif /* CONFIG_X86_32 */ -struct IO_APIC_route_entry **alloc_ioapic_entries(void) -{ - int apic; - struct IO_APIC_route_entry **ioapic_entries; - - ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, - GFP_KERNEL); - if (!ioapic_entries) - return 0; - - for (apic = 0; apic < nr_ioapics; apic++) { - ioapic_entries[apic] = - kzalloc(sizeof(struct IO_APIC_route_entry) * - nr_ioapic_registers[apic], GFP_KERNEL); - if (!ioapic_entries[apic]) - goto nomem; - } - - return ioapic_entries; - -nomem: - while (--apic >= 0) - kfree(ioapic_entries[apic]); - kfree(ioapic_entries); - - return 0; -} - /* * Saves all the IO-APIC RTE's */ -int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) +int save_ioapic_entries(void) { int apic, pin; - - if (!ioapic_entries) - return -ENOMEM; + int err = 0; for (apic = 0; apic < nr_ioapics; apic++) { - if (!ioapic_entries[apic]) - return -ENOMEM; + if (!ioapics[apic].saved_registers) { + err = -ENOMEM; + continue; + } - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - ioapic_entries[apic][pin] = + for (pin = 0; pin < ioapics[apic].nr_registers; pin++) + ioapics[apic].saved_registers[pin] = ioapic_read_entry(apic, pin); } - return 0; + return err; } /* * Mask all IO APIC entries. */ -void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) +void mask_ioapic_entries(void) { int apic, pin; - if (!ioapic_entries) - return; - for (apic = 0; apic < nr_ioapics; apic++) { - if (!ioapic_entries[apic]) - break; + if (!ioapics[apic].saved_registers) + continue; - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { struct IO_APIC_route_entry entry; - entry = ioapic_entries[apic][pin]; + entry = ioapics[apic].saved_registers[pin]; if (!entry.mask) { entry.mask = 1; ioapic_write_entry(apic, pin, entry); @@ -666,46 +716,33 @@ void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) } /* - * Restore IO APIC entries which was saved in ioapic_entries. + * Restore IO APIC entries which was saved in the ioapic structure. */ -int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) +int restore_ioapic_entries(void) { int apic, pin; - if (!ioapic_entries) - return -ENOMEM; - for (apic = 0; apic < nr_ioapics; apic++) { - if (!ioapic_entries[apic]) - return -ENOMEM; + if (!ioapics[apic].saved_registers) + continue; - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + for (pin = 0; pin < ioapics[apic].nr_registers; pin++) ioapic_write_entry(apic, pin, - ioapic_entries[apic][pin]); + ioapics[apic].saved_registers[pin]); } return 0; } -void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) -{ - int apic; - - for (apic = 0; apic < nr_ioapics; apic++) - kfree(ioapic_entries[apic]); - - kfree(ioapic_entries); -} - /* * Find the IRQ entry number of a certain pin. */ -static int find_irq_entry(int apic, int pin, int type) +static int find_irq_entry(int ioapic_idx, int pin, int type) { int i; for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].irqtype == type && - (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || + (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) || mp_irqs[i].dstapic == MP_APIC_ALL) && mp_irqs[i].dstirq == pin) return i; @@ -744,18 +781,19 @@ static int __init find_isa_irq_apic(int irq, int type) (mp_irqs[i].srcbusirq == irq)) break; } + if (i < mp_irq_entries) { - int apic; - for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) - return apic; - } + int ioapic_idx; + + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic) + return ioapic_idx; } return -1; } -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) +#ifdef CONFIG_EISA /* * EISA Edge/Level control register, ELCR */ @@ -792,13 +830,7 @@ static int EISA_ELCR(unsigned int irq) #define default_PCI_trigger(idx) (1) #define default_PCI_polarity(idx) (1) -/* MCA interrupts are always polarity zero level triggered, - * when listed as conforming in the MP table. */ - -#define default_MCA_trigger(idx) (1) -#define default_MCA_polarity(idx) default_ISA_polarity(idx) - -static int MPBIOS_polarity(int idx) +static int irq_polarity(int idx) { int bus = mp_irqs[idx].srcbus; int polarity; @@ -821,7 +853,7 @@ static int MPBIOS_polarity(int idx) } case 2: /* reserved */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); polarity = 1; break; } @@ -832,7 +864,7 @@ static int MPBIOS_polarity(int idx) } default: /* invalid */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); polarity = 1; break; } @@ -840,7 +872,7 @@ static int MPBIOS_polarity(int idx) return polarity; } -static int MPBIOS_trigger(int idx) +static int irq_trigger(int idx) { int bus = mp_irqs[idx].srcbus; int trigger; @@ -855,7 +887,7 @@ static int MPBIOS_trigger(int idx) trigger = default_ISA_trigger(idx); else trigger = default_PCI_trigger(idx); -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) +#ifdef CONFIG_EISA switch (mp_bus_id_to_type[bus]) { case MP_BUS_ISA: /* ISA pin */ { @@ -872,14 +904,9 @@ static int MPBIOS_trigger(int idx) /* set before the switch */ break; } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } default: { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); trigger = 1; break; } @@ -893,7 +920,7 @@ static int MPBIOS_trigger(int idx) } case 2: /* reserved */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); trigger = 1; break; } @@ -904,7 +931,7 @@ static int MPBIOS_trigger(int idx) } default: /* invalid */ { - printk(KERN_WARNING "broken BIOS!!\n"); + pr_warn("broken BIOS!!\n"); trigger = 0; break; } @@ -912,31 +939,22 @@ static int MPBIOS_trigger(int idx) return trigger; } -static inline int irq_polarity(int idx) -{ - return MPBIOS_polarity(idx); -} - -static inline int irq_trigger(int idx) -{ - return MPBIOS_trigger(idx); -} - static int pin_2_irq(int idx, int apic, int pin) { int irq; int bus = mp_irqs[idx].srcbus; + struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic); /* * Debugging check, we are in big trouble if this message pops up! */ if (mp_irqs[idx].dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); + pr_err("broken BIOS or MPTABLE parser, ayiee!!\n"); if (test_bit(bus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; } else { - u32 gsi = mp_gsi_routing[apic].gsi_base + pin; + u32 gsi = gsi_cfg->gsi_base + pin; if (gsi >= NR_IRQS_LEGACY) irq = gsi; @@ -973,7 +991,7 @@ static int pin_2_irq(int idx, int apic, int pin) int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, struct io_apic_irq_attr *irq_attr) { - int apic, i, best_guess = -1; + int ioapic_idx, i, best_guess = -1; apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", @@ -986,8 +1004,8 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, for (i = 0; i < mp_irq_entries; i++) { int lbus = mp_irqs[i].srcbus; - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic || mp_irqs[i].dstapic == MP_APIC_ALL) break; @@ -995,13 +1013,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, !mp_irqs[i].irqtype && (bus == lbus) && (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); + int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq); - if (!(apic || IO_APIC_IRQ(irq))) + if (!(ioapic_idx || IO_APIC_IRQ(irq))) continue; if (pin == (mp_irqs[i].srcbusirq & 3)) { - set_io_apic_irq_attr(irq_attr, apic, + set_io_apic_irq_attr(irq_attr, ioapic_idx, mp_irqs[i].dstirq, irq_trigger(i), irq_polarity(i)); @@ -1012,7 +1030,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, * best-guess fuzzy result for broken mptables. */ if (best_guess < 0) { - set_io_apic_irq_attr(irq_attr, apic, + set_io_apic_irq_attr(irq_attr, ioapic_idx, mp_irqs[i].dstirq, irq_trigger(i), irq_polarity(i)); @@ -1052,8 +1070,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; - static int current_offset = VECTOR_OFFSET_START % 8; - unsigned int old_vector; + static int current_offset = VECTOR_OFFSET_START % 16; int cpu, err; cpumask_var_t tmp_mask; @@ -1063,48 +1080,61 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) return -ENOMEM; - old_vector = cfg->vector; - if (old_vector) { - cpumask_and(tmp_mask, mask, cpu_online_mask); - cpumask_and(tmp_mask, cfg->domain, tmp_mask); - if (!cpumask_empty(tmp_mask)) { - free_cpumask_var(tmp_mask); - return 0; - } - } - /* Only try and allocate irqs on cpus that are present */ err = -ENOSPC; - for_each_cpu_and(cpu, mask, cpu_online_mask) { - int new_cpu; - int vector, offset; + cpumask_clear(cfg->old_domain); + cpu = cpumask_first_and(mask, cpu_online_mask); + while (cpu < nr_cpu_ids) { + int new_cpu, vector, offset; - apic->vector_allocation_domain(cpu, tmp_mask); + apic->vector_allocation_domain(cpu, tmp_mask, mask); + + if (cpumask_subset(tmp_mask, cfg->domain)) { + err = 0; + if (cpumask_equal(tmp_mask, cfg->domain)) + break; + /* + * New cpumask using the vector is a proper subset of + * the current in use mask. So cleanup the vector + * allocation for the members that are not used anymore. + */ + cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); + cfg->move_in_progress = + cpumask_intersects(cfg->old_domain, cpu_online_mask); + cpumask_and(cfg->domain, cfg->domain, tmp_mask); + break; + } vector = current_vector; offset = current_offset; next: - vector += 8; + vector += 16; if (vector >= first_system_vector) { - /* If out of vectors on large boxen, must share them. */ - offset = (offset + 1) % 8; + offset = (offset + 1) % 16; vector = FIRST_EXTERNAL_VECTOR + offset; } - if (unlikely(current_vector == vector)) + + if (unlikely(current_vector == vector)) { + cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); + cpumask_andnot(tmp_mask, mask, cfg->old_domain); + cpu = cpumask_first_and(tmp_mask, cpu_online_mask); continue; + } if (test_bit(vector, used_vectors)) goto next; - for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) - if (per_cpu(vector_irq, new_cpu)[vector] != -1) + for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) { + if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED) goto next; + } /* Found one! */ current_vector = vector; current_offset = offset; - if (old_vector) { - cfg->move_in_progress = 1; + if (cfg->vector) { cpumask_copy(cfg->old_domain, cfg->domain); + cfg->move_in_progress = + cpumask_intersects(cfg->old_domain, cpu_online_mask); } for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; @@ -1136,7 +1166,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) vector = cfg->vector; for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) - per_cpu(vector_irq, cpu)[vector] = -1; + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; cfg->vector = 0; cpumask_clear(cfg->domain); @@ -1144,11 +1174,10 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) if (likely(!cfg->move_in_progress)) return; for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; - vector++) { + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) continue; - per_cpu(vector_irq, cpu)[vector] = -1; + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; break; } } @@ -1169,15 +1198,9 @@ void __setup_vector_irq(int cpu) raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_active_irq(irq) { - cfg = get_irq_chip_data(irq); + cfg = irq_get_chip_data(irq); if (!cfg) continue; - /* - * If it is a legacy IRQ handled by the legacy PIC, this cpu - * will be part of the irq_cfg's domain. - */ - if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq)) - cpumask_set_cpu(cpu, cfg->domain); if (!cpumask_test_cpu(cpu, cfg->domain)) continue; @@ -1187,22 +1210,17 @@ void __setup_vector_irq(int cpu) /* Mark the free vectors */ for (vector = 0; vector < NR_VECTORS; ++vector) { irq = per_cpu(vector_irq, cpu)[vector]; - if (irq < 0) + if (irq <= VECTOR_UNDEFINED) continue; cfg = irq_cfg(irq); if (!cpumask_test_cpu(cpu, cfg->domain)) - per_cpu(vector_irq, cpu)[vector] = -1; + per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; } raw_spin_unlock(&vector_lock); } static struct irq_chip ioapic_chip; -static struct irq_chip ir_ioapic_chip; - -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 #ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) @@ -1210,7 +1228,7 @@ static inline int IO_APIC_irq_trigger(int irq) int apic, idx, pin; for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { idx = find_irq_entry(apic, pin, mp_INT); if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) return irq_trigger(idx); @@ -1228,174 +1246,120 @@ static inline int IO_APIC_irq_trigger(int irq) } #endif -static void ioapic_register_intr(unsigned int irq, unsigned long trigger) +static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, + unsigned long trigger) { + struct irq_chip *chip = &ioapic_chip; + irq_flow_handler_t hdl; + bool fasteoi; if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) + trigger == IOAPIC_LEVEL) { irq_set_status_flags(irq, IRQ_LEVEL); - else + fasteoi = true; + } else { irq_clear_status_flags(irq, IRQ_LEVEL); - - if (irq_remapped(get_irq_chip_data(irq))) { - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - if (trigger) - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_edge_irq, "edge"); - return; + fasteoi = false; } - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); + if (setup_remapped_irq(irq, cfg, chip)) + fasteoi = trigger != 0; + + hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; + irq_set_chip_and_handler_name(irq, chip, hdl, + fasteoi ? "fasteoi" : "edge"); } -static int setup_ioapic_entry(int apic_id, int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int trigger, - int polarity, int vector, int pin) +int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) { - /* - * add it to the IO-APIC irq-routing table: - */ - memset(entry,0,sizeof(*entry)); - - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); - struct irte irte; - struct IR_IO_APIC_route_entry *ir_entry = - (struct IR_IO_APIC_route_entry *) entry; - int index; - - if (!iommu) - panic("No mapping iommu for ioapic %d\n", apic_id); - - index = alloc_irte(iommu, irq, 1); - if (index < 0) - panic("Failed to allocate IRTE for ioapic %d\n", apic_id); - - prepare_irte(&irte, vector, destination); - - /* Set source-id of interrupt request */ - set_ioapic_sid(&irte, apic_id); - - modify_irte(irq, &irte); - - ir_entry->index2 = (index >> 15) & 0x1; - ir_entry->zero = 0; - ir_entry->format = 1; - ir_entry->index = (index & 0x7fff); - /* - * IO-APIC RTE will be configured with virtual vector. - * irq handler will do the explicit EOI to the io-apic. - */ - ir_entry->vector = pin; - } else { - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->dest = destination; - entry->vector = vector; - } + memset(entry, 0, sizeof(*entry)); - entry->mask = 0; /* enable IRQ */ - entry->trigger = trigger; - entry->polarity = polarity; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->dest = destination; + entry->vector = vector; + entry->mask = 0; /* enable IRQ */ + entry->trigger = attr->trigger; + entry->polarity = attr->polarity; - /* Mask level triggered irqs. + /* + * Mask level triggered irqs. * Use IRQ_DELAYED_DISABLE for edge triggered irqs. */ - if (trigger) + if (attr->trigger) entry->mask = 1; + return 0; } -static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, - struct irq_cfg *cfg, int trigger, int polarity) +static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, + struct io_apic_irq_attr *attr) { struct IO_APIC_route_entry entry; unsigned int dest; if (!IO_APIC_IRQ(irq)) return; - /* - * For legacy irqs, cfg->domain starts with cpu 0 for legacy - * controllers like 8259. Now that IO-APIC can handle this irq, update - * the cfg->domain. - */ - if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) - apic->vector_allocation_domain(0, cfg->domain); if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; - dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); + if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(), + &dest)) { + pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", + mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); + __clear_irq_vector(irq, cfg); + + return; + } apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " - "IRQ %d Mode:%i Active:%i)\n", - apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector, - irq, trigger, polarity); + "IRQ %d Mode:%i Active:%i Dest:%d)\n", + attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, + cfg->vector, irq, attr->trigger, attr->polarity, dest); - - if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, - dest, trigger, polarity, cfg->vector, pin)) { - printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic_id].apicid, pin); + if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) { + pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); __clear_irq_vector(irq, cfg); + return; } - ioapic_register_intr(irq, trigger); + ioapic_register_intr(irq, cfg, attr->trigger); if (irq < legacy_pic->nr_legacy_irqs) legacy_pic->mask(irq); - ioapic_write_entry(apic_id, pin, entry); + ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry); } -static struct { - DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); -} mp_ioapic_routing[MAX_IO_APICS]; - -static void __init setup_IO_APIC_irqs(void) +static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin) { - int apic_id, pin, idx, irq, notcon = 0; - int node = cpu_to_node(0); - struct irq_cfg *cfg; + if (idx != -1) + return false; - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", + mpc_ioapic_id(ioapic_idx), pin); + return true; +} + +static void __init __io_apic_setup_irqs(unsigned int ioapic_idx) +{ + int idx, node = cpu_to_node(0); + struct io_apic_irq_attr attr; + unsigned int pin, irq; - for (apic_id = 0; apic_id < nr_ioapics; apic_id++) - for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { - idx = find_irq_entry(apic_id, pin, mp_INT); - if (idx == -1) { - if (!notcon) { - notcon = 1; - apic_printk(APIC_VERBOSE, - KERN_DEBUG " %d-%d", - mp_ioapics[apic_id].apicid, pin); - } else - apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic_id].apicid, pin); + for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) { + idx = find_irq_entry(ioapic_idx, pin, mp_INT); + if (io_apic_pin_not_connected(idx, ioapic_idx, pin)) continue; - } - if (notcon) { - apic_printk(APIC_VERBOSE, - " (apicid-pin) not connected\n"); - notcon = 0; - } - irq = pin_2_irq(idx, apic_id, pin); + irq = pin_2_irq(idx, ioapic_idx, pin); - if ((apic_id > 0) && (irq > 16)) + if ((ioapic_idx > 0) && (irq > 16)) continue; /* @@ -1403,25 +1367,24 @@ static void __init setup_IO_APIC_irqs(void) * installed and if it returns 1: */ if (apic->multi_timer_check && - apic->multi_timer_check(apic_id, irq)) + apic->multi_timer_check(ioapic_idx, irq)) continue; - cfg = alloc_irq_and_cfg_at(irq, node); - if (!cfg) - continue; + set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), + irq_polarity(idx)); - add_pin_to_irq_node(cfg, node, apic_id, pin); - /* - * don't mark it in pin_programmed, so later acpi could - * set it correctly when irq < 16 - */ - setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx), - irq_polarity(idx)); + io_apic_setup_irq_pin(irq, node, &attr); } +} - if (notcon) - apic_printk(APIC_VERBOSE, - " (apicid-pin) not connected\n"); +static void __init setup_IO_APIC_irqs(void) +{ + unsigned int ioapic_idx; + + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + __io_apic_setup_irqs(ioapic_idx); } /* @@ -1431,54 +1394,41 @@ static void __init setup_IO_APIC_irqs(void) */ void setup_IO_APIC_irq_extra(u32 gsi) { - int apic_id = 0, pin, idx, irq, node = cpu_to_node(0); - struct irq_cfg *cfg; + int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0); + struct io_apic_irq_attr attr; /* * Convert 'gsi' to 'ioapic.pin'. */ - apic_id = mp_find_ioapic(gsi); - if (apic_id < 0) + ioapic_idx = mp_find_ioapic(gsi); + if (ioapic_idx < 0) return; - pin = mp_find_ioapic_pin(apic_id, gsi); - idx = find_irq_entry(apic_id, pin, mp_INT); + pin = mp_find_ioapic_pin(ioapic_idx, gsi); + idx = find_irq_entry(ioapic_idx, pin, mp_INT); if (idx == -1) return; - irq = pin_2_irq(idx, apic_id, pin); + irq = pin_2_irq(idx, ioapic_idx, pin); /* Only handle the non legacy irqs on secondary ioapics */ - if (apic_id == 0 || irq < NR_IRQS_LEGACY) - return; - - cfg = alloc_irq_and_cfg_at(irq, node); - if (!cfg) + if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY) return; - add_pin_to_irq_node(cfg, node, apic_id, pin); - - if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { - pr_debug("Pin %d-%d already programmed\n", - mp_ioapics[apic_id].apicid, pin); - return; - } - set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); + set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), + irq_polarity(idx)); - setup_ioapic_irq(apic_id, pin, irq, cfg, - irq_trigger(idx), irq_polarity(idx)); + io_apic_setup_irq_pin_once(irq, node, &attr); } /* * Set up the timer pin, possibly with the 8259A-master behind. */ -static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, - int vector) +static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, + unsigned int pin, int vector) { struct IO_APIC_route_entry entry; - - if (intr_remapping_enabled) - return; + unsigned int dest; memset(&entry, 0, sizeof(entry)); @@ -1486,9 +1436,13 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * We use logical delivery to get the timer IRQ * to the first CPU. */ + if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(), + apic->target_cpus(), &dest))) + dest = BAD_APICID; + entry.dest_mode = apic->irq_dest_mode; entry.mask = 0; /* don't mask IRQ for edge */ - entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); + entry.dest = dest; entry.delivery_mode = apic->irq_delivery_mode; entry.polarity = 0; entry.trigger = 0; @@ -1498,60 +1452,105 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * The timer IRQ doesn't have to know that behind the * scene we may have a 8259A-master in AEOI mode ... */ - set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); + irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, + "edge"); /* * Add it to the IO-APIC irq-routing table: */ - ioapic_write_entry(apic_id, pin, entry); + ioapic_write_entry(ioapic_idx, pin, entry); } +void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries) +{ + int i; + + pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n"); -__apicdebuginit(void) print_IO_APIC(void) + for (i = 0; i <= nr_entries; i++) { + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + + pr_debug(" %02x %02X ", i, entry.dest); + pr_cont("%1d %1d %1d %1d %1d " + "%1d %1d %02X\n", + entry.mask, + entry.trigger, + entry.irr, + entry.polarity, + entry.delivery_status, + entry.dest_mode, + entry.delivery_mode, + entry.vector); + } +} + +void intel_ir_io_apic_print_entries(unsigned int apic, + unsigned int nr_entries) +{ + int i; + + pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n"); + + for (i = 0; i <= nr_entries; i++) { + struct IR_IO_APIC_route_entry *ir_entry; + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + + ir_entry = (struct IR_IO_APIC_route_entry *)&entry; + + pr_debug(" %02x %04X ", i, ir_entry->index); + pr_cont("%1d %1d %1d %1d %1d " + "%1d %1d %X %02X\n", + ir_entry->format, + ir_entry->mask, + ir_entry->trigger, + ir_entry->irr, + ir_entry->polarity, + ir_entry->delivery_status, + ir_entry->index2, + ir_entry->zero, + ir_entry->vector); + } +} + +void ioapic_zap_locks(void) +{ + raw_spin_lock_init(&ioapic_lock); +} + +__apicdebuginit(void) print_IO_APIC(int ioapic_idx) { - int apic, i; union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; union IO_APIC_reg_03 reg_03; unsigned long flags; - struct irq_cfg *cfg; - unsigned int irq; - - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (i = 0; i < nr_ioapics; i++) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].apicid, nr_ioapic_registers[i]); - - /* - * We are a bit conservative about what we expect. We have to - * know about every hardware change ASAP. - */ - printk(KERN_INFO "testing the IO APIC.......................\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); + reg_00.raw = io_apic_read(ioapic_idx, 0); + reg_01.raw = io_apic_read(ioapic_idx, 1); if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); + reg_02.raw = io_apic_read(ioapic_idx, 2); if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); + reg_03.raw = io_apic_read(ioapic_idx, 3); raw_spin_unlock_irqrestore(&ioapic_lock, flags); - printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); + printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); - printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); + printk(KERN_DEBUG "....... : max redirection entries: %02X\n", + reg_01.bits.entries); printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); + printk(KERN_DEBUG "....... : IO APIC version: %02X\n", + reg_01.bits.version); /* * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, @@ -1576,36 +1575,40 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG ".... IRQ redirection table:\n"); - printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" - " Stat Dmod Deli Vect:\n"); + x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries); +} - for (i = 0; i <= reg_01.bits.entries; i++) { - struct IO_APIC_route_entry entry; +__apicdebuginit(void) print_IO_APICs(void) +{ + int ioapic_idx; + struct irq_cfg *cfg; + unsigned int irq; + struct irq_chip *chip; - entry = ioapic_read_entry(apic, i); + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", + mpc_ioapic_id(ioapic_idx), + ioapics[ioapic_idx].nr_registers); - printk(KERN_DEBUG " %02x %03X ", - i, - entry.dest - ); + /* + * We are a bit conservative about what we expect. We have to + * know about every hardware change ASAP. + */ + printk(KERN_INFO "testing the IO APIC.......................\n"); + + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) + print_IO_APIC(ioapic_idx); - printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } printk(KERN_DEBUG "IRQ to pin mappings:\n"); for_each_active_irq(irq) { struct irq_pin_list *entry; - cfg = get_irq_chip_data(irq); + chip = irq_get_chip(irq); + if (chip != &ioapic_chip) + continue; + + cfg = irq_get_chip_data(irq); if (!cfg) continue; entry = cfg->irq_2_pin; @@ -1613,13 +1616,11 @@ __apicdebuginit(void) print_IO_APIC(void) continue; printk(KERN_DEBUG "IRQ%d ", irq); for_each_irq_pin(entry, cfg->irq_2_pin) - printk("-> %d:%d", entry->apic, entry->pin); - printk("\n"); + pr_cont("-> %d:%d", entry->apic, entry->pin); + pr_cont("\n"); } printk(KERN_INFO ".................................... done.\n"); - - return; } __apicdebuginit(void) print_APIC_field(int base) @@ -1629,9 +1630,9 @@ __apicdebuginit(void) print_APIC_field(int base) printk(KERN_DEBUG); for (i = 0; i < 8; i++) - printk(KERN_CONT "%08x", apic_read(base + i*0x10)); + pr_cont("%08x", apic_read(base + i*0x10)); - printk(KERN_CONT "\n"); + pr_cont("\n"); } __apicdebuginit(void) print_local_APIC(void *dummy) @@ -1733,7 +1734,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); } } - printk("\n"); + pr_cont("\n"); } __apicdebuginit(void) print_local_APICs(int maxcpu) @@ -1813,12 +1814,12 @@ __apicdebuginit(int) print_ICs(void) return 0; print_local_APICs(show_lapic); - print_IO_APIC(); + print_IO_APICs(); return 0; } -fs_initcall(print_ICs); +late_initcall(print_ICs); /* Where if anywhere is the i8259 connect in external int mode */ @@ -1835,7 +1836,7 @@ void __init enable_IO_APIC(void) for(apic = 0; apic < nr_ioapics; apic++) { int pin; /* See if any of the pins is in ExtINT mode */ - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + for (pin = 0; pin < ioapics[apic].nr_registers; pin++) { struct IO_APIC_route_entry entry; entry = ioapic_read_entry(apic, pin); @@ -1876,30 +1877,14 @@ void __init enable_IO_APIC(void) clear_IO_APIC(); } -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) +void native_disable_io_apic(void) { /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - if (!legacy_pic->nr_legacy_irqs) - return; - - /* * If the i8259 is routed through an IOAPIC * Put that IOAPIC in virtual wire mode * so legacy interrupts can be delivered. - * - * With interrupt-remapping, for now we will use virtual wire A mode, - * as virtual wire B is little complex (need to configure both - * IOAPIC RTE aswell as interrupt-remapping table entry). - * As this gets called during crash dump, keep this simple for now. */ - if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { + if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); @@ -1919,12 +1904,25 @@ void disable_IO_APIC(void) ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } + if (cpu_has_apic || apic_from_smp_config()) + disconnect_bsp_APIC(ioapic_i8259.pin != -1); + +} + +/* + * Not an __init, needed by the reboot code + */ +void disable_IO_APIC(void) +{ /* - * Use virtual wire A mode when interrupt remapping is enabled. + * Clear the IO-APIC before rebooting: */ - if (cpu_has_apic || apic_from_smp_config()) - disconnect_bsp_APIC(!intr_remapping_enabled && - ioapic_i8259.pin != -1); + clear_IO_APIC(); + + if (!legacy_pic->nr_legacy_irqs) + return; + + x86_io_apic_ops.disable(); } #ifdef CONFIG_X86_32 @@ -1934,25 +1932,15 @@ void disable_IO_APIC(void) * * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 */ - -void __init setup_ioapic_ids_from_mpc(void) +void __init setup_ioapic_ids_from_mpc_nocheck(void) { union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; - int apic_id; + int ioapic_idx; int i; unsigned char old_id; unsigned long flags; - if (acpi_ioapic) - return; - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; /* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. @@ -1962,21 +1950,20 @@ void __init setup_ioapic_ids_from_mpc(void) /* * Set the IOAPIC ID to the value stored in the MPC table. */ - for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { - + for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { /* Read the register 0 value */ raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic_id, 0); + reg_00.raw = io_apic_read(ioapic_idx, 0); raw_spin_unlock_irqrestore(&ioapic_lock, flags); - old_id = mp_ioapics[apic_id].apicid; + old_id = mpc_ioapic_id(ioapic_idx); - if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) { + if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic_id, mp_ioapics[apic_id].apicid); + ioapic_idx, mpc_ioapic_id(ioapic_idx)); printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); - mp_ioapics[apic_id].apicid = reg_00.bits.ID; + ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID; } /* @@ -1985,9 +1972,9 @@ void __init setup_ioapic_ids_from_mpc(void) * 'stuck on smp_invalidate_needed IPI wait' messages. */ if (apic->check_apicid_used(&phys_id_present_map, - mp_ioapics[apic_id].apicid)) { + mpc_ioapic_id(ioapic_idx))) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic_id, mp_ioapics[apic_id].apicid); + ioapic_idx, mpc_ioapic_id(ioapic_idx)); for (i = 0; i < get_physical_broadcast(); i++) if (!physid_isset(i, phys_id_present_map)) break; @@ -1996,52 +1983,70 @@ void __init setup_ioapic_ids_from_mpc(void) printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); - mp_ioapics[apic_id].apicid = i; + ioapics[ioapic_idx].mp_config.apicid = i; } else { physid_mask_t tmp; - apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); + apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx), + &tmp); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", - mp_ioapics[apic_id].apicid); + mpc_ioapic_id(ioapic_idx)); physids_or(phys_id_present_map, phys_id_present_map, tmp); } - /* * We need to adjust the IRQ routing table * if the ID changed. */ - if (old_id != mp_ioapics[apic_id].apicid) + if (old_id != mpc_ioapic_id(ioapic_idx)) for (i = 0; i < mp_irq_entries; i++) if (mp_irqs[i].dstapic == old_id) mp_irqs[i].dstapic - = mp_ioapics[apic_id].apicid; + = mpc_ioapic_id(ioapic_idx); /* - * Read the right value from the MPC table and - * write it into the ID register. + * Update the ID register according to the right value + * from the MPC table if they are different. */ + if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID) + continue; + apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic_id].apicid); + mpc_ioapic_id(ioapic_idx)); - reg_00.bits.ID = mp_ioapics[apic_id].apicid; + reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); raw_spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic_id, 0, reg_00.raw); + io_apic_write(ioapic_idx, 0, reg_00.raw); raw_spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic_id, 0); + reg_00.raw = io_apic_read(ioapic_idx, 0); raw_spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) - printk("could not set ID!\n"); + if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) + pr_cont("could not set ID!\n"); else apic_printk(APIC_VERBOSE, " ok.\n"); } } + +void __init setup_ioapic_ids_from_mpc(void) +{ + + if (acpi_ioapic) + return; + /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return; + setup_ioapic_ids_from_mpc_nocheck(); +} #endif int no_timer_check __initdata; @@ -2133,9 +2138,11 @@ static int ioapic_retrigger_irq(struct irq_data *data) { struct irq_cfg *cfg = data->chip_data; unsigned long flags; + int cpu; raw_spin_lock_irqsave(&vector_lock, flags); - apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); + cpu = cpumask_first_and(cfg->domain, cpu_online_mask); + apic->send_IPI_mask(cpumask_of(cpu), cfg->vector); raw_spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -2167,144 +2174,23 @@ void send_cleanup_vector(struct irq_cfg *cfg) cfg->move_in_progress = 0; } -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - u8 vector = cfg->vector; - - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - - apic = entry->apic; - pin = entry->pin; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(cfg)) - io_apic_write(apic, 0x11 + pin*2, dest); - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - } -} - -/* - * Either sets data->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and - * leaves data->affinity untouched. - */ -int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - unsigned int *dest_id) -{ - struct irq_cfg *cfg = data->chip_data; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -1; - - if (assign_irq_vector(data->irq, data->chip_data, mask)) - return -1; - - cpumask_copy(data->affinity, mask); - - *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain); - return 0; -} - -static int -ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - unsigned int dest, irq = data->irq; - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&ioapic_lock, flags); - ret = __ioapic_set_affinity(data, mask, &dest); - if (!ret) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, data->chip_data); - } - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return ret; -} - -#ifdef CONFIG_INTR_REMAP - -/* - * Migrate the IO-APIC irq in the presence of intr-remapping. - * - * For both level and edge triggered, irq migration is a simple atomic - * update(of vector and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we eliminate the io-apic RTE modification (with the - * updated vector information), by using a virtual vector (io-apic pin number). - * Real vector that is used for interrupting cpu will be coming from - * the interrupt-remapping table entry. - */ -static int -ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest, irq = data->irq; - struct irte irte; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return -EINVAL; - - if (get_irte(irq, &irte)) - return -EBUSY; - - if (assign_irq_vector(irq, cfg, mask)) - return -EBUSY; - - dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * Modified the IRTE and flushes the Interrupt entry cache. - */ - modify_irte(irq, &irte); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - cpumask_copy(data->affinity, mask); - return 0; -} - -#else -static inline int -ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - return 0; -} -#endif - -asmlinkage void smp_irq_move_cleanup_interrupt(void) +asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; ack_APIC_irq(); - exit_idle(); irq_enter(); + exit_idle(); me = smp_processor_id(); for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - unsigned int irq; + int irq; unsigned int irr; struct irq_desc *desc; struct irq_cfg *cfg; - irq = __get_cpu_var(vector_irq)[vector]; + irq = __this_cpu_read(vector_irq[vector]); - if (irq == -1) + if (irq <= VECTOR_UNDEFINED) continue; desc = irq_to_desc(irq); @@ -2312,6 +2198,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) continue; cfg = irq_cfg(irq); + if (!cfg) + continue; + raw_spin_lock(&desc->lock); /* @@ -2336,7 +2225,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); goto unlock; } - __get_cpu_var(vector_irq)[vector] = -1; + __this_cpu_write(vector_irq[vector], -1); unlock: raw_spin_unlock(&desc->lock); } @@ -2364,7 +2253,7 @@ static void irq_complete_move(struct irq_cfg *cfg) void irq_force_complete_move(int irq) { - struct irq_cfg *cfg = get_irq_chip_data(irq); + struct irq_cfg *cfg = irq_get_chip_data(irq); if (!cfg) return; @@ -2375,72 +2264,182 @@ void irq_force_complete_move(int irq) static inline void irq_complete_move(struct irq_cfg *cfg) { } #endif +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + u8 vector = cfg->vector; + + for_each_irq_pin(entry, cfg->irq_2_pin) { + unsigned int reg; + + apic = entry->apic; + pin = entry->pin; + + io_apic_write(apic, 0x11 + pin*2, dest); + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin*2, reg); + } +} + +/* + * Either sets data->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and + * leaves data->affinity untouched. + */ +int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, + unsigned int *dest_id) +{ + struct irq_cfg *cfg = data->chip_data; + unsigned int irq = data->irq; + int err; + + if (!config_enabled(CONFIG_SMP)) + return -EPERM; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return -EINVAL; + + err = assign_irq_vector(irq, cfg, mask); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); + if (err) { + if (assign_irq_vector(irq, cfg, data->affinity)) + pr_err("Failed to recover vector for irq %d\n", irq); + return err; + } + + cpumask_copy(data->affinity, mask); + + return 0; +} + + +int native_ioapic_set_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force) +{ + unsigned int dest, irq = data->irq; + unsigned long flags; + int ret; + + if (!config_enabled(CONFIG_SMP)) + return -EPERM; + + raw_spin_lock_irqsave(&ioapic_lock, flags); + ret = __ioapic_set_affinity(data, mask, &dest); + if (!ret) { + /* Only the high 8 bits are valid. */ + dest = SET_APIC_LOGICAL_ID(dest); + __target_IO_APIC_irq(irq, dest, data->chip_data); + ret = IRQ_SET_MASK_OK_NOCOPY; + } + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + return ret; +} + static void ack_apic_edge(struct irq_data *data) { irq_complete_move(data->chip_data); - move_native_irq(data->irq); + irq_move_irq(data); ack_APIC_irq(); } atomic_t irq_mis_count; -/* - * IO-APIC versions below 0x20 don't support EOI register. - * For the record, here is the information about various versions: - * 0Xh 82489DX - * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant - * 2Xh I/O(x)APIC which is PCI 2.2 Compliant - * 30h-FFh Reserved - * - * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic - * version as 0x2. This is an error with documentation and these ICH chips - * use io-apic's of version 0x20. - * - * For IO-APIC's with EOI register, we use that to do an explicit EOI. - * Otherwise, we simulate the EOI message manually by changing the trigger - * mode to edge and then back to level, with RTE being masked during this. -*/ -static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +#ifdef CONFIG_GENERIC_PENDING_IRQ +static bool io_apic_level_ack_pending(struct irq_cfg *cfg) { struct irq_pin_list *entry; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); for_each_irq_pin(entry, cfg->irq_2_pin) { - if (mp_ioapics[entry->apic].apicver >= 0x20) { - /* - * Intr-remapping uses pin number as the virtual vector - * in the RTE. Actual vector is programmed in - * intr-remapping table entry. Hence for the io-apic - * EOI we use the pin number. - */ - if (irq_remapped(cfg)) - io_apic_eoi(entry->apic, entry->pin); - else - io_apic_eoi(entry->apic, cfg->vector); - } else { - __mask_and_edge_IO_APIC_irq(entry); - __unmask_and_level_IO_APIC_irq(entry); + unsigned int reg; + int pin; + + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + /* Is the remote IRR bit set? */ + if (reg & IO_APIC_REDIR_REMOTE_IRR) { + raw_spin_unlock_irqrestore(&ioapic_lock, flags); + return true; } } raw_spin_unlock_irqrestore(&ioapic_lock, flags); + + return false; +} + +static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +{ + /* If we are moving the irq we need to mask it */ + if (unlikely(irqd_is_setaffinity_pending(data))) { + mask_ioapic(cfg); + return true; + } + return false; +} + +static inline void ioapic_irqd_unmask(struct irq_data *data, + struct irq_cfg *cfg, bool masked) +{ + if (unlikely(masked)) { + /* Only migrate the irq if the ack has been received. + * + * On rare occasions the broadcast level triggered ack gets + * delayed going to ioapics, and if we reprogram the + * vector while Remote IRR is still set the irq will never + * fire again. + * + * To prevent this scenario we read the Remote IRR bit + * of the ioapic. This has two effects. + * - On any sane system the read of the ioapic will + * flush writes (and acks) going to the ioapic from + * this cpu. + * - We get to see if the ACK has actually been delivered. + * + * Based on failed experiments of reprogramming the + * ioapic entry from outside of irq context starting + * with masking the ioapic entry and then polling until + * Remote IRR was clear before reprogramming the + * ioapic I don't trust the Remote IRR bit to be + * completey accurate. + * + * However there appears to be no other way to plug + * this race, so if the Remote IRR bit is not + * accurate and is causing problems then it is a hardware bug + * and you can go talk to the chipset vendor about it. + */ + if (!io_apic_level_ack_pending(cfg)) + irq_move_masked_irq(data); + unmask_ioapic(cfg); + } +} +#else +static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) +{ + return false; } +static inline void ioapic_irqd_unmask(struct irq_data *data, + struct irq_cfg *cfg, bool masked) +{ +} +#endif static void ack_apic_level(struct irq_data *data) { struct irq_cfg *cfg = data->chip_data; - int i, do_unmask_irq = 0, irq = data->irq; - struct irq_desc *desc = irq_to_desc(irq); + int i, irq = data->irq; unsigned long v; + bool masked; irq_complete_move(cfg); -#ifdef CONFIG_GENERIC_PENDING_IRQ - /* If we are moving the irq we need to mask it */ - if (unlikely(desc->status & IRQ_MOVE_PENDING)) { - do_unmask_irq = 1; - mask_ioapic(cfg); - } -#endif + masked = ioapic_irqd_mask(data, cfg); /* * It appears there is an erratum which affects at least version 0x11 @@ -2496,52 +2495,8 @@ static void ack_apic_level(struct irq_data *data) eoi_ioapic_irq(irq, cfg); } - /* Now we can move and renable the irq */ - if (unlikely(do_unmask_irq)) { - /* Only migrate the irq if the ack has been received. - * - * On rare occasions the broadcast level triggered ack gets - * delayed going to ioapics, and if we reprogram the - * vector while Remote IRR is still set the irq will never - * fire again. - * - * To prevent this scenario we read the Remote IRR bit - * of the ioapic. This has two effects. - * - On any sane system the read of the ioapic will - * flush writes (and acks) going to the ioapic from - * this cpu. - * - We get to see if the ACK has actually been delivered. - * - * Based on failed experiments of reprogramming the - * ioapic entry from outside of irq context starting - * with masking the ioapic entry and then polling until - * Remote IRR was clear before reprogramming the - * ioapic I don't trust the Remote IRR bit to be - * completey accurate. - * - * However there appears to be no other way to plug - * this race, so if the Remote IRR bit is not - * accurate and is causing problems then it is a hardware bug - * and you can go talk to the chipset vendor about it. - */ - if (!io_apic_level_ack_pending(cfg)) - move_masked_irq(irq); - unmask_ioapic(cfg); - } -} - -#ifdef CONFIG_INTR_REMAP -static void ir_ack_apic_edge(struct irq_data *data) -{ - ack_APIC_irq(); -} - -static void ir_ack_apic_level(struct irq_data *data) -{ - ack_APIC_irq(); - eoi_ioapic_irq(data->irq, data->chip_data); + ioapic_irqd_unmask(data, cfg, masked); } -#endif /* CONFIG_INTR_REMAP */ static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", @@ -2550,24 +2505,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_unmask = unmask_ioapic_irq, .irq_ack = ack_apic_edge, .irq_eoi = ack_apic_level, -#ifdef CONFIG_SMP - .irq_set_affinity = ioapic_set_affinity, -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - -static struct irq_chip ir_ioapic_chip __read_mostly = { - .name = "IR-IO-APIC", - .irq_startup = startup_ioapic_irq, - .irq_mask = mask_ioapic_irq, - .irq_unmask = unmask_ioapic_irq, -#ifdef CONFIG_INTR_REMAP - .irq_ack = ir_ack_apic_edge, - .irq_eoi = ir_ack_apic_level, -#ifdef CONFIG_SMP - .irq_set_affinity = ir_ioapic_set_affinity, -#endif -#endif + .irq_set_affinity = native_ioapic_set_affinity, .irq_retrigger = ioapic_retrigger_irq, }; @@ -2588,7 +2526,7 @@ static inline void init_IO_APIC_traps(void) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for_each_active_irq(irq) { - cfg = get_irq_chip_data(irq); + cfg = irq_get_chip_data(irq); if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* * Hmm.. We don't have an entry for this, @@ -2599,7 +2537,7 @@ static inline void init_IO_APIC_traps(void) legacy_pic->make_irq(irq); else /* Strange. Oh, well.. */ - set_irq_chip(irq, &no_irq_chip); + irq_set_chip(irq, &no_irq_chip); } } } @@ -2639,28 +2577,10 @@ static struct irq_chip lapic_chip __read_mostly = { static void lapic_register_intr(int irq) { irq_clear_status_flags(irq, IRQ_LEVEL); - set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, + irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); } -static void __init setup_nmi(void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - enable_NMI_through_LVT0(); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - /* * This looks a bit hackish but it's about the only one way of sending * a few INTA cycles to 8259As and any associated glue logic. ICR does @@ -2741,7 +2661,7 @@ int timer_through_8259 __initdata; */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = get_irq_chip_data(0); + struct irq_cfg *cfg = irq_get_chip_data(0); int node = cpu_to_node(0); int apic1, pin1, apic2, pin2; unsigned long flags; @@ -2766,15 +2686,6 @@ static inline void __init check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); legacy_pic->init(1); -#ifdef CONFIG_X86_32 - { - unsigned int ver; - - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); - } -#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2793,8 +2704,7 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { - if (intr_remapping_enabled) - panic("BIOS bug: timer not connected to IO-APIC"); + panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2822,16 +2732,11 @@ static inline void __init check_timer(void) unmask_ioapic(cfg); } if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - legacy_pic->unmask(0); - } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); goto out; } - if (intr_remapping_enabled) - panic("timer doesn't work through Interrupt-remapped IO-APIC"); + panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC"); local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) @@ -2851,11 +2756,6 @@ static inline void __init check_timer(void) if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); timer_through_8259 = 1; - if (nmi_watchdog == NMI_IO_APIC) { - legacy_pic->mask(0); - setup_nmi(); - legacy_pic->unmask(0); - } goto out; } /* @@ -2867,15 +2767,6 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); } - if (nmi_watchdog == NMI_IO_APIC) { - apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " - "through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = NMI_NONE; - } -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); @@ -2907,6 +2798,10 @@ static inline void __init check_timer(void) } local_irq_disable(); apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); + if (x2apic_preenabled) + apic_printk(APIC_QUIET, KERN_INFO + "Perhaps problem with the pre-enabled x2apic mode\n" + "Try booting with x2apic and interrupt-remapping disabled in the bios.\n"); panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " "report. Then try booting with the 'noapic' option.\n"); out: @@ -2954,7 +2849,7 @@ void __init setup_IO_APIC(void) } /* - * Called after all the initialization is done. If we didnt find any + * Called after all the initialization is done. If we didn't find any * APIC bugs then we can allow the modify fast path */ @@ -2967,159 +2862,113 @@ static int __init io_apic_bug_finalize(void) late_initcall(io_apic_bug_finalize); -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; - -static int ioapic_suspend(struct sys_device *dev, pm_message_t state) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) - *entry = ioapic_read_entry(dev->id, i); - - return 0; -} - -static int ioapic_resume(struct sys_device *dev) +static void resume_ioapic_id(int ioapic_idx) { - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; unsigned long flags; union IO_APIC_reg_00 reg_00; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].apicid; - io_apic_write(dev->id, 0, reg_00.raw); + reg_00.raw = io_apic_read(ioapic_idx, 0); + if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) { + reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); + io_apic_write(ioapic_idx, 0, reg_00.raw); } raw_spin_unlock_irqrestore(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i++) - ioapic_write_entry(dev->id, i, entry[i]); +} - return 0; +static void ioapic_resume(void) +{ + int ioapic_idx; + + for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--) + resume_ioapic_id(ioapic_idx); + + restore_ioapic_entries(); } -static struct sysdev_class ioapic_sysdev_class = { - .name = "ioapic", - .suspend = ioapic_suspend, +static struct syscore_ops ioapic_syscore_ops = { + .suspend = save_ioapic_entries, .resume = ioapic_resume, }; -static int __init ioapic_init_sysfs(void) +static int __init ioapic_init_ops(void) { - struct sys_device * dev; - int i, size, error; - - error = sysdev_class_register(&ioapic_sysdev_class); - if (error) - return error; - - for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] - * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - dev = &mp_ioapic_data[i]->dev; - dev->id = i; - dev->cls = &ioapic_sysdev_class; - error = sysdev_register(dev); - if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - } + register_syscore_ops(&ioapic_syscore_ops); return 0; } -device_initcall(ioapic_init_sysfs); +device_initcall(ioapic_init_ops); /* - * Dynamic irq allocate and deallocation + * Dynamic irq allocate and deallocation. Should be replaced by irq domains! */ -unsigned int create_irq_nr(unsigned int from, int node) +int arch_setup_hwirq(unsigned int irq, int node) { struct irq_cfg *cfg; unsigned long flags; - unsigned int ret = 0; - int irq; - - if (from < nr_irqs_gsi) - from = nr_irqs_gsi; + int ret; - irq = alloc_irq_from(from, node); - if (irq < 0) - return 0; cfg = alloc_irq_cfg(irq, node); - if (!cfg) { - free_irq_at(irq, NULL); - return 0; - } + if (!cfg) + return -ENOMEM; raw_spin_lock_irqsave(&vector_lock, flags); - if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) - ret = irq; + ret = __assign_irq_vector(irq, cfg, apic->target_cpus()); raw_spin_unlock_irqrestore(&vector_lock, flags); - if (ret) { - set_irq_chip_data(irq, cfg); - irq_clear_status_flags(irq, IRQ_NOREQUEST); - } else { - free_irq_at(irq, cfg); - } + if (!ret) + irq_set_chip_data(irq, cfg); + else + free_irq_cfg(irq, cfg); return ret; } -int create_irq(void) -{ - int node = cpu_to_node(0); - unsigned int irq_want; - int irq; - - irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want, node); - - if (irq == 0) - irq = -1; - - return irq; -} - -void destroy_irq(unsigned int irq) +void arch_teardown_hwirq(unsigned int irq) { - struct irq_cfg *cfg = get_irq_chip_data(irq); + struct irq_cfg *cfg = irq_get_chip_data(irq); unsigned long flags; - irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); - - if (irq_remapped(cfg)) - free_irte(irq); + free_remapped_irq(irq); raw_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); raw_spin_unlock_irqrestore(&vector_lock, flags); - free_irq_at(irq, cfg); + free_irq_cfg(irq, cfg); } /* * MSI message composition */ +void native_compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_cfg *cfg = irq_cfg(irq); + + msg->address_hi = MSI_ADDR_BASE_HI; + + if (x2apic_enabled()) + msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); + + msg->address_lo = + MSI_ADDR_BASE_LO | + ((apic->irq_dest_mode == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); +} + #ifdef CONFIG_PCI_MSI static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg, u8 hpet_id) @@ -3136,70 +2985,27 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, if (err) return err; - dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); - - if (irq_remapped(get_irq_chip_data(irq))) { - struct irte irte; - int ir_index; - u16 sub_handle; - - ir_index = map_irq_to_irte_handle(irq, &sub_handle); - BUG_ON(ir_index == -1); - - prepare_irte(&irte, cfg->vector, dest); - - /* Set source-id of interrupt request */ - if (pdev) - set_msi_sid(&irte, pdev); - else - set_hpet_sid(&irte, hpet_id); + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; - modify_irte(irq, &irte); + x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); - msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(ir_index) | - MSI_ADDR_IR_INDEX2(ir_index); - } else { - if (x2apic_enabled()) - msg->address_hi = MSI_ADDR_BASE_HI | - MSI_ADDR_EXT_DEST_ID(dest); - else - msg->address_hi = MSI_ADDR_BASE_HI; - - msg->address_lo = - MSI_ADDR_BASE_LO | - ((apic->irq_dest_mode == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); - } - return err; + return 0; } -#ifdef CONFIG_SMP static int msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { struct irq_cfg *cfg = data->chip_data; struct msi_msg msg; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; __get_cached_msi_msg(data->msi_desc, &msg); @@ -3210,48 +3016,8 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) __write_msi_msg(data->msi_desc, &msg); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } -#ifdef CONFIG_INTR_REMAP -/* - * Migrate the MSI irq to another cpumask. This migration is - * done in the process context using interrupt-remapping hardware. - */ -static int -ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) -{ - struct irq_cfg *cfg = data->chip_data; - unsigned int dest, irq = data->irq; - struct irte irte; - - if (get_irte(irq, &irte)) - return -1; - - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * atomically update the IRTE with the new destination and vector. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return 0; -} - -#endif -#endif /* CONFIG_SMP */ /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, @@ -3262,69 +3028,34 @@ static struct irq_chip msi_chip = { .irq_unmask = unmask_msi_irq, .irq_mask = mask_msi_irq, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = msi_set_affinity, -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - -static struct irq_chip msi_ir_chip = { - .name = "IR-PCI-MSI", - .irq_unmask = unmask_msi_irq, - .irq_mask = mask_msi_irq, -#ifdef CONFIG_INTR_REMAP - .irq_ack = ir_ack_apic_edge, -#ifdef CONFIG_SMP - .irq_set_affinity = ir_msi_set_affinity, -#endif -#endif .irq_retrigger = ioapic_retrigger_irq, }; -/* - * Map the PCI dev to the corresponding remapping hardware unit - * and allocate 'nvec' consecutive interrupt-remapping table entries - * in it. - */ -static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) -{ - struct intel_iommu *iommu; - int index; - - iommu = map_dev_to_ir(dev); - if (!iommu) { - printk(KERN_ERR - "Unable to map PCI %s to iommu\n", pci_name(dev)); - return -ENOENT; - } - - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); - return -ENOSPC; - } - return index; -} - -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset) { + struct irq_chip *chip = &msi_chip; struct msi_msg msg; + unsigned int irq = irq_base + irq_offset; int ret; ret = msi_compose_msg(dev, irq, &msg, -1); if (ret < 0) return ret; - set_irq_msi(irq, msidesc); - write_msi_msg(irq, &msg); + irq_set_msi_desc_off(irq_base, irq_offset, msidesc); + + /* + * MSI-X message is written per-IRQ, the offset is always 0. + * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. + */ + if (!irq_offset) + write_msi_msg(irq, &msg); - if (irq_remapped(get_irq_chip_data(irq))) { - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); - } else - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + setup_remapped_irq(irq, irq_get_chip_data(irq), chip); + + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); @@ -3333,69 +3064,37 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int node, ret, sub_handle, index = 0; - unsigned int irq, irq_want; struct msi_desc *msidesc; - struct intel_iommu *iommu = NULL; + unsigned int irq; + int node, ret; - /* x86 doesn't support multiple MSI yet */ + /* Multiple MSI vectors only supported with interrupt remapping */ if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; node = dev_to_node(&dev->dev); - irq_want = nr_irqs_gsi; - sub_handle = 0; - list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want, node); - if (irq == 0) - return -1; - irq_want = irq + 1; - if (!intr_remapping_enabled) - goto no_ir; - if (!sub_handle) { - /* - * allocate the consecutive block of IRTE's - * for 'nvec' - */ - index = msi_alloc_irte(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - iommu = map_dev_to_ir(dev); - if (!iommu) { - ret = -ENOENT; - goto error; - } - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = irq_alloc_hwirq(node); + if (!irq) + return -ENOSPC; + + ret = setup_msi_irq(dev, msidesc, irq, 0); + if (ret < 0) { + irq_free_hwirq(irq); + return ret; } -no_ir: - ret = setup_msi_irq(dev, msidesc, irq); - if (ret < 0) - goto error; - sub_handle++; + } return 0; - -error: - destroy_irq(irq); - return ret; } void native_teardown_msi_irq(unsigned int irq) { - destroy_irq(irq); + irq_free_hwirq(irq); } -#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) -#ifdef CONFIG_SMP +#ifdef CONFIG_DMAR_TABLE static int dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) @@ -3403,9 +3102,11 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_cfg *cfg = data->chip_data; unsigned int dest, irq = data->irq; struct msi_msg msg; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; dmar_msi_read(irq, &msg); @@ -3413,22 +3114,19 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); + msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); dmar_msi_write(irq, &msg); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } -#endif /* CONFIG_SMP */ - static struct irq_chip dmar_msi_type = { .name = "DMAR_MSI", .irq_unmask = dmar_msi_unmask, .irq_mask = dmar_msi_mask, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = dmar_msi_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; @@ -3441,24 +3139,25 @@ int arch_setup_dmar_msi(unsigned int irq) if (ret < 0) return ret; dmar_msi_write(irq, &msg); - set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); + irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); return 0; } #endif #ifdef CONFIG_HPET_TIMER -#ifdef CONFIG_SMP static int hpet_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { struct irq_cfg *cfg = data->chip_data; struct msi_msg msg; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; hpet_msi_read(data->handler_data, &msg); @@ -3469,65 +3168,33 @@ static int hpet_msi_set_affinity(struct irq_data *data, hpet_msi_write(data->handler_data, &msg); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } -#endif /* CONFIG_SMP */ - -static struct irq_chip ir_hpet_msi_type = { - .name = "IR-HPET_MSI", - .irq_unmask = hpet_msi_unmask, - .irq_mask = hpet_msi_mask, -#ifdef CONFIG_INTR_REMAP - .irq_ack = ir_ack_apic_edge, -#ifdef CONFIG_SMP - .irq_set_affinity = ir_msi_set_affinity, -#endif -#endif - .irq_retrigger = ioapic_retrigger_irq, -}; - static struct irq_chip hpet_msi_type = { .name = "HPET_MSI", .irq_unmask = hpet_msi_unmask, .irq_mask = hpet_msi_mask, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = hpet_msi_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; -int arch_setup_hpet_msi(unsigned int irq, unsigned int id) +int default_setup_hpet_msi(unsigned int irq, unsigned int id) { + struct irq_chip *chip = &hpet_msi_type; struct msi_msg msg; int ret; - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_hpet_to_ir(id); - int index; - - if (!iommu) - return -1; - - index = alloc_irte(iommu, irq, 1); - if (index < 0) - return -1; - } - ret = msi_compose_msg(NULL, irq, &msg, id); if (ret < 0) return ret; - hpet_msi_write(get_irq_data(irq), &msg); + hpet_msi_write(irq_get_handler_data(irq), &msg); irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - if (irq_remapped(get_irq_chip_data(irq))) - set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, - handle_edge_irq, "edge"); - else - set_irq_chip_and_handler_name(irq, &hpet_msi_type, - handle_edge_irq, "edge"); + setup_remapped_irq(irq, irq_get_chip_data(irq), chip); + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); return 0; } #endif @@ -3538,8 +3205,6 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) */ #ifdef CONFIG_HT_IRQ -#ifdef CONFIG_SMP - static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) { struct ht_irq_msg msg; @@ -3559,30 +3224,30 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { struct irq_cfg *cfg = data->chip_data; unsigned int dest; + int ret; - if (__ioapic_set_affinity(data, mask, &dest)) - return -1; + ret = __ioapic_set_affinity(data, mask, &dest); + if (ret) + return ret; target_ht_irq(data->irq, dest, cfg->vector); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } -#endif - static struct irq_chip ht_irq_chip = { .name = "PCI-HT", .irq_mask = mask_ht_irq, .irq_unmask = unmask_ht_irq, .irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP .irq_set_affinity = ht_set_affinity, -#endif .irq_retrigger = ioapic_retrigger_irq, }; int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { struct irq_cfg *cfg; + struct ht_irq_msg msg; + unsigned dest; int err; if (disable_apic) @@ -3590,40 +3255,76 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) cfg = irq_cfg(irq); err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (!err) { - struct ht_irq_msg msg; - unsigned dest; - - dest = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus()); - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((apic->irq_dest_mode == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - - write_ht_irq_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); - - dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); - } - return err; + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; + + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + + msg.address_lo = + HT_IRQ_LOW_BASE | + HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_VECTOR(cfg->vector) | + ((apic->irq_dest_mode == 0) ? + HT_IRQ_LOW_DM_PHYSICAL : + HT_IRQ_LOW_DM_LOGICAL) | + HT_IRQ_LOW_RQEOI_EDGE | + ((apic->irq_delivery_mode != dest_LowestPrio) ? + HT_IRQ_LOW_MT_FIXED : + HT_IRQ_LOW_MT_ARBITRATED) | + HT_IRQ_LOW_IRQ_MASKED; + + write_ht_irq_msg(irq, &msg); + + irq_set_chip_and_handler_name(irq, &ht_irq_chip, + handle_edge_irq, "edge"); + + dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); + + return 0; } #endif /* CONFIG_HT_IRQ */ -int __init io_apic_get_redir_entries (int ioapic) +static int +io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) +{ + struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); + int ret; + + if (!cfg) + return -EINVAL; + ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin); + if (!ret) + setup_ioapic_irq(irq, cfg, attr); + return ret; +} + +int io_apic_setup_irq_pin_once(unsigned int irq, int node, + struct io_apic_irq_attr *attr) +{ + unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin; + int ret; + struct IO_APIC_route_entry orig_entry; + + /* Avoid redundant programming */ + if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) { + pr_debug("Pin %d-%d already programmed\n", mpc_ioapic_id(ioapic_idx), pin); + orig_entry = ioapic_read_entry(attr->ioapic, pin); + if (attr->trigger == orig_entry.trigger && attr->polarity == orig_entry.polarity) + return 0; + return -EBUSY; + } + ret = io_apic_setup_irq_pin(irq, node, attr); + if (!ret) + set_bit(pin, ioapics[ioapic_idx].pin_programmed); + return ret; +} + +static int __init io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -3639,7 +3340,7 @@ int __init io_apic_get_redir_entries (int ioapic) return reg_01.bits.entries + 1; } -void __init probe_nr_irqs_gsi(void) +static void __init probe_nr_irqs_gsi(void) { int nr; @@ -3650,12 +3351,11 @@ void __init probe_nr_irqs_gsi(void) printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } -int get_nr_irqs_gsi(void) +unsigned int arch_dynirq_lower_bound(unsigned int from) { - return nr_irqs_gsi; + return from < nr_irqs_gsi ? nr_irqs_gsi : from; } -#ifdef CONFIG_SPARSE_IRQ int __init arch_probe_nr_irqs(void) { int nr; @@ -3675,98 +3375,25 @@ int __init arch_probe_nr_irqs(void) return NR_IRQS_LEGACY; } -#endif -static int __io_apic_set_pci_routing(struct device *dev, int irq, - struct io_apic_irq_attr *irq_attr) +int io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr) { - struct irq_cfg *cfg; int node; - int ioapic, pin; - int trigger, polarity; - ioapic = irq_attr->ioapic; if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); + irq_attr->ioapic); return -EINVAL; } - if (dev) - node = dev_to_node(dev); - else - node = cpu_to_node(0); - - cfg = alloc_irq_and_cfg_at(irq, node); - if (!cfg) - return 0; - - pin = irq_attr->ioapic_pin; - trigger = irq_attr->trigger; - polarity = irq_attr->polarity; - - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= legacy_pic->nr_legacy_irqs) { - if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) { - printk(KERN_INFO "can not add pin %d for irq %d\n", - pin, irq); - return 0; - } - } - - setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity); - - return 0; -} - -int io_apic_set_pci_routing(struct device *dev, int irq, - struct io_apic_irq_attr *irq_attr) -{ - int ioapic, pin; - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->gsi mappings (but unique PCI devices); - * we only program the IOAPIC on the first. - */ - ioapic = irq_attr->ioapic; - pin = irq_attr->ioapic_pin; - if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) { - pr_debug("Pin %d-%d already programmed\n", - mp_ioapics[ioapic].apicid, pin); - return 0; - } - set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed); - - return __io_apic_set_pci_routing(dev, irq, irq_attr); -} - -u8 __init io_apic_unique_id(u8 id) -{ -#ifdef CONFIG_X86_32 - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return io_apic_get_unique_id(nr_ioapics, id); - else - return id; -#else - int i; - DECLARE_BITMAP(used, 256); + node = dev ? dev_to_node(dev) : cpu_to_node(0); - bitmap_zero(used, 256); - for (i = 0; i < nr_ioapics; i++) { - struct mpc_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->apicid, used); - } - if (!test_bit(id, used)) - return id; - return find_first_zero_bit(used, 256); -#endif + return io_apic_setup_irq_pin_once(irq, node, irq_attr); } #ifdef CONFIG_X86_32 -int __init io_apic_get_unique_id(int ioapic, int apic_id) +static int __init io_apic_get_unique_id(int ioapic, int apic_id) { union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; @@ -3829,7 +3456,8 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) /* Sanity check */ if (reg_00.bits.ID != apic_id) { - printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); + pr_err("IOAPIC[%d]: Unable to change apic_id!\n", + ioapic); return -1; } } @@ -3839,9 +3467,32 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) return apic_id; } + +static u8 __init io_apic_unique_id(u8 id) +{ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return io_apic_get_unique_id(nr_ioapics, id); + else + return id; +} +#else +static u8 __init io_apic_unique_id(u8 id) +{ + int i; + DECLARE_BITMAP(used, 256); + + bitmap_zero(used, 256); + for (i = 0; i < nr_ioapics; i++) { + __set_bit(mpc_ioapic_id(i), used); + } + if (!test_bit(id, used)) + return id; + return find_first_zero_bit(used, 256); +} #endif -int __init io_apic_get_version(int ioapic) +static int __init io_apic_get_version(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -3886,14 +3537,14 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; - struct irq_desc *desc; const struct cpumask *mask; + struct irq_data *idata; if (skip_ioapic_setup == 1) return; for (ioapic = 0; ioapic < nr_ioapics; ioapic++) - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) { irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; @@ -3902,21 +3553,17 @@ void __init setup_ioapic_dest(void) if ((ioapic > 0) && (irq > 16)) continue; - desc = irq_to_desc(irq); + idata = irq_get_irq_data(irq); /* * Honour affinities which have been set in early boot */ - if (desc->status & - (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = desc->irq_data.affinity; + if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata)) + mask = idata->affinity; else mask = apic->target_cpus(); - if (intr_remapping_enabled) - ir_ioapic_set_affinity(&desc->irq_data, mask, false); - else - ioapic_set_affinity(&desc->irq_data, mask, false); + x86_io_apic_ops.set_affinity(idata, mask, false); } } @@ -3956,7 +3603,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) return res; } -void __init ioapic_init_mappings(void) +void __init native_io_apic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; struct resource *ioapic_res; @@ -3965,7 +3612,7 @@ void __init ioapic_init_mappings(void) ioapic_res = ioapic_setup_resources(nr_ioapics); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { - ioapic_phys = mp_ioapics[i].apicaddr; + ioapic_phys = mpc_ioapic_addr(i); #ifdef CONFIG_X86_32 if (!ioapic_phys) { printk(KERN_ERR @@ -3994,6 +3641,8 @@ fake_ioapic_page: ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } + + probe_nr_irqs_gsi(); } void __init ioapic_insert_resources(void) @@ -4018,10 +3667,14 @@ int mp_find_ioapic(u32 gsi) { int i = 0; + if (nr_ioapics == 0) + return -1; + /* Find the IOAPIC that manages this GSI. */ for (i = 0; i < nr_ioapics; i++) { - if ((gsi >= mp_gsi_routing[i].gsi_base) - && (gsi <= mp_gsi_routing[i].gsi_end)) + struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i); + if ((gsi >= gsi_cfg->gsi_base) + && (gsi <= gsi_cfg->gsi_end)) return i; } @@ -4031,26 +3684,48 @@ int mp_find_ioapic(u32 gsi) int mp_find_ioapic_pin(int ioapic, u32 gsi) { + struct mp_ioapic_gsi *gsi_cfg; + if (WARN_ON(ioapic == -1)) return -1; - if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end)) + + gsi_cfg = mp_ioapic_gsi_routing(ioapic); + if (WARN_ON(gsi > gsi_cfg->gsi_end)) return -1; - return gsi - mp_gsi_routing[ioapic].gsi_base; + return gsi - gsi_cfg->gsi_base; } -static int bad_ioapic(unsigned long address) +static __init int bad_ioapic(unsigned long address) { if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " - "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); + pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n", + MAX_IO_APICS, nr_ioapics); return 1; } if (!address) { - printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address" - " found in table, skipping!\n"); + pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n"); + return 1; + } + return 0; +} + +static __init int bad_ioapic_register(int idx) +{ + union IO_APIC_reg_00 reg_00; + union IO_APIC_reg_01 reg_01; + union IO_APIC_reg_02 reg_02; + + reg_00.raw = io_apic_read(idx, 0); + reg_01.raw = io_apic_read(idx, 1); + reg_02.raw = io_apic_read(idx, 2); + + if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) { + pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n", + mpc_ioapic_addr(idx)); return 1; } + return 0; } @@ -4058,40 +3733,48 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) { int idx = 0; int entries; + struct mp_ioapic_gsi *gsi_cfg; if (bad_ioapic(address)) return; idx = nr_ioapics; - mp_ioapics[idx].type = MP_IOAPIC; - mp_ioapics[idx].flags = MPC_APIC_USABLE; - mp_ioapics[idx].apicaddr = address; + ioapics[idx].mp_config.type = MP_IOAPIC; + ioapics[idx].mp_config.flags = MPC_APIC_USABLE; + ioapics[idx].mp_config.apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].apicid = io_apic_unique_id(id); - mp_ioapics[idx].apicver = io_apic_get_version(idx); + + if (bad_ioapic_register(idx)) { + clear_fixmap(FIX_IO_APIC_BASE_0 + idx); + return; + } + + ioapics[idx].mp_config.apicid = io_apic_unique_id(id); + ioapics[idx].mp_config.apicver = io_apic_get_version(idx); /* * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ entries = io_apic_get_redir_entries(idx); - mp_gsi_routing[idx].gsi_base = gsi_base; - mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1; + gsi_cfg = mp_ioapic_gsi_routing(idx); + gsi_cfg->gsi_base = gsi_base; + gsi_cfg->gsi_end = gsi_base + entries - 1; /* * The number of IO-APIC IRQ registers (== #pins): */ - nr_ioapic_registers[idx] = entries; + ioapics[idx].nr_registers = entries; - if (mp_gsi_routing[idx].gsi_end >= gsi_top) - gsi_top = mp_gsi_routing[idx].gsi_end + 1; + if (gsi_cfg->gsi_end >= gsi_top) + gsi_top = gsi_cfg->gsi_end + 1; - printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " - "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, - mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, - mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end); + pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n", + idx, mpc_ioapic_id(idx), + mpc_ioapic_ver(idx), mpc_ioapic_addr(idx), + gsi_cfg->gsi_base, gsi_cfg->gsi_end); nr_ioapics++; } @@ -4099,19 +3782,16 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) /* Enable IOAPIC early just for system timer */ void __init pre_init_apic_IRQ0(void) { - struct irq_cfg *cfg; + struct io_apic_irq_attr attr = { 0, 0, 0, 0 }; printk(KERN_INFO "Early APIC setup for system timer0\n"); #ifndef CONFIG_SMP - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); + physid_set_mask_of_physid(boot_cpu_physical_apicid, + &phys_cpu_present_map); #endif - /* Make sure the irq descriptor is set up */ - cfg = alloc_irq_and_cfg_at(0, 0); - setup_local_APIC(); - add_pin_to_irq_node(cfg, 0, 0, 0); - set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); - - setup_ioapic_irq(0, 0, 0, cfg, 0, 0); + io_apic_setup_irq_pin(0, 0, &attr); + irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, + "edge"); } diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 08385e090a6..62071569bd5 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -1,6 +1,5 @@ #include <linux/cpumask.h> #include <linux/interrupt.h> -#include <linux/init.h> #include <linux/mm.h> #include <linux/delay.h> @@ -56,6 +55,8 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, local_irq_restore(flags); } +#ifdef CONFIG_X86_32 + void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector) { @@ -71,8 +72,8 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, local_irq_save(flags); for_each_cpu(query_cpu, mask) __default_send_IPI_dest_field( - apic->cpu_to_logical_apicid(query_cpu), vector, - apic->dest_logical); + early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, apic->dest_logical); local_irq_restore(flags); } @@ -90,14 +91,12 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, if (query_cpu == this_cpu) continue; __default_send_IPI_dest_field( - apic->cpu_to_logical_apicid(query_cpu), vector, - apic->dest_logical); + early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, apic->dest_logical); } local_irq_restore(flags); } -#ifdef CONFIG_X86_32 - /* * This is only used on smaller machines. */ @@ -106,7 +105,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; - if (WARN_ONCE(!mask, "empty IPI mask")) + if (!mask) return; local_irq_save(flags); diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c deleted file mode 100644 index c90041ccb74..00000000000 --- a/arch/x86/kernel/apic/nmi.c +++ /dev/null @@ -1,567 +0,0 @@ -/* - * NMI watchdog support on APIC systems - * - * Started by Ingo Molnar <mingo@redhat.com> - * - * Fixes: - * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. - * Mikael Pettersson : Power Management for local APIC NMI watchdog. - * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. Disable/enable API. - */ - -#include <asm/apic.h> - -#include <linux/nmi.h> -#include <linux/mm.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/sysdev.h> -#include <linux/sysctl.h> -#include <linux/percpu.h> -#include <linux/kprobes.h> -#include <linux/cpumask.h> -#include <linux/kernel_stat.h> -#include <linux/kdebug.h> -#include <linux/smp.h> - -#include <asm/i8259.h> -#include <asm/io_apic.h> -#include <asm/proto.h> -#include <asm/timer.h> - -#include <asm/mce.h> - -#include <asm/mach_traps.h> - -int unknown_nmi_panic; -int nmi_watchdog_enabled; - -/* For reliability, we're prepared to waste bits here. */ -static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; - -/* nmi_active: - * >0: the lapic NMI watchdog is active, but can be disabled - * <0: the lapic NMI watchdog has not been set up, and cannot - * be enabled - * 0: the lapic NMI watchdog is disabled, but can be enabled - */ -atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ -EXPORT_SYMBOL(nmi_active); - -unsigned int nmi_watchdog = NMI_NONE; -EXPORT_SYMBOL(nmi_watchdog); - -static int panic_on_timeout; - -static unsigned int nmi_hz = HZ; -static DEFINE_PER_CPU(short, wd_enabled); -static int endflag __initdata; - -static inline unsigned int get_nmi_count(int cpu) -{ - return per_cpu(irq_stat, cpu).__nmi_count; -} - -static inline int mce_in_progress(void) -{ -#if defined(CONFIG_X86_MCE) - return atomic_read(&mce_entry) > 0; -#endif - return 0; -} - -/* - * Take the local apic timer and PIT/HPET into account. We don't - * know which one is active, when we have highres/dyntick on - */ -static inline unsigned int get_timer_irqs(int cpu) -{ - return per_cpu(irq_stat, cpu).apic_timer_irqs + - per_cpu(irq_stat, cpu).irq0_irqs; -} - -#ifdef CONFIG_SMP -/* - * The performance counters used by NMI_LOCAL_APIC don't trigger when - * the CPU is idle. To make sure the NMI watchdog really ticks on all - * CPUs during the test make them busy. - */ -static __init void nmi_cpu_busy(void *data) -{ - local_irq_enable_in_hardirq(); - /* - * Intentionally don't use cpu_relax here. This is - * to make sure that the performance counter really ticks, - * even if there is a simulator or similar that catches the - * pause instruction. On a real HT machine this is fine because - * all other CPUs are busy with "useless" delay loops and don't - * care if they get somewhat less cycles. - */ - while (endflag == 0) - mb(); -} -#endif - -static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) -{ - printk(KERN_CONT "\n"); - - printk(KERN_WARNING - "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", - cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); - - printk(KERN_WARNING - "Please report this to bugzilla.kernel.org,\n"); - printk(KERN_WARNING - "and attach the output of the 'dmesg' command.\n"); - - per_cpu(wd_enabled, cpu) = 0; - atomic_dec(&nmi_active); -} - -static void __acpi_nmi_disable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); -} - -int __init check_nmi_watchdog(void) -{ - unsigned int *prev_nmi_count; - int cpu; - - if (!nmi_watchdog_active() || !atomic_read(&nmi_active)) - return 0; - - prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); - if (!prev_nmi_count) - goto error; - - printk(KERN_INFO "Testing NMI watchdog ... "); - -#ifdef CONFIG_SMP - if (nmi_watchdog == NMI_LOCAL_APIC) - smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); -#endif - - for_each_possible_cpu(cpu) - prev_nmi_count[cpu] = get_nmi_count(cpu); - local_irq_enable(); - mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */ - - for_each_online_cpu(cpu) { - if (!per_cpu(wd_enabled, cpu)) - continue; - if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) - report_broken_nmi(cpu, prev_nmi_count); - } - endflag = 1; - if (!atomic_read(&nmi_active)) { - kfree(prev_nmi_count); - atomic_set(&nmi_active, -1); - goto error; - } - printk("OK.\n"); - - /* - * now that we know it works we can reduce NMI frequency to - * something more reasonable; makes a difference in some configs - */ - if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = lapic_adjust_nmi_hz(1); - - kfree(prev_nmi_count); - return 0; -error: - if (nmi_watchdog == NMI_IO_APIC) { - if (!timer_through_8259) - legacy_pic->mask(0); - on_each_cpu(__acpi_nmi_disable, NULL, 1); - } - -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - return -1; -} - -static int __init setup_nmi_watchdog(char *str) -{ - unsigned int nmi; - - if (!strncmp(str, "panic", 5)) { - panic_on_timeout = 1; - str = strchr(str, ','); - if (!str) - return 1; - ++str; - } - - if (!strncmp(str, "lapic", 5)) - nmi_watchdog = NMI_LOCAL_APIC; - else if (!strncmp(str, "ioapic", 6)) - nmi_watchdog = NMI_IO_APIC; - else { - get_option(&str, &nmi); - if (nmi >= NMI_INVALID) - return 0; - nmi_watchdog = nmi; - } - - return 1; -} -__setup("nmi_watchdog=", setup_nmi_watchdog); - -/* - * Suspend/resume support - */ -#ifdef CONFIG_PM - -static int nmi_pm_active; /* nmi_active before suspend */ - -static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) -{ - /* only CPU0 goes here, other CPUs should be offline */ - nmi_pm_active = atomic_read(&nmi_active); - stop_apic_nmi_watchdog(NULL); - BUG_ON(atomic_read(&nmi_active) != 0); - return 0; -} - -static int lapic_nmi_resume(struct sys_device *dev) -{ - /* only CPU0 goes here, other CPUs should be offline */ - if (nmi_pm_active > 0) { - setup_apic_nmi_watchdog(NULL); - touch_nmi_watchdog(); - } - return 0; -} - -static struct sysdev_class nmi_sysclass = { - .name = "lapic_nmi", - .resume = lapic_nmi_resume, - .suspend = lapic_nmi_suspend, -}; - -static struct sys_device device_lapic_nmi = { - .id = 0, - .cls = &nmi_sysclass, -}; - -static int __init init_lapic_nmi_sysfs(void) -{ - int error; - - /* - * should really be a BUG_ON but b/c this is an - * init call, it just doesn't work. -dcz - */ - if (nmi_watchdog != NMI_LOCAL_APIC) - return 0; - - if (atomic_read(&nmi_active) < 0) - return 0; - - error = sysdev_class_register(&nmi_sysclass); - if (!error) - error = sysdev_register(&device_lapic_nmi); - return error; -} - -/* must come after the local APIC's device_initcall() */ -late_initcall(init_lapic_nmi_sysfs); - -#endif /* CONFIG_PM */ - -static void __acpi_nmi_enable(void *__unused) -{ - apic_write(APIC_LVT0, APIC_DM_NMI); -} - -/* - * Enable timer based NMIs on all CPUs: - */ -void acpi_nmi_enable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_enable, NULL, 1); -} - -/* - * Disable timer based NMIs on all CPUs: - */ -void acpi_nmi_disable(void) -{ - if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) - on_each_cpu(__acpi_nmi_disable, NULL, 1); -} - -/* - * This function is called as soon the LAPIC NMI watchdog driver has everything - * in place and it's ready to check if the NMIs belong to the NMI watchdog - */ -void cpu_nmi_set_wd_enabled(void) -{ - __get_cpu_var(wd_enabled) = 1; -} - -void setup_apic_nmi_watchdog(void *unused) -{ - if (__get_cpu_var(wd_enabled)) - return; - - /* cheap hack to support suspend/resume */ - /* if cpu0 is not active neither should the other cpus */ - if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0) - return; - - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - if (lapic_watchdog_init(nmi_hz) < 0) { - __get_cpu_var(wd_enabled) = 0; - return; - } - /* FALL THROUGH */ - case NMI_IO_APIC: - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - } -} - -void stop_apic_nmi_watchdog(void *unused) -{ - /* only support LOCAL and IO APICs for now */ - if (!nmi_watchdog_active()) - return; - if (__get_cpu_var(wd_enabled) == 0) - return; - if (nmi_watchdog == NMI_LOCAL_APIC) - lapic_watchdog_stop(); - else - __acpi_nmi_disable(NULL); - __get_cpu_var(wd_enabled) = 0; - atomic_dec(&nmi_active); -} - -/* - * the best way to detect whether a CPU has a 'hard lockup' problem - * is to check it's local APIC timer IRQ counts. If they are not - * changing then that CPU has some problem. - * - * as these watchdog NMI IRQs are generated on every CPU, we only - * have to check the current processor. - * - * since NMIs don't listen to _any_ locks, we have to be extremely - * careful not to rely on unsafe variables. The printk might lock - * up though, so we have to break up any console locks first ... - * [when there will be more tty-related locks, break them up here too!] - */ - -static DEFINE_PER_CPU(unsigned, last_irq_sum); -static DEFINE_PER_CPU(long, alert_counter); -static DEFINE_PER_CPU(int, nmi_touch); - -void touch_nmi_watchdog(void) -{ - if (nmi_watchdog_active()) { - unsigned cpu; - - /* - * Tell other CPUs to reset their alert counters. We cannot - * do it ourselves because the alert count increase is not - * atomic. - */ - for_each_present_cpu(cpu) { - if (per_cpu(nmi_touch, cpu) != 1) - per_cpu(nmi_touch, cpu) = 1; - } - } - - /* - * Tickle the softlockup detector too: - */ - touch_softlockup_watchdog(); -} -EXPORT_SYMBOL(touch_nmi_watchdog); - -notrace __kprobes int -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) -{ - /* - * Since current_thread_info()-> is always on the stack, and we - * always switch the stack NMI-atomically, it's safe to use - * smp_processor_id(). - */ - unsigned int sum; - int touched = 0; - int cpu = smp_processor_id(); - int rc = 0; - - sum = get_timer_irqs(cpu); - - if (__get_cpu_var(nmi_touch)) { - __get_cpu_var(nmi_touch) = 0; - touched = 1; - } - - /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ - - raw_spin_lock(&lock); - printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); - show_regs(regs); - dump_stack(); - raw_spin_unlock(&lock); - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - - rc = 1; - } - - /* Could check oops_in_progress here too, but it's safer not to */ - if (mce_in_progress()) - touched = 1; - - /* if the none of the timers isn't firing, this cpu isn't doing much */ - if (!touched && __get_cpu_var(last_irq_sum) == sum) { - /* - * Ayiee, looks like this CPU is stuck ... - * wait a few IRQs (5 seconds) before doing the oops ... - */ - __this_cpu_inc(alert_counter); - if (__this_cpu_read(alert_counter) == 5 * nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi("BUG: NMI Watchdog detected LOCKUP", - regs, panic_on_timeout); - } else { - __get_cpu_var(last_irq_sum) = sum; - __this_cpu_write(alert_counter, 0); - } - - /* see if the nmi watchdog went off */ - if (!__get_cpu_var(wd_enabled)) - return rc; - switch (nmi_watchdog) { - case NMI_LOCAL_APIC: - rc |= lapic_wd_event(nmi_hz); - break; - case NMI_IO_APIC: - /* - * don't know how to accurately check for this. - * just assume it was a watchdog timer interrupt - * This matches the old behaviour. - */ - rc = 1; - break; - } - return rc; -} - -#ifdef CONFIG_SYSCTL - -static void enable_ioapic_nmi_watchdog_single(void *unused) -{ - __get_cpu_var(wd_enabled) = 1; - atomic_inc(&nmi_active); - __acpi_nmi_enable(NULL); -} - -static void enable_ioapic_nmi_watchdog(void) -{ - on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); - touch_nmi_watchdog(); -} - -static void disable_ioapic_nmi_watchdog(void) -{ - on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); -} - -static int __init setup_unknown_nmi_panic(char *str) -{ - unknown_nmi_panic = 1; - return 1; -} -__setup("unknown_nmi_panic", setup_unknown_nmi_panic); - -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) -{ - unsigned char reason = get_nmi_reason(); - char buf[64]; - - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(buf, regs, 1); /* Always panic here */ - return 0; -} - -/* - * proc handler for /proc/sys/kernel/nmi - */ -int proc_nmi_enabled(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int old_state; - - nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; - old_state = nmi_watchdog_enabled; - proc_dointvec(table, write, buffer, length, ppos); - if (!!old_state == !!nmi_watchdog_enabled) - return 0; - - if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) { - printk(KERN_WARNING - "NMI watchdog is permanently disabled\n"); - return -EIO; - } - - if (nmi_watchdog == NMI_LOCAL_APIC) { - if (nmi_watchdog_enabled) - enable_lapic_nmi_watchdog(); - else - disable_lapic_nmi_watchdog(); - } else if (nmi_watchdog == NMI_IO_APIC) { - if (nmi_watchdog_enabled) - enable_ioapic_nmi_watchdog(); - else - disable_ioapic_nmi_watchdog(); - } else { - printk(KERN_WARNING - "NMI watchdog doesn't know what hardware to touch\n"); - return -EIO; - } - return 0; -} - -#endif /* CONFIG_SYSCTL */ - -int do_nmi_callback(struct pt_regs *regs, int cpu) -{ -#ifdef CONFIG_SYSCTL - if (unknown_nmi_panic) - return unknown_nmi_panic_callback(regs, cpu); -#endif - return 0; -} - -void arch_trigger_all_cpu_backtrace(void) -{ - int i; - - cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); - - printk(KERN_INFO "sending NMI to all CPUs:\n"); - apic->send_IPI_all(NMI_VECTOR); - - /* Wait for up to 10 seconds for all CPUs to do the backtrace */ - for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(to_cpumask(backtrace_mask))) - break; - mdelay(1); - } -} diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c deleted file mode 100644 index 960f26ab5c9..00000000000 --- a/arch/x86/kernel/apic/numaq_32.c +++ /dev/null @@ -1,550 +0,0 @@ -/* - * Written by: Patricia Gaughen, IBM Corporation - * - * Copyright (C) 2002, IBM Corp. - * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to <gone@us.ibm.com> - */ -#include <linux/nodemask.h> -#include <linux/topology.h> -#include <linux/bootmem.h> -#include <linux/memblock.h> -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/kernel.h> -#include <linux/mmzone.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/numa.h> -#include <linux/smp.h> -#include <linux/io.h> -#include <linux/mm.h> - -#include <asm/processor.h> -#include <asm/fixmap.h> -#include <asm/mpspec.h> -#include <asm/numaq.h> -#include <asm/setup.h> -#include <asm/apic.h> -#include <asm/e820.h> -#include <asm/ipi.h> - -#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) - -int found_numaq; - -/* - * Have to match translation table entries to main table entries by counter - * hence the mpc_record variable .... can't see a less disgusting way of - * doing this .... - */ -struct mpc_trans { - unsigned char mpc_type; - unsigned char trans_len; - unsigned char trans_type; - unsigned char trans_quad; - unsigned char trans_global; - unsigned char trans_local; - unsigned short trans_reserved; -}; - -static int mpc_record; - -static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; - -int mp_bus_id_to_node[MAX_MP_BUSSES]; -int mp_bus_id_to_local[MAX_MP_BUSSES]; -int quad_local_to_mp_bus_id[NR_CPUS/4][4]; - - -static inline void numaq_register_node(int node, struct sys_cfg_data *scd) -{ - struct eachquadmem *eq = scd->eq + node; - - node_set_online(node); - - /* Convert to pages */ - node_start_pfn[node] = - MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size); - - node_end_pfn[node] = - MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size); - - memblock_x86_register_active_regions(node, node_start_pfn[node], - node_end_pfn[node]); - - memory_present(node, node_start_pfn[node], node_end_pfn[node]); - - node_remap_size[node] = node_memmap_size_bytes(node, - node_start_pfn[node], - node_end_pfn[node]); -} - -/* - * Function: smp_dump_qct() - * - * Description: gets memory layout from the quad config table. This - * function also updates node_online_map with the nodes (quads) present. - */ -static void __init smp_dump_qct(void) -{ - struct sys_cfg_data *scd; - int node; - - scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); - - nodes_clear(node_online_map); - for_each_node(node) { - if (scd->quads_present31_0 & (1 << node)) - numaq_register_node(node, scd); - } -} - -void __cpuinit numaq_tsc_disable(void) -{ - if (!found_numaq) - return; - - if (num_online_nodes() > 1) { - printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); - setup_clear_cpu_cap(X86_FEATURE_TSC); - } -} - -static void __init numaq_tsc_init(void) -{ - numaq_tsc_disable(); -} - -static inline int generate_logical_apicid(int quad, int phys_apicid) -{ - return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); -} - -/* x86_quirks member */ -static int mpc_apic_id(struct mpc_cpu *m) -{ - int quad = translation_table[mpc_record]->trans_quad; - int logical_apicid = generate_logical_apicid(quad, m->apicid); - - printk(KERN_DEBUG - "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", - m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, - (m->cpufeature & CPU_MODEL_MASK) >> 4, - m->apicver, quad, logical_apicid); - - return logical_apicid; -} - -/* x86_quirks member */ -static void mpc_oem_bus_info(struct mpc_bus *m, char *name) -{ - int quad = translation_table[mpc_record]->trans_quad; - int local = translation_table[mpc_record]->trans_local; - - mp_bus_id_to_node[m->busid] = quad; - mp_bus_id_to_local[m->busid] = local; - - printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad); -} - -/* x86_quirks member */ -static void mpc_oem_pci_bus(struct mpc_bus *m) -{ - int quad = translation_table[mpc_record]->trans_quad; - int local = translation_table[mpc_record]->trans_local; - - quad_local_to_mp_bus_id[quad][local] = m->busid; -} - -/* - * Called from mpparse code. - * mode = 0: prescan - * mode = 1: one mpc entry scanned - */ -static void numaq_mpc_record(unsigned int mode) -{ - if (!mode) - mpc_record = 0; - else - mpc_record++; -} - -static void __init MP_translation_info(struct mpc_trans *m) -{ - printk(KERN_INFO - "Translation: record %d, type %d, quad %d, global %d, local %d\n", - mpc_record, m->trans_type, m->trans_quad, m->trans_global, - m->trans_local); - - if (mpc_record >= MAX_MPC_ENTRY) - printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); - else - translation_table[mpc_record] = m; /* stash this for later */ - - if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) - node_set_online(m->trans_quad); -} - -static int __init mpf_checksum(unsigned char *mp, int len) -{ - int sum = 0; - - while (len--) - sum += *mp++; - - return sum & 0xFF; -} - -/* - * Read/parse the MPC oem tables - */ -static void __init smp_read_mpc_oem(struct mpc_table *mpc) -{ - struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr; - int count = sizeof(*oemtable); /* the header size */ - unsigned char *oemptr = ((unsigned char *)oemtable) + count; - - mpc_record = 0; - printk(KERN_INFO - "Found an OEM MPC table at %8p - parsing it...\n", oemtable); - - if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { - printk(KERN_WARNING - "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", - oemtable->signature[0], oemtable->signature[1], - oemtable->signature[2], oemtable->signature[3]); - return; - } - - if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) { - printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); - return; - } - - while (count < oemtable->length) { - switch (*oemptr) { - case MP_TRANSLATION: - { - struct mpc_trans *m = (void *)oemptr; - - MP_translation_info(m); - oemptr += sizeof(*m); - count += sizeof(*m); - ++mpc_record; - break; - } - default: - printk(KERN_WARNING - "Unrecognised OEM table entry type! - %d\n", - (int)*oemptr); - return; - } - } -} - -static __init void early_check_numaq(void) -{ - /* - * get boot-time SMP configuration: - */ - if (smp_found_config) - early_get_smp_config(); - - if (found_numaq) { - x86_init.mpparse.mpc_record = numaq_mpc_record; - x86_init.mpparse.setup_ioapic_ids = x86_init_noop; - x86_init.mpparse.mpc_apic_id = mpc_apic_id; - x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem; - x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; - x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; - x86_init.timers.tsc_pre_init = numaq_tsc_init; - x86_init.pci.init = pci_numaq_init; - } -} - -int __init get_memcfg_numaq(void) -{ - early_check_numaq(); - if (!found_numaq) - return 0; - smp_dump_qct(); - - return 1; -} - -#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static inline unsigned int numaq_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0x0F; -} - -static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence_logical(mask, vector); -} - -static inline void numaq_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); -} - -static inline void numaq_send_IPI_all(int vector) -{ - numaq_send_IPI_mask(cpu_online_mask, vector); -} - -#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) -#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) - -/* - * Because we use NMIs rather than the INIT-STARTUP sequence to - * bootstrap the CPUs, the APIC may be in a weird state. Kick it: - */ -static inline void numaq_smp_callin_clear_local_apic(void) -{ - clear_local_APIC(); -} - -static inline const struct cpumask *numaq_target_cpus(void) -{ - return cpu_all_mask; -} - -static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) -{ - return physid_isset(apicid, *map); -} - -static inline unsigned long numaq_check_apicid_present(int bit) -{ - return physid_isset(bit, phys_cpu_present_map); -} - -static inline int numaq_apic_id_registered(void) -{ - return 1; -} - -static inline void numaq_init_apic_ldr(void) -{ - /* Already done in NUMA-Q firmware */ -} - -static inline void numaq_setup_apic_routing(void) -{ - printk(KERN_INFO - "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n", - nr_ioapics); -} - -/* - * Skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum. - */ -static inline int numaq_multi_timer_check(int apic, int irq) -{ - return apic != 0 && irq == 0; -} - -static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL, retmap); -} - -static inline int numaq_cpu_to_logical_apicid(int cpu) -{ - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return cpu_2_logical_apicid[cpu]; -} - -/* - * Supporting over 60 cpus on NUMA-Q requires a locality-dependent - * cpu to APIC ID relation to properly interact with the intelligent - * mode of the cluster controller. - */ -static inline int numaq_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < 60) - return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); - else - return BAD_APICID; -} - -static inline int numaq_apicid_to_node(int logical_apicid) -{ - return logical_apicid >> 4; -} - -static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) -{ - int node = numaq_apicid_to_node(logical_apicid); - int cpu = __ffs(logical_apicid & 0xf); - - physid_set_mask_of_physid(cpu + 4*node, retmap); -} - -/* Where the IO area was mapped on multiquad, always 0 otherwise */ -void *xquad_portio; - -static inline int numaq_check_phys_apicid_present(int phys_apicid) -{ - return 1; -} - -/* - * We use physical apicids here, not logical, so just return the default - * physical broadcast to stop people from breaking us - */ -static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - return 0x0F; -} - -static inline unsigned int -numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) -{ - return 0x0F; -} - -/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ -static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - -static int -numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) -{ - if (strncmp(oem, "IBM NUMA", 8)) - printk(KERN_ERR "Warning! Not a NUMA-Q system!\n"); - else - found_numaq = 1; - - return found_numaq; -} - -static int probe_numaq(void) -{ - /* already know from get_memcfg_numaq() */ - return found_numaq; -} - -static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - -static void numaq_setup_portio_remap(void) -{ - int num_quads = num_online_nodes(); - - if (num_quads <= 1) - return; - - printk(KERN_INFO - "Remapping cross-quad port I/O for %d quads\n", num_quads); - - xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); - - printk(KERN_INFO - "xquad_portio vaddr 0x%08lx, len %08lx\n", - (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); -} - -/* Use __refdata to keep false positive warning calm. */ -struct apic __refdata apic_numaq = { - - .name = "NUMAQ", - .probe = probe_numaq, - .acpi_madt_oem_check = NULL, - .apic_id_registered = numaq_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* physical delivery on LOCAL quad: */ - .irq_dest_mode = 0, - - .target_cpus = numaq_target_cpus, - .disable_esr = 1, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = numaq_check_apicid_used, - .check_apicid_present = numaq_check_apicid_present, - - .vector_allocation_domain = numaq_vector_allocation_domain, - .init_apic_ldr = numaq_init_apic_ldr, - - .ioapic_phys_id_map = numaq_ioapic_phys_id_map, - .setup_apic_routing = numaq_setup_apic_routing, - .multi_timer_check = numaq_multi_timer_check, - .apicid_to_node = numaq_apicid_to_node, - .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, - .cpu_present_to_apicid = numaq_cpu_present_to_apicid, - .apicid_to_cpu_present = numaq_apicid_to_cpu_present, - .setup_portio_remap = numaq_setup_portio_remap, - .check_phys_apicid_present = numaq_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = numaq_phys_pkg_id, - .mps_oem_check = numaq_mps_oem_check, - - .get_apic_id = numaq_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0x0F << 24, - - .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, - - .send_IPI_mask = numaq_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = numaq_send_IPI_allbutself, - .send_IPI_all = numaq_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi, - .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, - - /* We don't do anything here because we use NMI's to boot instead */ - .wait_for_init_deassert = NULL, - - .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, - .inquire_remote_apic = NULL, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, -}; diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 99d2fe01608..cceb352c968 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -52,29 +52,9 @@ static int __init print_ipi_mode(void) } late_initcall(print_ipi_mode); -void __init default_setup_apic_routing(void) +static int default_x86_32_early_logical_apicid(int cpu) { - int version = apic_version[boot_cpu_physical_apicid]; - - if (num_possible_cpus() > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } - -#ifdef CONFIG_X86_BIGSMP - generic_bigsmp_probe(); -#endif - - if (apic->setup_apic_routing) - apic->setup_apic_routing(); + return 1 << cpu; } static void setup_apic_flat_routing(void) @@ -86,32 +66,18 @@ static void setup_apic_flat_routing(void) #endif } -static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* - * Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - /* should be called last. */ static int probe_default(void) { return 1; } -struct apic apic_default = { +static struct apic apic_default = { .name = "default", .probe = probe_default, .acpi_madt_oem_check = NULL, + .apic_id_valid = default_apic_id_valid, .apic_id_registered = default_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, @@ -124,14 +90,12 @@ struct apic apic_default = { .check_apicid_used = default_check_apicid_used, .check_apicid_present = default_check_apicid_present, - .vector_allocation_domain = default_vector_allocation_domain, + .vector_allocation_domain = flat_vector_allocation_domain, .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = setup_apic_flat_routing, .multi_timer_check = NULL, - .apicid_to_node = default_apicid_to_node, - .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, @@ -144,8 +108,7 @@ struct apic apic_default = { .set_apic_id = NULL, .apic_id_mask = 0x0F << 24, - .cpu_mask_to_apicid = default_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .send_IPI_mask = default_send_IPI_mask_logical, .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, @@ -156,57 +119,37 @@ struct apic apic_default = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = default_wait_for_init_deassert, - + .wait_for_init_deassert = true, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, .read = native_apic_mem_read, .write = native_apic_mem_write, + .eoi_write = native_apic_mem_write, .icr_read = native_apic_icr_read, .icr_write = native_apic_icr_write, .wait_icr_idle = native_apic_wait_icr_idle, .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, + + .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid, }; -extern struct apic apic_numaq; -extern struct apic apic_summit; -extern struct apic apic_bigsmp; -extern struct apic apic_es7000; -extern struct apic apic_es7000_cluster; +apic_driver(apic_default); struct apic *apic = &apic_default; EXPORT_SYMBOL_GPL(apic); -static struct apic *apic_probe[] __initdata = { -#ifdef CONFIG_X86_NUMAQ - &apic_numaq, -#endif -#ifdef CONFIG_X86_SUMMIT - &apic_summit, -#endif -#ifdef CONFIG_X86_BIGSMP - &apic_bigsmp, -#endif -#ifdef CONFIG_X86_ES7000 - &apic_es7000, - &apic_es7000_cluster, -#endif - &apic_default, /* must be last */ - NULL, -}; - static int cmdline_apic __initdata; static int __init parse_apic(char *arg) { - int i; + struct apic **drv; if (!arg) return -EINVAL; - for (i = 0; apic_probe[i]; i++) { - if (!strcmp(apic_probe[i]->name, arg)) { - apic = apic_probe[i]; + for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { + if (!strcmp((*drv)->name, arg)) { + apic = *drv; cmdline_apic = 1; return 0; } @@ -217,38 +160,55 @@ static int __init parse_apic(char *arg) } early_param("apic", parse_apic); -void __init generic_bigsmp_probe(void) +void __init default_setup_apic_routing(void) { + int version = apic_version[boot_cpu_physical_apicid]; + + if (num_possible_cpus() > 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } + #ifdef CONFIG_X86_BIGSMP /* - * This routine is used to switch to bigsmp mode when + * This is used to switch to bigsmp mode when * - There is no apic= option specified by the user * - generic_apic_probe() has chosen apic_default as the sub_arch * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support */ - if (!cmdline_apic && apic == &apic_default) { - if (apic_bigsmp.probe()) { - apic = &apic_bigsmp; - printk(KERN_INFO "Overriding APIC driver with %s\n", - apic->name); - } - } + if (!cmdline_apic && apic == &apic_default) + generic_bigsmp_probe(); #endif + + if (apic->setup_apic_routing) + apic->setup_apic_routing(); + + if (x86_platform.apic_post_init) + x86_platform.apic_post_init(); } void __init generic_apic_probe(void) { if (!cmdline_apic) { - int i; - for (i = 0; apic_probe[i]; i++) { - if (apic_probe[i]->probe()) { - apic = apic_probe[i]; + struct apic **drv; + + for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { + if ((*drv)->probe()) { + apic = *drv; break; } } /* Not visible without early console */ - if (!apic_probe[i]) + if (drv == __apicdrivers_end) panic("Didn't find an APIC driver"); } printk(KERN_INFO "Using APIC driver %s\n", apic->name); @@ -259,16 +219,16 @@ void __init generic_apic_probe(void) int __init generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) { - int i; + struct apic **drv; - for (i = 0; apic_probe[i]; ++i) { - if (!apic_probe[i]->mps_oem_check) + for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { + if (!((*drv)->mps_oem_check)) continue; - if (!apic_probe[i]->mps_oem_check(mpc, oem, productid)) + if (!(*drv)->mps_oem_check(mpc, oem, productid)) continue; if (!cmdline_apic) { - apic = apic_probe[i]; + apic = *drv; printk(KERN_INFO "Switched to APIC driver `%s'.\n", apic->name); } @@ -279,16 +239,16 @@ generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - int i; + struct apic **drv; - for (i = 0; apic_probe[i]; ++i) { - if (!apic_probe[i]->acpi_madt_oem_check) + for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { + if (!(*drv)->acpi_madt_oem_check) continue; - if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) + if (!(*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) continue; if (!cmdline_apic) { - apic = apic_probe[i]; + apic = *drv; printk(KERN_INFO "Switched to APIC driver `%s'.\n", apic->name); } diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index f9e4e6a5407..1793dba7a74 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -23,69 +23,28 @@ #include <asm/ipi.h> #include <asm/setup.h> -extern struct apic apic_flat; -extern struct apic apic_physflat; -extern struct apic apic_x2xpic_uv_x; -extern struct apic apic_x2apic_phys; -extern struct apic apic_x2apic_cluster; - -struct apic __read_mostly *apic = &apic_flat; -EXPORT_SYMBOL_GPL(apic); - -static struct apic *apic_probe[] __initdata = { -#ifdef CONFIG_X86_UV - &apic_x2apic_uv_x, -#endif -#ifdef CONFIG_X86_X2APIC - &apic_x2apic_phys, - &apic_x2apic_cluster, -#endif - &apic_physflat, - NULL, -}; - -static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) -{ - return hard_smp_processor_id() >> index_msb; -} - /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ void __init default_setup_apic_routing(void) { + struct apic **drv; enable_IR_x2apic(); -#ifdef CONFIG_X86_X2APIC - if (x2apic_mode -#ifdef CONFIG_X86_UV - && apic != &apic_x2apic_uv_x -#endif - ) { - if (x2apic_phys) - apic = &apic_x2apic_phys; - else - apic = &apic_x2apic_cluster; - } -#endif - - if (apic == &apic_flat && num_possible_cpus() > 8) - apic = &apic_physflat; - - printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); - - if (is_vsmp_box()) { - /* need to update phys_pkg_id */ - apic->phys_pkg_id = apicid_phys_pkg_id; + for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { + if ((*drv)->probe && (*drv)->probe()) { + if (apic != *drv) { + apic = *drv; + pr_info("Switched APIC routing to %s.\n", + apic->name); + } + break; + } } - /* - * Now that apic routing model is selected, configure the - * fault handling for intr remapping. - */ - if (intr_remapping_enabled) - enable_drhd_fault_handling(); + if (x86_platform.apic_post_init) + x86_platform.apic_post_init(); } /* Same for both flat and physical. */ @@ -97,13 +56,15 @@ void apic_send_IPI_self(int vector) int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - int i; - - for (i = 0; apic_probe[i]; ++i) { - if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { - apic = apic_probe[i]; - printk(KERN_INFO "Setting APIC routing to %s.\n", - apic->name); + struct apic **drv; + + for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) { + if ((*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) { + if (apic != *drv) { + apic = *drv; + pr_info("Setting APIC routing to %s.\n", + apic->name); + } return 1; } } diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c deleted file mode 100644 index 9b419263d90..00000000000 --- a/arch/x86/kernel/apic/summit_32.c +++ /dev/null @@ -1,568 +0,0 @@ -/* - * IBM Summit-Specific Code - * - * Written By: Matthew Dobson, IBM Corporation - * - * Copyright (c) 2003 IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to <colpatch@us.ibm.com> - * - */ - -#include <linux/mm.h> -#include <linux/init.h> -#include <asm/io.h> -#include <asm/bios_ebda.h> - -/* - * APIC driver for the IBM "Summit" chipset. - */ -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <asm/mpspec.h> -#include <asm/apic.h> -#include <asm/smp.h> -#include <asm/fixmap.h> -#include <asm/apicdef.h> -#include <asm/ipi.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/gfp.h> -#include <linux/smp.h> - -static unsigned summit_get_apic_id(unsigned long x) -{ - return (x >> 24) & 0xFF; -} - -static inline void summit_send_IPI_mask(const struct cpumask *mask, int vector) -{ - default_send_IPI_mask_sequence_logical(mask, vector); -} - -static void summit_send_IPI_allbutself(int vector) -{ - default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); -} - -static void summit_send_IPI_all(int vector) -{ - summit_send_IPI_mask(cpu_online_mask, vector); -} - -#include <asm/tsc.h> - -extern int use_cyclone; - -#ifdef CONFIG_X86_SUMMIT_NUMA -static void setup_summit(void); -#else -static inline void setup_summit(void) {} -#endif - -static int summit_mps_oem_check(struct mpc_table *mpc, char *oem, - char *productid) -{ - if (!strncmp(oem, "IBM ENSW", 8) && - (!strncmp(productid, "VIGIL SMP", 9) - || !strncmp(productid, "EXA", 3) - || !strncmp(productid, "RUTHLESS SMP", 12))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -/* Hook from generic ACPI tables.c */ -static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (!strncmp(oem_id, "IBM", 3) && - (!strncmp(oem_table_id, "SERVIGIL", 8) - || !strncmp(oem_table_id, "EXA", 3))){ - mark_tsc_unstable("Summit based system"); - use_cyclone = 1; /*enable cyclone-timer*/ - setup_summit(); - return 1; - } - return 0; -} - -struct rio_table_hdr { - unsigned char version; /* Version number of this data structure */ - /* Version 3 adds chassis_num & WP_index */ - unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ - unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ -} __attribute__((packed)); - -struct scal_detail { - unsigned char node_id; /* Scalability Node ID */ - unsigned long CBAR; /* Address of 1MB register space */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF = None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port2node; /* Node ID port connected to: 0xFF = None */ - unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ -} __attribute__((packed)); - -struct rio_detail { - unsigned char node_id; /* RIO Node ID */ - unsigned long BBAR; /* Address of 1MB register space */ - unsigned char type; /* Type of device */ - unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ - /* For CYC: Node ID of Twister that owns this CYC */ - unsigned char port0node; /* Node ID port connected to: 0xFF=None */ - unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char port1node; /* Node ID port connected to: 0xFF=None */ - unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ - unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ - /* For CYC: 0 */ - unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ - /* = 0 : the XAPIC is not used, ie:*/ - /* ints fwded to another XAPIC */ - /* Bits1:7 Reserved */ - /* For CYC: Bits0:7 Reserved */ - unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ - /* lower slot numbers/PCI bus numbers */ - /* For CYC: No meaning */ - unsigned char chassis_num; /* 1 based Chassis number */ - /* For LookOut WPEGs this field indicates the */ - /* Expansion Chassis #, enumerated from Boot */ - /* Node WPEG external port, then Boot Node CYC */ - /* external port, then Next Vigil chassis WPEG */ - /* external port, etc. */ - /* Shared Lookouts have only 1 chassis number (the */ - /* first one assigned) */ -} __attribute__((packed)); - - -typedef enum { - CompatTwister = 0, /* Compatibility Twister */ - AltTwister = 1, /* Alternate Twister of internal 8-way */ - CompatCyclone = 2, /* Compatibility Cyclone */ - AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ - CompatWPEG = 4, /* Compatibility WPEG */ - AltWPEG = 5, /* Second Planar WPEG */ - LookOutAWPEG = 6, /* LookOut WPEG */ - LookOutBWPEG = 7, /* LookOut WPEG */ -} node_type; - -static inline int is_WPEG(struct rio_detail *rio){ - return (rio->type == CompatWPEG || rio->type == AltWPEG || - rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); -} - -#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) - -static const struct cpumask *summit_target_cpus(void) -{ - /* CPU_MASK_ALL (0xff) has undefined behaviour with - * dest_LowestPrio mode logical clustered apic interrupt routing - * Just start on cpu 0. IRQ balancing will spread load - */ - return cpumask_of(0); -} - -static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid) -{ - return 0; -} - -/* we don't use the phys_cpu_present_map to indicate apicid presence */ -static unsigned long summit_check_apicid_present(int bit) -{ - return 1; -} - -static void summit_init_apic_ldr(void) -{ - unsigned long val, id; - int count = 0; - u8 my_id = (u8)hard_smp_processor_id(); - u8 my_cluster = APIC_CLUSTER(my_id); -#ifdef CONFIG_SMP - u8 lid; - int i; - - /* Create logical APIC IDs by counting CPUs already in cluster. */ - for (count = 0, i = nr_cpu_ids; --i >= 0; ) { - lid = cpu_2_logical_apicid[i]; - if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) - ++count; - } -#endif - /* We only have a 4 wide bitmap in cluster mode. If a deranged - * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ - BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); - id = my_cluster | (1UL << count); - apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - val |= SET_APIC_LOGICAL_ID(id); - apic_write(APIC_LDR, val); -} - -static int summit_apic_id_registered(void) -{ - return 1; -} - -static void summit_setup_apic_routing(void) -{ - printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", - nr_ioapics); -} - -static int summit_apicid_to_node(int logical_apicid) -{ -#ifdef CONFIG_SMP - return apicid_2_node[hard_smp_processor_id()]; -#else - return 0; -#endif -} - -/* Mapping from cpu number to logical apicid */ -static inline int summit_cpu_to_logical_apicid(int cpu) -{ -#ifdef CONFIG_SMP - if (cpu >= nr_cpu_ids) - return BAD_APICID; - return cpu_2_logical_apicid[cpu]; -#else - return logical_smp_processor_id(); -#endif -} - -static int summit_cpu_present_to_apicid(int mps_cpu) -{ - if (mps_cpu < nr_cpu_ids) - return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); - else - return BAD_APICID; -} - -static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap) -{ - /* For clustered we don't have a good way to do this yet - hack */ - physids_promote(0x0FL, retmap); -} - -static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap) -{ - physid_set_mask_of_physid(0, retmap); -} - -static int summit_check_phys_apicid_present(int physical_apicid) -{ - return 1; -} - -static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ - unsigned int round = 0; - int cpu, apicid = 0; - - /* - * The cpus in the mask must all be on the apic cluster. - */ - for_each_cpu(cpu, cpumask) { - int new_apicid = summit_cpu_to_logical_apicid(cpu); - - if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { - printk("%s: Not a valid mask!\n", __func__); - return BAD_APICID; - } - apicid |= new_apicid; - round++; - } - return apicid; -} - -static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, - const struct cpumask *andmask) -{ - int apicid = summit_cpu_to_logical_apicid(0); - cpumask_var_t cpumask; - - if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return apicid; - - cpumask_and(cpumask, inmask, andmask); - cpumask_and(cpumask, cpumask, cpu_online_mask); - apicid = summit_cpu_mask_to_apicid(cpumask); - - free_cpumask_var(cpumask); - - return apicid; -} - -/* - * cpuid returns the value latched in the HW at reset, not the APIC ID - * register's value. For any box whose BIOS changes APIC IDs, like - * clustered APIC systems, we must use hard_smp_processor_id. - * - * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. - */ -static int summit_phys_pkg_id(int cpuid_apic, int index_msb) -{ - return hard_smp_processor_id() >> index_msb; -} - -static int probe_summit(void) -{ - /* probed later in mptable/ACPI hooks */ - return 0; -} - -static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - /* Careful. Some cpus do not strictly honor the set of cpus - * specified in the interrupt destination when using lowest - * priority interrupt delivery mode. - * - * In particular there was a hyperthreading cpu observed to - * deliver interrupts to the wrong hyperthread when only one - * hyperthread was specified in the interrupt desitination. - */ - cpumask_clear(retmask); - cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - -#ifdef CONFIG_X86_SUMMIT_NUMA -static struct rio_table_hdr *rio_table_hdr; -static struct scal_detail *scal_devs[MAX_NUMNODES]; -static struct rio_detail *rio_devs[MAX_NUMNODES*4]; - -#ifndef CONFIG_X86_NUMAQ -static int mp_bus_id_to_node[MAX_MP_BUSSES]; -#endif - -static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) -{ - int twister = 0, node = 0; - int i, bus, num_buses; - - for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { - if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) { - twister = rio_devs[i]->owner_id; - break; - } - } - if (i == rio_table_hdr->num_rio_dev) { - printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); - return last_bus; - } - - for (i = 0; i < rio_table_hdr->num_scal_dev; i++) { - if (scal_devs[i]->node_id == twister) { - node = scal_devs[i]->node_id; - break; - } - } - if (i == rio_table_hdr->num_scal_dev) { - printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); - return last_bus; - } - - switch (rio_devs[wpeg_num]->type) { - case CompatWPEG: - /* - * The Compatibility Winnipeg controls the 2 legacy buses, - * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case - * a PCI-PCI bridge card is used in either slot: total 5 buses. - */ - num_buses = 5; - break; - case AltWPEG: - /* - * The Alternate Winnipeg controls the 2 133MHz buses [1 slot - * each], their 2 "extra" buses, the 100MHz bus [2 slots] and - * the "extra" buses for each of those slots: total 7 buses. - */ - num_buses = 7; - break; - case LookOutAWPEG: - case LookOutBWPEG: - /* - * A Lookout Winnipeg controls 3 100MHz buses [2 slots each] - * & the "extra" buses for each of those slots: total 9 buses. - */ - num_buses = 9; - break; - default: - printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); - return last_bus; - } - - for (bus = last_bus; bus < last_bus + num_buses; bus++) - mp_bus_id_to_node[bus] = node; - return bus; -} - -static int build_detail_arrays(void) -{ - unsigned long ptr; - int i, scal_detail_size, rio_detail_size; - - if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { - printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); - return 0; - } - - switch (rio_table_hdr->version) { - default: - printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); - return 0; - case 2: - scal_detail_size = 11; - rio_detail_size = 13; - break; - case 3: - scal_detail_size = 12; - rio_detail_size = 15; - break; - } - - ptr = (unsigned long)rio_table_hdr + 3; - for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size) - scal_devs[i] = (struct scal_detail *)ptr; - - for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size) - rio_devs[i] = (struct rio_detail *)ptr; - - return 1; -} - -void setup_summit(void) -{ - unsigned long ptr; - unsigned short offset; - int i, next_wpeg, next_bus = 0; - - /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */ - ptr = get_bios_ebda(); - ptr = (unsigned long)phys_to_virt(ptr); - - rio_table_hdr = NULL; - offset = 0x180; - while (offset) { - /* The block id is stored in the 2nd word */ - if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) { - /* set the pointer past the offset & block id */ - rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); - break; - } - /* The next offset is stored in the 1st word. 0 means no more */ - offset = *((unsigned short *)(ptr + offset)); - } - if (!rio_table_hdr) { - printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); - return; - } - - if (!build_detail_arrays()) - return; - - /* The first Winnipeg we're looking for has an index of 0 */ - next_wpeg = 0; - do { - for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { - if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) { - /* It's the Winnipeg we're looking for! */ - next_bus = setup_pci_node_map_for_wpeg(i, next_bus); - next_wpeg++; - break; - } - } - /* - * If we go through all Rio devices and don't find one with - * the next index, it means we've found all the Winnipegs, - * and thus all the PCI buses. - */ - if (i == rio_table_hdr->num_rio_dev) - next_wpeg = 0; - } while (next_wpeg != 0); -} -#endif - -struct apic apic_summit = { - - .name = "summit", - .probe = probe_summit, - .acpi_madt_oem_check = summit_acpi_madt_oem_check, - .apic_id_registered = summit_apic_id_registered, - - .irq_delivery_mode = dest_LowestPrio, - /* logical delivery broadcast to all CPUs: */ - .irq_dest_mode = 1, - - .target_cpus = summit_target_cpus, - .disable_esr = 1, - .dest_logical = APIC_DEST_LOGICAL, - .check_apicid_used = summit_check_apicid_used, - .check_apicid_present = summit_check_apicid_present, - - .vector_allocation_domain = summit_vector_allocation_domain, - .init_apic_ldr = summit_init_apic_ldr, - - .ioapic_phys_id_map = summit_ioapic_phys_id_map, - .setup_apic_routing = summit_setup_apic_routing, - .multi_timer_check = NULL, - .apicid_to_node = summit_apicid_to_node, - .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, - .cpu_present_to_apicid = summit_cpu_present_to_apicid, - .apicid_to_cpu_present = summit_apicid_to_cpu_present, - .setup_portio_remap = NULL, - .check_phys_apicid_present = summit_check_phys_apicid_present, - .enable_apic_mode = NULL, - .phys_pkg_id = summit_phys_pkg_id, - .mps_oem_check = summit_mps_oem_check, - - .get_apic_id = summit_get_apic_id, - .set_apic_id = NULL, - .apic_id_mask = 0xFF << 24, - - .cpu_mask_to_apicid = summit_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, - - .send_IPI_mask = summit_send_IPI_mask, - .send_IPI_mask_allbutself = NULL, - .send_IPI_allbutself = summit_send_IPI_allbutself, - .send_IPI_all = summit_send_IPI_all, - .send_IPI_self = default_send_IPI_self, - - .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, - .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - - .wait_for_init_deassert = default_wait_for_init_deassert, - - .smp_callin_clear_local_apic = NULL, - .inquire_remote_apic = default_inquire_remote_apic, - - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = native_apic_icr_read, - .icr_write = native_apic_icr_write, - .wait_icr_idle = native_apic_wait_icr_idle, - .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, -}; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index cf69c59f491..e66766bf164 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -3,224 +3,272 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/ctype.h> -#include <linux/init.h> #include <linux/dmar.h> +#include <linux/cpu.h> #include <asm/smp.h> -#include <asm/apic.h> -#include <asm/ipi.h> +#include <asm/x2apic.h> static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); +static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster); +static DEFINE_PER_CPU(cpumask_var_t, ipi_mask); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return x2apic_enabled(); } -/* - * need to use more than cpu 0, because we need more vectors when - * MSI-X are used. - */ -static const struct cpumask *x2apic_target_cpus(void) +static inline u32 x2apic_cluster(int cpu) { - return cpu_online_mask; -} - -/* - * for now each logical cpu is in its own vector allocation domain. - */ -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); + return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16; } static void - __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) +__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) { - unsigned long cfg; - - cfg = __prepare_ICR(0, vector, dest); - - /* - * send the IPI. - */ - native_x2apic_icr_write(cfg, apicid); -} - -/* - * for now, we send the IPI's one by one in the cpumask. - * TBD: Based on the cpu mask, we can send the IPI's to the cluster group - * at once. We have 16 cpu's in a cluster. This will minimize IPI register - * writes. - */ -static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) -{ - unsigned long query_cpu; + struct cpumask *cpus_in_cluster_ptr; + struct cpumask *ipi_mask_ptr; + unsigned int cpu, this_cpu; unsigned long flags; + u32 dest; x2apic_wrmsr_fence(); local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->dest_logical); - } - local_irq_restore(flags); -} -static void - x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) -{ - unsigned long this_cpu = smp_processor_id(); - unsigned long query_cpu; - unsigned long flags; + this_cpu = smp_processor_id(); - x2apic_wrmsr_fence(); + /* + * We are to modify mask, so we need an own copy + * and be sure it's manipulated with irq off. + */ + ipi_mask_ptr = __raw_get_cpu_var(ipi_mask); + cpumask_copy(ipi_mask_ptr, mask); - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu == this_cpu) - continue; - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->dest_logical); - } - local_irq_restore(flags); -} + /* + * The idea is to send one IPI per cluster. + */ + for_each_cpu(cpu, ipi_mask_ptr) { + unsigned long i; -static void x2apic_send_IPI_allbutself(int vector) -{ - unsigned long this_cpu = smp_processor_id(); - unsigned long query_cpu; - unsigned long flags; + cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu); + dest = 0; - x2apic_wrmsr_fence(); + /* Collect cpus in cluster. */ + for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) { + if (apic_dest == APIC_DEST_ALLINC || i != this_cpu) + dest |= per_cpu(x86_cpu_to_logical_apicid, i); + } - local_irq_save(flags); - for_each_online_cpu(query_cpu) { - if (query_cpu == this_cpu) + if (!dest) continue; - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, apic->dest_logical); + + __x2apic_send_IPI_dest(dest, vector, apic->dest_logical); + /* + * Cluster sibling cpus should be discared now so + * we would not send IPI them second time. + */ + cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr); } + local_irq_restore(flags); } -static void x2apic_send_IPI_all(int vector) +static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { - x2apic_send_IPI_mask(cpu_online_mask, vector); + __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC); } -static int x2apic_apic_id_registered(void) +static void +x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - return 1; + __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static void x2apic_send_IPI_allbutself(int vector) { - /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. - * May as well be the first. - */ - int cpu = cpumask_first(cpumask); + __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT); +} - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_logical_apicid, cpu); - else - return BAD_APICID; +static void x2apic_send_IPI_all(int vector) +{ + __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); } -static unsigned int +static int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) + const struct cpumask *andmask, + unsigned int *apicid) { - int cpu; + u32 dest = 0; + u16 cluster; + int i; - /* - * We're using fixed IRQ delivery, can only return one logical APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; + for_each_cpu_and(i, cpumask, andmask) { + if (!cpumask_test_cpu(i, cpu_online_mask)) + continue; + dest = per_cpu(x86_cpu_to_logical_apicid, i); + cluster = x2apic_cluster(i); + break; + } + + if (!dest) + return -EINVAL; + + for_each_cpu_and(i, cpumask, andmask) { + if (!cpumask_test_cpu(i, cpu_online_mask)) + continue; + if (cluster != x2apic_cluster(i)) + continue; + dest |= per_cpu(x86_cpu_to_logical_apicid, i); } - return per_cpu(x86_cpu_to_logical_apicid, cpu); + *apicid = dest; + + return 0; } -static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) +static void init_x2apic_ldr(void) { - unsigned int id; + unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; - id = x; - return id; + per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR); + + __cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu)); + for_each_online_cpu(cpu) { + if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) + continue; + __cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu)); + __cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu)); + } } -static unsigned long set_apic_id(unsigned int id) + /* + * At CPU state changes, update the x2apic cluster sibling info. + */ +static int +update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu) { - unsigned long x; + unsigned int this_cpu = (unsigned long)hcpu; + unsigned int cpu; + int err = 0; + + switch (action) { + case CPU_UP_PREPARE: + if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu), + GFP_KERNEL)) { + err = -ENOMEM; + } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu), + GFP_KERNEL)) { + free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); + err = -ENOMEM; + } + break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + case CPU_DEAD: + for_each_online_cpu(cpu) { + if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) + continue; + __cpu_clear(this_cpu, per_cpu(cpus_in_cluster, cpu)); + __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu)); + } + free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu)); + free_cpumask_var(per_cpu(ipi_mask, this_cpu)); + break; + } - x = id; - return x; + return notifier_from_errno(err); } -static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) +static struct notifier_block __refdata x2apic_cpu_notifier = { + .notifier_call = update_clusterinfo, +}; + +static int x2apic_init_cpu_notifier(void) { - return initial_apicid >> index_msb; + int cpu = smp_processor_id(); + + zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL); + + BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu)); + + __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu)); + register_hotcpu_notifier(&x2apic_cpu_notifier); + return 1; } -static void x2apic_send_IPI_self(int vector) +static int x2apic_cluster_probe(void) { - apic_write(APIC_SELF_IPI, vector); + if (x2apic_mode) + return x2apic_init_cpu_notifier(); + else + return 0; } -static void init_x2apic_ldr(void) +static const struct cpumask *x2apic_cluster_target_cpus(void) { - int cpu = smp_processor_id(); + return cpu_all_mask; +} - per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); +/* + * Each x2apic cluster is an allocation domain. + */ +static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, + const struct cpumask *mask) +{ + /* + * To minimize vector pressure, default case of boot, device bringup + * etc will use a single cpu for the interrupt destination. + * + * On explicit migration requests coming from irqbalance etc, + * interrupts will be routed to the x2apic cluster (cluster-id + * derived from the first cpu in the mask) members specified + * in the mask. + */ + if (mask == x2apic_cluster_target_cpus()) + cpumask_copy(retmask, cpumask_of(cpu)); + else + cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); } -struct apic apic_x2apic_cluster = { +static struct apic apic_x2apic_cluster = { .name = "cluster x2apic", - .probe = NULL, + .probe = x2apic_cluster_probe, .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_LowestPrio, .irq_dest_mode = 1, /* logical */ - .target_cpus = x2apic_target_cpus, + .target_cpus = x2apic_cluster_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = x2apic_vector_allocation_domain, + .vector_allocation_domain = cluster_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .multi_timer_check = NULL, - .apicid_to_node = NULL, - .cpu_to_logical_apicid = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = x2apic_cluster_phys_pkg_id, + .phys_pkg_id = x2apic_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = x2apic_cluster_phys_get_apic_id, - .set_apic_id = set_apic_id, + .get_apic_id = x2apic_get_apic_id, + .set_apic_id = x2apic_set_apic_id, .apic_id_mask = 0xFFFFFFFFu, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .send_IPI_mask = x2apic_send_IPI_mask, @@ -231,14 +279,17 @@ struct apic apic_x2apic_cluster = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, .write = native_apic_msr_write, + .eoi_write = native_apic_msr_eoi_write, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, .wait_icr_idle = native_x2apic_wait_icr_idle, .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; + +apic_driver(apic_x2apic_cluster); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 8972f38c5ce..6d600ebf6c1 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -3,15 +3,15 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/ctype.h> -#include <linux/init.h> #include <linux/dmar.h> #include <asm/smp.h> -#include <asm/apic.h> -#include <asm/ipi.h> +#include <asm/x2apic.h> int x2apic_phys; +static struct apic apic_x2apic_phys; + static int set_x2apic_phys_mode(char *arg) { x2apic_phys = 1; @@ -19,87 +19,35 @@ static int set_x2apic_phys_mode(char *arg) } early_param("x2apic_phys", set_x2apic_phys_mode); -static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (x2apic_phys) - return x2apic_enabled(); - else - return 0; -} - -/* - * need to use more than cpu 0, because we need more vectors when - * MSI-X are used. - */ -static const struct cpumask *x2apic_target_cpus(void) -{ - return cpu_online_mask; -} - -static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) +static bool x2apic_fadt_phys(void) { - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - -static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, - unsigned int dest) -{ - unsigned long cfg; - - cfg = __prepare_ICR(0, vector, dest); - - /* - * send the IPI. - */ - native_x2apic_icr_write(cfg, apicid); + if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { + printk(KERN_DEBUG "System requires x2apic physical mode\n"); + return true; + } + return false; } -static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) +static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - unsigned long query_cpu; - unsigned long flags; - - x2apic_wrmsr_fence(); - - local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); + return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys()); } static void - x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) { - unsigned long this_cpu = smp_processor_id(); unsigned long query_cpu; + unsigned long this_cpu; unsigned long flags; x2apic_wrmsr_fence(); local_irq_save(flags); - for_each_cpu(query_cpu, mask) { - if (query_cpu != this_cpu) - __x2apic_send_IPI_dest( - per_cpu(x86_cpu_to_apicid, query_cpu), - vector, APIC_DEST_PHYSICAL); - } - local_irq_restore(flags); -} - -static void x2apic_send_IPI_allbutself(int vector) -{ - unsigned long this_cpu = smp_processor_id(); - unsigned long query_cpu; - unsigned long flags; - x2apic_wrmsr_fence(); - - local_irq_save(flags); - for_each_online_cpu(query_cpu) { - if (query_cpu == this_cpu) + this_cpu = smp_processor_id(); + for_each_cpu(query_cpu, mask) { + if (apic_dest == APIC_DEST_ALLBUT && this_cpu == query_cpu) continue; __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); @@ -107,96 +55,62 @@ static void x2apic_send_IPI_allbutself(int vector) local_irq_restore(flags); } -static void x2apic_send_IPI_all(int vector) -{ - x2apic_send_IPI_mask(cpu_online_mask, vector); -} - -static int x2apic_apic_id_registered(void) -{ - return 1; -} - -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - int cpu = cpumask_first(cpumask); - - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - else - return BAD_APICID; + __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC); } -static unsigned int -x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) +static void + x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) { - int cpu; - - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - for_each_cpu_and(cpu, cpumask, andmask) { - if (cpumask_test_cpu(cpu, cpu_online_mask)) - break; - } - - return per_cpu(x86_cpu_to_apicid, cpu); + __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); } -static unsigned int x2apic_phys_get_apic_id(unsigned long x) +static void x2apic_send_IPI_allbutself(int vector) { - return x; + __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT); } -static unsigned long set_apic_id(unsigned int id) +static void x2apic_send_IPI_all(int vector) { - return id; + __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); } -static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) +static void init_x2apic_ldr(void) { - return initial_apicid >> index_msb; } -static void x2apic_send_IPI_self(int vector) +static int x2apic_phys_probe(void) { - apic_write(APIC_SELF_IPI, vector); -} + if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) + return 1; -static void init_x2apic_ldr(void) -{ + return apic == &apic_x2apic_phys; } -struct apic apic_x2apic_phys = { +static struct apic apic_x2apic_phys = { .name = "physical x2apic", - .probe = NULL, + .probe = x2apic_phys_probe, .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, + .apic_id_valid = x2apic_apic_id_valid, .apic_id_registered = x2apic_apic_id_registered, .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = x2apic_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = 0, .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = x2apic_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .multi_timer_check = NULL, - .apicid_to_node = NULL, - .cpu_to_logical_apicid = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, @@ -205,12 +119,11 @@ struct apic apic_x2apic_phys = { .phys_pkg_id = x2apic_phys_pkg_id, .mps_oem_check = NULL, - .get_apic_id = x2apic_phys_get_apic_id, - .set_apic_id = set_apic_id, + .get_apic_id = x2apic_get_apic_id, + .set_apic_id = x2apic_set_apic_id, .apic_id_mask = 0xFFFFFFFFu, - .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, - .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, .send_IPI_mask = x2apic_send_IPI_mask, .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, @@ -220,14 +133,17 @@ struct apic apic_x2apic_phys = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, .write = native_apic_msr_write, + .eoi_write = native_apic_msr_eoi_write, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, .wait_icr_idle = native_x2apic_wait_icr_idle, .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; + +apic_driver(apic_x2apic_phys); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 194539aea17..293b41df54e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -5,7 +5,7 @@ * * SGI UV APIC functions (note: not an Intel compatible APIC) * - * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved. */ #include <linux/cpumask.h> #include <linux/hardirq.h> @@ -23,6 +23,9 @@ #include <linux/io.h> #include <linux/pci.h> #include <linux/kdebug.h> +#include <linux/delay.h> +#include <linux/crash_dump.h> +#include <linux/reboot.h> #include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> @@ -34,6 +37,7 @@ #include <asm/ipi.h> #include <asm/smp.h> #include <asm/x86_init.h> +#include <asm/nmi.h> DEFINE_PER_CPU(int, x2apic_extra_bits); @@ -41,14 +45,42 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; static u64 gru_start_paddr, gru_end_paddr; +static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr; +static u64 gru_dist_lmask, gru_dist_umask; static union uvh_apicid uvh_apicid; int uv_min_hub_revision_id; EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); -static DEFINE_SPINLOCK(uv_nmi_lock); +unsigned int uv_apicid_hibits; +EXPORT_SYMBOL_GPL(uv_apicid_hibits); + +static struct apic apic_x2apic_uv_x; + +static unsigned long __init uv_early_read_mmr(unsigned long addr) +{ + unsigned long val, *mmr; + + mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr)); + val = *mmr; + early_iounmap(mmr, sizeof(*mmr)); + return val; +} static inline bool is_GRU_range(u64 start, u64 end) { - return start >= gru_start_paddr && end <= gru_end_paddr; + if (gru_dist_base) { + u64 su = start & gru_dist_umask; /* upper (incl pnode) bits */ + u64 sl = start & gru_dist_lmask; /* base offset bits */ + u64 eu = end & gru_dist_umask; + u64 el = end & gru_dist_lmask; + + /* Must reside completely within a single GRU range */ + return (sl == gru_dist_base && el == gru_dist_base && + su >= gru_first_node_paddr && + su <= gru_last_node_paddr && + eu == su); + } else { + return start >= gru_start_paddr && end <= gru_end_paddr; + } } static bool uv_is_untracked_pat_range(u64 start, u64 end) @@ -56,28 +88,36 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end) return is_ISA_range(start, end) || is_GRU_range(start, end); } -static int early_get_nodeid(void) +static int __init early_get_pnodeid(void) { union uvh_node_id_u node_id; - unsigned long *mmr; - - mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); - node_id.v = *mmr; - early_iounmap(mmr, sizeof(*mmr)); + union uvh_rh_gam_config_mmr_u m_n_config; + int pnode; /* Currently, all blades have same revision number */ + node_id.v = uv_early_read_mmr(UVH_NODE_ID); + m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); uv_min_hub_revision_id = node_id.s.revision; - return node_id.s.node_id; + switch (node_id.s.part_number) { + case UV2_HUB_PART_NUMBER: + case UV2_HUB_PART_NUMBER_X: + uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; + break; + case UV3_HUB_PART_NUMBER: + case UV3_HUB_PART_NUMBER_X: + uv_min_hub_revision_id += UV3_HUB_REVISION_BASE; + break; + } + + uv_hub_info->hub_revision = uv_min_hub_revision_id; + pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); + return pnode; } static void __init early_get_apic_pnode_shift(void) { - unsigned long *mmr; - - mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr)); - uvh_apicid.v = *mmr; - early_iounmap(mmr, sizeof(*mmr)); + uvh_apicid.v = uv_early_read_mmr(UVH_APICID); if (!uvh_apicid.v) /* * Old bios, use default value @@ -85,12 +125,36 @@ static void __init early_get_apic_pnode_shift(void) uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT; } -static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +/* + * Add an extra bit as dictated by bios to the destination apicid of + * interrupts potentially passing through the UV HUB. This prevents + * a deadlock between interrupts and IO port operations. + */ +static void __init uv_set_apicid_hibit(void) { - int nodeid; + union uv1h_lb_target_physical_apic_id_mask_u apicid_mask; + + if (is_uv1_hub()) { + apicid_mask.v = + uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK); + uv_apicid_hibits = + apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK; + } +} - if (!strcmp(oem_id, "SGI")) { - nodeid = early_get_nodeid(); +static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + int pnodeid, is_uv1, is_uv2, is_uv3; + + is_uv1 = !strcmp(oem_id, "SGI"); + is_uv2 = !strcmp(oem_id, "SGI2"); + is_uv3 = !strncmp(oem_id, "SGI3", 4); /* there are varieties of UV3 */ + if (is_uv1 || is_uv2 || is_uv3) { + uv_hub_info->hub_revision = + (is_uv1 ? UV1_HUB_REVISION_BASE : + (is_uv2 ? UV2_HUB_REVISION_BASE : + UV3_HUB_REVISION_BASE)); + pnodeid = early_get_pnodeid(); early_get_apic_pnode_shift(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; x86_platform.nmi_init = uv_nmi_init; @@ -99,9 +163,10 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) else if (!strcmp(oem_table_id, "UVX")) uv_system_type = UV_X2APIC; else if (!strcmp(oem_table_id, "UVH")) { - __get_cpu_var(x2apic_extra_bits) = - nodeid << (uvh_apicid.s.pnode_shift - 1); + __this_cpu_write(x2apic_extra_bits, + pnodeid << uvh_apicid.s.pnode_shift); uv_system_type = UV_NON_UNIQUE_APIC; + uv_set_apicid_hibit(); return 1; } } @@ -137,30 +202,19 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); -static const struct cpumask *uv_target_cpus(void) -{ - return cpu_online_mask; -} - -static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ - cpumask_clear(retmask); - cpumask_set_cpu(cpu, retmask); -} - -static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) +static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { #ifdef CONFIG_SMP unsigned long val; int pnode; pnode = uv_apicid_to_pnode(phys_apicid); + phys_apicid |= uv_apicid_hibits; val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_INIT; uv_write_global_mmr64(pnode, UVH_IPI_INT, val); - mdelay(10); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | @@ -218,34 +272,26 @@ static void uv_send_IPI_all(int vector) uv_send_IPI_mask(cpu_online_mask, vector); } -static int uv_apic_id_registered(void) +static int uv_apic_id_valid(int apicid) { return 1; } -static void uv_init_apic_ldr(void) +static int uv_apic_id_registered(void) { + return 1; } -static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) +static void uv_init_apic_ldr(void) { - /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. - * May as well be the first. - */ - int cpu = cpumask_first(cpumask); - - if ((unsigned)cpu < nr_cpu_ids) - return per_cpu(x86_cpu_to_apicid, cpu); - else - return BAD_APICID; } -static unsigned int +static int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, - const struct cpumask *andmask) + const struct cpumask *andmask, + unsigned int *apicid) { - int cpu; + int unsigned cpu; /* * We're using fixed IRQ delivery, can only return one phys APIC ID. @@ -255,7 +301,13 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, if (cpumask_test_cpu(cpu, cpu_online_mask)) break; } - return per_cpu(x86_cpu_to_apicid, cpu); + + if (likely(cpu < nr_cpu_ids)) { + *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; + return 0; + } + + return -EINVAL; } static unsigned int x2apic_get_apic_id(unsigned long x) @@ -263,7 +315,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x) unsigned int id; WARN_ON(preemptible() && num_online_cpus() > 1); - id = x | __get_cpu_var(x2apic_extra_bits); + id = x | __this_cpu_read(x2apic_extra_bits); return id; } @@ -293,30 +345,34 @@ static void uv_send_IPI_self(int vector) apic_write(APIC_SELF_IPI, vector); } -struct apic __refdata apic_x2apic_uv_x = { +static int uv_probe(void) +{ + return apic == &apic_x2apic_uv_x; +} + +static struct apic __refdata apic_x2apic_uv_x = { .name = "UV large system", - .probe = NULL, + .probe = uv_probe, .acpi_madt_oem_check = uv_acpi_madt_oem_check, + .apic_id_valid = uv_apic_id_valid, .apic_id_registered = uv_apic_id_registered, .irq_delivery_mode = dest_Fixed, .irq_dest_mode = 0, /* physical */ - .target_cpus = uv_target_cpus, + .target_cpus = online_target_cpus, .disable_esr = 0, .dest_logical = APIC_DEST_LOGICAL, .check_apicid_used = NULL, .check_apicid_present = NULL, - .vector_allocation_domain = uv_vector_allocation_domain, + .vector_allocation_domain = default_vector_allocation_domain, .init_apic_ldr = uv_init_apic_ldr, .ioapic_phys_id_map = NULL, .setup_apic_routing = NULL, .multi_timer_check = NULL, - .apicid_to_node = NULL, - .cpu_to_logical_apicid = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .apicid_to_cpu_present = NULL, .setup_portio_remap = NULL, @@ -329,7 +385,6 @@ struct apic __refdata apic_x2apic_uv_x = { .set_apic_id = set_apic_id, .apic_id_mask = 0xFFFFFFFFu, - .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, .send_IPI_mask = uv_send_IPI_mask, @@ -341,21 +396,22 @@ struct apic __refdata apic_x2apic_uv_x = { .wakeup_secondary_cpu = uv_wakeup_secondary, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, .read = native_apic_msr_read, .write = native_apic_msr_write, + .eoi_write = native_apic_msr_eoi_write, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write, .wait_icr_idle = native_x2apic_wait_icr_idle, .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, }; -static __cpuinit void set_x2apic_extra_bits(int pnode) +static void set_x2apic_extra_bits(int pnode) { - __get_cpu_var(x2apic_extra_bits) = (pnode << 6); + __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift); } /* @@ -384,6 +440,20 @@ static __initdata struct redir_addr redir_addrs[] = { {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR}, }; +static unsigned char get_n_lshift(int m_val) +{ + union uv3h_gr0_gam_gr_config_u m_gr_config; + + if (is_uv1_hub()) + return m_val; + + if (is_uv2_hub()) + return m_val == 40 ? 40 : 39; + + m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG); + return m_gr_config.s3.m_skt; +} + static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) { union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias; @@ -411,26 +481,67 @@ static __init void map_high(char *id, unsigned long base, int pshift, paddr = base << pshift; bytes = (1UL << bshift) * (max_pnode + 1); - printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, - paddr + bytes); + if (!paddr) { + pr_info("UV: Map %s_HI base address NULL\n", id); + return; + } + pr_debug("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes); if (map_type == map_uc) init_extra_mapping_uc(paddr, bytes); else init_extra_mapping_wb(paddr, bytes); +} +static __init void map_gru_distributed(unsigned long c) +{ + union uvh_rh_gam_gru_overlay_config_mmr_u gru; + u64 paddr; + unsigned long bytes; + int nid; + + gru.v = c; + /* only base bits 42:28 relevant in dist mode */ + gru_dist_base = gru.v & 0x000007fff0000000UL; + if (!gru_dist_base) { + pr_info("UV: Map GRU_DIST base address NULL\n"); + return; + } + bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; + gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1); + gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1); + gru_dist_base &= gru_dist_lmask; /* Clear bits above M */ + for_each_online_node(nid) { + paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) | + gru_dist_base; + init_extra_mapping_wb(paddr, bytes); + gru_first_node_paddr = min(paddr, gru_first_node_paddr); + gru_last_node_paddr = max(paddr, gru_last_node_paddr); + } + /* Save upper (63:M) bits of address only for is_GRU_range */ + gru_first_node_paddr &= gru_dist_umask; + gru_last_node_paddr &= gru_dist_umask; + pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n", + gru_dist_base, gru_first_node_paddr, gru_last_node_paddr); } + static __init void map_gru_high(int max_pnode) { union uvh_rh_gam_gru_overlay_config_mmr_u gru; int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); - if (gru.s.enable) { - map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); - gru_start_paddr = ((u64)gru.s.base << shift); - gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); + if (!gru.s.enable) { + pr_info("UV: GRU disabled\n"); + return; + } + if (is_uv3_hub() && gru.s3.mode) { + map_gru_distributed(gru.v); + return; } + map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); + gru_start_paddr = ((u64)gru.s.base << shift); + gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); } static __init void map_mmr_high(int max_pnode) @@ -441,17 +552,147 @@ static __init void map_mmr_high(int max_pnode) mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); if (mmr.s.enable) map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); + else + pr_info("UV: MMR disabled\n"); } -static __init void map_mmioh_high(int max_pnode) +/* + * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY + * and REDIRECT MMR regs are exactly the same on UV3. + */ +struct mmioh_config { + unsigned long overlay; + unsigned long redirect; + char *id; +}; + +static __initdata struct mmioh_config mmiohs[] = { + { + UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR, + UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR, + "MMIOH0" + }, + { + UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR, + UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR, + "MMIOH1" + }, +}; + +static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) +{ + union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay; + unsigned long mmr; + unsigned long base; + int i, n, shift, m_io, max_io; + int nasid, lnasid, fi, li; + char *id; + + id = mmiohs[index].id; + overlay.v = uv_read_local_mmr(mmiohs[index].overlay); + pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n", + id, overlay.v, overlay.s3.base, overlay.s3.m_io); + if (!overlay.s3.enable) { + pr_info("UV: %s disabled\n", id); + return; + } + + shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT; + base = (unsigned long)overlay.s3.base; + m_io = overlay.s3.m_io; + mmr = mmiohs[index].redirect; + n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH; + min_pnode *= 2; /* convert to NASID */ + max_pnode *= 2; + max_io = lnasid = fi = li = -1; + + for (i = 0; i < n; i++) { + union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect; + + redirect.v = uv_read_local_mmr(mmr + i * 8); + nasid = redirect.s3.nasid; + if (nasid < min_pnode || max_pnode < nasid) + nasid = -1; /* invalid NASID */ + + if (nasid == lnasid) { + li = i; + if (i != n-1) /* last entry check */ + continue; + } + + /* check if we have a cached (or last) redirect to print */ + if (lnasid != -1 || (i == n-1 && nasid != -1)) { + unsigned long addr1, addr2; + int f, l; + + if (lnasid == -1) { + f = l = i; + lnasid = nasid; + } else { + f = fi; + l = li; + } + addr1 = (base << shift) + + f * (unsigned long)(1 << m_io); + addr2 = (base << shift) + + (l + 1) * (unsigned long)(1 << m_io); + pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n", + id, fi, li, lnasid, addr1, addr2); + if (max_io < l) + max_io = l; + } + fi = li = i; + lnasid = nasid; + } + + pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n", + id, base, shift, m_io, max_io); + + if (max_io >= 0) + map_high(id, base, shift, m_io, max_io, map_uc); +} + +static __init void map_mmioh_high(int min_pnode, int max_pnode) { union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; - int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + unsigned long mmr, base; + int shift, enable, m_io, n_io; - mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); - if (mmioh.s.enable) - map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io, - max_pnode, map_uc); + if (is_uv3_hub()) { + /* Map both MMIOH Regions */ + map_mmioh_high_uv3(0, min_pnode, max_pnode); + map_mmioh_high_uv3(1, min_pnode, max_pnode); + return; + } + + if (is_uv1_hub()) { + mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR; + shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + mmioh.v = uv_read_local_mmr(mmr); + enable = !!mmioh.s1.enable; + base = mmioh.s1.base; + m_io = mmioh.s1.m_io; + n_io = mmioh.s1.n_io; + } else if (is_uv2_hub()) { + mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR; + shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + mmioh.v = uv_read_local_mmr(mmr); + enable = !!mmioh.s2.enable; + base = mmioh.s2.base; + m_io = mmioh.s2.m_io; + n_io = mmioh.s2.n_io; + } else + return; + + if (enable) { + max_pnode &= (1 << n_io) - 1; + pr_info( + "UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n", + base, shift, m_io, n_io, max_pnode); + map_high("MMIOH", base, shift, m_io, max_pnode, map_uc); + } else { + pr_info("UV: MMIOH disabled\n"); + } } static __init void map_low_mmrs(void) @@ -501,7 +742,7 @@ static void uv_heartbeat(unsigned long ignored) mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL); } -static void __cpuinit uv_heartbeat_enable(int cpu) +static void uv_heartbeat_enable(int cpu) { while (!uv_cpu_hub_info(cpu)->scir.enabled) { struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; @@ -518,7 +759,7 @@ static void __cpuinit uv_heartbeat_enable(int cpu) } #ifdef CONFIG_HOTPLUG_CPU -static void __cpuinit uv_heartbeat_disable(int cpu) +static void uv_heartbeat_disable(int cpu) { if (uv_cpu_hub_info(cpu)->scir.enabled) { uv_cpu_hub_info(cpu)->scir.enabled = 0; @@ -530,8 +771,8 @@ static void __cpuinit uv_heartbeat_disable(int cpu) /* * cpu hotplug notifier */ -static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action, + void *hcpu) { long cpu = (long)hcpu; @@ -575,14 +816,14 @@ late_initcall(uv_init_heartbeat); /* Direct Legacy VGA I/O traffic to designated IOH */ int uv_set_vga_state(struct pci_dev *pdev, bool decode, - unsigned int command_bits, bool change_bridge) + unsigned int command_bits, u32 flags) { int domain, bus, rc; - PR_DEVEL("devfn %x decode %d cmd %x chg_brdg %d\n", - pdev->devfn, decode, command_bits, change_bridge); + PR_DEVEL("devfn %x decode %d cmd %x flags %d\n", + pdev->devfn, decode, command_bits, flags); - if (!change_bridge) + if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE)) return 0; if ((command_bits & PCI_COMMAND_IO) == 0) @@ -601,7 +842,7 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode, * Called on each cpu to initialize the per_cpu UV data area. * FIXME: hotplug not supported yet */ -void __cpuinit uv_cpu_init(void) +void uv_cpu_init(void) { /* CPU 0 initilization will be done via uv_system_init. */ if (!uv_blade_info) @@ -613,86 +854,55 @@ void __cpuinit uv_cpu_init(void) set_x2apic_extra_bits(uv_hub_info->pnode); } -/* - * When NMI is received, print a stack trace. - */ -int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) -{ - if (reason != DIE_NMI_IPI) - return NOTIFY_OK; - - if (in_crash_kexec) - /* do nothing if entering the crash kernel */ - return NOTIFY_OK; - /* - * Use a lock so only one cpu prints at a time - * to prevent intermixed output. - */ - spin_lock(&uv_nmi_lock); - pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); - dump_stack(); - spin_unlock(&uv_nmi_lock); - - return NOTIFY_STOP; -} - -static struct notifier_block uv_dump_stack_nmi_nb = { - .notifier_call = uv_handle_nmi -}; - -void uv_register_nmi_notifier(void) -{ - if (register_die_notifier(&uv_dump_stack_nmi_nb)) - printk(KERN_WARNING "UV NMI handler failed to register\n"); -} - -void uv_nmi_init(void) -{ - unsigned int value; - - /* - * Unmask NMI on all cpus - */ - value = apic_read(APIC_LVT1) | APIC_DM_NMI; - value &= ~APIC_LVT_MASKED; - apic_write(APIC_LVT1, value); -} - void __init uv_system_init(void) { union uvh_rh_gam_config_mmr_u m_n_config; union uvh_node_id_u node_id; unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; - int gnode_extra, max_pnode = 0; + int gnode_extra, min_pnode = 999999, max_pnode = -1; unsigned long mmr_base, present, paddr; unsigned short pnode_mask; + unsigned char n_lshift; + char *hub = (is_uv1_hub() ? "UV1" : + (is_uv2_hub() ? "UV2" : + "UV3")); + pr_info("UV: Found %s hub\n", hub); map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; + pnode_mask = (1 << n_val) - 1; + n_lshift = get_n_lshift(m_val); mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE; - pnode_mask = (1 << n_val) - 1; + node_id.v = uv_read_local_mmr(UVH_NODE_ID); gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; gnode_upper = ((unsigned long)gnode_extra << m_val); - printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n", - n_val, m_val, gnode_upper, gnode_extra); + pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x n_lshift 0x%x\n", + n_val, m_val, pnode_mask, gnode_upper, gnode_extra, + n_lshift); - printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); + pr_info("UV: global MMR base 0x%lx\n", mmr_base); for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) uv_possible_blades += hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); - printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); + + /* uv_num_possible_blades() is really the hub count */ + pr_info("UV: Found %d blades, %d hubs\n", + is_uv1_hub() ? uv_num_possible_blades() : + (uv_num_possible_blades() + 1) / 2, + uv_num_possible_blades()); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); - uv_blade_info = kmalloc(bytes, GFP_KERNEL); + uv_blade_info = kzalloc(bytes, GFP_KERNEL); BUG_ON(!uv_blade_info); + for (blade = 0; blade < uv_num_possible_blades(); blade++) uv_blade_info[blade].memory_nid = -1; @@ -714,10 +924,12 @@ void __init uv_system_init(void) for (j = 0; j < 64; j++) { if (!test_bit(j, &present)) continue; - pnode = (i * 64 + j); + pnode = (i * 64 + j) & pnode_mask; uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; + spin_lock_init(&uv_blade_info[blade].nmi_lock); + min_pnode = min(pnode, min_pnode); max_pnode = max(pnode, max_pnode); blade++; } @@ -735,7 +947,13 @@ void __init uv_system_init(void) /* * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); */ + uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; + uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision; + + uv_cpu_hub_info(cpu)->m_shift = 64 - m_val; + uv_cpu_hub_info(cpu)->n_lshift = n_lshift; + pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); lcpu = uv_blade_info[blade].nr_possible_cpus; @@ -751,7 +969,6 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->numa_blade_id = blade; uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; uv_cpu_hub_info(cpu)->pnode = pnode; - uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; @@ -767,21 +984,29 @@ void __init uv_system_init(void) if (uv_node_to_blade[nid] >= 0) continue; paddr = node_start_pfn(nid) << PAGE_SHIFT; - paddr = uv_soc_phys_ram_to_gpa(paddr); - pnode = (paddr >> m_val) & pnode_mask; + pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); blade = boot_pnode_to_blade(pnode); uv_node_to_blade[nid] = blade; } map_gru_high(max_pnode); map_mmr_high(max_pnode); - map_mmioh_high(max_pnode); + map_mmioh_high(min_pnode, max_pnode); + uv_nmi_setup(); uv_cpu_init(); uv_scir_register_cpu_notifier(); - uv_register_nmi_notifier(); proc_mkdir("sgi_uv", NULL); /* register Legacy VGA I/O redirection handler */ pci_register_set_vga_state(uv_set_vga_state); + + /* + * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as + * EFI is not enabled in the kdump kernel. + */ + if (is_kdump_kernel()) + reboot_type = BOOT_ACPI; } + +apic_driver(apic_x2apic_uv_x); |
