aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/boot.c65
-rw-r--r--arch/x86/kernel/alternative.c29
-rw-r--r--arch/x86/kernel/amd_iommu_init.c16
-rw-r--r--arch/x86/kernel/apic/apic.c76
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c16
-rw-r--r--arch/x86/kernel/apic/es7000_32.c25
-rw-r--r--arch/x86/kernel/apic/io_apic.c145
-rw-r--r--arch/x86/kernel/apic/nmi.c12
-rw-r--r--arch/x86/kernel/apic/numaq_32.c11
-rw-r--r--arch/x86/kernel/apic/probe_32.c3
-rw-r--r--arch/x86/kernel/apic/summit_32.c21
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c31
-rw-r--r--arch/x86/kernel/apm_32.c248
-rw-r--r--arch/x86/kernel/bios_uv.c3
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c1
-rw-r--r--arch/x86/kernel/cpu/common.c15
-rw-r--r--[-rwxr-xr-x]arch/x86/kernel/cpu/cpu_debug.c0
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c105
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c7
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c13
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c24
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c43
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c6
-rw-r--r--arch/x86/kernel/cpu/proc.c6
-rw-r--r--arch/x86/kernel/dumpstack.c6
-rw-r--r--arch/x86/kernel/e820.c11
-rw-r--r--arch/x86/kernel/entry_64.S3
-rw-r--r--arch/x86/kernel/ftrace.c197
-rw-r--r--arch/x86/kernel/hpet.c24
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/irq.c2
-rw-r--r--arch/x86/kernel/kgdb.c3
-rw-r--r--arch/x86/kernel/kprobes.c17
-rw-r--r--arch/x86/kernel/kvmclock.c7
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/microcode_amd.c43
-rw-r--r--arch/x86/kernel/microcode_core.c125
-rw-r--r--arch/x86/kernel/microcode_intel.c83
-rw-r--r--arch/x86/kernel/mpparse.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-dma.c8
-rw-r--r--arch/x86/kernel/pci-nommu.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86/kernel/process.c25
-rw-r--r--arch/x86/kernel/ptrace.c8
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/smpboot.c70
-rw-r--r--arch/x86/kernel/tlb_uv.c198
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/uv_sysfs.c4
-rw-r--r--arch/x86/kernel/uv_time.c10
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/xsave.c6
61 files changed, 1176 insertions, 664 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index c611ad64137..88d1bfc847d 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -66,7 +66,8 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-y += apic/
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
-obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
@@ -88,7 +89,8 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
-obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a18eb7ce223..723989d7f80 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -230,6 +230,35 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
}
static int __init
+acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
+{
+ struct acpi_madt_local_x2apic *processor = NULL;
+
+ processor = (struct acpi_madt_local_x2apic *)header;
+
+ if (BAD_MADT_ENTRY(processor, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+#ifdef CONFIG_X86_X2APIC
+ /*
+ * We need to register disabled CPU as well to permit
+ * counting disabled CPUs. This allows us to size
+ * cpus_possible_map more accurately, to permit
+ * to not preallocating memory for all NR_CPUS
+ * when we use CPU hotplug.
+ */
+ acpi_register_lapic(processor->local_apic_id, /* APIC ID */
+ processor->lapic_flags & ACPI_MADT_ENABLED);
+#else
+ printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
+#endif
+
+ return 0;
+}
+
+static int __init
acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
{
struct acpi_madt_local_apic *processor = NULL;
@@ -289,6 +318,25 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
}
static int __init
+acpi_parse_x2apic_nmi(struct acpi_subtable_header *header,
+ const unsigned long end)
+{
+ struct acpi_madt_local_x2apic_nmi *x2apic_nmi = NULL;
+
+ x2apic_nmi = (struct acpi_madt_local_x2apic_nmi *)header;
+
+ if (BAD_MADT_ENTRY(x2apic_nmi, end))
+ return -EINVAL;
+
+ acpi_table_print_madt_entry(header);
+
+ if (x2apic_nmi->lint != 1)
+ printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
+
+ return 0;
+}
+
+static int __init
acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end)
{
struct acpi_madt_local_apic_nmi *lapic_nmi = NULL;
@@ -793,6 +841,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
static int __init acpi_parse_madt_lapic_entries(void)
{
int count;
+ int x2count = 0;
if (!cpu_has_apic)
return -ENODEV;
@@ -816,22 +865,28 @@ static int __init acpi_parse_madt_lapic_entries(void)
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
acpi_parse_sapic, MAX_APICS);
- if (!count)
+ if (!count) {
+ x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
+ acpi_parse_x2apic, MAX_APICS);
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
acpi_parse_lapic, MAX_APICS);
- if (!count) {
+ }
+ if (!count && !x2count) {
printk(KERN_ERR PREFIX "No LAPIC entries present\n");
/* TBD: Cleanup to allow fallback to MPS */
return -ENODEV;
- } else if (count < 0) {
+ } else if (count < 0 || x2count < 0) {
printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n");
/* TBD: Cleanup to allow fallback to MPS */
return count;
}
+ x2count =
+ acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC_NMI,
+ acpi_parse_x2apic_nmi, 0);
count =
acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0);
- if (count < 0) {
+ if (count < 0 || x2count < 0) {
printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
/* TBD: Cleanup to allow fallback to MPS */
return count;
@@ -1470,7 +1525,7 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
/*
* If your system is blacklisted here, but you find that acpi=force
- * works for you, please contact acpi-devel@sourceforge.net
+ * works for you, please contact linux-acpi@vger.kernel.org
*/
static struct dmi_system_id __initdata acpi_dmi_table[] = {
/*
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4c80f155743..f5765870257 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -5,6 +5,7 @@
#include <linux/kprobes.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
+#include <linux/memory.h>
#include <asm/alternative.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
@@ -12,7 +13,9 @@
#include <asm/nmi.h>
#include <asm/vsyscall.h>
#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
#include <asm/io.h>
+#include <asm/fixmap.h>
#define MAX_PATCH_LEN (255-1)
@@ -226,6 +229,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
{
u8 **ptr;
+ mutex_lock(&text_mutex);
for (ptr = start; ptr < end; ptr++) {
if (*ptr < text)
continue;
@@ -234,6 +238,7 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
/* turn DS segment override prefix into lock prefix */
text_poke(*ptr, ((unsigned char []){0xf0}), 1);
};
+ mutex_unlock(&text_mutex);
}
static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
@@ -243,6 +248,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
if (noreplace_smp)
return;
+ mutex_lock(&text_mutex);
for (ptr = start; ptr < end; ptr++) {
if (*ptr < text)
continue;
@@ -251,6 +257,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
/* turn lock prefix into DS segment override prefix */
text_poke(*ptr, ((unsigned char []){0x3E}), 1);
};
+ mutex_unlock(&text_mutex);
}
struct smp_alt_module {
@@ -500,15 +507,16 @@ void *text_poke_early(void *addr, const void *opcode, size_t len)
* It means the size must be writable atomically and the address must be aligned
* in a way that permits an atomic write. It also makes sure we fit on a single
* page.
+ *
+ * Note: Must be called under text_mutex.
*/
void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
{
+ unsigned long flags;
char *vaddr;
- int nr_pages = 2;
struct page *pages[2];
int i;
- might_sleep();
if (!core_kernel_text((unsigned long)addr)) {
pages[0] = vmalloc_to_page(addr);
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
@@ -518,18 +526,21 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
pages[1] = virt_to_page(addr + PAGE_SIZE);
}
BUG_ON(!pages[0]);
- if (!pages[1])
- nr_pages = 1;
- vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
- BUG_ON(!vaddr);
- local_irq_disable();
+ local_irq_save(flags);
+ set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
+ if (pages[1])
+ set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
+ vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
- local_irq_enable();
- vunmap(vaddr);
+ clear_fixmap(FIX_TEXT_POKE0);
+ if (pages[1])
+ clear_fixmap(FIX_TEXT_POKE1);
+ local_flush_tlb();
sync_core();
/* Could also do a CLFLUSH here to speed up CPU recovery; but
that causes hangs on some VIA CPUs. */
for (i = 0; i < len; i++)
BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
+ local_irq_restore(flags);
return addr;
}
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 42c33cebf00..8c0be0902da 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -49,10 +49,10 @@
#define IVHD_DEV_EXT_SELECT 0x46
#define IVHD_DEV_EXT_SELECT_RANGE 0x47
-#define IVHD_FLAG_HT_TUN_EN 0x00
-#define IVHD_FLAG_PASSPW_EN 0x01
-#define IVHD_FLAG_RESPASSPW_EN 0x02
-#define IVHD_FLAG_ISOC_EN 0x03
+#define IVHD_FLAG_HT_TUN_EN_MASK 0x01
+#define IVHD_FLAG_PASSPW_EN_MASK 0x02
+#define IVHD_FLAG_RESPASSPW_EN_MASK 0x04
+#define IVHD_FLAG_ISOC_EN_MASK 0x08
#define IVMD_FLAG_EXCL_RANGE 0x08
#define IVMD_FLAG_UNITY_MAP 0x01
@@ -569,19 +569,19 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
* First set the recommended feature enable bits from ACPI
* into the IOMMU control registers
*/
- h->flags & IVHD_FLAG_HT_TUN_EN ?
+ h->flags & IVHD_FLAG_HT_TUN_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
- h->flags & IVHD_FLAG_PASSPW_EN ?
+ h->flags & IVHD_FLAG_PASSPW_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
- h->flags & IVHD_FLAG_RESPASSPW_EN ?
+ h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
- h->flags & IVHD_FLAG_ISOC_EN ?
+ h->flags & IVHD_FLAG_ISOC_EN_MASK ?
iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
iommu_feature_disable(iommu, CONTROL_ISOC_EN);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 85eb8e10081..f2870920f24 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -431,6 +431,12 @@ static void __cpuinit setup_APIC_timer(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+ if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)) {
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
+ /* Make LAPIC timer preferrable over percpu HPET */
+ lapic_clockevent.rating = 150;
+ }
+
memcpy(levt, &lapic_clockevent, sizeof(*levt));
levt->cpumask = cpumask_of(smp_processor_id());
@@ -1304,6 +1310,7 @@ void __init enable_IR_x2apic(void)
#ifdef CONFIG_INTR_REMAP
int ret;
unsigned long flags;
+ struct IO_APIC_route_entry **ioapic_entries = NULL;
if (!cpu_has_x2apic)
return;
@@ -1334,17 +1341,23 @@ void __init enable_IR_x2apic(void)
return;
}
- ret = save_IO_APIC_setup();
+ ioapic_entries = alloc_ioapic_entries();
+ if (!ioapic_entries) {
+ pr_info("Allocate ioapic_entries failed: %d\n", ret);
+ goto end;
+ }
+
+ ret = save_IO_APIC_setup(ioapic_entries);
if (ret) {
pr_info("Saving IO-APIC state failed: %d\n", ret);
goto end;
}
local_irq_save(flags);
- mask_IO_APIC_setup();
+ mask_IO_APIC_setup(ioapic_entries);
mask_8259A();
- ret = enable_intr_remapping(1);
+ ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
if (ret && x2apic_preenabled) {
local_irq_restore(flags);
@@ -1364,9 +1377,9 @@ end_restore:
/*
* IR enabling failed
*/
- restore_IO_APIC_setup();
+ restore_IO_APIC_setup(ioapic_entries);
else
- reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+ reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries);
unmask_8259A();
local_irq_restore(flags);
@@ -1379,6 +1392,8 @@ end:
pr_info("Enabled Interrupt-remapping\n");
} else
pr_err("Failed to enable Interrupt-remapping and x2apic\n");
+ if (ioapic_entries)
+ free_ioapic_entries(ioapic_entries);
#else
if (!cpu_has_x2apic)
return;
@@ -1954,6 +1969,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
local_irq_save(flags);
disable_local_APIC();
+#ifdef CONFIG_INTR_REMAP
+ if (intr_remapping_enabled)
+ disable_intr_remapping();
+#endif
local_irq_restore(flags);
return 0;
}
@@ -1964,15 +1983,41 @@ static int lapic_resume(struct sys_device *dev)
unsigned long flags;
int maxlvt;
+#ifdef CONFIG_INTR_REMAP
+ int ret;
+ struct IO_APIC_route_entry **ioapic_entries = NULL;
+
if (!apic_pm_state.active)
return 0;
- maxlvt = lapic_get_maxlvt();
-
local_irq_save(flags);
+ if (x2apic) {
+ ioapic_entries = alloc_ioapic_entries();
+ if (!ioapic_entries) {
+ WARN(1, "Alloc ioapic_entries in lapic resume failed.");
+ return -ENOMEM;
+ }
+
+ ret = save_IO_APIC_setup(ioapic_entries);
+ if (ret) {
+ WARN(1, "Saving IO-APIC state failed: %d\n", ret);
+ free_ioapic_entries(ioapic_entries);
+ return ret;
+ }
+
+ mask_IO_APIC_setup(ioapic_entries);
+ mask_8259A();
+ enable_x2apic();
+ }
+#else
+ if (!apic_pm_state.active)
+ return 0;
+ local_irq_save(flags);
if (x2apic)
enable_x2apic();
+#endif
+
else {
/*
* Make sure the APICBASE points to the right address
@@ -1986,6 +2031,7 @@ static int lapic_resume(struct sys_device *dev)
wrmsr(MSR_IA32_APICBASE, l, h);
}
+ maxlvt = lapic_get_maxlvt();
apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
apic_write(APIC_ID, apic_pm_state.apic_id);
apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -2009,8 +2055,20 @@ static int lapic_resume(struct sys_device *dev)
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
+#ifdef CONFIG_INTR_REMAP
+ if (intr_remapping_enabled)
+ reenable_intr_remapping(EIM_32BIT_APIC_ID);
+
+ if (x2apic) {
+ unmask_8259A();
+ restore_IO_APIC_setup(ioapic_entries);
+ free_ioapic_entries(ioapic_entries);
+ }
+#endif
+
local_irq_restore(flags);
+
return 0;
}
@@ -2048,7 +2106,9 @@ static int __init init_lapic_sysfs(void)
error = sysdev_register(&device_lapic);
return error;
}
-device_initcall(init_lapic_sysfs);
+
+/* local apic needs to resume before other devices access its registers. */
+core_initcall(init_lapic_sysfs);
#else /* CONFIG_PM */
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 0014714ea97..306e5e88fb6 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -212,7 +212,7 @@ struct apic apic_flat = {
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
.smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
.write = native_apic_mem_write,
@@ -362,7 +362,7 @@ struct apic apic_physflat = {
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
.smp_callin_clear_local_apic = NULL,
- .inquire_remote_apic = NULL,
+ .inquire_remote_apic = default_inquire_remote_apic,
.read = native_apic_mem_read,
.write = native_apic_mem_write,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index d806ecaa948..676cdac385c 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -26,12 +26,12 @@ static int bigsmp_apic_id_registered(void)
return 1;
}
-static const cpumask_t *bigsmp_target_cpus(void)
+static const struct cpumask *bigsmp_target_cpus(void)
{
#ifdef CONFIG_SMP
- return &cpu_online_map;
+ return cpu_online_mask;
#else
- return &cpumask_of_cpu(0);
+ return cpumask_of(0);
#endif
}
@@ -118,9 +118,9 @@ static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
}
/* As we are using single CPU as destination, pick only one CPU here */
-static unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
- return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask));
+ return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask));
}
static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -188,10 +188,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
{ } /* NULL entry stops DMI scanning */
};
-static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
- cpus_clear(*retmask);
- cpu_set(cpu, *retmask);
+ cpumask_clear(retmask);
+ cpumask_set_cpu(cpu, retmask);
}
static int probe_bigsmp(void)
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 19588f2770e..30294777557 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -254,7 +254,7 @@ static int parse_unisys_oem(char *oemptr)
}
#ifdef CONFIG_ACPI
-static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
+static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
{
struct acpi_table_header *header = NULL;
struct es7000_oem_table *table;
@@ -285,7 +285,7 @@ static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
return 0;
}
-static void unmap_unisys_acpi_oem_table(unsigned long oem_addr)
+static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
{
if (!oem_addr)
return;
@@ -306,7 +306,7 @@ static int es7000_check_dsdt(void)
static int es7000_acpi_ret;
/* Hook from generic ACPI tables.c */
-static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
unsigned long oem_addr = 0;
int check_dsdt;
@@ -410,7 +410,7 @@ static void es7000_enable_apic_mode(void)
WARN(1, "Command failed, status = %x\n", mip_status);
}
-static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -420,7 +420,8 @@ static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
}
@@ -455,14 +456,14 @@ static int es7000_apic_id_registered(void)
return 1;
}
-static const cpumask_t *target_cpus_cluster(void)
+static const struct cpumask *target_cpus_cluster(void)
{
- return &CPU_MASK_ALL;
+ return cpu_all_mask;
}
-static const cpumask_t *es7000_target_cpus(void)
+static const struct cpumask *es7000_target_cpus(void)
{
- return &cpumask_of_cpu(smp_processor_id());
+ return cpumask_of(smp_processor_id());
}
static unsigned long
@@ -517,7 +518,7 @@ static void es7000_setup_apic_routing(void)
"Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
(apic_version[apic] == 0x14) ?
"Physical Cluster" : "Logical Cluster",
- nr_ioapics, cpus_addr(*es7000_target_cpus())[0]);
+ nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
}
static int es7000_apicid_to_node(int logical_apicid)
@@ -572,7 +573,7 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
return 1;
}
-static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
unsigned int round = 0;
int cpu, uninitialized_var(apicid);
@@ -716,7 +717,7 @@ struct apic apic_es7000_cluster = {
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
};
-struct apic apic_es7000 = {
+struct apic __refdata apic_es7000 = {
.name = "es7000",
.probe = probe_es7000,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1bb5c6cee3e..30da617d18e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -851,63 +851,74 @@ __setup("pirq=", ioapic_pirq_setup);
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_INTR_REMAP
-/* I/O APIC RTE contents at the OS boot up */
-static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+struct IO_APIC_route_entry **alloc_ioapic_entries(void)
+{
+ int apic;
+ struct IO_APIC_route_entry **ioapic_entries;
+
+ ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
+ GFP_ATOMIC);
+ if (!ioapic_entries)
+ return 0;
+
+ for (apic = 0; apic < nr_ioapics; apic++) {
+ ioapic_entries[apic] =
+ kzalloc(sizeof(struct IO_APIC_route_entry) *
+ nr_ioapic_registers[apic], GFP_ATOMIC);
+ if (!ioapic_entries[apic])
+ goto nomem;
+ }
+
+ return ioapic_entries;
+
+nomem:
+ while (--apic >= 0)
+ kfree(ioapic_entries[apic]);
+ kfree(ioapic_entries);
+
+ return 0;
+}
/*
* Saves all the IO-APIC RTE's
*/
-int save_IO_APIC_setup(void)
+int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
{
- union IO_APIC_reg_01 reg_01;
- unsigned long flags;
int apic, pin;
- /*
- * The number of IO-APIC IRQ registers (== #pins):
- */
- for (apic = 0; apic < nr_ioapics; apic++) {
- spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(apic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
- nr_ioapic_registers[apic] = reg_01.bits.entries+1;
- }
+ if (!ioapic_entries)
+ return -ENOMEM;
for (apic = 0; apic < nr_ioapics; apic++) {
- early_ioapic_entries[apic] =
- kzalloc(sizeof(struct IO_APIC_route_entry) *
- nr_ioapic_registers[apic], GFP_KERNEL);
- if (!early_ioapic_entries[apic])
- goto nomem;
- }
+ if (!ioapic_entries[apic])
+ return -ENOMEM;
- for (apic = 0; apic < nr_ioapics; apic++)
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
- early_ioapic_entries[apic][pin] =
+ ioapic_entries[apic][pin] =
ioapic_read_entry(apic, pin);
+ }
return 0;
-
-nomem:
- while (apic >= 0)
- kfree(early_ioapic_entries[apic--]);
- memset(early_ioapic_entries, 0,
- ARRAY_SIZE(early_ioapic_entries));
-
- return -ENOMEM;
}
-void mask_IO_APIC_setup(void)
+/*
+ * Mask all IO APIC entries.
+ */
+void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
{
int apic, pin;
+ if (!ioapic_entries)
+ return;
+
for (apic = 0; apic < nr_ioapics; apic++) {
- if (!early_ioapic_entries[apic])
+ if (!ioapic_entries[apic])
break;
+
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
struct IO_APIC_route_entry entry;
- entry = early_ioapic_entries[apic][pin];
+ entry = ioapic_entries[apic][pin];
if (!entry.mask) {
entry.mask = 1;
ioapic_write_entry(apic, pin, entry);
@@ -916,22 +927,30 @@ void mask_IO_APIC_setup(void)
}
}
-void restore_IO_APIC_setup(void)
+/*
+ * Restore IO APIC entries which was saved in ioapic_entries.
+ */
+int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
{
int apic, pin;
+ if (!ioapic_entries)
+ return -ENOMEM;
+
for (apic = 0; apic < nr_ioapics; apic++) {
- if (!early_ioapic_entries[apic])
- break;
+ if (!ioapic_entries[apic])
+ return -ENOMEM;
+
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
ioapic_write_entry(apic, pin,
- early_ioapic_entries[apic][pin]);
- kfree(early_ioapic_entries[apic]);
- early_ioapic_entries[apic] = NULL;
+ ioapic_entries[apic][pin]);
}
+ return 0;
}
-void reinit_intr_remapped_IO_APIC(int intr_remapping)
+void reinit_intr_remapped_IO_APIC(int intr_remapping,
+ struct IO_APIC_route_entry **ioapic_entries)
+
{
/*
* for now plain restore of previous settings.
@@ -940,7 +959,17 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
* table entries. for now, do a plain restore, and wait for
* the setup_IO_APIC_irqs() to do proper initialization.
*/
- restore_IO_APIC_setup();
+ restore_IO_APIC_setup(ioapic_entries);
+}
+
+void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
+{
+ int apic;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ kfree(ioapic_entries[apic]);
+
+ kfree(ioapic_entries);
}
#endif
@@ -2495,7 +2524,6 @@ static void irq_complete_move(struct irq_desc **descp)
static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
-#ifdef CONFIG_INTR_REMAP
static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
int apic, pin;
@@ -2529,6 +2557,7 @@ eoi_ioapic_irq(struct irq_desc *desc)
spin_unlock_irqrestore(&ioapic_lock, flags);
}
+#ifdef CONFIG_X86_X2APIC
static void ack_x2apic_level(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -2540,7 +2569,6 @@ static void ack_x2apic_edge(unsigned int irq)
{
ack_x2APIC_irq();
}
-
#endif
static void ack_apic_edge(unsigned int irq)
@@ -2606,6 +2634,9 @@ static void ack_apic_level(unsigned int irq)
*/
ack_APIC_irq();
+ if (irq_remapped(irq))
+ eoi_ioapic_irq(desc);
+
/* Now we can move and renable the irq */
if (unlikely(do_unmask_irq)) {
/* Only migrate the irq if the ack has been received.
@@ -2651,6 +2682,26 @@ static void ack_apic_level(unsigned int irq)
#endif
}
+#ifdef CONFIG_INTR_REMAP
+static void ir_ack_apic_edge(unsigned int irq)
+{
+#ifdef CONFIG_X86_X2APIC
+ if (x2apic_enabled())
+ return ack_x2apic_edge(irq);
+#endif
+ return ack_apic_edge(irq);
+}
+
+static void ir_ack_apic_level(unsigned int irq)
+{
+#ifdef CONFIG_X86_X2APIC
+ if (x2apic_enabled())
+ return ack_x2apic_level(irq);
+#endif
+ return ack_apic_level(irq);
+}
+#endif /* CONFIG_INTR_REMAP */
+
static struct irq_chip ioapic_chip __read_mostly = {
.name = "IO-APIC",
.startup = startup_ioapic_irq,
@@ -2670,8 +2721,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
.mask = mask_IO_APIC_irq,
.unmask = unmask_IO_APIC_irq,
#ifdef CONFIG_INTR_REMAP
- .ack = ack_x2apic_edge,
- .eoi = ack_x2apic_level,
+ .ack = ir_ack_apic_edge,
+ .eoi = ir_ack_apic_level,
#ifdef CONFIG_SMP
.set_affinity = set_ir_ioapic_affinity_irq,
#endif
@@ -3397,7 +3448,7 @@ static struct irq_chip msi_ir_chip = {
.unmask = unmask_msi_irq,
.mask = mask_msi_irq,
#ifdef CONFIG_INTR_REMAP
- .ack = ack_x2apic_edge,
+ .ack = ir_ack_apic_edge,
#ifdef CONFIG_SMP
.set_affinity = ir_set_msi_irq_affinity,
#endif
@@ -3619,12 +3670,14 @@ int arch_setup_hpet_msi(unsigned int irq)
{
int ret;
struct msi_msg msg;
+ struct irq_desc *desc = irq_to_desc(irq);
ret = msi_compose_msg(NULL, irq, &msg);
if (ret < 0)
return ret;
hpet_msi_write(irq, &msg);
+ desc->status |= IRQ_MOVE_PCNTXT;
set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
"edge");
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index bdfad80c3cf..ce4fbfa315a 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -39,7 +39,7 @@
int unknown_nmi_panic;
int nmi_watchdog_enabled;
-static cpumask_t backtrace_mask = CPU_MASK_NONE;
+static cpumask_var_t backtrace_mask;
/* nmi_active:
* >0: the lapic NMI watchdog is active, but can be disabled
@@ -138,6 +138,7 @@ int __init check_nmi_watchdog(void)
if (!prev_nmi_count)
goto error;
+ alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO);
printk(KERN_INFO "Testing NMI watchdog ... ");
#ifdef CONFIG_SMP
@@ -413,14 +414,15 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
touched = 1;
}
- if (cpu_isset(cpu, backtrace_mask)) {
+ /* We can be called before check_nmi_watchdog, hence NULL check. */
+ if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) {
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
spin_lock(&lock);
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
dump_stack();
spin_unlock(&lock);
- cpu_clear(cpu, backtrace_mask);
+ cpumask_clear_cpu(cpu, backtrace_mask);
}
/* Could check oops_in_progress here too, but it's safer not to */
@@ -554,10 +556,10 @@ void __trigger_all_cpu_backtrace(void)
{
int i;
- backtrace_mask = cpu_online_map;
+ cpumask_copy(backtrace_mask, cpu_online_mask);
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
for (i = 0; i < 10 * 1000; i++) {
- if (cpus_empty(backtrace_mask))
+ if (cpumask_empty(backtrace_mask))
break;
mdelay(1);
}
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index ba2fc646553..533e59c6fc8 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -334,9 +334,9 @@ static inline void numaq_smp_callin_clear_local_apic(void)
clear_local_APIC();
}
-static inline const cpumask_t *numaq_target_cpus(void)
+static inline const struct cpumask *numaq_target_cpus(void)
{
- return &CPU_MASK_ALL;
+ return cpu_all_mask;
}
static inline unsigned long
@@ -427,7 +427,7 @@ static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid)
* We use physical apicids here, not logical, so just return the default
* physical broadcast to stop people from breaking us
*/
-static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
return 0x0F;
}
@@ -462,7 +462,7 @@ static int probe_numaq(void)
return found_numaq;
}
-static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -472,7 +472,8 @@ static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
}
static void numaq_setup_portio_remap(void)
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 141c99a1c26..01eda2ac65e 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -83,7 +83,8 @@ static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
}
/* should be called last. */
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index aac52fa873f..9cfe1f415d8 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -53,23 +53,19 @@ static unsigned summit_get_apic_id(unsigned long x)
return (x >> 24) & 0xFF;
}
-static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector)
+static inline void summit_send_IPI_mask(const struct cpumask *mask, int vector)
{
default_send_IPI_mask_sequence_logical(mask, vector);
}
static void summit_send_IPI_allbutself(int vector)
{
- cpumask_t mask = cpu_online_map;
- cpu_clear(smp_processor_id(), mask);
-
- if (!cpus_empty(mask))
- summit_send_IPI_mask(&mask, vector);
+ default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
}
static void summit_send_IPI_all(int vector)
{
- summit_send_IPI_mask(&cpu_online_map, vector);
+ summit_send_IPI_mask(cpu_online_mask, vector);
}
#include <asm/tsc.h>
@@ -186,13 +182,13 @@ static inline int is_WPEG(struct rio_detail *rio){
#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
-static const cpumask_t *summit_target_cpus(void)
+static const struct cpumask *summit_target_cpus(void)
{
/* CPU_MASK_ALL (0xff) has undefined behaviour with
* dest_LowestPrio mode logical clustered apic interrupt routing
* Just start on cpu 0. IRQ balancing will spread load
*/
- return &cpumask_of_cpu(0);
+ return cpumask_of(0);
}
static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid)
@@ -289,7 +285,7 @@ static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
return 1;
}
-static unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
+static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
unsigned int round = 0;
int cpu, apicid = 0;
@@ -346,7 +342,7 @@ static int probe_summit(void)
return 0;
}
-static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask)
+static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask)
{
/* Careful. Some cpus do not strictly honor the set of cpus
* specified in the interrupt destination when using lowest
@@ -356,7 +352,8 @@ static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask)
* deliver interrupts to the wrong hyperthread when only one
* hyperthread was specified in the interrupt desitination.
*/
- *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+ cpumask_clear(retmask);
+ cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
}
#ifdef CONFIG_X86_SUMMIT_NUMA
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1248318436e..2bda6935297 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -19,6 +19,7 @@
#include <linux/timer.h>
#include <linux/cpu.h>
#include <linux/init.h>
+#include <linux/io.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
@@ -34,6 +35,17 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
static enum uv_system_type uv_system_type;
+static int early_get_nodeid(void)
+{
+ union uvh_node_id_u node_id;
+ unsigned long *mmr;
+
+ mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
+ node_id.v = *mmr;
+ early_iounmap(mmr, sizeof(*mmr));
+ return node_id.s.node_id;
+}
+
static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
if (!strcmp(oem_id, "SGI")) {
@@ -42,6 +54,8 @@ static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
else if (!strcmp(oem_table_id, "UVX"))
uv_system_type = UV_X2APIC;
else if (!strcmp(oem_table_id, "UVH")) {
+ __get_cpu_var(x2apic_extra_bits) =
+ early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1);
uv_system_type = UV_NON_UNIQUE_APIC;
return 1;
}
@@ -549,7 +563,8 @@ void __init uv_system_init(void)
unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
int max_pnode = 0;
- unsigned long mmr_base, present;
+ unsigned long mmr_base, present, paddr;
+ unsigned short pnode_mask;
map_low_mmrs();
@@ -592,6 +607,7 @@ void __init uv_system_init(void)
}
}
+ pnode_mask = (1 << n_val) - 1;
node_id.v = uv_read_local_mmr(UVH_NODE_ID);
gnode_upper = (((unsigned long)node_id.s.node_id) &
~((1 << n_val) - 1)) << m_val;
@@ -615,7 +631,7 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->numa_blade_id = blade;
uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
uv_cpu_hub_info(cpu)->pnode = pnode;
- uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
+ uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
@@ -631,6 +647,17 @@ void __init uv_system_init(void)
lcpu, blade);
}
+ /* Add blade/pnode info for nodes without cpus */
+ for_each_online_node(nid) {
+ if (uv_node_to_blade[nid] >= 0)
+ continue;
+ paddr = node_start_pfn(nid) << PAGE_SHIFT;
+ paddr = uv_soc_phys_ram_to_gpa(paddr);
+ pnode = (paddr >> m_val) & pnode_mask;
+ blade = boot_pnode_to_blade(pnode);
+ uv_node_to_blade[nid] = blade;
+ }
+
map_gru_high(max_pnode);
map_mmr_high(max_pnode);
map_config_high(max_pnode);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index ac7783a6743..49e0939bac4 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -466,7 +466,7 @@ static const lookup_t error_table[] = {
* @err: APM BIOS return code
*
* Write a meaningful log entry to the kernel log in the event of
- * an APM error.
+ * an APM error. Note that this also handles (negative) kernel errors.
*/
static void apm_error(char *str, int err)
@@ -478,43 +478,14 @@ static void apm_error(char *str, int err)
break;
if (i < ERROR_COUNT)
printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
+ else if (err < 0)
+ printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err);
else
printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
str, err);
}
/*
- * Lock APM functionality to physical CPU 0
- */
-
-#ifdef CONFIG_SMP
-
-static cpumask_t apm_save_cpus(void)
-{
- cpumask_t x = current->cpus_allowed;
- /* Some bioses don't like being called from CPU != 0 */
- set_cpus_allowed(current, cpumask_of_cpu(0));
- BUG_ON(smp_processor_id() != 0);
- return x;
-}
-
-static inline void apm_restore_cpus(cpumask_t mask)
-{
- set_cpus_allowed(current, mask);
-}
-
-#else
-
-/*
- * No CPU lockdown needed on a uniprocessor
- */
-
-#define apm_save_cpus() (current->cpus_allowed)
-#define apm_restore_cpus(x) (void)(x)
-
-#endif
-
-/*
* These are the actual BIOS calls. Depending on APM_ZERO_SEGS and
* apm_info.allow_ints, we are being really paranoid here! Not only
* are interrupts disabled, but all the segment registers (except SS)
@@ -568,16 +539,23 @@ static inline void apm_irq_restore(unsigned long flags)
# define APM_DO_RESTORE_SEGS
#endif
+struct apm_bios_call {
+ u32 func;
+ /* In and out */
+ u32 ebx;
+ u32 ecx;
+ /* Out only */
+ u32 eax;
+ u32 edx;
+ u32 esi;
+
+ /* Error: -ENOMEM, or bits 8-15 of eax */
+ int err;
+};
+
/**
- * apm_bios_call - Make an APM BIOS 32bit call
- * @func: APM function to execute
- * @ebx_in: EBX register for call entry
- * @ecx_in: ECX register for call entry
- * @eax: EAX register return
- * @ebx: EBX register return
- * @ecx: ECX register return
- * @edx: EDX register return
- * @esi: ESI register return
+ * __apm_bios_call - Make an APM BIOS 32bit call
+ * @_call: pointer to struct apm_bios_call.
*
* Make an APM call using the 32bit protected mode interface. The
* caller is responsible for knowing if APM BIOS is configured and
@@ -586,80 +564,142 @@ static inline void apm_irq_restore(unsigned long flags)
* flag is loaded into AL. If there is an error, then the error
* code is returned in AH (bits 8-15 of eax) and this function
* returns non-zero.
+ *
+ * Note: this makes the call on the current CPU.
*/
-
-static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
- u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi)
+static long __apm_bios_call(void *_call)
{
APM_DECL_SEGS
unsigned long flags;
- cpumask_t cpus;
int cpu;
struct desc_struct save_desc_40;
struct desc_struct *gdt;
-
- cpus = apm_save_cpus();
+ struct apm_bios_call *call = _call;
cpu = get_cpu();
+ BUG_ON(cpu != 0);
gdt = get_cpu_gdt_table(cpu);
save_desc_40 = gdt[0x40 / 8];
gdt[0x40 / 8] = bad_bios_desc;
apm_irq_save(flags);
APM_DO_SAVE_SEGS;
- apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi);
+ apm_bios_call_asm(call->func, call->ebx, call->ecx,
+ &call->eax, &call->ebx, &call->ecx, &call->edx,
+ &call->esi);
APM_DO_RESTORE_SEGS;
apm_irq_restore(flags);
gdt[0x40 / 8] = save_desc_40;
put_cpu();
- apm_restore_cpus(cpus);
- return *eax & 0xff;
+ return call->eax & 0xff;
+}
+
+/* Run __apm_bios_call or __apm_bios_call_simple on CPU 0 */
+static int on_cpu0(long (*fn)(void *), struct apm_bios_call *call)
+{
+ int ret;
+
+ /* Don't bother with work_on_cpu in the common case, so we don't
+ * have to worry about OOM or overhead. */
+ if (get_cpu() == 0) {
+ ret = fn(call);
+ put_cpu();
+ } else {
+ put_cpu();
+ ret = work_on_cpu(0, fn, call);
+ }
+
+ /* work_on_cpu can fail with -ENOMEM */
+ if (ret < 0)
+ call->err = ret;
+ else
+ call->err = (call->eax >> 8) & 0xff;
+
+ return ret;
}
/**
- * apm_bios_call_simple - make a simple APM BIOS 32bit call
- * @func: APM function to invoke
- * @ebx_in: EBX register value for BIOS call
- * @ecx_in: ECX register value for BIOS call
- * @eax: EAX register on return from the BIOS call
+ * apm_bios_call - Make an APM BIOS 32bit call (on CPU 0)
+ * @call: the apm_bios_call registers.
+ *
+ * If there is an error, it is returned in @call.err.
+ */
+static int apm_bios_call(struct apm_bios_call *call)
+{
+ return on_cpu0(__apm_bios_call, call);
+}
+
+/**
+ * __apm_bios_call_simple - Make an APM BIOS 32bit call (on CPU 0)
+ * @_call: pointer to struct apm_bios_call.
*
* Make a BIOS call that returns one value only, or just status.
* If there is an error, then the error code is returned in AH
- * (bits 8-15 of eax) and this function returns non-zero. This is
- * used for simpler BIOS operations. This call may hold interrupts
- * off for a long time on some laptops.
+ * (bits 8-15 of eax) and this function returns non-zero (it can
+ * also return -ENOMEM). This is used for simpler BIOS operations.
+ * This call may hold interrupts off for a long time on some laptops.
+ *
+ * Note: this makes the call on the current CPU.
*/
-
-static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
+static long __apm_bios_call_simple(void *_call)
{
u8 error;
APM_DECL_SEGS
unsigned long flags;
- cpumask_t cpus;
int cpu;
struct desc_struct save_desc_40;
struct desc_struct *gdt;
-
- cpus = apm_save_cpus();
+ struct apm_bios_call *call = _call;
cpu = get_cpu();
+ BUG_ON(cpu != 0);
gdt = get_cpu_gdt_table(cpu);
save_desc_40 = gdt[0x40 / 8];
gdt[0x40 / 8] = bad_bios_desc;
apm_irq_save(flags);
APM_DO_SAVE_SEGS;
- error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax);
+ error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx,
+ &call->eax);
APM_DO_RESTORE_SEGS;
apm_irq_restore(flags);
gdt[0x40 / 8] = save_desc_40;
put_cpu();
- apm_restore_cpus(cpus);
return error;
}
/**
+ * apm_bios_call_simple - make a simple APM BIOS 32bit call
+ * @func: APM function to invoke
+ * @ebx_in: EBX register value for BIOS call
+ * @ecx_in: ECX register value for BIOS call
+ * @eax: EAX register on return from the BIOS call
+ * @err: bits
+ *
+ * Make a BIOS call that returns one value only, or just status.
+ * If there is an error, then the error code is returned in @err
+ * and this function returns non-zero. This is used for simpler
+ * BIOS operations. This call may hold interrupts off for a long
+ * time on some laptops.
+ */
+static int apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax,
+ int *err)
+{
+ struct apm_bios_call call;
+ int ret;
+
+ call.func = func;
+ call.ebx = ebx_in;
+ call.ecx = ecx_in;
+
+ ret = on_cpu0(__apm_bios_call_simple, &call);
+ *eax = call.eax;
+ *err = call.err;
+ return ret;
+}
+
+/**
* apm_driver_version - APM driver version
* @val: loaded with the APM version on return
*
@@ -678,9 +718,10 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
static int apm_driver_version(u_short *val)
{
u32 eax;
+ int err;
- if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax))
- return (eax >> 8) & 0xff;
+ if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax, &err))
+ return err;
*val = eax;
return APM_SUCCESS;
}
@@ -701,22 +742,21 @@ static int apm_driver_version(u_short *val)
* that APM 1.2 is in use. If no messges are pending the value 0x80
* is returned (No power management events pending).
*/
-
static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 dummy;
+ struct apm_bios_call call;
- if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx,
- &dummy, &dummy))
- return (eax >> 8) & 0xff;
- *event = ebx;
+ call.func = APM_FUNC_GET_EVENT;
+ call.ebx = call.ecx = 0;
+
+ if (apm_bios_call(&call))
+ return call.err;
+
+ *event = call.ebx;
if (apm_info.connection_version < 0x0102)
*info = ~0; /* indicate info not valid */
else
- *info = ecx;
+ *info = call.ecx;
return APM_SUCCESS;
}
@@ -737,9 +777,10 @@ static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
static int set_power_state(u_short what, u_short state)
{
u32 eax;
+ int err;
- if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax))
- return (eax >> 8) & 0xff;
+ if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax, &err))
+ return err;
return APM_SUCCESS;
}
@@ -770,6 +811,7 @@ static int apm_do_idle(void)
u8 ret = 0;
int idled = 0;
int polling;
+ int err;
polling = !!(current_thread_info()->status & TS_POLLING);
if (polling) {
@@ -782,7 +824,7 @@ static int apm_do_idle(void)
}
if (!need_resched()) {
idled = 1;
- ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
+ ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax, &err);
}
if (polling)
current_thread_info()->status |= TS_POLLING;
@@ -797,8 +839,7 @@ static int apm_do_idle(void)
* Only report the failure the first 5 times.
*/
if (++t < 5) {
- printk(KERN_DEBUG "apm_do_idle failed (%d)\n",
- (eax >> 8) & 0xff);
+ printk(KERN_DEBUG "apm_do_idle failed (%d)\n", err);
t = jiffies;
}
return -1;
@@ -816,9 +857,10 @@ static int apm_do_idle(void)
static void apm_do_busy(void)
{
u32 dummy;
+ int err;
if (clock_slowed || ALWAYS_CALL_BUSY) {
- (void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
+ (void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy, &err);
clock_slowed = 0;
}
}
@@ -937,7 +979,7 @@ static void apm_power_off(void)
/* Some bioses don't like being called from CPU != 0 */
if (apm_info.realmode_power_off) {
- (void)apm_save_cpus();
+ set_cpus_allowed_ptr(current, cpumask_of(0));
machine_real_restart(po_bios_call, sizeof(po_bios_call));
} else {
(void)set_system_power_state(APM_STATE_OFF);
@@ -956,12 +998,13 @@ static void apm_power_off(void)
static int apm_enable_power_management(int enable)
{
u32 eax;
+ int err;
if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED))
return APM_NOT_ENGAGED;
if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL,
- enable, &eax))
- return (eax >> 8) & 0xff;
+ enable, &eax, &err))
+ return err;
if (enable)
apm_info.bios.flags &= ~APM_BIOS_DISABLED;
else
@@ -986,24 +1029,23 @@ static int apm_enable_power_management(int enable)
static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- u32 dummy;
+ struct apm_bios_call call;
+
+ call.func = APM_FUNC_GET_STATUS;
+ call.ebx = APM_DEVICE_ALL;
+ call.ecx = 0;
if (apm_info.get_power_status_broken)
return APM_32_UNSUPPORTED;
- if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0,
- &eax, &ebx, &ecx, &edx, &dummy))
- return (eax >> 8) & 0xff;
- *status = ebx;
- *bat = ecx;
+ if (apm_bios_call(&call))
+ return call.err;
+ *status = call.ebx;
+ *bat = call.ecx;
if (apm_info.get_power_status_swabinminutes) {
- *life = swab16((u16)edx);
+ *life = swab16((u16)call.edx);
*life |= 0x8000;
} else
- *life = edx;
+ *life = call.edx;
return APM_SUCCESS;
}
@@ -1048,12 +1090,14 @@ static int apm_get_battery_status(u_short which, u_short *status,
static int apm_engage_power_management(u_short device, int enable)
{
u32 eax;
+ int err;
if ((enable == 0) && (device == APM_DEVICE_ALL)
&& (apm_info.bios.flags & APM_BIOS_DISABLED))
return APM_DISABLED;
- if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax))
- return (eax >> 8) & 0xff;
+ if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable,
+ &eax, &err))
+ return err;
if (device == APM_DEVICE_ALL) {
if (enable)
apm_info.bios.flags &= ~APM_BIOS_DISENGAGED;
@@ -1689,16 +1733,14 @@ static int apm(void *unused)
char *power_stat;
char *bat_stat;
-#ifdef CONFIG_SMP
/* 2002/08/01 - WT
* This is to avoid random crashes at boot time during initialization
* on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D.
* Some bioses don't like being called from CPU != 0.
* Method suggested by Ingo Molnar.
*/
- set_cpus_allowed(current, cpumask_of_cpu(0));
+ set_cpus_allowed_ptr(current, cpumask_of(0));
BUG_ON(smp_processor_id() != 0);
-#endif
if (apm_info.connection_version == 0) {
apm_info.connection_version = apm_info.bios.version;
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index f63882728d9..63a88e1f987 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -182,7 +182,8 @@ void uv_bios_init(void)
memcpy(&uv_systab, tab, sizeof(struct uv_systab));
iounmap(tab);
- printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
+ printk(KERN_INFO "EFI UV System Table Revision %d\n",
+ uv_systab.revision);
}
#else /* !CONFIG_EFI */
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 8220ae69849..c965e521271 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
+ { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
{ 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e2962cc1e27..c1caefc82e6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -41,8 +41,6 @@
#include "cpu.h"
-#ifdef CONFIG_X86_64
-
/* all of these masks are initialized in setup_cpu_local_masks() */
cpumask_var_t cpu_initialized_mask;
cpumask_var_t cpu_callout_mask;
@@ -60,16 +58,6 @@ void __init setup_cpu_local_masks(void)
alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
}
-#else /* CONFIG_X86_32 */
-
-cpumask_t cpu_sibling_setup_map;
-cpumask_t cpu_callout_map;
-cpumask_t cpu_initialized;
-cpumask_t cpu_callin_map;
-
-#endif /* CONFIG_X86_32 */
-
-
static const struct cpu_dev *this_cpu __cpuinitdata;
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
@@ -859,6 +847,7 @@ static void vgetcpu_set_mode(void)
void __init identify_boot_cpu(void)
{
identify_cpu(&boot_cpu_data);
+ init_c1e_mask();
#ifdef CONFIG_X86_32
sysenter_setup();
enable_sep_cpu();
@@ -1214,6 +1203,8 @@ void __cpuinit cpu_init(void)
load_TR_desc();
load_LDT(&init_mm.context);
+ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+
#ifdef CONFIG_DOUBLEFAULT
/* Set up doublefault TSS pointer in the GDT */
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index 46e29ab96c6..46e29ab96c6 100755..100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 23da96e57b1..208ecf6643d 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
#include <linux/cpufreq.h>
#include <linux/compiler.h>
#include <linux/dmi.h>
-#include <linux/ftrace.h>
+#include <trace/power.h>
#include <linux/acpi.h>
#include <linux/io.h>
@@ -65,13 +65,20 @@ enum {
struct acpi_cpufreq_data {
struct acpi_processor_performance *acpi_data;
struct cpufreq_frequency_table *freq_table;
- unsigned int max_freq;
unsigned int resume;
unsigned int cpu_feature;
};
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+struct acpi_msr_data {
+ u64 saved_aperf, saved_mperf;
+};
+
+static DEFINE_PER_CPU(struct acpi_msr_data, msr_data);
+
+DEFINE_TRACE(power_mark);
+
/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;
@@ -150,7 +157,8 @@ struct drv_cmd {
u32 val;
};
-static long do_drv_read(void *_cmd)
+/* Called via smp_call_function_single(), on the target CPU */
+static void do_drv_read(void *_cmd)
{
struct drv_cmd *cmd = _cmd;
u32 h;
@@ -167,10 +175,10 @@ static long do_drv_read(void *_cmd)
default:
break;
}
- return 0;
}
-static long do_drv_write(void *_cmd)
+/* Called via smp_call_function_many(), on the target CPUs */
+static void do_drv_write(void *_cmd)
{
struct drv_cmd *cmd = _cmd;
u32 lo, hi;
@@ -189,23 +197,24 @@ static long do_drv_write(void *_cmd)
default:
break;
}
- return 0;
}
static void drv_read(struct drv_cmd *cmd)
{
cmd->val = 0;
- work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd);
+ smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1);
}
static void drv_write(struct drv_cmd *cmd)
{
- unsigned int i;
+ int this_cpu;
- for_each_cpu(i, cmd->mask) {
- work_on_cpu(i, do_drv_write, cmd);
- }
+ this_cpu = get_cpu();
+ if (cpumask_test_cpu(this_cpu, cmd->mask))
+ do_drv_write(cmd);
+ smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
+ put_cpu();
}
static u32 get_cur_val(const struct cpumask *mask)
@@ -239,28 +248,23 @@ static u32 get_cur_val(const struct cpumask *mask)
return cmd.val;
}
-struct perf_cur {
+struct perf_pair {
union {
struct {
u32 lo;
u32 hi;
} split;
u64 whole;
- } aperf_cur, mperf_cur;
+ } aperf, mperf;
};
-
-static long read_measured_perf_ctrs(void *_cur)
+/* Called via smp_call_function_single(), on the target CPU */
+static void read_measured_perf_ctrs(void *_cur)
{
- struct perf_cur *cur = _cur;
-
- rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi);
- rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi);
+ struct perf_pair *cur = _cur;
- wrmsr(MSR_IA32_APERF, 0, 0);
- wrmsr(MSR_IA32_MPERF, 0, 0);
-
- return 0;
+ rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
+ rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
}
/*
@@ -279,58 +283,63 @@ static long read_measured_perf_ctrs(void *_cur)
static unsigned int get_measured_perf(struct cpufreq_policy *policy,
unsigned int cpu)
{
- struct perf_cur cur;
+ struct perf_pair readin, cur;
unsigned int perf_percent;
unsigned int retval;
- if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur))
+ if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1))
return 0;
+ cur.aperf.whole = readin.aperf.whole -
+ per_cpu(msr_data, cpu).saved_aperf;
+ cur.mperf.whole = readin.mperf.whole -
+ per_cpu(msr_data, cpu).saved_mperf;
+ per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole;
+ per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole;
+
#ifdef __i386__
/*
* We dont want to do 64 bit divide with 32 bit kernel
* Get an approximate value. Return failure in case we cannot get
* an approximate value.
*/
- if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) {
+ if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
int shift_count;
u32 h;
- h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi);
+ h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
shift_count = fls(h);
- cur.aperf_cur.whole >>= shift_count;
- cur.mperf_cur.whole >>= shift_count;
+ cur.aperf.whole >>= shift_count;
+ cur.mperf.whole >>= shift_count;
}
- if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) {
+ if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
int shift_count = 7;
- cur.aperf_cur.split.lo >>= shift_count;
- cur.mperf_cur.split.lo >>= shift_count;
+ cur.aperf.split.lo >>= shift_count;
+ cur.mperf.split.lo >>= shift_count;
}
- if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo)
- perf_percent = (cur.aperf_cur.split.lo * 100) /
- cur.mperf_cur.split.lo;
+ if (cur.aperf.split.lo && cur.mperf.split.lo)
+ perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
else
perf_percent = 0;
#else
- if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) {
+ if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
int shift_count = 7;
- cur.aperf_cur.whole >>= shift_count;
- cur.mperf_cur.whole >>= shift_count;
+ cur.aperf.whole >>= shift_count;
+ cur.mperf.whole >>= shift_count;
}
- if (cur.aperf_cur.whole && cur.mperf_cur.whole)
- perf_percent = (cur.aperf_cur.whole * 100) /
- cur.mperf_cur.whole;
+ if (cur.aperf.whole && cur.mperf.whole)
+ perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
else
perf_percent = 0;
#endif
- retval = per_cpu(drv_data, policy->cpu)->max_freq * perf_percent / 100;
+ retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
return retval;
}
@@ -680,7 +689,14 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
perf->states[i].transition_latency * 1000;
}
- data->max_freq = perf->states[0].core_frequency * 1000;
+ /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */
+ if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE &&
+ policy->cpuinfo.transition_latency > 20 * 1000) {
+ policy->cpuinfo.transition_latency = 20 * 1000;
+ printk_once(KERN_INFO "Capping off P-state tranision"
+ " latency at 20 uS\n");
+ }
+
/* table init */
for (i = 0; i < perf->state_count; i++) {
if (i > 0 && perf->states[i].core_frequency >=
@@ -699,6 +715,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (result)
goto err_freqfree;
+ if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
+ printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n");
+
switch (perf->control_register.space_id) {
case ACPI_ADR_SPACE_SYSTEM_IO:
/* Current speed is unknown and not detectable by IO port */
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index f1c51aea064..ce2ed3e4aad 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -33,7 +33,6 @@
#include <linux/timex.h>
#include <linux/io.h>
#include <linux/acpi.h>
-#include <linux/kernel.h>
#include <asm/msr.h>
#include <acpi/processor.h>
@@ -305,7 +304,7 @@ retry_loop:
outb(3, 0x22);
} else if ((pr != NULL) && pr->flags.bm_control) {
/* Disable bus master arbitration */
- acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
+ acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 1);
}
switch (longhaul_version) {
@@ -328,7 +327,7 @@ retry_loop:
case TYPE_POWERSAVER:
if (longhaul_flags & USE_ACPI_C3) {
/* Don't allow wakeup */
- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+ acpi_write_bit_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
do_powersaver(cx->address, mults_index, dir);
} else {
do_powersaver(0, mults_index, dir);
@@ -341,7 +340,7 @@ retry_loop:
outb(0, 0x22);
} else if ((pr != NULL) && pr->flags.bm_control) {
/* Enable bus master arbitration */
- acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
+ acpi_write_bit_register(ACPI_BITREG_ARB_DISABLE, 0);
}
outb(pic2_mask, 0xA1); /* restore mask */
outb(pic1_mask, 0x21);
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 41ed94915f9..6ac55bd341a 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -211,7 +211,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
unsigned int i;
#ifdef CONFIG_SMP
- cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
+ cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
#endif
/* Errata workaround */
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index a15ac94e0b9..4709ead2db5 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -54,7 +54,10 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data);
static int cpu_family = CPU_OPTERON;
#ifndef CONFIG_SMP
-DEFINE_PER_CPU(cpumask_t, cpu_core_map);
+static inline const struct cpumask *cpu_core_mask(int cpu)
+{
+ return cpumask_of(0);
+}
#endif
/* Return a frequency in MHz, given an input fid */
@@ -699,7 +702,7 @@ static int fill_powernow_table(struct powernow_k8_data *data,
dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
data->powernow_table = powernow_table;
- if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
+ if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
print_basics(data);
for (j = 0; j < data->numps; j++)
@@ -862,7 +865,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
/* fill in data */
data->numps = data->acpi_data.state_count;
- if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
+ if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
print_basics(data);
powernow_k8_acpi_pst_values(data, 0);
@@ -1300,7 +1303,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
if (cpu_family == CPU_HW_PSTATE)
cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
else
- cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu));
+ cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
data->available_cores = pol->cpus;
if (cpu_family == CPU_HW_PSTATE)
@@ -1365,7 +1368,7 @@ static unsigned int powernowk8_get(unsigned int cpu)
unsigned int khz = 0;
unsigned int first;
- first = first_cpu(per_cpu(cpu_core_map, cpu));
+ first = cpumask_first(cpu_core_mask(cpu));
data = per_cpu(powernow_data, first);
if (!data)
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 8bbb11adb31..016c1a4fa3f 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -321,7 +321,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
/* only run on CPU to be set, or on its sibling */
#ifdef CONFIG_SMP
- cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
+ cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
#endif
cpus_allowed = current->cpus_allowed;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c471eb1a389..483eda96e10 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -159,7 +159,7 @@ struct _cpuid4_info_regs {
unsigned long can_disable;
};
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
static struct pci_device_id k8_nb_id[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
@@ -324,15 +324,6 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
return 0;
}
-static int
-__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
-{
- struct _cpuid4_info_regs *leaf_regs =
- (struct _cpuid4_info_regs *)this_leaf;
-
- return cpuid4_cache_lookup_regs(index, leaf_regs);
-}
-
static int __cpuinit find_num_cache_leaves(void)
{
unsigned int eax, ebx, ecx, edx;
@@ -508,6 +499,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
return l2;
}
+#ifdef CONFIG_SYSFS
+
/* pointer to _cpuid4_info array (for each cache leaf) */
static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
@@ -571,6 +564,15 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
per_cpu(cpuid4_info, cpu) = NULL;
}
+static int
+__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+{
+ struct _cpuid4_info_regs *leaf_regs =
+ (struct _cpuid4_info_regs *)this_leaf;
+
+ return cpuid4_cache_lookup_regs(index, leaf_regs);
+}
+
static void __cpuinit get_cpu_leaves(void *_retval)
{
int j, *retval = _retval, cpu = smp_processor_id();
@@ -612,8 +614,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
return retval;
}
-#ifdef CONFIG_SYSFS
-
#include <linux/kobject.h>
#include <linux/sysfs.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index ca14604611e..6fb0b359d2a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -239,9 +239,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
-
- mce_log(&m);
- add_taint(TAINT_MACHINE_CHECK);
+ if (!(flags & MCP_DONTLOG)) {
+ mce_log(&m);
+ add_taint(TAINT_MACHINE_CHECK);
+ }
/*
* Clear state for this bank.
@@ -452,13 +453,14 @@ void mce_log_therm_throt_event(__u64 status)
*/
static int check_interval = 5 * 60; /* 5 minutes */
-static int next_interval; /* in jiffies */
+static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
static void mcheck_timer(unsigned long);
static DEFINE_PER_CPU(struct timer_list, mce_timer);
static void mcheck_timer(unsigned long data)
{
struct timer_list *t = &per_cpu(mce_timer, data);
+ int *n;
WARN_ON(smp_processor_id() != data);
@@ -470,14 +472,14 @@ static void mcheck_timer(unsigned long data)
* Alert userspace if needed. If we logged an MCE, reduce the
* polling interval, otherwise increase the polling interval.
*/
+ n = &__get_cpu_var(next_interval);
if (mce_notify_user()) {
- next_interval = max(next_interval/2, HZ/100);
+ *n = max(*n/2, HZ/100);
} else {
- next_interval = min(next_interval * 2,
- (int)round_jiffies_relative(check_interval*HZ));
+ *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
}
- t->expires = jiffies + next_interval;
+ t->expires = jiffies + *n;
add_timer(t);
}
@@ -584,7 +586,7 @@ static void mce_init(void *dummy)
* Log the machine checks left over from the previous reset.
*/
bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC, &all_banks);
+ machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
set_in_cr4(X86_CR4_MCE);
@@ -632,14 +634,13 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
static void mce_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
+ int *n = &__get_cpu_var(next_interval);
- /* data race harmless because everyone sets to the same value */
- if (!next_interval)
- next_interval = check_interval * HZ;
- if (!next_interval)
+ *n = check_interval * HZ;
+ if (!*n)
return;
setup_timer(t, mcheck_timer, smp_processor_id());
- t->expires = round_jiffies(jiffies + next_interval);
+ t->expires = round_jiffies(jiffies + *n);
add_timer(t);
}
@@ -907,7 +908,6 @@ static void mce_cpu_restart(void *data)
/* Reinit MCEs after user configuration changes */
static void mce_restart(void)
{
- next_interval = check_interval * HZ;
on_each_cpu(mce_cpu_restart, NULL, 1);
}
@@ -990,7 +990,7 @@ static struct sysdev_attribute *mce_attributes[] = {
NULL
};
-static cpumask_t mce_device_initialized = CPU_MASK_NONE;
+static cpumask_var_t mce_device_initialized;
/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
static __cpuinit int mce_create_device(unsigned int cpu)
@@ -1021,7 +1021,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
if (err)
goto error2;
}
- cpu_set(cpu, mce_device_initialized);
+ cpumask_set_cpu(cpu, mce_device_initialized);
return 0;
error2:
@@ -1043,7 +1043,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
{
int i;
- if (!cpu_isset(cpu, mce_device_initialized))
+ if (!cpumask_test_cpu(cpu, mce_device_initialized))
return;
for (i = 0; mce_attributes[i]; i++)
@@ -1053,7 +1053,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
sysdev_remove_file(&per_cpu(device_mce, cpu),
&bank_attrs[i]);
sysdev_unregister(&per_cpu(device_mce,cpu));
- cpu_clear(cpu, mce_device_initialized);
+ cpumask_clear_cpu(cpu, mce_device_initialized);
}
/* Make sure there are no machine checks on offlined CPUs. */
@@ -1110,7 +1110,8 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- t->expires = round_jiffies(jiffies + next_interval);
+ t->expires = round_jiffies(jiffies +
+ __get_cpu_var(next_interval));
add_timer_on(t, cpu);
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break;
@@ -1162,6 +1163,8 @@ static __init int mce_init_device(void)
if (!mce_available(&boot_cpu_data))
return -EIO;
+ alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
+
err = mce_init_banks();
if (err)
return err;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 7d01be86887..56dde9c4bc9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -485,7 +485,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
- i = cpumask_first(&per_cpu(cpu_core_map, cpu));
+ i = cpumask_first(cpu_core_mask(cpu));
/* first core not up yet */
if (cpu_data(i).cpu_core_id)
@@ -505,7 +505,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out;
- cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
+ cpumask_copy(b->cpus, cpu_core_mask(cpu));
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
}
@@ -529,7 +529,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
#ifndef CONFIG_SMP
cpumask_setall(b->cpus);
#else
- cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
+ cpumask_copy(b->cpus, cpu_core_mask(cpu));
#endif
per_cpu(threshold_banks, cpu)[bank] = b;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 57df3d38347..cef3ee30744 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -151,10 +151,11 @@ static void print_update(char *type, int *hdr, int num)
static void cmci_discover(int banks, int boot)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
+ unsigned long flags;
int hdr = 0;
int i;
- spin_lock(&cmci_discover_lock);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
@@ -184,7 +185,7 @@ static void cmci_discover(int banks, int boot)
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
- spin_unlock(&cmci_discover_lock);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (hdr)
printk(KERN_CONT "\n");
}
@@ -211,13 +212,14 @@ void cmci_recheck(void)
*/
void cmci_clear(void)
{
+ unsigned long flags;
int i;
int banks;
u64 val;
if (!cmci_supported(&banks))
return;
- spin_lock(&cmci_discover_lock);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
@@ -227,7 +229,7 @@ void cmci_clear(void)
wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
- spin_unlock(&cmci_discover_lock);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
/*
@@ -249,7 +251,7 @@ void cmci_rediscover(int dying)
for_each_online_cpu (cpu) {
if (cpu == dying)
continue;
- if (set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)))
+ if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
continue;
/* Recheck banks in case CPUs don't all have the same */
if (cmci_supported(&banks))
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0b776c09aff..d21d4fb161f 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -275,7 +275,11 @@ static void __init print_mtrr_state(void)
}
printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
mtrr_state.enabled & 2 ? "en" : "dis");
- high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
+ if (size_or_mask & 0xffffffffUL)
+ high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
+ else
+ high_width = ffs(size_or_mask>>32) + 32 - 1;
+ high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
for (i = 0; i < num_var_ranges; ++i) {
if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n",
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index d67e0e48bc2..d5e30397246 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -14,7 +14,7 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
if (c->x86_max_cores * smp_num_siblings > 1) {
seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
seq_printf(m, "siblings\t: %d\n",
- cpus_weight(per_cpu(cpu_core_map, cpu)));
+ cpumask_weight(cpu_core_mask(cpu)));
seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
seq_printf(m, "apicid\t\t: %d\n", c->apicid);
@@ -143,9 +143,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
static void *c_start(struct seq_file *m, loff_t *pos)
{
if (*pos == 0) /* just in case, cpu 0 is not the first */
- *pos = first_cpu(cpu_online_map);
+ *pos = cpumask_first(cpu_online_mask);
else
- *pos = next_cpu_nr(*pos - 1, cpu_online_map);
+ *pos = cpumask_next(*pos - 1, cpu_online_mask);
if ((*pos) < nr_cpu_ids)
return &cpu_data(*pos);
return NULL;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index dd2130b0fb3..95ea5fa7d44 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -15,6 +15,7 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
+#include <linux/ftrace.h>
#include <asm/stacktrace.h>
@@ -196,6 +197,11 @@ unsigned __kprobes long oops_begin(void)
int cpu;
unsigned long flags;
+ /* notify the hw-branch tracer so it may disable tracing and
+ add the last trace to the trace buffer -
+ the earlier this happens, the more useful the trace. */
+ trace_hw_branch_oops();
+
oops_enter();
/* racy, but better than risking deadlock. */
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ef2c3563357..00628130292 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1074,12 +1074,13 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
u64 addr;
u64 start;
- start = startt;
- while (size < sizet && (start + 1))
+ for (start = startt; ; start += size) {
start = find_e820_area_size(start, &size, align);
-
- if (size < sizet)
- return 0;
+ if (!(start + 1))
+ return 0;
+ if (size >= sizet)
+ break;
+ }
#ifdef CONFIG_X86_32
if (start >= MAXMEM)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a331ec38af9..38946c6e843 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1410,7 +1410,10 @@ ENTRY(paranoid_exit)
paranoid_swapgs:
TRACE_IRQS_IRETQ 0
SWAPGS_UNSAFE_STACK
+ RESTORE_ALL 8
+ jmp irq_return
paranoid_restore:
+ TRACE_IRQS_IRETQ 0
RESTORE_ALL 8
jmp irq_return
paranoid_userspace:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 76f7141e0f9..b79c5533c42 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -18,14 +18,28 @@
#include <linux/init.h>
#include <linux/list.h>
+#include <trace/syscall.h>
+
+#include <asm/cacheflush.h>
#include <asm/ftrace.h>
-#include <linux/ftrace.h>
#include <asm/nops.h>
#include <asm/nmi.h>
#ifdef CONFIG_DYNAMIC_FTRACE
+int ftrace_arch_code_modify_prepare(void)
+{
+ set_kernel_text_rw();
+ return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+ set_kernel_text_ro();
+ return 0;
+}
+
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
struct {
@@ -66,11 +80,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
*
* 1) Put the instruction pointer into the IP buffer
* and the new code into the "code" buffer.
- * 2) Set a flag that says we are modifying code
- * 3) Wait for any running NMIs to finish.
- * 4) Write the code
- * 5) clear the flag.
- * 6) Wait for any running NMIs to finish.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ * we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag.
+ * 5) Wait for any running NMIs to finish.
*
* If an NMI is executed, the first thing it does is to call
* "ftrace_nmi_enter". This will check if the flag is set to write
@@ -82,9 +96,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
* are the same as what exists.
*/
-static atomic_t in_nmi = ATOMIC_INIT(0);
+#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
+static atomic_t nmi_running = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
-static int mod_code_write; /* set when NMI should do the write */
static void *mod_code_ip; /* holds the IP to write to */
static void *mod_code_newcode; /* holds the text to write to the IP */
@@ -101,6 +115,20 @@ int ftrace_arch_read_dyn_info(char *buf, int size)
return r;
}
+static void clear_mod_flag(void)
+{
+ int old = atomic_read(&nmi_running);
+
+ for (;;) {
+ int new = old & ~MOD_CODE_WRITE_FLAG;
+
+ if (old == new)
+ break;
+
+ old = atomic_cmpxchg(&nmi_running, old, new);
+ }
+}
+
static void ftrace_mod_code(void)
{
/*
@@ -111,37 +139,52 @@ static void ftrace_mod_code(void)
*/
mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
MCOUNT_INSN_SIZE);
+
+ /* if we fail, then kill any new writers */
+ if (mod_code_status)
+ clear_mod_flag();
}
void ftrace_nmi_enter(void)
{
- atomic_inc(&in_nmi);
- /* Must have in_nmi seen before reading write flag */
- smp_mb();
- if (mod_code_write) {
+ if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+ smp_rmb();
ftrace_mod_code();
atomic_inc(&nmi_update_count);
}
+ /* Must have previous changes seen before executions */
+ smp_mb();
}
void ftrace_nmi_exit(void)
{
- /* Finish all executions before clearing in_nmi */
- smp_wmb();
- atomic_dec(&in_nmi);
+ /* Finish all executions before clearing nmi_running */
+ smp_mb();
+ atomic_dec(&nmi_running);
+}
+
+static void wait_for_nmi_and_set_mod_flag(void)
+{
+ if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
+ return;
+
+ do {
+ cpu_relax();
+ } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
+
+ nmi_wait_count++;
}
static void wait_for_nmi(void)
{
- int waited = 0;
+ if (!atomic_read(&nmi_running))
+ return;
- while (atomic_read(&in_nmi)) {
- waited = 1;
+ do {
cpu_relax();
- }
+ } while (atomic_read(&nmi_running));
- if (waited)
- nmi_wait_count++;
+ nmi_wait_count++;
}
static int
@@ -151,14 +194,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
mod_code_newcode = new_code;
/* The buffers need to be visible before we let NMIs write them */
- smp_wmb();
-
- mod_code_write = 1;
-
- /* Make sure write bit is visible before we wait on NMIs */
smp_mb();
- wait_for_nmi();
+ wait_for_nmi_and_set_mod_flag();
/* Make sure all running NMIs have finished before we write the code */
smp_mb();
@@ -166,13 +204,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
ftrace_mod_code();
/* Make sure the write happens before clearing the bit */
- smp_wmb();
-
- mod_code_write = 0;
-
- /* make sure NMIs see the cleared bit */
smp_mb();
+ clear_mod_flag();
wait_for_nmi();
return mod_code_status;
@@ -368,25 +402,6 @@ int ftrace_disable_ftrace_graph_caller(void)
return ftrace_mod_jmp(ip, old_offset, new_offset);
}
-#else /* CONFIG_DYNAMIC_FTRACE */
-
-/*
- * These functions are picked from those used on
- * this page for dynamic ftrace. They have been
- * simplified to ignore all traces in NMI context.
- */
-static atomic_t in_nmi;
-
-void ftrace_nmi_enter(void)
-{
- atomic_inc(&in_nmi);
-}
-
-void ftrace_nmi_exit(void)
-{
- atomic_dec(&in_nmi);
-}
-
#endif /* !CONFIG_DYNAMIC_FTRACE */
/*
@@ -396,14 +411,13 @@ void ftrace_nmi_exit(void)
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
{
unsigned long old;
- unsigned long long calltime;
int faulted;
struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
/* Nmi's are currently unsupported */
- if (unlikely(atomic_read(&in_nmi)))
+ if (unlikely(in_nmi()))
return;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
@@ -428,7 +442,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
_ASM_EXTABLE(1b, 4b)
_ASM_EXTABLE(2b, 4b)
- : [old] "=r" (old), [faulted] "=r" (faulted)
+ : [old] "=&r" (old), [faulted] "=r" (faulted)
: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
: "memory"
);
@@ -439,17 +453,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return;
}
- if (unlikely(!__kernel_text_address(old))) {
- ftrace_graph_stop();
- *parent = old;
- WARN_ON(1);
- return;
- }
-
- calltime = cpu_clock(raw_smp_processor_id());
-
- if (ftrace_push_return_trace(old, calltime,
- self_addr, &trace.depth) == -EBUSY) {
+ if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
*parent = old;
return;
}
@@ -463,3 +467,66 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
}
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_FTRACE_SYSCALLS
+
+extern unsigned long __start_syscalls_metadata[];
+extern unsigned long __stop_syscalls_metadata[];
+extern unsigned long *sys_call_table;
+
+static struct syscall_metadata **syscalls_metadata;
+
+static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
+{
+ struct syscall_metadata *start;
+ struct syscall_metadata *stop;
+ char str[KSYM_SYMBOL_LEN];
+
+
+ start = (struct syscall_metadata *)__start_syscalls_metadata;
+ stop = (struct syscall_metadata *)__stop_syscalls_metadata;
+ kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
+
+ for ( ; start < stop; start++) {
+ if (start->name && !strcmp(start->name, str))
+ return start;
+ }
+ return NULL;
+}
+
+struct syscall_metadata *syscall_nr_to_meta(int nr)
+{
+ if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0)
+ return NULL;
+
+ return syscalls_metadata[nr];
+}
+
+void arch_init_ftrace_syscalls(void)
+{
+ int i;
+ struct syscall_metadata *meta;
+ unsigned long **psys_syscall_table = &sys_call_table;
+ static atomic_t refs;
+
+ if (atomic_inc_return(&refs) != 1)
+ goto end;
+
+ syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
+ FTRACE_SYSCALL_MAX, GFP_KERNEL);
+ if (!syscalls_metadata) {
+ WARN_ON(1);
+ return;
+ }
+
+ for (i = 0; i < FTRACE_SYSCALL_MAX; i++) {
+ meta = find_syscall_meta(psys_syscall_table[i]);
+ syscalls_metadata[i] = meta;
+ }
+ return;
+
+ /* Paranoid: avoid overflow */
+end:
+ atomic_dec(&refs);
+}
+#endif
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 648b3a2a3a4..81408b93f88 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -236,6 +236,10 @@ static void hpet_stop_counter(void)
unsigned long cfg = hpet_readl(HPET_CFG);
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
+}
+
+static void hpet_reset_counter(void)
+{
hpet_writel(0, HPET_COUNTER);
hpet_writel(0, HPET_COUNTER + 4);
}
@@ -250,6 +254,7 @@ static void hpet_start_counter(void)
static void hpet_restart_counter(void)
{
hpet_stop_counter();
+ hpet_reset_counter();
hpet_start_counter();
}
@@ -309,7 +314,7 @@ static int hpet_setup_msi_irq(unsigned int irq);
static void hpet_set_mode(enum clock_event_mode mode,
struct clock_event_device *evt, int timer)
{
- unsigned long cfg;
+ unsigned long cfg, cmp, now;
uint64_t delta;
switch (mode) {
@@ -317,12 +322,23 @@ static void hpet_set_mode(enum clock_event_mode mode,
hpet_stop_counter();
delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
delta >>= evt->shift;
+ now = hpet_readl(HPET_COUNTER);
+ cmp = now + (unsigned long) delta;
cfg = hpet_readl(HPET_Tn_CFG(timer));
/* Make sure we use edge triggered interrupts */
cfg &= ~HPET_TN_LEVEL;
cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
HPET_TN_SETVAL | HPET_TN_32BIT;
hpet_writel(cfg, HPET_Tn_CFG(timer));
+ hpet_writel(cmp, HPET_Tn_CMP(timer));
+ udelay(1);
+ /*
+ * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL
+ * cleared) to T0_CMP to set the period. The HPET_TN_SETVAL
+ * bit is automatically cleared after the first write.
+ * (See AMD-8111 HyperTransport I/O Hub Data Sheet,
+ * Publication # 24674)
+ */
hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
hpet_start_counter();
hpet_print_config();
@@ -722,7 +738,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
/*
* Clock source related code
*/
-static cycle_t read_hpet(void)
+static cycle_t read_hpet(struct clocksource *cs)
{
return (cycle_t)hpet_readl(HPET_COUNTER);
}
@@ -756,7 +772,7 @@ static int hpet_clocksource_register(void)
hpet_restart_counter();
/* Verify whether hpet counter works */
- t1 = read_hpet();
+ t1 = hpet_readl(HPET_COUNTER);
rdtscll(start);
/*
@@ -770,7 +786,7 @@ static int hpet_clocksource_register(void)
rdtscll(now);
} while ((now - start) < 200000UL);
- if (t1 == read_hpet()) {
+ if (t1 == hpet_readl(HPET_COUNTER)) {
printk(KERN_WARNING
"HPET counter not counting. HPET disabled\n");
return -ENODEV;
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 3475440baa5..c2e0bb0890d 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -129,7 +129,7 @@ void __init setup_pit_timer(void)
* to just read by itself. So use jiffies to emulate a free
* running counter:
*/
-static cycle_t pit_read(void)
+static cycle_t pit_read(struct clocksource *cs)
{
static int old_count;
static u32 old_jifs;
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3aaf7b9e3a8..c3fe010d74c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -65,7 +65,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
seq_printf(p, " Spurious interrupts\n");
#endif
if (generic_interrupt_extension) {
- seq_printf(p, "PLT: ");
+ seq_printf(p, "%*s: ", prec, "PLT");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->generic_irqs);
seq_printf(p, " Platform interrupts\n");
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index eedfaebe106..b1f4dffb919 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -88,6 +88,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs[GDB_SS] = __KERNEL_DS;
gdb_regs[GDB_FS] = 0xFFFF;
gdb_regs[GDB_GS] = 0xFFFF;
+ gdb_regs[GDB_SP] = (int)&regs->sp;
#else
gdb_regs[GDB_R8] = regs->r8;
gdb_regs[GDB_R9] = regs->r9;
@@ -100,8 +101,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
gdb_regs32[GDB_PS] = regs->flags;
gdb_regs32[GDB_CS] = regs->cs;
gdb_regs32[GDB_SS] = regs->ss;
-#endif
gdb_regs[GDB_SP] = regs->sp;
+#endif
}
/**
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 55b94614e34..7b5169d2b00 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -638,13 +638,13 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
#else
" pushf\n"
/*
- * Skip cs, ip, orig_ax.
+ * Skip cs, ip, orig_ax and gs.
* trampoline_handler() will plug in these values
*/
- " subl $12, %esp\n"
+ " subl $16, %esp\n"
" pushl %fs\n"
- " pushl %ds\n"
" pushl %es\n"
+ " pushl %ds\n"
" pushl %eax\n"
" pushl %ebp\n"
" pushl %edi\n"
@@ -655,10 +655,10 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
" movl %esp, %eax\n"
" call trampoline_handler\n"
/* Move flags to cs */
- " movl 52(%esp), %edx\n"
- " movl %edx, 48(%esp)\n"
+ " movl 56(%esp), %edx\n"
+ " movl %edx, 52(%esp)\n"
/* Replace saved flags with true return address. */
- " movl %eax, 52(%esp)\n"
+ " movl %eax, 56(%esp)\n"
" popl %ebx\n"
" popl %ecx\n"
" popl %edx\n"
@@ -666,8 +666,8 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
" popl %edi\n"
" popl %ebp\n"
" popl %eax\n"
- /* Skip ip, orig_ax, es, ds, fs */
- " addl $20, %esp\n"
+ /* Skip ds, es, fs, gs, orig_ax and ip */
+ " addl $24, %esp\n"
" popf\n"
#endif
" ret\n");
@@ -691,6 +691,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
regs->cs = __KERNEL_CS;
#else
regs->cs = __KERNEL_CS | get_kernel_rpl();
+ regs->gs = 0;
#endif
regs->ip = trampoline_address;
regs->orig_ax = ~0UL;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 137f2e8132d..223af43f152 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -77,6 +77,11 @@ static cycle_t kvm_clock_read(void)
return ret;
}
+static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
+{
+ return kvm_clock_read();
+}
+
/*
* If we don't do that, there is the possibility that the guest
* will calibrate under heavy load - thus, getting a lower lpj -
@@ -107,7 +112,7 @@ static void kvm_get_preset_lpj(void)
static struct clocksource kvm_clock = {
.name = "kvm-clock",
- .read = kvm_clock_read,
+ .read = kvm_clock_get_cycles,
.rating = 400,
.mask = CLOCKSOURCE_MASK(64),
.mult = 1 << KVM_SCALE,
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index e7368c1da01..c1c429d0013 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -194,7 +194,7 @@ void machine_kexec(struct kimage *image)
unsigned int preserve_context);
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
save_processor_state();
#endif
@@ -253,7 +253,7 @@ void machine_kexec(struct kimage *image)
image->preserve_context);
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
restore_processor_state();
#endif
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 89cea4d4467..84c3bf209e9 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -274,7 +274,7 @@ void machine_kexec(struct kimage *image)
int save_ftrace_enabled;
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
save_processor_state();
#endif
@@ -333,7 +333,7 @@ void machine_kexec(struct kimage *image)
image->preserve_context);
#ifdef CONFIG_KEXEC_JUMP
- if (kexec_image->preserve_context)
+ if (image->preserve_context)
restore_processor_state();
#endif
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index c25fdb38229..453b5795a5c 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -12,31 +12,30 @@
*
* Licensed under the terms of the GNU General Public
* License version 2. See file COPYING for details.
-*/
-
+ */
+#include <linux/platform_device.h>
#include <linux/capability.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/cpumask.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
#include <linux/miscdevice.h>
+#include <linux/firmware.h>
#include <linux/spinlock.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
+#include <linux/cpumask.h>
+#include <linux/pci_ids.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/cpu.h>
-#include <linux/firmware.h>
-#include <linux/platform_device.h>
#include <linux/pci.h>
-#include <linux/pci_ids.h>
-#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
-#include <asm/msr.h>
-#include <asm/processor.h>
#include <asm/microcode.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
MODULE_DESCRIPTION("AMD Microcode Update Driver");
MODULE_AUTHOR("Peter Oruba");
@@ -72,8 +71,8 @@ struct microcode_header_amd {
} __attribute__((packed));
struct microcode_amd {
- struct microcode_header_amd hdr;
- unsigned int mpb[0];
+ struct microcode_header_amd hdr;
+ unsigned int mpb[0];
};
#define UCODE_MAX_SIZE 2048
@@ -184,8 +183,8 @@ static int get_ucode_data(void *to, const u8 *from, size_t n)
return 0;
}
-static void *get_next_ucode(const u8 *buf, unsigned int size,
- unsigned int *mc_size)
+static void *
+get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
{
unsigned int total_size;
u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
@@ -223,7 +222,6 @@ static void *get_next_ucode(const u8 *buf, unsigned int size,
return mc;
}
-
static int install_equiv_cpu_table(const u8 *buf)
{
u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
@@ -372,4 +370,3 @@ struct microcode_ops * __init init_amd_microcode(void)
{
return &microcode_amd_ops;
}
-
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index c9b721ba968..98c470c069d 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -70,41 +70,41 @@
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
+#include <linux/platform_device.h>
#include <linux/capability.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/miscdevice.h>
+#include <linux/firmware.h>
#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
#include <linux/cpumask.h>
-#include <linux/module.h>
-#include <linux/slab.h>
+#include <linux/uaccess.h>
#include <linux/vmalloc.h>
-#include <linux/miscdevice.h>
-#include <linux/spinlock.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/cpu.h>
-#include <linux/firmware.h>
-#include <linux/platform_device.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
-#include <asm/msr.h>
-#include <asm/uaccess.h>
-#include <asm/processor.h>
#include <asm/microcode.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
MODULE_LICENSE("GPL");
-#define MICROCODE_VERSION "2.00"
+#define MICROCODE_VERSION "2.00"
-static struct microcode_ops *microcode_ops;
+static struct microcode_ops *microcode_ops;
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DEFINE_MUTEX(microcode_mutex);
-struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
+struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
EXPORT_SYMBOL_GPL(ucode_cpu_info);
#ifdef CONFIG_MICROCODE_OLD_INTERFACE
@@ -198,18 +198,33 @@ static void microcode_dev_exit(void)
MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
#else
-#define microcode_dev_init() 0
-#define microcode_dev_exit() do { } while (0)
+#define microcode_dev_init() 0
+#define microcode_dev_exit() do { } while (0)
#endif
/* fake device for request_firmware */
-static struct platform_device *microcode_pdev;
+static struct platform_device *microcode_pdev;
+
+static long reload_for_cpu(void *unused)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
+ int err = 0;
+
+ mutex_lock(&microcode_mutex);
+ if (uci->valid) {
+ err = microcode_ops->request_microcode_fw(smp_processor_id(),
+ &microcode_pdev->dev);
+ if (!err)
+ microcode_ops->apply_microcode(smp_processor_id());
+ }
+ mutex_unlock(&microcode_mutex);
+ return err;
+}
static ssize_t reload_store(struct sys_device *dev,
struct sysdev_attribute *attr,
const char *buf, size_t sz)
{
- struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
char *end;
unsigned long val = simple_strtoul(buf, &end, 0);
int err = 0;
@@ -218,21 +233,9 @@ static ssize_t reload_store(struct sys_device *dev,
if (end == buf)
return -EINVAL;
if (val == 1) {
- cpumask_t old = current->cpus_allowed;
-
get_online_cpus();
- if (cpu_online(cpu)) {
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
- mutex_lock(&microcode_mutex);
- if (uci->valid) {
- err = microcode_ops->request_microcode_fw(cpu,
- &microcode_pdev->dev);
- if (!err)
- microcode_ops->apply_microcode(cpu);
- }
- mutex_unlock(&microcode_mutex);
- set_cpus_allowed_ptr(current, &old);
- }
+ if (cpu_online(cpu))
+ err = work_on_cpu(cpu, reload_for_cpu, NULL);
put_online_cpus();
}
if (err)
@@ -268,8 +271,8 @@ static struct attribute *mc_default_attrs[] = {
};
static struct attribute_group mc_attr_group = {
- .attrs = mc_default_attrs,
- .name = "microcode",
+ .attrs = mc_default_attrs,
+ .name = "microcode",
};
static void __microcode_fini_cpu(int cpu)
@@ -328,9 +331,9 @@ static int microcode_resume_cpu(int cpu)
return 0;
}
-static void microcode_update_cpu(int cpu)
+static long microcode_update_cpu(void *unused)
{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
int err = 0;
/*
@@ -338,30 +341,27 @@ static void microcode_update_cpu(int cpu)
* otherwise just request a firmware:
*/
if (uci->valid) {
- err = microcode_resume_cpu(cpu);
- } else {
- collect_cpu_info(cpu);
+ err = microcode_resume_cpu(smp_processor_id());
+ } else {
+ collect_cpu_info(smp_processor_id());
if (uci->valid && system_state == SYSTEM_RUNNING)
- err = microcode_ops->request_microcode_fw(cpu,
+ err = microcode_ops->request_microcode_fw(
+ smp_processor_id(),
&microcode_pdev->dev);
}
if (!err)
- microcode_ops->apply_microcode(cpu);
+ microcode_ops->apply_microcode(smp_processor_id());
+ return err;
}
-static void microcode_init_cpu(int cpu)
+static int microcode_init_cpu(int cpu)
{
- cpumask_t old = current->cpus_allowed;
-
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
- /* We should bind the task to the CPU */
- BUG_ON(raw_smp_processor_id() != cpu);
-
+ int err;
mutex_lock(&microcode_mutex);
- microcode_update_cpu(cpu);
+ err = work_on_cpu(cpu, microcode_update_cpu, NULL);
mutex_unlock(&microcode_mutex);
- set_cpus_allowed_ptr(current, &old);
+ return err;
}
static int mc_sysdev_add(struct sys_device *sys_dev)
@@ -379,8 +379,9 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
if (err)
return err;
- microcode_init_cpu(cpu);
- return 0;
+ err = microcode_init_cpu(cpu);
+
+ return err;
}
static int mc_sysdev_remove(struct sys_device *sys_dev)
@@ -404,14 +405,14 @@ static int mc_sysdev_resume(struct sys_device *dev)
return 0;
/* only CPU 0 will apply ucode here */
- microcode_update_cpu(0);
+ microcode_update_cpu(NULL);
return 0;
}
static struct sysdev_driver mc_sysdev_driver = {
- .add = mc_sysdev_add,
- .remove = mc_sysdev_remove,
- .resume = mc_sysdev_resume,
+ .add = mc_sysdev_add,
+ .remove = mc_sysdev_remove,
+ .resume = mc_sysdev_resume,
};
static __cpuinit int
@@ -424,7 +425,9 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- microcode_init_cpu(cpu);
+ if (microcode_init_cpu(cpu))
+ printk(KERN_ERR "microcode: failed to init CPU%d\n",
+ cpu);
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
pr_debug("microcode: CPU%d added\n", cpu);
@@ -448,7 +451,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
}
static struct notifier_block __refdata mc_cpu_notifier = {
- .notifier_call = mc_cpu_callback,
+ .notifier_call = mc_cpu_callback,
};
static int __init microcode_init(void)
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 5e9f4fc5138..149b9ec7c1a 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -70,28 +70,28 @@
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
+#include <linux/platform_device.h>
#include <linux/capability.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/sched.h>
+#include <linux/miscdevice.h>
+#include <linux/firmware.h>
#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
#include <linux/cpumask.h>
-#include <linux/module.h>
-#include <linux/slab.h>
+#include <linux/uaccess.h>
#include <linux/vmalloc.h>
-#include <linux/miscdevice.h>
-#include <linux/spinlock.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/cpu.h>
-#include <linux/firmware.h>
-#include <linux/platform_device.h>
-#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
-#include <asm/msr.h>
-#include <asm/processor.h>
#include <asm/microcode.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -129,12 +129,13 @@ struct extended_sigtable {
struct extended_signature sigs[0];
};
-#define DEFAULT_UCODE_DATASIZE (2000)
+#define DEFAULT_UCODE_DATASIZE (2000)
#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
#define DWSIZE (sizeof(u32))
+
#define get_totalsize(mc) \
(((struct microcode_intel *)mc)->hdr.totalsize ? \
((struct microcode_intel *)mc)->hdr.totalsize : \
@@ -197,30 +198,31 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
}
static inline int
-update_match_revision(struct microcode_header_intel *mc_header, int rev)
+update_match_revision(struct microcode_header_intel *mc_header, int rev)
{
return (mc_header->rev <= rev) ? 0 : 1;
}
static int microcode_sanity_check(void *mc)
{
+ unsigned long total_size, data_size, ext_table_size;
struct microcode_header_intel *mc_header = mc;
struct extended_sigtable *ext_header = NULL;
- struct extended_signature *ext_sig;
- unsigned long total_size, data_size, ext_table_size;
int sum, orig_sum, ext_sigcount = 0, i;
+ struct extended_signature *ext_sig;
total_size = get_totalsize(mc_header);
data_size = get_datasize(mc_header);
+
if (data_size + MC_HEADER_SIZE > total_size) {
printk(KERN_ERR "microcode: error! "
- "Bad data size in microcode data file\n");
+ "Bad data size in microcode data file\n");
return -EINVAL;
}
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
printk(KERN_ERR "microcode: error! "
- "Unknown microcode update format\n");
+ "Unknown microcode update format\n");
return -EINVAL;
}
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
@@ -318,11 +320,15 @@ get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
static void apply_microcode(int cpu)
{
+ struct microcode_intel *mc_intel;
+ struct ucode_cpu_info *uci;
unsigned long flags;
unsigned int val[2];
- int cpu_num = raw_smp_processor_id();
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- struct microcode_intel *mc_intel = uci->mc;
+ int cpu_num;
+
+ cpu_num = raw_smp_processor_id();
+ uci = ucode_cpu_info + cpu;
+ mc_intel = uci->mc;
/* We should bind the task to the CPU */
BUG_ON(cpu_num != cpu);
@@ -348,15 +354,17 @@ static void apply_microcode(int cpu)
spin_unlock_irqrestore(&microcode_update_lock, flags);
if (val[1] != mc_intel->hdr.rev) {
printk(KERN_ERR "microcode: CPU%d update from revision "
- "0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]);
+ "0x%x to 0x%x failed\n",
+ cpu_num, uci->cpu_sig.rev, val[1]);
return;
}
printk(KERN_INFO "microcode: CPU%d updated from revision "
- "0x%x to 0x%x, date = %04x-%02x-%02x \n",
+ "0x%x to 0x%x, date = %04x-%02x-%02x \n",
cpu_num, uci->cpu_sig.rev, val[1],
mc_intel->hdr.date & 0xffff,
mc_intel->hdr.date >> 24,
(mc_intel->hdr.date >> 16) & 0xff);
+
uci->cpu_sig.rev = val[1];
}
@@ -404,18 +412,23 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
leftover -= mc_size;
}
- if (new_mc) {
- if (!leftover) {
- if (uci->mc)
- vfree(uci->mc);
- uci->mc = (struct microcode_intel *)new_mc;
- pr_debug("microcode: CPU%d found a matching microcode update with"
- " version 0x%x (current=0x%x)\n",
- cpu, new_rev, uci->cpu_sig.rev);
- } else
- vfree(new_mc);
+ if (!new_mc)
+ goto out;
+
+ if (leftover) {
+ vfree(new_mc);
+ goto out;
}
+ if (uci->mc)
+ vfree(uci->mc);
+ uci->mc = (struct microcode_intel *)new_mc;
+
+ pr_debug("microcode: CPU%d found a matching microcode update with"
+ " version 0x%x (current=0x%x)\n",
+ cpu, new_rev, uci->cpu_sig.rev);
+
+ out:
return (int)leftover;
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index dce99dca6cf..70fd7e414c1 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -679,7 +679,7 @@ void __init get_smp_config(void)
__get_smp_config(0);
}
-static void smp_reserve_bootmem(struct mpf_intel *mpf)
+static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
{
unsigned long size = get_mpc_size(mpf->physptr);
#ifdef CONFIG_X86_32
@@ -838,7 +838,7 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
-static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
+static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
{
int i;
@@ -866,7 +866,8 @@ static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
}
}
#else /* CONFIG_X86_IO_APIC */
-static inline void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
+static
+inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
#endif /* CONFIG_X86_IO_APIC */
static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 8e45f446488..9faf43bea33 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -134,7 +134,9 @@ static void *get_call_destination(u8 type)
.pv_irq_ops = pv_irq_ops,
.pv_apic_ops = pv_apic_ops,
.pv_mmu_ops = pv_mmu_ops,
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
.pv_lock_ops = pv_lock_ops,
+#endif
};
return *((void **)&tmpl + type);
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 90f5b9ef5de..745579bc825 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -40,7 +40,7 @@ EXPORT_SYMBOL(bad_dma_address);
to older i386. */
struct device x86_dma_fallback_dev = {
.init_name = "fallback device",
- .coherent_dma_mask = DMA_32BIT_MASK,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
.dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
};
EXPORT_SYMBOL(x86_dma_fallback_dev);
@@ -148,7 +148,7 @@ again:
if (!is_buffer_dma_capable(dma_mask, addr, size)) {
__free_pages(page, get_order(size));
- if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) {
+ if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
flag = (flag & ~GFP_DMA32) | GFP_DMA;
goto again;
}
@@ -243,7 +243,7 @@ int dma_supported(struct device *dev, u64 mask)
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
The caller just has to use GFP_DMA in this case. */
- if (mask < DMA_24BIT_MASK)
+ if (mask < DMA_BIT_MASK(24))
return 0;
/* Tell the device to use SAC when IOMMU force is on. This
@@ -258,7 +258,7 @@ int dma_supported(struct device *dev, u64 mask)
SAC for these. Assume all masks <= 40 bits are of this
type. Normally this doesn't make any difference, but gives
more gentle handling of IOMMU overflow. */
- if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
+ if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
dev_info(dev, "Force SAC with mask %Lx\n", mask);
return 0;
}
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index c6d703b3932..71d412a09f3 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -15,7 +15,7 @@ static int
check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
{
if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) {
- if (*hwdev->dma_mask >= DMA_32BIT_MASK)
+ if (*hwdev->dma_mask >= DMA_BIT_MASK(32))
printk(KERN_ERR
"nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
name, (long long)bus, size,
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 34f12e9996e..221a3853e26 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -50,7 +50,7 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
}
-struct dma_map_ops swiotlb_dma_ops = {
+static struct dma_map_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc_coherent = x86_swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 156f87582c6..ca989158e84 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -8,7 +8,7 @@
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/clockchips.h>
-#include <linux/ftrace.h>
+#include <trace/power.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/idle.h>
@@ -22,6 +22,9 @@ EXPORT_SYMBOL(idle_nomwait);
struct kmem_cache *task_xstate_cachep;
+DEFINE_TRACE(power_start);
+DEFINE_TRACE(power_end);
+
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
*dst = *src;
@@ -325,7 +328,7 @@ void stop_this_cpu(void *dummy)
/*
* Remove this CPU:
*/
- cpu_clear(smp_processor_id(), cpu_online_map);
+ set_cpu_online(smp_processor_id(), false);
disable_local_APIC();
for (;;) {
@@ -475,12 +478,13 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
return 1;
}
-static cpumask_t c1e_mask = CPU_MASK_NONE;
+static cpumask_var_t c1e_mask;
static int c1e_detected;
void c1e_remove_cpu(int cpu)
{
- cpu_clear(cpu, c1e_mask);
+ if (c1e_mask != NULL)
+ cpumask_clear_cpu(cpu, c1e_mask);
}
/*
@@ -509,8 +513,8 @@ static void c1e_idle(void)
if (c1e_detected) {
int cpu = smp_processor_id();
- if (!cpu_isset(cpu, c1e_mask)) {
- cpu_set(cpu, c1e_mask);
+ if (!cpumask_test_cpu(cpu, c1e_mask)) {
+ cpumask_set_cpu(cpu, c1e_mask);
/*
* Force broadcast so ACPI can not interfere. Needs
* to run with interrupts enabled as it uses
@@ -562,6 +566,15 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
pm_idle = default_idle;
}
+void __init init_c1e_mask(void)
+{
+ /* If we're using c1e_idle, we need to allocate c1e_mask. */
+ if (pm_idle == c1e_idle) {
+ alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
+ cpumask_clear(c1e_mask);
+ }
+}
+
static int __init idle_setup(char *str)
{
if (!str)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b7cc21bc6ae..23b7c8f017e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -34,6 +34,8 @@
#include <asm/proto.h>
#include <asm/ds.h>
+#include <trace/syscall.h>
+
#include "tls.h"
enum x86_regset {
@@ -1415,6 +1417,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
tracehook_report_syscall_entry(regs))
ret = -1L;
+ if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+ ftrace_syscall_enter(regs);
+
if (unlikely(current->audit_context)) {
if (IS_IA32)
audit_syscall_entry(AUDIT_ARCH_I386,
@@ -1438,6 +1443,9 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+ if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
+ ftrace_syscall_exit(regs);
+
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, 0);
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index e95022e4f5d..7563b31b4f0 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -261,8 +261,6 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
{
if (hpet_force_user)
old_ich_force_enable_hpet(dev);
- else
- hpet_print_force_info();
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2aef36d8aca..1340dad417f 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -224,6 +224,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"),
},
},
+ { /* Handle problems with rebooting on Dell DXP061 */
+ .callback = set_bios_reboot,
+ .ident = "Dell DXP061",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Dell DXP061"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ef7d10170c3..58d24ef917d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -101,11 +101,11 @@ EXPORT_SYMBOL(smp_num_siblings);
DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
/* representing HT siblings of each logical CPU */
-DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
/* representing HT and core siblings of each logical CPU */
-DEFINE_PER_CPU(cpumask_t, cpu_core_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
/* Per CPU bogomips and other parameters */
@@ -115,11 +115,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
atomic_t init_deasserted;
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
-
-/* which logical CPUs are on which nodes */
-cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
- { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
-EXPORT_SYMBOL(node_to_cpumask_map);
/* which node each logical CPU is on */
int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
EXPORT_SYMBOL(cpu_to_node_map);
@@ -128,7 +123,7 @@ EXPORT_SYMBOL(cpu_to_node_map);
static void map_cpu_to_node(int cpu, int node)
{
printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
- cpumask_set_cpu(cpu, &node_to_cpumask_map[node]);
+ cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
cpu_to_node_map[cpu] = node;
}
@@ -139,7 +134,7 @@ static void unmap_cpu_to_node(int cpu)
printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
for (node = 0; node < MAX_NUMNODES; node++)
- cpumask_clear_cpu(cpu, &node_to_cpumask_map[node]);
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
cpu_to_node_map[cpu] = 0;
}
#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
@@ -301,7 +296,7 @@ notrace static void __cpuinit start_secondary(void *unused)
__flush_tlb_all();
#endif
- /* This must be done before setting cpu_online_map */
+ /* This must be done before setting cpu_online_mask */
set_cpu_sibling_map(raw_smp_processor_id());
wmb();
@@ -334,6 +329,23 @@ notrace static void __cpuinit start_secondary(void *unused)
cpu_idle();
}
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* In this case, llc_shared_map is a pointer to a cpumask. */
+static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
+ const struct cpuinfo_x86 *src)
+{
+ struct cpumask *llc = dst->llc_shared_map;
+ *dst = *src;
+ dst->llc_shared_map = llc;
+}
+#else
+static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
+ const struct cpuinfo_x86 *src)
+{
+ *dst = *src;
+}
+#endif /* CONFIG_CPUMASK_OFFSTACK */
+
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
@@ -343,7 +355,7 @@ void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
- *c = boot_cpu_data;
+ copy_cpuinfo_x86(c, &boot_cpu_data);
c->cpu_index = id;
if (id != 0)
identify_secondary_cpu(c);
@@ -367,15 +379,15 @@ void __cpuinit set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, cpu_sibling_mask(i));
cpumask_set_cpu(i, cpu_core_mask(cpu));
cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, &c->llc_shared_map);
- cpumask_set_cpu(cpu, &o->llc_shared_map);
+ cpumask_set_cpu(i, c->llc_shared_map);
+ cpumask_set_cpu(cpu, o->llc_shared_map);
}
}
} else {
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
}
- cpumask_set_cpu(cpu, &c->llc_shared_map);
+ cpumask_set_cpu(cpu, c->llc_shared_map);
if (current_cpu_data.x86_max_cores == 1) {
cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
@@ -386,8 +398,8 @@ void __cpuinit set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
- cpumask_set_cpu(i, &c->llc_shared_map);
- cpumask_set_cpu(cpu, &cpu_data(i).llc_shared_map);
+ cpumask_set_cpu(i, c->llc_shared_map);
+ cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map);
}
if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -425,12 +437,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
if (sched_mc_power_savings || sched_smt_power_savings)
return cpu_core_mask(cpu);
else
- return &c->llc_shared_map;
-}
-
-cpumask_t cpu_coregroup_map(int cpu)
-{
- return *cpu_coregroup_mask(cpu);
+ return c->llc_shared_map;
}
static void impress_friends(void)
@@ -897,9 +904,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
*/
static __init void disable_smp(void)
{
- /* use the read/write pointers to the present and possible maps */
- cpumask_copy(&cpu_present_map, cpumask_of(0));
- cpumask_copy(&cpu_possible_map, cpumask_of(0));
+ init_cpu_present(cpumask_of(0));
+ init_cpu_possible(cpumask_of(0));
smpboot_clear_io_apic_irqs();
if (smp_found_config)
@@ -1031,6 +1037,8 @@ static void __init smp_cpu_index_default(void)
*/
void __init native_smp_prepare_cpus(unsigned int max_cpus)
{
+ unsigned int i;
+
preempt_disable();
smp_cpu_index_default();
current_cpu_data = boot_cpu_data;
@@ -1044,6 +1052,14 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
boot_cpu_logical_apicid = logical_smp_processor_id();
#endif
current_thread_info()->cpu = 0; /* needed? */
+ for_each_possible_cpu(i) {
+ alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
+ alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
+ cpumask_clear(per_cpu(cpu_core_map, i));
+ cpumask_clear(per_cpu(cpu_sibling_map, i));
+ cpumask_clear(cpu_data(i).llc_shared_map);
+ }
set_cpu_sibling_map(0);
enable_IR_x2apic();
@@ -1132,11 +1148,11 @@ early_param("possible_cpus", _setup_possible_cpus);
/*
- * cpu_possible_map should be static, it cannot change as cpu's
+ * cpu_possible_mask should be static, it cannot change as cpu's
* are onlined, or offlined. The reason is per-cpu data-structures
* are allocated by some modules at init time, and dont expect to
* do this dynamically on cpu arrival/departure.
- * cpu_present_map on the other hand can change dynamically.
+ * cpu_present_mask on the other hand can change dynamically.
* In case when cpu_hotplug is not compiled, then we resort to current
* behaviour, which is cpu_possible == cpu_present.
* - Ashok Raj
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 79c07324728..ed0c33761e6 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -25,6 +25,8 @@ static int uv_bau_retry_limit __read_mostly;
/* position of pnode (which is nasid>>1): */
static int uv_nshift __read_mostly;
+/* base pnode in this partition */
+static int uv_partition_base_pnode __read_mostly;
static unsigned long uv_mmask __read_mostly;
@@ -32,6 +34,34 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
static DEFINE_PER_CPU(struct bau_control, bau_control);
/*
+ * Determine the first node on a blade.
+ */
+static int __init blade_to_first_node(int blade)
+{
+ int node, b;
+
+ for_each_online_node(node) {
+ b = uv_node_to_blade_id(node);
+ if (blade == b)
+ return node;
+ }
+ return -1; /* shouldn't happen */
+}
+
+/*
+ * Determine the apicid of the first cpu on a blade.
+ */
+static int __init blade_to_first_apicid(int blade)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu)
+ if (blade == uv_cpu_to_blade_id(cpu))
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return -1;
+}
+
+/*
* Free a software acknowledge hardware resource by clearing its Pending
* bit. This will return a reply to the sender.
* If the message has timed out, a reply has already been sent by the
@@ -67,7 +97,7 @@ static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
cpu = uv_blade_processor_id();
msg->number_of_cpus =
- uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
+ uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
this_cpu_mask = 1UL << cpu;
if (msp->seen_by.bits & this_cpu_mask)
return;
@@ -215,14 +245,14 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
* Returns @flush_mask if some remote flushing remains to be done. The
* mask will have some bits still set.
*/
-const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
+const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode,
struct bau_desc *bau_desc,
struct cpumask *flush_mask)
{
int completion_status = 0;
int right_shift;
int tries = 0;
- int blade;
+ int pnode;
int bit;
unsigned long mmr_offset;
unsigned long index;
@@ -265,8 +295,8 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
* use the IPI method of shootdown on them.
*/
for_each_cpu(bit, flush_mask) {
- blade = uv_cpu_to_blade_id(bit);
- if (blade == this_blade)
+ pnode = uv_cpu_to_pnode(bit);
+ if (pnode == this_pnode)
continue;
cpumask_clear_cpu(bit, flush_mask);
}
@@ -275,6 +305,8 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
return NULL;
}
+static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
+
/**
* uv_flush_tlb_others - globally purge translation cache of a virtual
* address or all TLB's
@@ -304,20 +336,19 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long va, unsigned int cpu)
{
- static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
- struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
+ struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
int i;
int bit;
- int blade;
+ int pnode;
int uv_cpu;
- int this_blade;
+ int this_pnode;
int locals = 0;
struct bau_desc *bau_desc;
cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
uv_cpu = uv_blade_processor_id();
- this_blade = uv_numa_blade_id();
+ this_pnode = uv_hub_info->pnode;
bau_desc = __get_cpu_var(bau_control).descriptor_base;
bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
@@ -325,13 +356,14 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
i = 0;
for_each_cpu(bit, flush_mask) {
- blade = uv_cpu_to_blade_id(bit);
- BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
- if (blade == this_blade) {
+ pnode = uv_cpu_to_pnode(bit);
+ BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1));
+ if (pnode == this_pnode) {
locals++;
continue;
}
- bau_node_set(blade, &bau_desc->distribution);
+ bau_node_set(pnode - uv_partition_base_pnode,
+ &bau_desc->distribution);
i++;
}
if (i == 0) {
@@ -349,7 +381,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
bau_desc->payload.address = va;
bau_desc->payload.sending_cpu = cpu;
- return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
+ return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask);
}
/*
@@ -417,24 +449,58 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
set_irq_regs(old_regs);
}
+/*
+ * uv_enable_timeouts
+ *
+ * Each target blade (i.e. blades that have cpu's) needs to have
+ * shootdown message timeouts enabled. The timeout does not cause
+ * an interrupt, but causes an error message to be returned to
+ * the sender.
+ */
static void uv_enable_timeouts(void)
{
- int i;
int blade;
- int last_blade;
+ int nblades;
int pnode;
- int cur_cpu = 0;
- unsigned long apicid;
+ unsigned long mmr_image;
- last_blade = -1;
- for_each_online_node(i) {
- blade = uv_node_to_blade_id(i);
- if (blade == last_blade)
+ nblades = uv_num_possible_blades();
+
+ for (blade = 0; blade < nblades; blade++) {
+ if (!uv_blade_nr_possible_cpus(blade))
continue;
- last_blade = blade;
- apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+
pnode = uv_blade_to_pnode(blade);
- cur_cpu += uv_blade_nr_possible_cpus(i);
+ mmr_image =
+ uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
+ /*
+ * Set the timeout period and then lock it in, in three
+ * steps; captures and locks in the period.
+ *
+ * To program the period, the SOFT_ACK_MODE must be off.
+ */
+ mmr_image &= ~((unsigned long)1 <<
+ UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+ /*
+ * Set the 4-bit period.
+ */
+ mmr_image &= ~((unsigned long)0xf <<
+ UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+ mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
+ UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+ /*
+ * Subsequent reversals of the timebase bit (3) cause an
+ * immediate timeout of one or all INTD resources as
+ * indicated in bits 2:0 (7 causes all of them to timeout).
+ */
+ mmr_image |= ((unsigned long)1 <<
+ UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
}
}
@@ -481,8 +547,7 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
stat->requestee, stat->onetlb, stat->alltlb,
stat->s_retry, stat->d_retry, stat->ptc_i);
seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
- uv_read_global_mmr64(uv_blade_to_pnode
- (uv_cpu_to_blade_id(cpu)),
+ uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
stat->sflush, stat->dflush,
stat->retriesok, stat->nomsg,
@@ -616,16 +681,18 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
* finish the initialization of the per-blade control structures
*/
static void __init
-uv_table_bases_finish(int blade, int node, int cur_cpu,
+uv_table_bases_finish(int blade,
struct bau_control *bau_tablesp,
struct bau_desc *adp)
{
struct bau_control *bcp;
- int i;
+ int cpu;
- for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) {
- bcp = (struct bau_control *)&per_cpu(bau_control, i);
+ for_each_present_cpu(cpu) {
+ if (blade != uv_cpu_to_blade_id(cpu))
+ continue;
+ bcp = (struct bau_control *)&per_cpu(bau_control, cpu);
bcp->bau_msg_head = bau_tablesp->va_queue_first;
bcp->va_queue_first = bau_tablesp->va_queue_first;
bcp->va_queue_last = bau_tablesp->va_queue_last;
@@ -648,11 +715,10 @@ uv_activation_descriptor_init(int node, int pnode)
struct bau_desc *adp;
struct bau_desc *ad2;
- adp = (struct bau_desc *)
- kmalloc_node(16384, GFP_KERNEL, node);
+ adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node);
BUG_ON(!adp);
- pa = __pa((unsigned long)adp);
+ pa = uv_gpa(adp); /* need the real nasid*/
n = pa >> uv_nshift;
m = pa & uv_mmask;
@@ -666,8 +732,12 @@ uv_activation_descriptor_init(int node, int pnode)
for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
memset(ad2, 0, sizeof(struct bau_desc));
ad2->header.sw_ack_flag = 1;
- ad2->header.base_dest_nodeid =
- uv_blade_to_pnode(uv_cpu_to_blade_id(0));
+ /*
+ * base_dest_nodeid is the first node in the partition, so
+ * the bit map will indicate partition-relative node numbers.
+ * note that base_dest_nodeid is actually a nasid.
+ */
+ ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
ad2->header.command = UV_NET_ENDPOINT_INTD;
ad2->header.int_both = 1;
/*
@@ -685,6 +755,8 @@ static struct bau_payload_queue_entry * __init
uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
{
struct bau_payload_queue_entry *pqp;
+ unsigned long pa;
+ int pn;
char *cp;
pqp = (struct bau_payload_queue_entry *) kmalloc_node(
@@ -695,10 +767,14 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
cp = (char *)pqp + 31;
pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
bau_tablesp->va_queue_first = pqp;
+ /*
+ * need the pnode of where the memory was really allocated
+ */
+ pa = uv_gpa(pqp);
+ pn = pa >> uv_nshift;
uv_write_global_mmr64(pnode,
UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
- ((unsigned long)pnode <<
- UV_PAYLOADQ_PNODE_SHIFT) |
+ ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
uv_physnodeaddr(pqp));
uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
uv_physnodeaddr(pqp));
@@ -714,8 +790,9 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
/*
* Initialization of each UV blade's structures
*/
-static int __init uv_init_blade(int blade, int node, int cur_cpu)
+static int __init uv_init_blade(int blade)
{
+ int node;
int pnode;
unsigned long pa;
unsigned long apicid;
@@ -723,16 +800,17 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu)
struct bau_payload_queue_entry *pqp;
struct bau_control *bau_tablesp;
+ node = blade_to_first_node(blade);
bau_tablesp = uv_table_bases_init(blade, node);
pnode = uv_blade_to_pnode(blade);
adp = uv_activation_descriptor_init(node, pnode);
pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
- uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp);
+ uv_table_bases_finish(blade, bau_tablesp, adp);
/*
* the below initialization can't be in firmware because the
* messaging IRQ will be determined by the OS
*/
- apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+ apicid = blade_to_first_apicid(blade);
pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
if ((pa & 0xff) != UV_BAU_MESSAGE) {
uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -747,40 +825,34 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu)
static int __init uv_bau_init(void)
{
int blade;
- int node;
int nblades;
- int last_blade;
int cur_cpu;
if (!is_uv_system())
return 0;
+ for_each_possible_cpu(cur_cpu)
+ alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
+ GFP_KERNEL, cpu_to_node(cur_cpu));
+
uv_bau_retry_limit = 1;
uv_nshift = uv_hub_info->n_val;
uv_mmask = (1UL << uv_hub_info->n_val) - 1;
- nblades = 0;
- last_blade = -1;
- cur_cpu = 0;
- for_each_online_node(node) {
- blade = uv_node_to_blade_id(node);
- if (blade == last_blade)
- continue;
- last_blade = blade;
- nblades++;
- }
+ nblades = uv_num_possible_blades();
+
uv_bau_table_bases = (struct bau_control **)
kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
BUG_ON(!uv_bau_table_bases);
- last_blade = -1;
- for_each_online_node(node) {
- blade = uv_node_to_blade_id(node);
- if (blade == last_blade)
- continue;
- last_blade = blade;
- uv_init_blade(blade, node, cur_cpu);
- cur_cpu += uv_blade_nr_possible_cpus(blade);
- }
+ uv_partition_base_pnode = 0x7fffffff;
+ for (blade = 0; blade < nblades; blade++)
+ if (uv_blade_nr_possible_cpus(blade) &&
+ (uv_blade_to_pnode(blade) < uv_partition_base_pnode))
+ uv_partition_base_pnode = uv_blade_to_pnode(blade);
+ for (blade = 0; blade < nblades; blade++)
+ if (uv_blade_nr_possible_cpus(blade))
+ uv_init_blade(blade);
+
alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
uv_enable_timeouts();
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7a567ebe636..d57de05dc43 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -699,7 +699,7 @@ static struct clocksource clocksource_tsc;
* code, which is necessary to support wrapping clocksources like pm
* timer.
*/
-static cycle_t read_tsc(void)
+static cycle_t read_tsc(struct clocksource *cs)
{
cycle_t ret = (cycle_t)get_cycles();
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
index 67f9b9dbf80..36afb98675a 100644
--- a/arch/x86/kernel/uv_sysfs.c
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -21,6 +21,7 @@
#include <linux/sysdev.h>
#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
struct kobject *sgi_uv_kobj;
@@ -47,6 +48,9 @@ static int __init sgi_uv_sysfs_init(void)
{
unsigned long ret;
+ if (!is_uv_system())
+ return -ENODEV;
+
if (!sgi_uv_kobj)
sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
if (!sgi_uv_kobj) {
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
index 2ffb6c53326..583f11d5c48 100644
--- a/arch/x86/kernel/uv_time.c
+++ b/arch/x86/kernel/uv_time.c
@@ -29,7 +29,7 @@
#define RTC_NAME "sgi_rtc"
-static cycle_t uv_read_rtc(void);
+static cycle_t uv_read_rtc(struct clocksource *cs);
static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
static void uv_rtc_timer_setup(enum clock_event_mode,
struct clock_event_device *);
@@ -123,7 +123,7 @@ static int uv_setup_intr(int cpu, u64 expires)
/* Initialize comparator value */
uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
- return (expires < uv_read_rtc() && !uv_intr_pending(pnode));
+ return (expires < uv_read_rtc(NULL) && !uv_intr_pending(pnode));
}
/*
@@ -256,7 +256,7 @@ static int uv_rtc_unset_timer(int cpu)
spin_lock_irqsave(&head->lock, flags);
- if (head->next_cpu == bcpu && uv_read_rtc() >= *t)
+ if (head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t)
rc = 1;
*t = ULLONG_MAX;
@@ -278,7 +278,7 @@ static int uv_rtc_unset_timer(int cpu)
/*
* Read the RTC.
*/
-static cycle_t uv_read_rtc(void)
+static cycle_t uv_read_rtc(struct clocksource *cs)
{
return (cycle_t)uv_read_local_mmr(UVH_RTC);
}
@@ -291,7 +291,7 @@ static int uv_rtc_next_event(unsigned long delta,
{
int ced_cpu = cpumask_first(ced->cpumask);
- return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc());
+ return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL));
}
/*
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index d303369a7ba..2b3eb82efee 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -283,7 +283,7 @@ void __devinit vmi_time_ap_init(void)
/** vmi clocksource */
static struct clocksource clocksource_vmi;
-static cycle_t read_real_cycles(void)
+static cycle_t read_real_cycles(struct clocksource *cs)
{
cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
return max(ret, clocksource_vmi.cycle_last);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 2b54fe002e9..c5ee17e8c6d 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -89,7 +89,7 @@ int save_i387_xstate(void __user *buf)
if (!used_math())
return 0;
- clear_used_math(); /* trigger finit */
+
if (task_thread_info(tsk)->status & TS_USEDFPU) {
/*
* Start with clearing the user buffer. This will present a
@@ -114,6 +114,8 @@ int save_i387_xstate(void __user *buf)
return -1;
}
+ clear_used_math(); /* trigger finit */
+
if (task_thread_info(tsk)->status & TS_XSAVE) {
struct _fpstate __user *fx = buf;
struct _xstate __user *x = buf;
@@ -324,7 +326,7 @@ void __ref xsave_cntxt_init(void)
}
/*
- * for now OS knows only about FP/SSE
+ * Support only the state known to OS.
*/
pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
xsave_init();