diff options
Diffstat (limited to 'arch/x86')
32 files changed, 564 insertions, 360 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 97b023f0cbe..f8130a77065 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -28,7 +28,6 @@ config X86 select HAVE_OPROFILE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS - select HAVE_IRQ_WORK select HAVE_IOREMAP_PROT select HAVE_KPROBES select HAVE_MEMBLOCK @@ -40,10 +39,12 @@ config X86 select HAVE_DMA_CONTIGUOUS if !SWIOTLB select HAVE_KRETPROBES select HAVE_OPTPROBES + select HAVE_KPROBES_ON_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_FP_TEST diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 102ff7cb3e4..142c4ceff11 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -207,7 +207,7 @@ sysexit_from_sys_call: testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jnz ia32_ret_from_sys_call TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) movl %eax,%esi /* second arg, syscall return value */ cmpl $-MAX_ERRNO,%eax /* is it an error ? */ jbe 1f @@ -217,7 +217,7 @@ sysexit_from_sys_call: call __audit_syscall_exit movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jz \exit diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 2d9075e863a..93fe929d1ce 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -167,6 +167,7 @@ #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ #define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) +#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB) #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9a25b522d37..86cb51e1ca9 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -44,7 +44,6 @@ #ifdef CONFIG_DYNAMIC_FTRACE #define ARCH_SUPPORTS_FTRACE_OPS 1 -#define ARCH_SUPPORTS_FTRACE_SAVE_REGS #endif #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ecdfee60ee4..f4076af1f4e 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -3,6 +3,90 @@ #include <uapi/asm/mce.h> +/* + * Machine Check support for x86 + */ + +/* MCG_CAP register defines */ +#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ +#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ +#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ +#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ +#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ +#define MCG_EXT_CNT_SHIFT 16 +#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) +#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ + +/* MCG_STATUS register defines */ +#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ +#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ +#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ + +/* MCi_STATUS register defines */ +#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ +#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ +#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ +#define MCI_STATUS_EN (1ULL<<60) /* error enabled */ +#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ +#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ +#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ +#define MCI_STATUS_AR (1ULL<<55) /* Action required */ +#define MCACOD 0xffff /* MCA Error Code */ + +/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ +#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ +#define MCACOD_SCRUBMSK 0xfff0 +#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ +#define MCACOD_DATA 0x0134 /* Data Load */ +#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ + +/* MCi_MISC register defines */ +#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f) +#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7) +#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */ +#define MCI_MISC_ADDR_LINEAR 1 /* linear address */ +#define MCI_MISC_ADDR_PHYS 2 /* physical address */ +#define MCI_MISC_ADDR_MEM 3 /* memory address */ +#define MCI_MISC_ADDR_GENERIC 7 /* generic */ + +/* CTL2 register defines */ +#define MCI_CTL2_CMCI_EN (1ULL << 30) +#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL + +#define MCJ_CTX_MASK 3 +#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) +#define MCJ_CTX_RANDOM 0 /* inject context: random */ +#define MCJ_CTX_PROCESS 0x1 /* inject context: process */ +#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ +#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ +#define MCJ_EXCEPTION 0x8 /* raise as exception */ +#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ + +#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ + +/* Software defined banks */ +#define MCE_EXTENDED_BANK 128 +#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0) +#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) + +#define MCE_LOG_LEN 32 +#define MCE_LOG_SIGNATURE "MACHINECHECK" + +/* + * This structure contains all data related to the MCE log. Also + * carries a signature to make it easier to find from external + * debugging tools. Each entry is only valid when its finished flag + * is set. + */ +struct mce_log { + char signature[12]; /* "MACHINECHECK" */ + unsigned len; /* = MCE_LOG_LEN */ + unsigned next; + unsigned flags; + unsigned recordlen; /* length of struct mce */ + struct mce entry[MCE_LOG_LEN]; +}; struct mca_config { bool dont_log_ce; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 4fabcdf1cfa..57cb6340221 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -29,8 +29,13 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL -#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40) -#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41) +#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) +#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) +#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) + +#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37 +#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \ + (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT) #define AMD64_EVENTSEL_EVENT \ (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) @@ -46,8 +51,12 @@ #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) +#define AMD64_RAW_EVENT_MASK_NB \ + (AMD64_EVENTSEL_EVENT | \ + ARCH_PERFMON_EVENTSEL_UMASK) #define AMD64_NUM_COUNTERS 4 #define AMD64_NUM_COUNTERS_CORE 6 +#define AMD64_NUM_COUNTERS_NB 4 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5199db2923d..1c1a955e67c 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; } +static inline unsigned long pud_pfn(pud_t pud) +{ + return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) static inline int pmd_large(pmd_t pte) diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 58c829871c3..a0eab85ce7b 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -4,66 +4,6 @@ #include <linux/types.h> #include <asm/ioctls.h> -/* - * Machine Check support for x86 - */ - -/* MCG_CAP register defines */ -#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ -#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ -#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ -#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ -#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ -#define MCG_EXT_CNT_SHIFT 16 -#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) -#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ - -/* MCG_STATUS register defines */ -#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ -#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ -#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ - -/* MCi_STATUS register defines */ -#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ -#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ -#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ -#define MCI_STATUS_EN (1ULL<<60) /* error enabled */ -#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ -#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ -#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ -#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ -#define MCI_STATUS_AR (1ULL<<55) /* Action required */ -#define MCACOD 0xffff /* MCA Error Code */ - -/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ -#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ -#define MCACOD_SCRUBMSK 0xfff0 -#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ -#define MCACOD_DATA 0x0134 /* Data Load */ -#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ - -/* MCi_MISC register defines */ -#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f) -#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7) -#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */ -#define MCI_MISC_ADDR_LINEAR 1 /* linear address */ -#define MCI_MISC_ADDR_PHYS 2 /* physical address */ -#define MCI_MISC_ADDR_MEM 3 /* memory address */ -#define MCI_MISC_ADDR_GENERIC 7 /* generic */ - -/* CTL2 register defines */ -#define MCI_CTL2_CMCI_EN (1ULL << 30) -#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL - -#define MCJ_CTX_MASK 3 -#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) -#define MCJ_CTX_RANDOM 0 /* inject context: random */ -#define MCJ_CTX_PROCESS 0x1 /* inject context: process */ -#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ -#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ -#define MCJ_EXCEPTION 0x8 /* raise as exception */ -#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ - /* Fields are zero when not available */ struct mce { __u64 status; @@ -87,35 +27,8 @@ struct mce { __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ }; -/* - * This structure contains all data related to the MCE log. Also - * carries a signature to make it easier to find from external - * debugging tools. Each entry is only valid when its finished flag - * is set. - */ - -#define MCE_LOG_LEN 32 - -struct mce_log { - char signature[12]; /* "MACHINECHECK" */ - unsigned len; /* = MCE_LOG_LEN */ - unsigned next; - unsigned flags; - unsigned recordlen; /* length of struct mce */ - struct mce entry[MCE_LOG_LEN]; -}; - -#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ - -#define MCE_LOG_SIGNATURE "MACHINECHECK" - #define MCE_GET_RECORD_LEN _IOR('M', 1, int) #define MCE_GET_LOG_LEN _IOR('M', 2, int) #define MCE_GETCLEAR_FLAGS _IOR('M', 3, int) -/* Software defined banks */ -#define MCE_EXTENDED_BANK 128 -#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 -#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) - #endif /* _UAPI_ASM_X86_MCE_H */ diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 433a59fb1a7..075a4025559 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -194,6 +194,8 @@ /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 #define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_NB_PERF_CTL 0xc0010240 +#define MSR_F15H_NB_PERF_CTR 0xc0010241 /* Fam 10h MSRs */ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 34e923a5376..ac3b3d00283 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o -obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_OPTPROBES) += kprobes-opt.o +obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index e03a1e180e8..562a76d433c 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -20,18 +20,19 @@ static int set_x2apic_phys_mode(char *arg) } early_param("x2apic_phys", set_x2apic_phys_mode); -static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static bool x2apic_fadt_phys(void) { - if (x2apic_phys) - return x2apic_enabled(); - else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) && - x2apic_enabled()) { + if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { printk(KERN_DEBUG "System requires x2apic physical mode\n"); - return 1; + return true; } - else - return 0; + return false; +} + +static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys()); } static void @@ -82,7 +83,7 @@ static void init_x2apic_ldr(void) static int x2apic_phys_probe(void) { - if (x2apic_mode && x2apic_phys) + if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) return 1; return apic == &apic_x2apic_phys; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index d65464e4350..8d7012b7f40 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -899,6 +899,7 @@ static void apm_cpu_idle(void) static int use_apm_idle; /* = 0 */ static unsigned int last_jiffies; /* = 0 */ static unsigned int last_stime; /* = 0 */ + cputime_t stime; int apm_idle_done = 0; unsigned int jiffies_since_last_check = jiffies - last_jiffies; @@ -906,23 +907,23 @@ static void apm_cpu_idle(void) WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); recalc: + task_cputime(current, NULL, &stime); if (jiffies_since_last_check > IDLE_CALC_LIMIT) { use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; } else if (jiffies_since_last_check > idle_period) { unsigned int idle_percentage; - idle_percentage = current->stime - last_stime; + idle_percentage = stime - last_stime; idle_percentage *= 100; idle_percentage /= jiffies_since_last_check; use_apm_idle = (idle_percentage > idle_threshold); if (apm_info.forbid_idle) use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; } + last_jiffies = jiffies; + last_stime = stime; + bucket = IDLE_LEAKY_MAX; while (!need_resched()) { diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index fe9edec6698..84c1309c4c0 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -298,8 +298,7 @@ struct _cache_attr { unsigned int); }; -#ifdef CONFIG_AMD_NB - +#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) /* * L3 cache descriptors */ @@ -524,9 +523,9 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, static struct _cache_attr subcaches = __ATTR(subcaches, 0644, show_subcaches, store_subcaches); -#else /* CONFIG_AMD_NB */ +#else #define amd_init_l3_cache(x, y) -#endif /* CONFIG_AMD_NB */ +#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ static int __cpuinit cpuid4_cache_lookup_regs(int index, diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6774c17a557..bf0f01aea99 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -829,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event, } else { hwc->config_base = x86_pmu_config_addr(hwc->idx); hwc->event_base = x86_pmu_event_addr(hwc->idx); - hwc->event_base_rdpmc = hwc->idx; + hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx); } } @@ -1310,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = { .attrs = NULL, }; -struct perf_pmu_events_attr { - struct device_attribute attr; - u64 id; -}; - /* * Remove all undefined events (x86_pmu.event_map(id) == 0) * out of events_attr attributes. @@ -1348,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr -#define EVENT_ATTR(_name, _id) \ -static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ - .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ - .id = PERF_COUNT_HW_##_id, \ -}; +#define EVENT_ATTR(_name, _id) \ + PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \ + events_sysfs_show) EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 115c1ea9774..7f5c75c2afd 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -325,6 +325,8 @@ struct x86_pmu { int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; unsigned perfctr; + int (*addr_offset)(int index, bool eventsel); + int (*rdpmc_index)(int index); u64 (*event_map)(int); int max_events; int num_counters; @@ -446,28 +448,21 @@ extern u64 __read_mostly hw_cache_extra_regs u64 x86_perf_event_update(struct perf_event *event); -static inline int x86_pmu_addr_offset(int index) +static inline unsigned int x86_pmu_config_addr(int index) { - int offset; - - /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ - alternative_io(ASM_NOP2, - "shll $1, %%eax", - X86_FEATURE_PERFCTR_CORE, - "=a" (offset), - "a" (index)); - - return offset; + return x86_pmu.eventsel + (x86_pmu.addr_offset ? + x86_pmu.addr_offset(index, true) : index); } -static inline unsigned int x86_pmu_config_addr(int index) +static inline unsigned int x86_pmu_event_addr(int index) { - return x86_pmu.eventsel + x86_pmu_addr_offset(index); + return x86_pmu.perfctr + (x86_pmu.addr_offset ? + x86_pmu.addr_offset(index, false) : index); } -static inline unsigned int x86_pmu_event_addr(int index) +static inline int x86_pmu_rdpmc_index(int index) { - return x86_pmu.perfctr + x86_pmu_addr_offset(index); + return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; } int x86_setup_perfctr(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index c93bc4e813a..dfdab42aed2 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -132,21 +132,102 @@ static u64 amd_pmu_event_map(int hw_event) return amd_perfmon_event_map[hw_event]; } -static int amd_pmu_hw_config(struct perf_event *event) +static struct event_constraint *amd_nb_event_constraint; + +/* + * Previously calculated offsets + */ +static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; +static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; +static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly; + +/* + * Legacy CPUs: + * 4 counters starting at 0xc0010000 each offset by 1 + * + * CPUs with core performance counter extensions: + * 6 counters starting at 0xc0010200 each offset by 2 + * + * CPUs with north bridge performance counter extensions: + * 4 additional counters starting at 0xc0010240 each offset by 2 + * (indexed right above either one of the above core counters) + */ +static inline int amd_pmu_addr_offset(int index, bool eventsel) { - int ret; + int offset, first, base; - /* pass precise event sampling to ibs: */ - if (event->attr.precise_ip && get_ibs_caps()) - return -ENOENT; + if (!index) + return index; + + if (eventsel) + offset = event_offsets[index]; + else + offset = count_offsets[index]; + + if (offset) + return offset; + + if (amd_nb_event_constraint && + test_bit(index, amd_nb_event_constraint->idxmsk)) { + /* + * calculate the offset of NB counters with respect to + * base eventsel or perfctr + */ + + first = find_first_bit(amd_nb_event_constraint->idxmsk, + X86_PMC_IDX_MAX); + + if (eventsel) + base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel; + else + base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr; + + offset = base + ((index - first) << 1); + } else if (!cpu_has_perfctr_core) + offset = index; + else + offset = index << 1; + + if (eventsel) + event_offsets[index] = offset; + else + count_offsets[index] = offset; + + return offset; +} + +static inline int amd_pmu_rdpmc_index(int index) +{ + int ret, first; + + if (!index) + return index; + + ret = rdpmc_indexes[index]; - ret = x86_pmu_hw_config(event); if (ret) return ret; - if (has_branch_stack(event)) - return -EOPNOTSUPP; + if (amd_nb_event_constraint && + test_bit(index, amd_nb_event_constraint->idxmsk)) { + /* + * according to the mnual, ECX value of the NB counters is + * the index of the NB counter (0, 1, 2 or 3) plus 6 + */ + + first = find_first_bit(amd_nb_event_constraint->idxmsk, + X86_PMC_IDX_MAX); + ret = index - first + 6; + } else + ret = index; + + rdpmc_indexes[index] = ret; + + return ret; +} +static int amd_core_hw_config(struct perf_event *event) +{ if (event->attr.exclude_host && event->attr.exclude_guest) /* * When HO == GO == 1 the hardware treats that as GO == HO == 0 @@ -156,14 +237,37 @@ static int amd_pmu_hw_config(struct perf_event *event) event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_OS); else if (event->attr.exclude_host) - event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; + event->hw.config |= AMD64_EVENTSEL_GUESTONLY; else if (event->attr.exclude_guest) - event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; + event->hw.config |= AMD64_EVENTSEL_HOSTONLY; + + return 0; +} + +/* + * NB counters do not support the following event select bits: + * Host/Guest only + * Counter mask + * Invert counter mask + * Edge detect + * OS/User mode + */ +static int amd_nb_hw_config(struct perf_event *event) +{ + /* for NB, we only allow system wide counting mode */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + if (event->attr.exclude_user || event->attr.exclude_kernel || + event->attr.exclude_host || event->attr.exclude_guest) + return -EINVAL; - if (event->attr.type != PERF_TYPE_RAW) - return 0; + event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | + ARCH_PERFMON_EVENTSEL_OS); - event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; + if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB | + ARCH_PERFMON_EVENTSEL_INT)) + return -EINVAL; return 0; } @@ -181,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc) return (hwc->config & 0xe0) == 0xe0; } +static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc) +{ + return amd_nb_event_constraint && amd_is_nb_event(hwc); +} + static inline int amd_has_nb(struct cpu_hw_events *cpuc) { struct amd_nb *nb = cpuc->amd_nb; @@ -188,20 +297,37 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc) return nb && nb->nb_id != -1; } -static void amd_put_event_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event) +static int amd_pmu_hw_config(struct perf_event *event) +{ + int ret; + + /* pass precise event sampling to ibs: */ + if (event->attr.precise_ip && get_ibs_caps()) + return -ENOE |