diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/kernel/alternative.c | 71 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 280 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 292 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 6 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 63 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/irq_work.c | 30 | ||||
-rw-r--r-- | arch/x86/kernel/irqinit.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/jump_label.c | 50 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/module.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 6 |
16 files changed, 579 insertions, 278 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index fedf32a8c3e..7490bf8d145 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o ldt.o dumpstack.o -obj-y += setup.o x86_init.o i8259.o irqinit.o +obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o +obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f65ab8b014c..a36bb90aef5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; -static void *text_poke_early(void *addr, const void *opcode, size_t len); +void *text_poke_early(void *addr, const void *opcode, size_t len); /* Replace instructions with better alternatives for this CPU type. This runs before SMP is initialized to avoid SMP problems with @@ -522,7 +522,7 @@ void __init alternative_instructions(void) * instructions. And on the local CPU you need to be protected again NMI or MCE * handlers seeing an inconsistent instruction while you patch. */ -static void *__init_or_module text_poke_early(void *addr, const void *opcode, +void *__init_or_module text_poke_early(void *addr, const void *opcode, size_t len) { unsigned long flags; @@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) tpp.len = len; atomic_set(&stop_machine_first, 1); wrote_text = 0; - stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); + /* Use __stop_machine() because the caller already got online_cpus. */ + __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); return addr; } +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) + +unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; + +void __init arch_init_ideal_nop5(void) +{ + extern const unsigned char ftrace_test_p6nop[]; + extern const unsigned char ftrace_test_nop5[]; + extern const unsigned char ftrace_test_jmp[]; + int faulted = 0; + + /* + * There is no good nop for all x86 archs. + * We will default to using the P6_NOP5, but first we + * will test to make sure that the nop will actually + * work on this CPU. If it faults, we will then + * go to a lesser efficient 5 byte nop. If that fails + * we then just use a jmp as our nop. This isn't the most + * efficient nop, but we can not use a multi part nop + * since we would then risk being preempted in the middle + * of that nop, and if we enabled tracing then, it might + * cause a system crash. + * + * TODO: check the cpuid to determine the best nop. + */ + asm volatile ( + "ftrace_test_jmp:" + "jmp ftrace_test_p6nop\n" + "nop\n" + "nop\n" + "nop\n" /* 2 byte jmp + 3 bytes */ + "ftrace_test_p6nop:" + P6_NOP5 + "jmp 1f\n" + "ftrace_test_nop5:" + ".byte 0x66,0x66,0x66,0x66,0x90\n" + "1:" + ".section .fixup, \"ax\"\n" + "2: movl $1, %0\n" + " jmp ftrace_test_nop5\n" + "3: movl $2, %0\n" + " jmp 1b\n" + ".previous\n" + _ASM_EXTABLE(ftrace_test_p6nop, 2b) + _ASM_EXTABLE(ftrace_test_nop5, 3b) + : "=r"(faulted) : "0" (faulted)); + + switch (faulted) { + case 0: + pr_info("converting mcount calls to 0f 1f 44 00 00\n"); + memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5); + break; + case 1: + pr_info("converting mcount calls to 66 66 66 66 90\n"); + memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5); + break; + case 2: + pr_info("converting mcount calls to jmp . + 5\n"); + memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5); + break; + } + +} +#endif diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 03a5b0385ad..fe73c1844a9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event) /* * Setup the hardware configuration for a given attr_type */ -static int __hw_perf_event_init(struct perf_event *event) +static int __x86_pmu_event_init(struct perf_event *event) { int err; @@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void) } } -void hw_perf_disable(void) +static void x86_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added) } } -static const struct pmu pmu; +static struct pmu pmu; static inline int is_x86_event(struct perf_event *event) { @@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, hwc->last_tag == cpuc->tags[i]; } -static int x86_pmu_start(struct perf_event *event); -static void x86_pmu_stop(struct perf_event *event); +static void x86_pmu_start(struct perf_event *event, int flags); +static void x86_pmu_stop(struct perf_event *event, int flags); -void hw_perf_enable(void) +static void x86_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct perf_event *event; @@ -840,7 +840,14 @@ void hw_perf_enable(void) match_prev_assignment(hwc, cpuc, i)) continue; - x86_pmu_stop(event); + /* + * Ensure we don't accidentally enable a stopped + * counter simply because we rescheduled. + */ + if (hwc->state & PERF_HES_STOPPED) + hwc->state |= PERF_HES_ARCH; + + x86_pmu_stop(event, PERF_EF_UPDATE); } for (i = 0; i < cpuc->n_events; i++) { @@ -852,7 +859,10 @@ void hw_perf_enable(void) else if (i < n_running) continue; - x86_pmu_start(event); + if (hwc->state & PERF_HES_ARCH) + continue; + + x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added = 0; perf_events_lapic_init(); @@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event) } /* - * activate a single event + * Add a single event to the PMU. * * The event is added to the group of enabled events * but only if it can be scehduled with existing events. - * - * Called with PMU disabled. If successful and return value 1, - * then guaranteed to call perf_enable() and hw_perf_enable() */ -static int x86_pmu_enable(struct perf_event *event) +static int x86_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc; @@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event) hwc = &event->hw; + perf_pmu_disable(event->pmu); n0 = cpuc->n_events; - n = collect_events(cpuc, event, false); - if (n < 0) - return n; + ret = n = collect_events(cpuc, event, false); + if (ret < 0) + goto out; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed - * at commit time(->commit_txn) as a whole + * at commit time (->commit_txn) as a whole */ if (cpuc->group_flag & PERF_EVENT_TXN) - goto out; + goto done_collect; ret = x86_pmu.schedule_events(cpuc, n, assign); if (ret) - return ret; + goto out; /* * copy new assignment, now we know it is possible * will be used by hw_perf_enable() */ memcpy(cpuc->assign, assign, n*sizeof(int)); -out: +done_collect: cpuc->n_events = n; cpuc->n_added += n - n0; cpuc->n_txn += n - n0; - return 0; + ret = 0; +out: + perf_pmu_enable(event->pmu); + return ret; } -static int x86_pmu_start(struct perf_event *event) +static void x86_pmu_start(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx = event->hw.idx; - if (idx == -1) - return -EAGAIN; + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + x86_perf_event_set_period(event); + } + + event->hw.state = 0; - x86_perf_event_set_period(event); cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); __set_bit(idx, cpuc->running); x86_pmu.enable(event); perf_event_update_userpage(event); - - return 0; -} - -static void x86_pmu_unthrottle(struct perf_event *event) -{ - int ret = x86_pmu_start(event); - WARN_ON_ONCE(ret); } void perf_event_print_debug(void) @@ -1078,27 +1094,29 @@ void perf_event_print_debug(void) local_irq_restore(flags); } -static void x86_pmu_stop(struct perf_event *event) +static void x86_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - if (!__test_and_clear_bit(idx, cpuc->active_mask)) - return; - - x86_pmu.disable(event); - - /* - * Drain the remaining delta count out of a event - * that we are disabling: - */ - x86_perf_event_update(event); + if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { + x86_pmu.disable(event); + cpuc->events[hwc->idx] = NULL; + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } - cpuc->events[idx] = NULL; + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + x86_perf_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } } -static void x86_pmu_disable(struct perf_event *event) +static void x86_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int i; @@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event) if (cpuc->group_flag & PERF_EVENT_TXN) return; - x86_pmu_stop(event); + x86_pmu_stop(event, PERF_EF_UPDATE); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { @@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) struct perf_sample_data data; struct cpu_hw_events *cpuc; struct perf_event *event; - struct hw_perf_event *hwc; int idx, handled = 0; u64 val; @@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) } event = cpuc->events[idx]; - hwc = &event->hw; val = x86_perf_event_update(event); if (val & (1ULL << (x86_pmu.cntval_bits - 1))) @@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) continue; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } if (handled) @@ -1180,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) return handled; } -void smp_perf_pending_interrupt(struct pt_regs *regs) -{ - irq_enter(); - ack_APIC_irq(); - inc_irq_stat(apic_pending_irqs); - perf_event_do_pending(); - irq_exit(); -} - -void set_perf_event_pending(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - if (!x86_pmu.apic || !x86_pmu_initialized()) - return; - - apic->send_IPI_self(LOCAL_PENDING_VECTOR); -#endif -} - void perf_events_lapic_init(void) { if (!x86_pmu.apic || !x86_pmu_initialized()) @@ -1388,7 +1385,6 @@ void __init init_hw_perf_events(void) x86_pmu.num_counters = X86_PMC_MAX_GENERIC; } x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; - perf_max_events = x86_pmu.num_counters; if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", @@ -1424,6 +1420,7 @@ void __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); + perf_pmu_register(&pmu); perf_cpu_notifier(x86_pmu_notifier); } @@ -1437,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void x86_pmu_start_txn(const struct pmu *pmu) +static void x86_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + perf_pmu_disable(pmu); cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1450,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -static void x86_pmu_cancel_txn(const struct pmu *pmu) +static void x86_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1460,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) */ cpuc->n_added -= cpuc->n_txn; cpuc->n_events -= cpuc->n_txn; + perf_pmu_enable(pmu); } /* @@ -1467,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -static int x86_pmu_commit_txn(const struct pmu *pmu) +static int x86_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int assign[X86_PMC_IDX_MAX]; @@ -1489,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) memcpy(cpuc->assign, assign, n*sizeof(int)); cpuc->group_flag &= ~PERF_EVENT_TXN; - + perf_pmu_enable(pmu); return 0; } -static const struct pmu pmu = { - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, - .start = x86_pmu_start, - .stop = x86_pmu_stop, - .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, - .start_txn = x86_pmu_start_txn, - .cancel_txn = x86_pmu_cancel_txn, - .commit_txn = x86_pmu_commit_txn, -}; - /* * validate that we can schedule this event */ @@ -1579,12 +1566,22 @@ out: return ret; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +int x86_pmu_event_init(struct perf_event *event) { - const struct pmu *tmp; + struct pmu *tmp; int err; - err = __hw_perf_event_init(event); + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + + err = __x86_pmu_event_init(event); if (!err) { /* * we temporarily connect event to its pmu @@ -1604,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; } -/* - * callchain support - */ +static struct pmu pmu = { + .pmu_enable = x86_pmu_enable, + .pmu_disable = x86_pmu_disable, -static inline -void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} + .event_init = x86_pmu_event_init, -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); + .add = x86_pmu_add, + .del = x86_pmu_del, + .start = x86_pmu_start, + .stop = x86_pmu_stop, + .read = x86_pmu_read, + .start_txn = x86_pmu_start_txn, + .cancel_txn = x86_pmu_cancel_txn, + .commit_txn = x86_pmu_commit_txn, +}; + +/* + * callchain support + */ static void backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) @@ -1645,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops backtrace_ops = { @@ -1656,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack_bp, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->ip); + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->ip); dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } @@ -1689,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (fp < compat_ptr(regs->sp)) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = compat_ptr(frame.next_frame); } return 1; @@ -1702,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) } #endif -static void -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stack_frame frame; const void __user *fp; - if (!user_mode(regs)) - regs = task_pt_regs(current); + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return; + } fp = (void __user *)regs->bp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->ip); + perf_callchain_store(entry, regs->ip); if (perf_callchain_user32(regs, entry)) return; @@ -1731,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) if ((unsigned long)fp < regs->sp) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = frame.next_frame; } } -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* TODO: We don't support guest os callchain now */ - return NULL; - } - - if (in_nmi()) - entry = &__get_cpu_var(pmc_nmi_entry); - else - entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} - unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long ip; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index c2897b7b4a3..46d58448c3a 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids [ C(DTLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ + [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids [ C(ITLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ - [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ + [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ee05c90012d..c8f5c088cad 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_events *cpuc; int bit, loops; u64 status; - int handled = 0; + int handled; perf_sample_data_init(&data, 0); cpuc = &__get_cpu_var(cpu_hw_events); intel_pmu_disable_all(); - intel_pmu_drain_bts_buffer(); + handled = intel_pmu_drain_bts_buffer(); status = intel_pmu_get_status(); if (!status) { intel_pmu_enable_all(0); - return 0; + return handled; } loops = 0; @@ -763,7 +763,7 @@ again: data.period = event->hw.last_period; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 18018d1311c..4977f9c400e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void) update_debugctlmsr(debugctlmsr); } -static void intel_pmu_drain_bts_buffer(void) +static int intel_pmu_drain_bts_buffer(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void) struct pt_regs regs; if (!event) - return; + return 0; if (!ds) - return; + return 0; at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; top = (struct bts_record *)(unsigned long)ds->bts_index; if (top <= at) - return; + return 0; ds->bts_index = ds->bts_buffer_base; @@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void) perf_prepare_sample(&header, &data, event, ®s); if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) - return; + return 1; for (; at < top; at++) { data.ip = at->from; @@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void) /* There's new data available. */ event->hw.interrupts++; event->pending_kill = POLL_IN; + return 1; } /* @@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.flags &= ~PERF_EFLAGS_EXACT; if (perf_event_overflow(event, 1, &data, ®s)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 24901517399..81400b93e69 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -18,6 +18,8 @@ struct p4_event_bind { unsigned int opcode; /* Event code and ESCR selector */ unsigned int escr_msr[2]; /* ESCR MSR for this event */ + unsigned int escr_emask; /* valid ESCR EventMask bits */ + unsigned int shared; /* event is shared across threads */ char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ }; @@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = { [P4_EVENT_TC_DELIVER_MODE] = { .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID), + .shared = 1, .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_BPU_FETCH_REQUEST] = { .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_ITLB_REFERENCE] = { .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_MEMORY_CANCEL] = { .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_MEMORY_COMPLETE] = { .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_LOAD_PORT_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_STORE_PORT_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_MOB_LOAD_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_PAGE_WALK_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS), + .shared = 1, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BSQ_CACHE_REFERENCE] = { .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_IOQ_ALLOCATION] = { .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH), .cntr = { {2, -1, -1}, {3, -1, -1} }, }, [P4_EVENT_FSB_DATA_ACTIVITY] = { .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER), + .shared = 1, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2), .cntr = { {0, -1, -1}, {1, -1, -1} }, }, [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, RE |