diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 279 |
1 files changed, 197 insertions, 82 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6774c17a557..2879ecdaac4 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) continue; if (event->attr.config1 & ~er->valid_mask) return -EINVAL; + /* Check if the extra msrs can be safely accessed*/ + if (!er->extra_msr_access) + return -ENXIO; reg->idx = er->idx; reg->config = event->attr.config1; @@ -180,8 +183,9 @@ static void release_pmc_hardware(void) {} static bool check_hw_exists(void) { - u64 val, val_new = ~0; - int i, reg, ret = 0; + u64 val, val_fail, val_new= ~0; + int i, reg, reg_fail, ret = 0; + int bios_fail = 0; /* * Check to see if the BIOS enabled any of the counters, if so @@ -192,8 +196,11 @@ static bool check_hw_exists(void) ret = rdmsrl_safe(reg, &val); if (ret) goto msr_fail; - if (val & ARCH_PERFMON_EVENTSEL_ENABLE) - goto bios_fail; + if (val & ARCH_PERFMON_EVENTSEL_ENABLE) { + bios_fail = 1; + val_fail = val; + reg_fail = reg; + } } if (x86_pmu.num_counters_fixed) { @@ -202,8 +209,11 @@ static bool check_hw_exists(void) if (ret) goto msr_fail; for (i = 0; i < x86_pmu.num_counters_fixed; i++) { - if (val & (0x03 << i*4)) - goto bios_fail; + if (val & (0x03 << i*4)) { + bios_fail = 1; + val_fail = val; + reg_fail = reg; + } } } @@ -221,14 +231,13 @@ static bool check_hw_exists(void) if (ret || val != val_new) goto msr_fail; - return true; - -bios_fail: /* * We still allow the PMU driver to operate: */ - printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); - printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); + if (bios_fail) { + printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); + printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail); + } return true; @@ -297,15 +306,6 @@ int x86_setup_perfctr(struct perf_event *event) hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); - } else { - /* - * If we have a PMU initialized but no APIC - * interrupts, we cannot sample hardware - * events (user-space has to fall back and - * sample via a hrtimer based software event): - */ - if (!x86_pmu.apic) - return -EOPNOTSUPP; } if (attr->type == PERF_TYPE_RAW) @@ -397,7 +397,8 @@ int x86_pmu_hw_config(struct perf_event *event) * check that PEBS LBR correction does not conflict with * whatever the user is asking with attr->branch_sample_type */ - if (event->attr.precise_ip > 1) { + if (event->attr.precise_ip > 1 && + x86_pmu.intel_cap.pebs_format < 2) { u64 *br_type = &event->attr.branch_sample_type; if (has_branch_stack(event)) { @@ -562,7 +563,7 @@ struct sched_state { struct perf_sched { int max_weight; int max_events; - struct event_constraint **constraints; + struct perf_event **events; struct sched_state state; int saved_states; struct sched_state saved[SCHED_STATES_MAX]; @@ -571,7 +572,7 @@ struct perf_sched { /* * Initialize interator that runs through all events and counters. */ -static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, +static void perf_sched_init(struct perf_sched *sched, struct perf_event **events, int num, int wmin, int wmax) { int idx; @@ -579,10 +580,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint ** memset(sched, 0, sizeof(*sched)); sched->max_events = num; sched->max_weight = wmax; - sched->constraints = c; + sched->events = events; for (idx = 0; idx < num; idx++) { - if (c[idx]->weight == wmin) + if (events[idx]->hw.constraint->weight == wmin) break; } @@ -629,8 +630,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) if (sched->state.event >= sched->max_events) return false; - c = sched->constraints[sched->state.event]; - + c = sched->events[sched->state.event]->hw.constraint; /* Prefer fixed purpose counters */ if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { idx = INTEL_PMC_IDX_FIXED; @@ -688,7 +688,7 @@ static bool perf_sched_next_event(struct perf_sched *sched) if (sched->state.weight > sched->max_weight) return false; } - c = sched->constraints[sched->state.event]; + c = sched->events[sched->state.event]->hw.constraint; } while (c->weight != sched->state.weight); sched->state.counter = 0; /* start with first counter */ @@ -699,12 +699,12 @@ static bool perf_sched_next_event(struct perf_sched *sched) /* * Assign a counter for each event. */ -int perf_assign_events(struct event_constraint **constraints, int n, +int perf_assign_events(struct perf_event **events, int n, int wmin, int wmax, int *assign) { struct perf_sched sched; - perf_sched_init(&sched, constraints, n, wmin, wmax); + perf_sched_init(&sched, events, n, wmin, wmax); do { if (!perf_sched_find_counter(&sched)) @@ -715,19 +715,23 @@ int perf_assign_events(struct event_constraint **constraints, int n, return sched.state.unassigned; } +EXPORT_SYMBOL_GPL(perf_assign_events); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { - struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; + struct event_constraint *c; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + struct perf_event *e; int i, wmin, wmax, num = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { + hwc = &cpuc->event_list[i]->hw; c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); - constraints[i] = c; + hwc->constraint = c; + wmin = min(wmin, c->weight); wmax = max(wmax, c->weight); } @@ -737,7 +741,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) */ for (i = 0; i < n; i++) { hwc = &cpuc->event_list[i]->hw; - c = constraints[i]; + c = hwc->constraint; /* never assigned */ if (hwc->idx == -1) @@ -758,16 +762,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* slow path */ if (i != n) - num = perf_assign_events(constraints, n, wmin, wmax, assign); + num = perf_assign_events(cpuc->event_list, n, wmin, + wmax, assign); /* + * Mark the event as committed, so we do not put_constraint() + * in case new events are added and fail scheduling. + */ + if (!num && assign) { + for (i = 0; i < n; i++) { + e = cpuc->event_list[i]; + e->hw.flags |= PERF_X86_EVENT_COMMITTED; + } + } + /* * scheduling failed or is just a simulation, * free resources if necessary */ if (!assign || num) { for (i = 0; i < n; i++) { + e = cpuc->event_list[i]; + /* + * do not put_constraint() on comitted events, + * because they are good to go + */ + if ((e->hw.flags & PERF_X86_EVENT_COMMITTED)) + continue; + if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); + x86_pmu.put_event_constraints(cpuc, e); } } return num ? -EINVAL : 0; @@ -829,7 +852,7 @@ static inline void x86_assign_hw_event(struct perf_event *event, } else { hwc->config_base = x86_pmu_config_addr(hwc->idx); hwc->event_base = x86_pmu_event_addr(hwc->idx); - hwc->event_base_rdpmc = hwc->idx; + hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx); } } @@ -864,7 +887,6 @@ static void x86_pmu_enable(struct pmu *pmu) * hw_perf_group_sched_in() or x86_pmu_enable() * * step1: save events moving to new counters - * step2: reprogram moved events into new counters */ for (i = 0; i < n_running; i++) { event = cpuc->event_list[i]; @@ -890,6 +912,9 @@ static void x86_pmu_enable(struct pmu *pmu) x86_pmu_stop(event, PERF_EF_UPDATE); } + /* + * step2: reprogram moved events into new counters + */ for (i = 0; i < cpuc->n_events; i++) { event = cpuc->event_list[i]; hwc = &event->hw; @@ -1015,7 +1040,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be performed - * at commit time (->commit_txn) as a whole + * at commit time (->commit_txn) as a whole. */ if (cpuc->group_flag & PERF_EVENT_TXN) goto done_collect; @@ -1030,6 +1055,10 @@ static int x86_pmu_add(struct perf_event *event, int flags) memcpy(cpuc->assign, assign, n*sizeof(int)); done_collect: + /* + * Commit the collect_events() state. See x86_pmu_del() and + * x86_pmu_*_txn(). + */ cpuc->n_events = n; cpuc->n_added += n - n0; cpuc->n_txn += n - n0; @@ -1147,28 +1176,46 @@ static void x86_pmu_del(struct perf_event *event, int flags) int i; /* + * event is descheduled + */ + event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; + + /* * If we're called during a txn, we don't need to do anything. * The events never got scheduled and ->cancel_txn will truncate * the event_list. + * + * XXX assumes any ->del() called during a TXN will only be on + * an event added during that same TXN. */ if (cpuc->group_flag & PERF_EVENT_TXN) return; + /* + * Not a TXN, therefore cleanup properly. + */ x86_pmu_stop(event, PERF_EF_UPDATE); for (i = 0; i < cpuc->n_events; i++) { - if (event == cpuc->event_list[i]) { + if (event == cpuc->event_list[i]) + break; + } - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, event); + if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */ + return; - while (++i < cpuc->n_events) - cpuc->event_list[i-1] = cpuc->event_list[i]; + /* If we have a newly added event; make sure to decrease n_added. */ + if (i >= cpuc->n_events - cpuc->n_added) + --cpuc->n_added; + + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(cpuc, event); + + /* Delete the array entry. */ + while (++i < cpuc->n_events) + cpuc->event_list[i-1] = cpuc->event_list[i]; + --cpuc->n_events; - --cpuc->n_events; - break; - } - } perf_event_update_userpage(event); } @@ -1240,19 +1287,30 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } -static int __kprobes +static int perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { + u64 start_clock; + u64 finish_clock; + int ret; + if (!atomic_read(&active_events)) return NMI_DONE; - return x86_pmu.handle_irq(regs); + start_clock = sched_clock(); + ret = x86_pmu.handle_irq(regs); + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + + return ret; } +NOKPROBE_SYMBOL(perf_event_nmi_handler); struct event_constraint emptyconstraint; struct event_constraint unconstrained; -static int __cpuinit +static int x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; @@ -1303,6 +1361,15 @@ static void __init pmu_check_apic(void) x86_pmu.apic = 0; pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); pr_info("no hardware sampling interrupt available.\n"); + + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * events (user-space has to fall back and + * sample via a hrtimer based software event): + */ + pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + } static struct attribute_group x86_pmu_format_group = { @@ -1310,20 +1377,22 @@ static struct attribute_group x86_pmu_format_group = { .attrs = NULL, }; -struct perf_pmu_events_attr { - struct device_attribute attr; - u64 id; -}; - /* * Remove all undefined events (x86_pmu.event_map(id) == 0) * out of events_attr attributes. */ static void __init filter_events(struct attribute **attrs) { + struct device_attribute *d; + struct perf_pmu_events_attr *pmu_attr; int i, j; for (i = 0; attrs[i]; i++) { + d = (struct device_attribute *)attrs[i]; + pmu_attr = container_of(d, struct perf_pmu_events_attr, attr); + /* str trumps id */ + if (pmu_attr->event_str) + continue; if (x86_pmu.event_map(i)) continue; @@ -1335,24 +1404,45 @@ static void __init filter_events(struct attribute **attrs) } } -static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, +/* Merge two pointer arrays */ +static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b) +{ + struct attribute **new; + int j, i; + + for (j = 0; a[j]; j++) + ; + for (i = 0; b[i]; i++) + j++; + j++; + + new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL); + if (!new) + return NULL; + + j = 0; + for (i = 0; a[i]; i++) + new[j++] = a[i]; + for (i = 0; b[i]; i++) + new[j++] = b[i]; + new[j] = NULL; + + return new; +} + +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { struct perf_pmu_events_attr *pmu_attr = \ container_of(attr, struct perf_pmu_events_attr, attr); - u64 config = x86_pmu.event_map(pmu_attr->id); - return x86_pmu.events_sysfs_show(page, config); -} -#define EVENT_VAR(_id) event_attr_##_id -#define EVENT_PTR(_id) &event_attr_##_id.attr.attr + /* string trumps id */ + if (pmu_attr->event_str) + return sprintf(page, "%s", pmu_attr->event_str); -#define EVENT_ATTR(_name, _id) \ -static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ - .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ - .id = PERF_COUNT_HW_##_id, \ -}; + return x86_pmu.events_sysfs_show(page, config); +} EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); @@ -1440,7 +1530,7 @@ static int __init init_hw_perf_events(void) err = amd_pmu_init(); break; default: - return 0; + err = -ENOTSUPP; } if (err != 0) { pr_cont("no PMU driver, software events only.\n"); @@ -1455,6 +1545,8 @@ static int __init init_hw_perf_events(void) pr_cont("%s PMU driver.\n", x86_pmu.name); + x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ + for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) quirk->func(); @@ -1466,16 +1558,26 @@ static int __init init_hw_perf_events(void) unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, - 0, x86_pmu.num_counters, 0); + 0, x86_pmu.num_counters, 0, 0); - x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ x86_pmu_format_group.attrs = x86_pmu.format_attrs; + if (x86_pmu.event_attrs) + x86_pmu_events_group.attrs = x86_pmu.event_attrs; + if (!x86_pmu.events_sysfs_show) x86_pmu_events_group.attrs = &empty_attrs; else filter_events(x86_pmu_events_group.attrs); + if (x86_pmu.cpu_events) { + struct attribute **tmp; + + tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events); + if (!WARN_ON(!tmp)) + x86_pmu_events_group.attrs = tmp; + } + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); pr_info("... generic registers: %d\n", x86_pmu.num_counters); @@ -1517,7 +1619,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) { __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); /* - * Truncate the collected events. + * Truncate collected array by the number of events added in this + * transaction. See x86_pmu_add() and x86_pmu_*_txn(). */ __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); @@ -1528,6 +1631,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) * Commit group events scheduling transaction * Perform the group schedulability test as a whole * Return 0 if success + * + * Does not cancel the transaction on failure; expects the caller to do this. */ static int x86_pmu_commit_txn(struct pmu *pmu) { @@ -1743,9 +1848,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev, if (ret) return ret; + if (x86_pmu.attr_rdpmc_broken) + return -ENOTSUPP; + if (!!val != !!x86_pmu.attr_rdpmc) { x86_pmu.attr_rdpmc = !!val; - smp_call_function(change_rdpmc, (void *)val, 1); + on_each_cpu(change_rdpmc, (void *)val, 1); } return count; @@ -1806,20 +1914,27 @@ static struct pmu pmu = { void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { - userpg->cap_usr_time = 0; - userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; + struct cyc2ns_data *data; + + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; userpg->pmc_width = x86_pmu.cntval_bits; - if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + if (!sched_clock_stable()) return; - if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) - return; + data = cyc2ns_read_begin(); + + userpg->cap_user_time = 1; + userpg->time_mult = data->cyc2ns_mul; + userpg->time_shift = data->cyc2ns_shift; + userpg->time_offset = data->cyc2ns_offset - now; + + userpg->cap_user_time_zero = 1; + userpg->time_zero = data->cyc2ns_offset; - userpg->cap_usr_time = 1; - userpg->time_mult = this_cpu_read(cyc2ns); - userpg->time_shift = CYC2NS_SCALE_FACTOR; - userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; + cyc2ns_read_end(data); } /* @@ -1911,7 +2026,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) frame.return_address = 0; bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); - if (bytes != sizeof(frame)) + if (bytes != 0) break; if (!valid_user_frame(fp, sizeof(frame))) @@ -1963,7 +2078,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) frame.return_address = 0; bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); - if (bytes != sizeof(frame)) + if (bytes != 0) break; if (!valid_user_frame(fp, sizeof(frame))) |
