aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c279
1 files changed, 197 insertions, 82 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6774c17a557..2879ecdaac4 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
continue;
if (event->attr.config1 & ~er->valid_mask)
return -EINVAL;
+ /* Check if the extra msrs can be safely accessed*/
+ if (!er->extra_msr_access)
+ return -ENXIO;
reg->idx = er->idx;
reg->config = event->attr.config1;
@@ -180,8 +183,9 @@ static void release_pmc_hardware(void) {}
static bool check_hw_exists(void)
{
- u64 val, val_new = ~0;
- int i, reg, ret = 0;
+ u64 val, val_fail, val_new= ~0;
+ int i, reg, reg_fail, ret = 0;
+ int bios_fail = 0;
/*
* Check to see if the BIOS enabled any of the counters, if so
@@ -192,8 +196,11 @@ static bool check_hw_exists(void)
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
- if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
- goto bios_fail;
+ if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
+ bios_fail = 1;
+ val_fail = val;
+ reg_fail = reg;
+ }
}
if (x86_pmu.num_counters_fixed) {
@@ -202,8 +209,11 @@ static bool check_hw_exists(void)
if (ret)
goto msr_fail;
for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
- if (val & (0x03 << i*4))
- goto bios_fail;
+ if (val & (0x03 << i*4)) {
+ bios_fail = 1;
+ val_fail = val;
+ reg_fail = reg;
+ }
}
}
@@ -221,14 +231,13 @@ static bool check_hw_exists(void)
if (ret || val != val_new)
goto msr_fail;
- return true;
-
-bios_fail:
/*
* We still allow the PMU driver to operate:
*/
- printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
- printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
+ if (bios_fail) {
+ printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
+ printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail);
+ }
return true;
@@ -297,15 +306,6 @@ int x86_setup_perfctr(struct perf_event *event)
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
local64_set(&hwc->period_left, hwc->sample_period);
- } else {
- /*
- * If we have a PMU initialized but no APIC
- * interrupts, we cannot sample hardware
- * events (user-space has to fall back and
- * sample via a hrtimer based software event):
- */
- if (!x86_pmu.apic)
- return -EOPNOTSUPP;
}
if (attr->type == PERF_TYPE_RAW)
@@ -397,7 +397,8 @@ int x86_pmu_hw_config(struct perf_event *event)
* check that PEBS LBR correction does not conflict with
* whatever the user is asking with attr->branch_sample_type
*/
- if (event->attr.precise_ip > 1) {
+ if (event->attr.precise_ip > 1 &&
+ x86_pmu.intel_cap.pebs_format < 2) {
u64 *br_type = &event->attr.branch_sample_type;
if (has_branch_stack(event)) {
@@ -562,7 +563,7 @@ struct sched_state {
struct perf_sched {
int max_weight;
int max_events;
- struct event_constraint **constraints;
+ struct perf_event **events;
struct sched_state state;
int saved_states;
struct sched_state saved[SCHED_STATES_MAX];
@@ -571,7 +572,7 @@ struct perf_sched {
/*
* Initialize interator that runs through all events and counters.
*/
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
int num, int wmin, int wmax)
{
int idx;
@@ -579,10 +580,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
memset(sched, 0, sizeof(*sched));
sched->max_events = num;
sched->max_weight = wmax;
- sched->constraints = c;
+ sched->events = events;
for (idx = 0; idx < num; idx++) {
- if (c[idx]->weight == wmin)
+ if (events[idx]->hw.constraint->weight == wmin)
break;
}
@@ -629,8 +630,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
if (sched->state.event >= sched->max_events)
return false;
- c = sched->constraints[sched->state.event];
-
+ c = sched->events[sched->state.event]->hw.constraint;
/* Prefer fixed purpose counters */
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
@@ -688,7 +688,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
if (sched->state.weight > sched->max_weight)
return false;
}
- c = sched->constraints[sched->state.event];
+ c = sched->events[sched->state.event]->hw.constraint;
} while (c->weight != sched->state.weight);
sched->state.counter = 0; /* start with first counter */
@@ -699,12 +699,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
/*
* Assign a counter for each event.
*/
-int perf_assign_events(struct event_constraint **constraints, int n,
+int perf_assign_events(struct perf_event **events, int n,
int wmin, int wmax, int *assign)
{
struct perf_sched sched;
- perf_sched_init(&sched, constraints, n, wmin, wmax);
+ perf_sched_init(&sched, events, n, wmin, wmax);
do {
if (!perf_sched_find_counter(&sched))
@@ -715,19 +715,23 @@ int perf_assign_events(struct event_constraint **constraints, int n,
return sched.state.unassigned;
}
+EXPORT_SYMBOL_GPL(perf_assign_events);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
- struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
+ struct event_constraint *c;
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ struct perf_event *e;
int i, wmin, wmax, num = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+ hwc = &cpuc->event_list[i]->hw;
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
- constraints[i] = c;
+ hwc->constraint = c;
+
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
@@ -737,7 +741,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
*/
for (i = 0; i < n; i++) {
hwc = &cpuc->event_list[i]->hw;
- c = constraints[i];
+ c = hwc->constraint;
/* never assigned */
if (hwc->idx == -1)
@@ -758,16 +762,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
/* slow path */
if (i != n)
- num = perf_assign_events(constraints, n, wmin, wmax, assign);
+ num = perf_assign_events(cpuc->event_list, n, wmin,
+ wmax, assign);
/*
+ * Mark the event as committed, so we do not put_constraint()
+ * in case new events are added and fail scheduling.
+ */
+ if (!num && assign) {
+ for (i = 0; i < n; i++) {
+ e = cpuc->event_list[i];
+ e->hw.flags |= PERF_X86_EVENT_COMMITTED;
+ }
+ }
+ /*
* scheduling failed or is just a simulation,
* free resources if necessary
*/
if (!assign || num) {
for (i = 0; i < n; i++) {
+ e = cpuc->event_list[i];
+ /*
+ * do not put_constraint() on comitted events,
+ * because they are good to go
+ */
+ if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
+ continue;
+
if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
+ x86_pmu.put_event_constraints(cpuc, e);
}
}
return num ? -EINVAL : 0;
@@ -829,7 +852,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
} else {
hwc->config_base = x86_pmu_config_addr(hwc->idx);
hwc->event_base = x86_pmu_event_addr(hwc->idx);
- hwc->event_base_rdpmc = hwc->idx;
+ hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
}
}
@@ -864,7 +887,6 @@ static void x86_pmu_enable(struct pmu *pmu)
* hw_perf_group_sched_in() or x86_pmu_enable()
*
* step1: save events moving to new counters
- * step2: reprogram moved events into new counters
*/
for (i = 0; i < n_running; i++) {
event = cpuc->event_list[i];
@@ -890,6 +912,9 @@ static void x86_pmu_enable(struct pmu *pmu)
x86_pmu_stop(event, PERF_EF_UPDATE);
}
+ /*
+ * step2: reprogram moved events into new counters
+ */
for (i = 0; i < cpuc->n_events; i++) {
event = cpuc->event_list[i];
hwc = &event->hw;
@@ -1015,7 +1040,7 @@ static int x86_pmu_add(struct perf_event *event, int flags)
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be performed
- * at commit time (->commit_txn) as a whole
+ * at commit time (->commit_txn) as a whole.
*/
if (cpuc->group_flag & PERF_EVENT_TXN)
goto done_collect;
@@ -1030,6 +1055,10 @@ static int x86_pmu_add(struct perf_event *event, int flags)
memcpy(cpuc->assign, assign, n*sizeof(int));
done_collect:
+ /*
+ * Commit the collect_events() state. See x86_pmu_del() and
+ * x86_pmu_*_txn().
+ */
cpuc->n_events = n;
cpuc->n_added += n - n0;
cpuc->n_txn += n - n0;
@@ -1147,28 +1176,46 @@ static void x86_pmu_del(struct perf_event *event, int flags)
int i;
/*
+ * event is descheduled
+ */
+ event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
+
+ /*
* If we're called during a txn, we don't need to do anything.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
+ *
+ * XXX assumes any ->del() called during a TXN will only be on
+ * an event added during that same TXN.
*/
if (cpuc->group_flag & PERF_EVENT_TXN)
return;
+ /*
+ * Not a TXN, therefore cleanup properly.
+ */
x86_pmu_stop(event, PERF_EF_UPDATE);
for (i = 0; i < cpuc->n_events; i++) {
- if (event == cpuc->event_list[i]) {
+ if (event == cpuc->event_list[i])
+ break;
+ }
- if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(cpuc, event);
+ if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */
+ return;
- while (++i < cpuc->n_events)
- cpuc->event_list[i-1] = cpuc->event_list[i];
+ /* If we have a newly added event; make sure to decrease n_added. */
+ if (i >= cpuc->n_events - cpuc->n_added)
+ --cpuc->n_added;
+
+ if (x86_pmu.put_event_constraints)
+ x86_pmu.put_event_constraints(cpuc, event);
+
+ /* Delete the array entry. */
+ while (++i < cpuc->n_events)
+ cpuc->event_list[i-1] = cpuc->event_list[i];
+ --cpuc->n_events;
- --cpuc->n_events;
- break;
- }
- }
perf_event_update_userpage(event);
}
@@ -1240,19 +1287,30 @@ void perf_events_lapic_init(void)
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
-static int __kprobes
+static int
perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
{
+ u64 start_clock;
+ u64 finish_clock;
+ int ret;
+
if (!atomic_read(&active_events))
return NMI_DONE;
- return x86_pmu.handle_irq(regs);
+ start_clock = sched_clock();
+ ret = x86_pmu.handle_irq(regs);
+ finish_clock = sched_clock();
+
+ perf_sample_event_took(finish_clock - start_clock);
+
+ return ret;
}
+NOKPROBE_SYMBOL(perf_event_nmi_handler);
struct event_constraint emptyconstraint;
struct event_constraint unconstrained;
-static int __cpuinit
+static int
x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
@@ -1303,6 +1361,15 @@ static void __init pmu_check_apic(void)
x86_pmu.apic = 0;
pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
pr_info("no hardware sampling interrupt available.\n");
+
+ /*
+ * If we have a PMU initialized but no APIC
+ * interrupts, we cannot sample hardware
+ * events (user-space has to fall back and
+ * sample via a hrtimer based software event):
+ */
+ pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
}
static struct attribute_group x86_pmu_format_group = {
@@ -1310,20 +1377,22 @@ static struct attribute_group x86_pmu_format_group = {
.attrs = NULL,
};
-struct perf_pmu_events_attr {
- struct device_attribute attr;
- u64 id;
-};
-
/*
* Remove all undefined events (x86_pmu.event_map(id) == 0)
* out of events_attr attributes.
*/
static void __init filter_events(struct attribute **attrs)
{
+ struct device_attribute *d;
+ struct perf_pmu_events_attr *pmu_attr;
int i, j;
for (i = 0; attrs[i]; i++) {
+ d = (struct device_attribute *)attrs[i];
+ pmu_attr = container_of(d, struct perf_pmu_events_attr, attr);
+ /* str trumps id */
+ if (pmu_attr->event_str)
+ continue;
if (x86_pmu.event_map(i))
continue;
@@ -1335,24 +1404,45 @@ static void __init filter_events(struct attribute **attrs)
}
}
-static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+/* Merge two pointer arrays */
+static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
+{
+ struct attribute **new;
+ int j, i;
+
+ for (j = 0; a[j]; j++)
+ ;
+ for (i = 0; b[i]; i++)
+ j++;
+ j++;
+
+ new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ j = 0;
+ for (i = 0; a[i]; i++)
+ new[j++] = a[i];
+ for (i = 0; b[i]; i++)
+ new[j++] = b[i];
+ new[j] = NULL;
+
+ return new;
+}
+
+ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
char *page)
{
struct perf_pmu_events_attr *pmu_attr = \
container_of(attr, struct perf_pmu_events_attr, attr);
-
u64 config = x86_pmu.event_map(pmu_attr->id);
- return x86_pmu.events_sysfs_show(page, config);
-}
-#define EVENT_VAR(_id) event_attr_##_id
-#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
+ /* string trumps id */
+ if (pmu_attr->event_str)
+ return sprintf(page, "%s", pmu_attr->event_str);
-#define EVENT_ATTR(_name, _id) \
-static struct perf_pmu_events_attr EVENT_VAR(_id) = { \
- .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \
- .id = PERF_COUNT_HW_##_id, \
-};
+ return x86_pmu.events_sysfs_show(page, config);
+}
EVENT_ATTR(cpu-cycles, CPU_CYCLES );
EVENT_ATTR(instructions, INSTRUCTIONS );
@@ -1440,7 +1530,7 @@ static int __init init_hw_perf_events(void)
err = amd_pmu_init();
break;
default:
- return 0;
+ err = -ENOTSUPP;
}
if (err != 0) {
pr_cont("no PMU driver, software events only.\n");
@@ -1455,6 +1545,8 @@ static int __init init_hw_perf_events(void)
pr_cont("%s PMU driver.\n", x86_pmu.name);
+ x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
quirk->func();
@@ -1466,16 +1558,26 @@ static int __init init_hw_perf_events(void)
unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
- 0, x86_pmu.num_counters, 0);
+ 0, x86_pmu.num_counters, 0, 0);
- x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
+ if (x86_pmu.event_attrs)
+ x86_pmu_events_group.attrs = x86_pmu.event_attrs;
+
if (!x86_pmu.events_sysfs_show)
x86_pmu_events_group.attrs = &empty_attrs;
else
filter_events(x86_pmu_events_group.attrs);
+ if (x86_pmu.cpu_events) {
+ struct attribute **tmp;
+
+ tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events);
+ if (!WARN_ON(!tmp))
+ x86_pmu_events_group.attrs = tmp;
+ }
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1517,7 +1619,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
{
__this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
/*
- * Truncate the collected events.
+ * Truncate collected array by the number of events added in this
+ * transaction. See x86_pmu_add() and x86_pmu_*_txn().
*/
__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
@@ -1528,6 +1631,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
* Commit group events scheduling transaction
* Perform the group schedulability test as a whole
* Return 0 if success
+ *
+ * Does not cancel the transaction on failure; expects the caller to do this.
*/
static int x86_pmu_commit_txn(struct pmu *pmu)
{
@@ -1743,9 +1848,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
if (ret)
return ret;
+ if (x86_pmu.attr_rdpmc_broken)
+ return -ENOTSUPP;
+
if (!!val != !!x86_pmu.attr_rdpmc) {
x86_pmu.attr_rdpmc = !!val;
- smp_call_function(change_rdpmc, (void *)val, 1);
+ on_each_cpu(change_rdpmc, (void *)val, 1);
}
return count;
@@ -1806,20 +1914,27 @@ static struct pmu pmu = {
void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{
- userpg->cap_usr_time = 0;
- userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
+ struct cyc2ns_data *data;
+
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
userpg->pmc_width = x86_pmu.cntval_bits;
- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ if (!sched_clock_stable())
return;
- if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
- return;
+ data = cyc2ns_read_begin();
+
+ userpg->cap_user_time = 1;
+ userpg->time_mult = data->cyc2ns_mul;
+ userpg->time_shift = data->cyc2ns_shift;
+ userpg->time_offset = data->cyc2ns_offset - now;
+
+ userpg->cap_user_time_zero = 1;
+ userpg->time_zero = data->cyc2ns_offset;
- userpg->cap_usr_time = 1;
- userpg->time_mult = this_cpu_read(cyc2ns);
- userpg->time_shift = CYC2NS_SCALE_FACTOR;
- userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+ cyc2ns_read_end(data);
}
/*
@@ -1911,7 +2026,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
frame.return_address = 0;
bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
- if (bytes != sizeof(frame))
+ if (bytes != 0)
break;
if (!valid_user_frame(fp, sizeof(frame)))
@@ -1963,7 +2078,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
frame.return_address = 0;
bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
- if (bytes != sizeof(frame))
+ if (bytes != 0)
break;
if (!valid_user_frame(fp, sizeof(frame)))