diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 16:15:23 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 16:15:23 -0700 |
commit | f0bb4c0ab064a8aeeffbda1cee380151a594eaab (patch) | |
tree | 14d55a89c5db455aa10ff9a96ca14c474a9c4d55 /arch/x86/kernel | |
parent | a4883ef6af5e513a1e8c2ab9aab721604aa3a4f5 (diff) | |
parent | 983433b5812c5cf33a9008fa38c6f9b407fedb76 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"Kernel improvements:
- watchdog driver improvements by Li Zefan
- Power7 CPI stack events related improvements by Sukadev Bhattiprolu
- event multiplexing via hrtimers and other improvements by Stephane
Eranian
- kernel stack use optimization by Andrew Hunter
- AMD IOMMU uncore PMU support by Suravee Suthikulpanit
- NMI handling rate-limits by Dave Hansen
- various hw_breakpoint fixes by Oleg Nesterov
- hw_breakpoint overflow period sampling and related signal handling
fixes by Jiri Olsa
- Intel Haswell PMU support by Andi Kleen
Tooling improvements:
- Reset SIGTERM handler in workload child process, fix from David
Ahern.
- Makefile reorganization, prep work for Kconfig patches, from Jiri
Olsa.
- Add automated make test suite, from Jiri Olsa.
- Add --percent-limit option to 'top' and 'report', from Namhyung
Kim.
- Sorting improvements, from Namhyung Kim.
- Expand definition of sysfs format attribute, from Michael Ellerman.
Tooling fixes:
- 'perf tests' fixes from Jiri Olsa.
- Make Power7 CPI stack events available in sysfs, from Sukadev
Bhattiprolu.
- Handle death by SIGTERM in 'perf record', fix from David Ahern.
- Fix printing of perf_event_paranoid message, from David Ahern.
- Handle realloc failures in 'perf kvm', from David Ahern.
- Fix divide by 0 in variance, from David Ahern.
- Save parent pid in thread struct, from David Ahern.
- Handle JITed code in shared memory, from Andi Kleen.
- Fixes for 'perf diff', from Jiri Olsa.
- Remove some unused struct members, from Jiri Olsa.
- Add missing liblk.a dependency for python/perf.so, fix from Jiri
Olsa.
- Respect CROSS_COMPILE in liblk.a, from Rabin Vincent.
- No need to do locking when adding hists in perf report, only 'top'
needs that, from Namhyung Kim.
- Fix alignment of symbol column in in the hists browser (top,
report) when -v is given, from NAmhyung Kim.
- Fix 'perf top' -E option behavior, from Namhyung Kim.
- Fix bug in isupper() and islower(), from Sukadev Bhattiprolu.
- Fix compile errors in bp_signal 'perf test', from Sukadev
Bhattiprolu.
... and more things"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (102 commits)
perf/x86: Disable PEBS-LL in intel_pmu_pebs_disable()
perf/x86: Fix shared register mutual exclusion enforcement
perf/x86/intel: Support full width counting
x86: Add NMI duration tracepoints
perf: Drop sample rate when sampling is too slow
x86: Warn when NMI handlers take large amounts of time
hw_breakpoint: Introduce "struct bp_cpuinfo"
hw_breakpoint: Simplify *register_wide_hw_breakpoint()
hw_breakpoint: Introduce cpumask_of_bp()
hw_breakpoint: Simplify the "weight" usage in toggle_bp_slot() paths
hw_breakpoint: Simplify list/idx mess in toggle_bp_slot() paths
perf/x86/intel: Add mem-loads/stores support for Haswell
perf/x86/intel: Support Haswell/v4 LBR format
perf/x86/intel: Move NMI clearing to end of PMI handler
perf/x86/intel: Add Haswell PEBS support
perf/x86/intel: Add simple Haswell PMU support
perf/x86/intel: Add Haswell PEBS record support
perf/x86/intel: Fix sparse warning
perf/x86/amd: AMD IOMMU Performance Counter PERF uncore PMU implementation
perf/x86/amd: Add IOMMU Performance Counter resource management
...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 69 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 24 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 34 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd_iommu.c | 504 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd_iommu.h | 40 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 136 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 178 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_lbr.c | 69 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 16 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/nmi.c | 36 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 16 |
13 files changed, 1019 insertions, 111 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index b0684e4a73a..47b56a7e99c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifdef CONFIG_PERF_EVENTS obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o +ifdef CONFIG_AMD_IOMMU +obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o +endif obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o endif + obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1025f3c99d2..9e581c5cf6d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event) * check that PEBS LBR correction does not conflict with * whatever the user is asking with attr->branch_sample_type */ - if (event->attr.precise_ip > 1) { + if (event->attr.precise_ip > 1 && + x86_pmu.intel_cap.pebs_format < 2) { u64 *br_type = &event->attr.branch_sample_type; if (has_branch_stack(event)) { @@ -568,7 +569,7 @@ struct sched_state { struct perf_sched { int max_weight; int max_events; - struct event_constraint **constraints; + struct perf_event **events; struct sched_state state; int saved_states; struct sched_state saved[SCHED_STATES_MAX]; @@ -577,7 +578,7 @@ struct perf_sched { /* * Initialize interator that runs through all events and counters. */ -static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, +static void perf_sched_init(struct perf_sched *sched, struct perf_event **events, int num, int wmin, int wmax) { int idx; @@ -585,10 +586,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint ** memset(sched, 0, sizeof(*sched)); sched->max_events = num; sched->max_weight = wmax; - sched->constraints = c; + sched->events = events; for (idx = 0; idx < num; idx++) { - if (c[idx]->weight == wmin) + if (events[idx]->hw.constraint->weight == wmin) break; } @@ -635,8 +636,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) if (sched->state.event >= sched->max_events) return false; - c = sched->constraints[sched->state.event]; - + c = sched->events[sched->state.event]->hw.constraint; /* Prefer fixed purpose counters */ if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { idx = INTEL_PMC_IDX_FIXED; @@ -694,7 +694,7 @@ static bool perf_sched_next_event(struct perf_sched *sched) if (sched->state.weight > sched->max_weight) return false; } - c = sched->constraints[sched->state.event]; + c = sched->events[sched->state.event]->hw.constraint; } while (c->weight != sched->state.weight); sched->state.counter = 0; /* start with first counter */ @@ -705,12 +705,12 @@ static bool perf_sched_next_event(struct perf_sched *sched) /* * Assign a counter for each event. */ -int perf_assign_events(struct event_constraint **constraints, int n, +int perf_assign_events(struct perf_event **events, int n, int wmin, int wmax, int *assign) { struct perf_sched sched; - perf_sched_init(&sched, constraints, n, wmin, wmax); + perf_sched_init(&sched, events, n, wmin, wmax); do { if (!perf_sched_find_counter(&sched)) @@ -724,16 +724,19 @@ int perf_assign_events(struct event_constraint **constraints, int n, int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { - struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; + struct event_constraint *c; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + struct perf_event *e; int i, wmin, wmax, num = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { + hwc = &cpuc->event_list[i]->hw; c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); - constraints[i] = c; + hwc->constraint = c; + wmin = min(wmin, c->weight); wmax = max(wmax, c->weight); } @@ -743,7 +746,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) */ for (i = 0; i < n; i++) { hwc = &cpuc->event_list[i]->hw; - c = constraints[i]; + c = hwc->constraint; /* never assigned */ if (hwc->idx == -1) @@ -764,16 +767,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* slow path */ if (i != n) - num = perf_assign_events(constraints, n, wmin, wmax, assign); + num = perf_assign_events(cpuc->event_list, n, wmin, + wmax, assign); /* + * Mark the event as committed, so we do not put_constraint() + * in case new events are added and fail scheduling. + */ + if (!num && assign) { + for (i = 0; i < n; i++) { + e = cpuc->event_list[i]; + e->hw.flags |= PERF_X86_EVENT_COMMITTED; + } + } + /* * scheduling failed or is just a simulation, * free resources if necessary */ if (!assign || num) { for (i = 0; i < n; i++) { + e = cpuc->event_list[i]; + /* + * do not put_constraint() on comitted events, + * because they are good to go + */ + if ((e->hw.flags & PERF_X86_EVENT_COMMITTED)) + continue; + if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); + x86_pmu.put_event_constraints(cpuc, e); } } return num ? -EINVAL : 0; @@ -1153,6 +1175,11 @@ static void x86_pmu_del(struct perf_event *event, int flags) int i; /* + * event is descheduled + */ + event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; + + /* * If we're called during a txn, we don't need to do anything. * The events never got scheduled and ->cancel_txn will truncate * the event_list. @@ -1249,10 +1276,20 @@ void perf_events_lapic_init(void) static int __kprobes perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { + int ret; + u64 start_clock; + u64 finish_clock; + if (!atomic_read(&active_events)) return NMI_DONE; - return x86_pmu.handle_irq(regs); + start_clock = local_clock(); + ret = x86_pmu.handle_irq(regs); + finish_clock = local_clock(); + + perf_sample_event_took(finish_clock - start_clock); + + return ret; } struct event_constraint emptyconstraint; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index ba9aadfa683..97e557bc4c9 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -63,10 +63,12 @@ struct event_constraint { int flags; }; /* - * struct event_constraint flags + * struct hw_perf_event.flags flags */ #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ #define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ +#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */ +#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -227,11 +229,14 @@ struct cpu_hw_events { * - inv * - edge * - cnt-mask + * - in_tx + * - in_tx_checkpointed * The other filters are supported by fixed counters. * The any-thread option is supported starting with v3. */ +#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED) #define FIXED_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) + EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS) /* * Constraint on the Event code + UMask @@ -247,6 +252,11 @@ struct cpu_hw_events { __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) +/* DataLA version of store sampling without extra enable bit. */ +#define INTEL_PST_HSW_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) + #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) @@ -301,6 +311,11 @@ union perf_capabilities { u64 pebs_arch_reg:1; u64 pebs_format:4; u64 smm_freeze:1; + /* + * PMU supports separate counter range for writing + * values > 32bit. + */ + u64 full_width_write:1; }; u64 capabilities; }; @@ -375,6 +390,7 @@ struct x86_pmu { struct event_constraint *event_constraints; struct x86_pmu_quirk *quirks; int perfctr_second_write; + bool late_ack; /* * sysfs attrs @@ -528,7 +544,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, void x86_pmu_enable_all(int added); -int perf_assign_events(struct event_constraint **constraints, int n, +int perf_assign_events(struct perf_event **events, int n, int wmin, int wmax, int *assign); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); @@ -633,6 +649,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[]; extern struct event_constraint intel_ivb_pebs_event_constraints[]; +extern struct event_constraint intel_hsw_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_enable(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 7e28d9467bb..4cbe03287b0 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -648,48 +648,48 @@ static __initconst const struct x86_pmu amd_pmu = { .cpu_dead = amd_pmu_cpu_dead, }; -static int setup_event_constraints(void) +static int __init amd_core_pmu_init(void) { - if (boot_cpu_data.x86 == 0x15) + if (!cpu_has_perfctr_core) + return 0; + + switch (boot_cpu_data.x86) { + case 0x15: + pr_cont("Fam15h "); x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; - return 0; -} + break; -static int setup_perfctr_core(void) -{ - if (!cpu_has_perfctr_core) { - WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h, - KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!"); + default: + pr_err("core perfctr but no constraints; unknown hardware!\n"); return -ENODEV; } - WARN(x86_pmu.get_event_constraints == amd_get_event_constraints, - KERN_ERR "hw perf events core counters need constraints handler!"); - /* * If core performance counter extensions exists, we must use * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also - * x86_pmu_addr_offset(). + * amd_pmu_addr_offset(). */ x86_pmu.eventsel = MSR_F15H_PERF_CTL; x86_pmu.perfctr = MSR_F15H_PERF_CTR; x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; - printk(KERN_INFO "perf: AMD core performance counters detected\n"); - + pr_cont("core perfctr, "); return 0; } __init int amd_pmu_init(void) { + int ret; + /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) return -ENODEV; x86_pmu = amd_pmu; - setup_event_constraints(); - setup_perfctr_core(); + ret = amd_core_pmu_init(); + if (ret) + return ret; /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c new file mode 100644 index 00000000000..0db655ef391 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c @@ -0,0 +1,504 @@ +/* + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Steven Kinney <Steven.Kinney@amd.com> + * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com> + * + * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/perf_event.h> +#include <linux/module.h> +#include <linux/cpumask.h> +#include <linux/slab.h> + +#include "perf_event.h" +#include "perf_event_amd_iommu.h" + +#define COUNTER_SHIFT 16 + +#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8)) +#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg)) + +/* iommu pmu config masks */ +#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL)) +#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL) +#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL) +#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL) +#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL) +#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL) +#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL) + +static struct perf_amd_iommu __perf_iommu; + +struct perf_amd_iommu { + struct pmu pmu; + u8 max_banks; + u8 max_counters; + u64 cntr_assign_mask; + raw_spinlock_t lock; + const struct attribute_group *attr_groups[4]; +}; + +#define format_group attr_groups[0] +#define cpumask_group attr_groups[1] +#define events_group attr_groups[2] +#define null_group attr_groups[3] + +/*--------------------------------------------- + * sysfs format attributes + *---------------------------------------------*/ +PMU_FORMAT_ATTR(csource, "config:0-7"); +PMU_FORMAT_ATTR(devid, "config:8-23"); +PMU_FORMAT_ATTR(pasid, "config:24-39"); +PMU_FORMAT_ATTR(domid, "config:40-55"); +PMU_FORMAT_ATTR(devid_mask, "config1:0-15"); +PMU_FORMAT_ATTR(pasid_mask, "config1:16-31"); +PMU_FORMAT_ATTR(domid_mask, "config1:32-47"); + +static struct attribute *iommu_format_attrs[] = { + &format_attr_csource.attr, + &format_attr_devid.attr, + &format_attr_pasid.attr, + &format_attr_domid.attr, + &format_attr_devid_mask.attr, + &format_attr_pasid_mask.attr, + &format_attr_domid_mask.attr, + NULL, +}; + +static struct attribute_group amd_iommu_format_group = { + .name = "format", + .attrs = iommu_format_attrs, +}; + +/*--------------------------------------------- + * sysfs events attributes + *---------------------------------------------*/ +struct amd_iommu_event_desc { + struct kobj_attribute attr; + const char *event; +}; + +static ssize_t _iommu_event_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct amd_iommu_event_desc *event = + container_of(attr, struct amd_iommu_event_desc, attr); + return sprintf(buf, "%s\n", event->event); +} + +#define AMD_IOMMU_EVENT_DESC(_name, _event) \ +{ \ + .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \ + .event = _event, \ +} + +static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = { + AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"), + AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"), + AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"), + AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"), + AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"), + AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"), + AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"), + AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"), + AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"), + AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"), + AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"), + AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"), + AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"), + AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"), + AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"), + { /* end: all zeroes */ }, +}; + +/*--------------------------------------------- + * sysfs cpumask attributes + *---------------------------------------------*/ +static cpumask_t iommu_cpumask; + +static ssize_t _iommu_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask); + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} +static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL); + +static struct attribute *iommu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group amd_iommu_cpumask_group = { + .attrs = iommu_cpumask_attrs, +}; + +/*---------------------------------------------*/ + +static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu) +{ + unsigned long flags; + int shift, bank, cntr, retval; + int max_banks = perf_iommu->max_banks; + int max_cntrs = perf_iommu->max_counters; + + raw_spin_lock_irqsave(&perf_iommu->lock, flags); + + for (bank = 0, shift = 0; bank < max_banks; bank++) { + for (cntr = 0; cntr < max_cntrs; cntr++) { + shift = bank + (bank*3) + cntr; + if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) { + continue; + } else { + perf_iommu->cntr_assign_mask |= (1ULL<<shift); + retval = ((u16)((u16)bank<<8) | (u8)(cntr)); + goto out; + } + } + } + retval = -ENOSPC; +out: + raw_spin_unlock_irqrestore(&perf_iommu->lock, flags); + return retval; +} + +static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu, + u8 bank, u8 cntr) +{ + unsigned long flags; + int max_banks, max_cntrs; + int shift = 0; + + max_banks = perf_iommu->max_banks; + max_cntrs = perf_iommu->max_counters; + + if ((bank > max_banks) || (cntr > max_cntrs)) + return -EINVAL; + + shift = bank + cntr + (bank*3); + + raw_spin_lock_irqsave(&perf_iommu->lock, flags); + perf_iommu->cntr_assign_mask &= ~(1ULL<<shift); + raw_spin_unlock_irqrestore(&perf_iommu->lock, flags); + + return 0; +} + +static int perf_iommu_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct perf_amd_iommu *perf_iommu; + u64 config, config1; + + /* test the event attr type check for PMU enumeration */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* + * IOMMU counters are shared across all cores. + * Therefore, it does not support per-process mode. + * Also, it does not support event sampling mode. + */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + /* IOMMU counters do not have usr/os/guest/host bits */ + if (event->attr.exclude_user || event->attr.exclude_kernel || + event->attr.exclude_host || event->attr.exclude_guest) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + perf_iommu = &__perf_iommu; + + if (event->pmu != &perf_iommu->pmu) + return -ENOENT; + + if (perf_iommu) { + config = event->attr.config; + config1 = event->attr.config1; + } else { + return -EINVAL; + } + + /* integrate with iommu base devid (0000), assume one iommu */ + perf_iommu->max_banks = + amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID); + perf_iommu->max_counters = + amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID); + if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0)) + return -EINVAL; + + /* update the hw_perf_event struct with the iommu config data */ + hwc->config = config; + hwc->extra_reg.config = config1; + + return 0; +} + +static void perf_iommu_enable_event(struct perf_event *ev) +{ + u8 csource = _GET_CSOURCE(ev); + u16 devid = _GET_DEVID(ev); + u64 reg = 0ULL; + + reg = csource; + amd_iommu_pc_get_set_reg_val(devid, + _GET_BANK(ev), _GET_CNTR(ev) , + IOMMU_PC_COUNTER_SRC_REG, ®, true); + + reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32); + if (reg) + reg |= (1UL << 31); + amd_iommu_pc_get_set_reg_val(devid, + _GET_BANK(ev), _GET_CNTR(ev) , + IOMMU_PC_DEVID_MATCH_REG, ®, true); + + reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); + if (reg) + reg |= (1UL << 31); + amd_iommu_pc_get_set_reg_val(devid, + _GET_BANK(ev), _GET_CNTR(ev) , + IOMMU_PC_PASID_MATCH_REG, ®, true); + + reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); + if (reg) + reg |= (1UL << 31); + amd_iommu_pc_get_set_reg_val(devid, + _GET_BANK(ev), _GET_CNTR(ev) , + IOMMU_PC_DOMID_MATCH_REG, ®, true); +} + +static void perf_iommu_disable_event(struct perf_event *event) +{ + u64 reg = 0ULL; + + amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), + _GET_BANK(event), _GET_CNTR(event), + IOMMU_PC_COUNTER_SRC_REG, ®, true); +} + +static void perf_iommu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + pr_debug("perf: amd_iommu:perf_iommu_start\n"); + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + if (flags & PERF_EF_RELOAD) { + u64 prev_raw_count = local64_read(&hwc->prev_count); + amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), + _GET_BANK(event), _GET_CNTR(event), + IOMMU_PC_COUNTER_REG, &prev_raw_count, true); + } + + perf_iommu_enable_event(event); + perf_event_update_userpage(event); + +} + +static void perf_iommu_read(struct perf_event *event) +{ + u64 count = 0ULL; + u64 prev_raw_count = 0ULL; + u64 delta = 0ULL; + struct hw_perf_event *hwc = &event->hw; + pr_debug("perf: amd_iommu:perf_iommu_read\n"); + + amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), + _GET_BANK(event), _GET_CNTR(event), + IOMMU_PC_COUNTER_REG, &count, false); + + /* IOMMU pc counter register is only 48 bits */ + count &= 0xFFFFFFFFFFFFULL; + + prev_raw_count = local64_read(&hwc->prev_count); + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + count) != prev_raw_count) + return; + + /* Handling 48-bit counter overflowing */ + delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT); + delta >>= COUNTER_SHIFT; + local64_add(delta, &event->count); + +} + +static void perf_iommu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + u64 config; + + pr_debug("perf: amd_iommu:perf_iommu_stop\n"); + + if (hwc->state & PERF_HES_UPTODATE) + return; + + perf_iommu_disable_event(event); + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if (hwc->state & PERF_HES_UPTODATE) + return; + + config = hwc->config; + perf_iommu_read(event); + hwc->state |= PERF_HES_UPTODATE; +} + +static int perf_iommu_add(struct perf_event *event, int flags) +{ + int retval; + struct perf_amd_iommu *perf_iommu = + container_of(event->pmu, struct perf_amd_iommu, pmu); + + pr_debug("perf: amd_iommu:perf_iommu_add\n"); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + /* request an iommu bank/counter */ + retval = get_next_avail_iommu_bnk_cntr(perf_iommu); + if (retval != -ENOSPC) + event->hw.extra_reg.reg = (u16)retval; + else + return retval; + + if (flags & PERF_EF_START) + perf_iommu_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void perf_iommu_del(struct perf_event *event, int flags) +{ + struct perf_amd_iommu *perf_iommu = + container_of(event->pmu, struct perf_amd_iommu, pmu); + + pr_debug("perf: amd_iommu:perf_iommu_del\n"); + perf_iommu_stop(event, PERF_EF_UPDATE); + + /* clear the assigned iommu bank/counter */ + clear_avail_iommu_bnk_cntr(perf_iommu, + _GET_BANK(event), + _GET_CNTR(event)); + + perf_event_update_userpage(event); +} + +static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu) +{ + struct attribute **attrs; + struct attribute_group *attr_group; + int i = 0, j; + + while (amd_iommu_v2_event_descs[i].attr.attr.name) + i++; + + attr_group = kzalloc(sizeof(struct attribute *) + * (i + 1) + sizeof(*attr_group), GFP_KERNEL); + if (!attr_group) + return -ENOMEM; + + attrs = (struct attribute **)(attr_group + 1); + for (j = 0; j < i; j++) + attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr; + + attr_group->name = "events"; + attr_group->attrs = attrs; + perf_iommu->events_group = attr_group; + + return 0; +} + +static __init void amd_iommu_pc_exit(void) +{ + if (__perf_iommu.events_group != NULL) { + kfree(__perf_iommu.events_group); + __perf_iommu.events_group = NULL; + } +} + +static __init int _init_perf_amd_iommu( + struct perf_amd_iommu *perf_iommu, char *name) +{ + int ret; + + raw_spin_lock_init(&perf_iommu->lock); + + /* Init format attributes */ + perf_iommu->format_group = &amd_iommu_format_group; + + /* Init cpumask attributes to only core 0 */ + cpumask_set_cpu(0, &iommu_cpumask); + perf_iommu->cpumask_group = &amd_iommu_cpumask_group; + + /* Init events attributes */ + if (_init_events_attrs(perf_iommu) != 0) + pr_err("perf: amd_iommu: Only support raw events.\n"); + + /* Init null attributes */ + perf_iommu->null_group = NULL; + perf_iommu->pmu.attr_groups = perf_iommu->attr_groups; + + ret = perf_pmu_register(&perf_iommu->pmu, name, -1); + if (ret) { + pr_err("perf: amd_iommu: Failed to initialized.\n"); + amd_iommu_pc_exit(); + } else { + pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n", + amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID), + amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID)); + } + + return ret; +} + +static struct perf_amd_iommu __perf_iommu = { + .pmu = { + .event_init = perf_iommu_event_init, + .add = perf_iommu_add, + .del = perf_iommu_del, + .start = perf_iommu_start, + .stop = perf_iommu_stop, + .read = perf_iommu_read, + }, + .max_banks = 0x00, + .max_counters = 0x00, + .cntr_assign_mask = 0ULL, + .format_group = NULL, + .cpumask_group = NULL, + .events_group = NULL, + .null_group = NULL, +}; + +static __init int amd_iommu_pc_init(void) +{ + /* Make sure the IOMMU PC resource is available */ + if (!amd_iommu_pc_supported()) { + pr_err("perf: amd_iommu PMU not installed. No support!\n"); + return -ENODEV; + } + + _init_perf_amd_iommu(&__perf_iommu, "amd_iommu"); + + return 0; +} + +device_initcall(amd_iommu_pc_init); diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h new file mode 100644 index 00000000000..845d173278e --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Steven Kinney <Steven.Kinney@amd.com> + * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _PERF_EVENT_AMD_IOMMU_H_ +#define _PERF_EVENT_AMD_IOMMU_H_ + +/* iommu pc mmio region register indexes */ +#define IOMMU_PC_COUNTER_REG 0x00 +#define IOMMU_PC_COUNTER_SRC_REG 0x08 +#define IOMMU_PC_PASID_MATCH_REG 0x10 +#define IOMMU_PC_DOMID_MATCH_REG 0x18 +#define IOMMU_PC_DEVID_MATCH_REG 0x20 +#define IOMMU_PC_COUNTER_REPORT_REG 0x28 + +/* maximun specified bank/counters */ +#define PC_MAX_SPEC_BNKS 64 +#define PC_MAX_SPEC_CNTRS 16 + +/* iommu pc reg masks*/ +#define IOMMU_BASE_DEVID 0x0000 + +/* amd_iommu_init.c external support functions */ +extern bool amd_iommu_pc_supported(void); + +extern u8 amd_iommu_pc_get_max_banks(u16 devid); + +extern u8 amd_iommu_pc_get_max_counters(u16 devid); + +extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, + u8 fxn, u64 *value, bool is_write); + +#endif /*_PERF_EVENT_AMD_IOMMU_H_*/ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a9e22073bd5..fbc9210b45b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -13,6 +13,7 @@ |