diff options
Diffstat (limited to 'arch/powerpc/perf')
| -rw-r--r-- | arch/powerpc/perf/Makefile | 2 | ||||
| -rw-r--r-- | arch/powerpc/perf/core-book3s.c | 201 | ||||
| -rw-r--r-- | arch/powerpc/perf/hv-24x7-catalog.h | 33 | ||||
| -rw-r--r-- | arch/powerpc/perf/hv-24x7.c | 523 | ||||
| -rw-r--r-- | arch/powerpc/perf/hv-24x7.h | 109 | ||||
| -rw-r--r-- | arch/powerpc/perf/hv-common.c | 39 | ||||
| -rw-r--r-- | arch/powerpc/perf/hv-common.h | 36 | ||||
| -rw-r--r-- | arch/powerpc/perf/hv-gpci.c | 294 | ||||
| -rw-r--r-- | arch/powerpc/perf/hv-gpci.h | 73 | ||||
| -rw-r--r-- | arch/powerpc/perf/power7-events-list.h | 10 | ||||
| -rw-r--r-- | arch/powerpc/perf/power8-pmu.c | 229 | 
11 files changed, 1512 insertions, 37 deletions
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 60d71eea919..f9c083a5652 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -11,5 +11,7 @@ obj32-$(CONFIG_PPC_PERF_CTRS)	+= mpc7450-pmu.o  obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o  obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o +obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o +  obj-$(CONFIG_PPC64)		+= $(obj64-y)  obj-$(CONFIG_PPC32)		+= $(obj32-y) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 29b89e863d7..fe52db2eea6 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -78,6 +78,7 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;  #define MMCR0_FC56		0  #define MMCR0_PMAO		0  #define MMCR0_EBE		0 +#define MMCR0_BHRBA		0  #define MMCR0_PMCC		0  #define MMCR0_PMCC_U6		0 @@ -120,6 +121,7 @@ static inline void power_pmu_bhrb_enable(struct perf_event *event) {}  static inline void power_pmu_bhrb_disable(struct perf_event *event) {}  void power_pmu_flush_branch_stack(void) {}  static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} +static void pmao_restore_workaround(bool ebb) { }  #endif /* CONFIG_PPC32 */  static bool regs_use_siar(struct pt_regs *regs) @@ -483,7 +485,7 @@ static bool is_ebb_event(struct perf_event *event)  	 * check that the PMU supports EBB, meaning those that don't can still  	 * use bit 63 of the event code for something else if they wish.  	 */ -	return (ppmu->flags & PPMU_EBB) && +	return (ppmu->flags & PPMU_ARCH_207S) &&  	       ((event->attr.config >> PERF_EVENT_CONFIG_EBB_SHIFT) & 1);  } @@ -502,8 +504,11 @@ static int ebb_event_check(struct perf_event *event)  		if (!leader->attr.pinned || !leader->attr.exclusive)  			return -EINVAL; -		if (event->attr.inherit || event->attr.sample_period || -		    event->attr.enable_on_exec || event->attr.freq) +		if (event->attr.freq || +		    event->attr.inherit || +		    event->attr.sample_type || +		    event->attr.sample_period || +		    event->attr.enable_on_exec)  			return -EINVAL;  	} @@ -542,13 +547,21 @@ static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)  	if (!ebb)  		goto out; -	/* Enable EBB and read/write to all 6 PMCs for userspace */ -	mmcr0 |= MMCR0_EBE | MMCR0_PMCC_U6; +	/* Enable EBB and read/write to all 6 PMCs and BHRB for userspace */ +	mmcr0 |= MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC_U6; -	/* Add any bits from the user reg, FC or PMAO */ +	/* +	 * Add any bits from the user MMCR0, FC or PMAO. This is compatible +	 * with pmao_restore_workaround() because we may add PMAO but we never +	 * clear it here. +	 */  	mmcr0 |= current->thread.mmcr0; -	/* Be careful not to set PMXE if userspace had it cleared */ +	/* +	 * Be careful not to set PMXE if userspace had it cleared. This is also +	 * compatible with pmao_restore_workaround() because it has already +	 * cleared PMXE and we leave PMAO alone. +	 */  	if (!(current->thread.mmcr0 & MMCR0_PMXE))  		mmcr0 &= ~MMCR0_PMXE; @@ -559,13 +572,94 @@ static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)  out:  	return mmcr0;  } -#endif /* CONFIG_PPC64 */ -static void perf_event_interrupt(struct pt_regs *regs); - -void perf_event_print_debug(void) +static void pmao_restore_workaround(bool ebb)  { +	unsigned pmcs[6]; + +	if (!cpu_has_feature(CPU_FTR_PMAO_BUG)) +		return; + +	/* +	 * On POWER8E there is a hardware defect which affects the PMU context +	 * switch logic, ie. power_pmu_disable/enable(). +	 * +	 * When a counter overflows PMXE is cleared and FC/PMAO is set in MMCR0 +	 * by the hardware. Sometime later the actual PMU exception is +	 * delivered. +	 * +	 * If we context switch, or simply disable/enable, the PMU prior to the +	 * exception arriving, the exception will be lost when we clear PMAO. +	 * +	 * When we reenable the PMU, we will write the saved MMCR0 with PMAO +	 * set, and this _should_ generate an exception. However because of the +	 * defect no exception is generated when we write PMAO, and we get +	 * stuck with no counters counting but no exception delivered. +	 * +	 * The workaround is to detect this case and tweak the hardware to +	 * create another pending PMU exception. +	 * +	 * We do that by setting up PMC6 (cycles) for an imminent overflow and +	 * enabling the PMU. That causes a new exception to be generated in the +	 * chip, but we don't take it yet because we have interrupts hard +	 * disabled. We then write back the PMU state as we want it to be seen +	 * by the exception handler. When we reenable interrupts the exception +	 * handler will be called and see the correct state. +	 * +	 * The logic is the same for EBB, except that the exception is gated by +	 * us having interrupts hard disabled as well as the fact that we are +	 * not in userspace. The exception is finally delivered when we return +	 * to userspace. +	 */ + +	/* Only if PMAO is set and PMAO_SYNC is clear */ +	if ((current->thread.mmcr0 & (MMCR0_PMAO | MMCR0_PMAO_SYNC)) != MMCR0_PMAO) +		return; + +	/* If we're doing EBB, only if BESCR[GE] is set */ +	if (ebb && !(current->thread.bescr & BESCR_GE)) +		return; + +	/* +	 * We are already soft-disabled in power_pmu_enable(). We need to hard +	 * enable to actually prevent the PMU exception from firing. +	 */ +	hard_irq_disable(); + +	/* +	 * This is a bit gross, but we know we're on POWER8E and have 6 PMCs. +	 * Using read/write_pmc() in a for loop adds 12 function calls and +	 * almost doubles our code size. +	 */ +	pmcs[0] = mfspr(SPRN_PMC1); +	pmcs[1] = mfspr(SPRN_PMC2); +	pmcs[2] = mfspr(SPRN_PMC3); +	pmcs[3] = mfspr(SPRN_PMC4); +	pmcs[4] = mfspr(SPRN_PMC5); +	pmcs[5] = mfspr(SPRN_PMC6); + +	/* Ensure all freeze bits are unset */ +	mtspr(SPRN_MMCR2, 0); + +	/* Set up PMC6 to overflow in one cycle */ +	mtspr(SPRN_PMC6, 0x7FFFFFFE); + +	/* Enable exceptions and unfreeze PMC6 */ +	mtspr(SPRN_MMCR0, MMCR0_PMXE | MMCR0_PMCjCE | MMCR0_PMAO); + +	/* Now we need to refreeze and restore the PMCs */ +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMAO); + +	mtspr(SPRN_PMC1, pmcs[0]); +	mtspr(SPRN_PMC2, pmcs[1]); +	mtspr(SPRN_PMC3, pmcs[2]); +	mtspr(SPRN_PMC4, pmcs[3]); +	mtspr(SPRN_PMC5, pmcs[4]); +	mtspr(SPRN_PMC6, pmcs[5]);  } +#endif /* CONFIG_PPC64 */ + +static void perf_event_interrupt(struct pt_regs *regs);  /*   * Read one performance monitor counter (PMC). @@ -645,6 +739,57 @@ static void write_pmc(int idx, unsigned long val)  	}  } +/* Called from sysrq_handle_showregs() */ +void perf_event_print_debug(void) +{ +	unsigned long sdar, sier, flags; +	u32 pmcs[MAX_HWEVENTS]; +	int i; + +	if (!ppmu->n_counter) +		return; + +	local_irq_save(flags); + +	pr_info("CPU: %d PMU registers, ppmu = %s n_counters = %d", +		 smp_processor_id(), ppmu->name, ppmu->n_counter); + +	for (i = 0; i < ppmu->n_counter; i++) +		pmcs[i] = read_pmc(i + 1); + +	for (; i < MAX_HWEVENTS; i++) +		pmcs[i] = 0xdeadbeef; + +	pr_info("PMC1:  %08x PMC2: %08x PMC3: %08x PMC4: %08x\n", +		 pmcs[0], pmcs[1], pmcs[2], pmcs[3]); + +	if (ppmu->n_counter > 4) +		pr_info("PMC5:  %08x PMC6: %08x PMC7: %08x PMC8: %08x\n", +			 pmcs[4], pmcs[5], pmcs[6], pmcs[7]); + +	pr_info("MMCR0: %016lx MMCR1: %016lx MMCRA: %016lx\n", +		mfspr(SPRN_MMCR0), mfspr(SPRN_MMCR1), mfspr(SPRN_MMCRA)); + +	sdar = sier = 0; +#ifdef CONFIG_PPC64 +	sdar = mfspr(SPRN_SDAR); + +	if (ppmu->flags & PPMU_HAS_SIER) +		sier = mfspr(SPRN_SIER); + +	if (ppmu->flags & PPMU_ARCH_207S) { +		pr_info("MMCR2: %016lx EBBHR: %016lx\n", +			mfspr(SPRN_MMCR2), mfspr(SPRN_EBBHR)); +		pr_info("EBBRR: %016lx BESCR: %016lx\n", +			mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR)); +	} +#endif +	pr_info("SIAR:  %016lx SDAR:  %016lx SIER:  %016lx\n", +		mfspr(SPRN_SIAR), sdar, sier); + +	local_irq_restore(flags); +} +  /*   * Check if a set of events can all go on the PMU at once.   * If they can't, this will look at alternative codes for the events @@ -851,7 +996,22 @@ static void power_pmu_read(struct perf_event *event)  	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);  	local64_add(delta, &event->count); -	local64_sub(delta, &event->hw.period_left); + +	/* +	 * A number of places program the PMC with (0x80000000 - period_left). +	 * We never want period_left to be less than 1 because we will program +	 * the PMC with a value >= 0x800000000 and an edge detected PMC will +	 * roll around to 0 before taking an exception. We have seen this +	 * on POWER8. +	 * +	 * To fix this, clamp the minimum value of period_left to 1. +	 */ +	do { +		prev = local64_read(&event->hw.period_left); +		val = prev - delta; +		if (val < 1) +			val = 1; +	} while (local64_cmpxchg(&event->hw.period_left, prev, val) != prev);  }  /* @@ -973,11 +1133,12 @@ static void power_pmu_disable(struct pmu *pmu)  		}  		/* -		 * Set the 'freeze counters' bit, clear EBE/PMCC/PMAO/FC56. +		 * Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56  		 */  		val  = mmcr0 = mfspr(SPRN_MMCR0);  		val |= MMCR0_FC; -		val &= ~(MMCR0_EBE | MMCR0_PMCC | MMCR0_PMAO | MMCR0_FC56); +		val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO | +			 MMCR0_FC56);  		/*  		 * The barrier is to make sure the mtspr has been @@ -1144,9 +1305,17 @@ static void power_pmu_enable(struct pmu *pmu)  	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;   out_enable: +	pmao_restore_workaround(ebb); + +	if (ppmu->flags & PPMU_ARCH_207S) +		mtspr(SPRN_MMCR2, 0); +  	mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]);  	mb(); +	if (cpuhw->bhrb_users) +		ppmu->config_bhrb(cpuhw->bhrb_filter); +  	write_mmcr0(cpuhw, mmcr0);  	/* @@ -1158,8 +1327,6 @@ static void power_pmu_enable(struct pmu *pmu)  	}   out: -	if (cpuhw->bhrb_users) -		ppmu->config_bhrb(cpuhw->bhrb_filter);  	local_irq_restore(flags);  } @@ -1547,7 +1714,7 @@ static int power_pmu_event_init(struct perf_event *event)  	if (has_branch_stack(event)) {  	        /* PMU has BHRB enabled */ -		if (!(ppmu->flags & PPMU_BHRB)) +		if (!(ppmu->flags & PPMU_ARCH_207S))  			return -EOPNOTSUPP;  	} diff --git a/arch/powerpc/perf/hv-24x7-catalog.h b/arch/powerpc/perf/hv-24x7-catalog.h new file mode 100644 index 00000000000..21b19dd86d9 --- /dev/null +++ b/arch/powerpc/perf/hv-24x7-catalog.h @@ -0,0 +1,33 @@ +#ifndef LINUX_POWERPC_PERF_HV_24X7_CATALOG_H_ +#define LINUX_POWERPC_PERF_HV_24X7_CATALOG_H_ + +#include <linux/types.h> + +/* From document "24x7 Event and Group Catalog Formats Proposal" v0.15 */ + +struct hv_24x7_catalog_page_0 { +#define HV_24X7_CATALOG_MAGIC 0x32347837 /* "24x7" in ASCII */ +	__be32 magic; +	__be32 length; /* In 4096 byte pages */ +	__be64 version; /* XXX: arbitrary? what's the meaning/useage/purpose? */ +	__u8 build_time_stamp[16]; /* "YYYYMMDDHHMMSS\0\0" */ +	__u8 reserved2[32]; +	__be16 schema_data_offs; /* in 4096 byte pages */ +	__be16 schema_data_len;  /* in 4096 byte pages */ +	__be16 schema_entry_count; +	__u8 reserved3[2]; +	__be16 event_data_offs; +	__be16 event_data_len; +	__be16 event_entry_count; +	__u8 reserved4[2]; +	__be16 group_data_offs; /* in 4096 byte pages */ +	__be16 group_data_len;  /* in 4096 byte pages */ +	__be16 group_entry_count; +	__u8 reserved5[2]; +	__be16 formula_data_offs; /* in 4096 byte pages */ +	__be16 formula_data_len;  /* in 4096 byte pages */ +	__be16 formula_entry_count; +	__u8 reserved6[2]; +} __packed; + +#endif diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c new file mode 100644 index 00000000000..e0766b82e16 --- /dev/null +++ b/arch/powerpc/perf/hv-24x7.c @@ -0,0 +1,523 @@ +/* + * Hypervisor supplied "24x7" performance counter support + * + * Author: Cody P Schafer <cody@linux.vnet.ibm.com> + * Copyright 2014 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "hv-24x7: " fmt + +#include <linux/perf_event.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <asm/firmware.h> +#include <asm/hvcall.h> +#include <asm/io.h> + +#include "hv-24x7.h" +#include "hv-24x7-catalog.h" +#include "hv-common.h" + +/* + * TODO: Merging events: + * - Think of the hcall as an interface to a 4d array of counters: + *   - x = domains + *   - y = indexes in the domain (core, chip, vcpu, node, etc) + *   - z = offset into the counter space + *   - w = lpars (guest vms, "logical partitions") + * - A single request is: x,y,y_last,z,z_last,w,w_last + *   - this means we can retrieve a rectangle of counters in y,z for a single x. + * + * - Things to consider (ignoring w): + *   - input  cost_per_request = 16 + *   - output cost_per_result(ys,zs)  = 8 + 8 * ys + ys * zs + *   - limited number of requests per hcall (must fit into 4K bytes) + *     - 4k = 16 [buffer header] - 16 [request size] * request_count + *     - 255 requests per hcall + *   - sometimes it will be more efficient to read extra data and discard + */ + +/* + * Example usage: + *  perf stat -e 'hv_24x7/domain=2,offset=8,starting_index=0,lpar=0xffffffff/' + */ + +/* u3 0-6, one of HV_24X7_PERF_DOMAIN */ +EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3); +/* u16 */ +EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 16, 31); +/* u32, see "data_offset" */ +EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63); +/* u16 */ +EVENT_DEFINE_RANGE_FORMAT(lpar, config1, 0, 15); + +EVENT_DEFINE_RANGE(reserved1, config,   4, 15); +EVENT_DEFINE_RANGE(reserved2, config1, 16, 63); +EVENT_DEFINE_RANGE(reserved3, config2,  0, 63); + +static struct attribute *format_attrs[] = { +	&format_attr_domain.attr, +	&format_attr_offset.attr, +	&format_attr_starting_index.attr, +	&format_attr_lpar.attr, +	NULL, +}; + +static struct attribute_group format_group = { +	.name = "format", +	.attrs = format_attrs, +}; + +static struct kmem_cache *hv_page_cache; + +/* + * read_offset_data - copy data from one buffer to another while treating the + *                    source buffer as a small view on the total avaliable + *                    source data. + * + * @dest: buffer to copy into + * @dest_len: length of @dest in bytes + * @requested_offset: the offset within the source data we want. Must be > 0 + * @src: buffer to copy data from + * @src_len: length of @src in bytes + * @source_offset: the offset in the sorce data that (src,src_len) refers to. + *                 Must be > 0 + * + * returns the number of bytes copied. + * + * The following ascii art shows the various buffer possitioning we need to + * handle, assigns some arbitrary varibles to points on the buffer, and then + * shows how we fiddle with those values to get things we care about (copy + * start in src and copy len) + * + * s = @src buffer + * d = @dest buffer + * '.' areas in d are written to. + * + *                       u + *   x         w	 v  z + * d           |.........| + * s |----------------------| + * + *                      u + *   x         w	z     v + * d           |........------| + * s |------------------| + * + *   x         w        u,z,v + * d           |........| + * s |------------------| + * + *   x,w                u,v,z + * d |..................| + * s |------------------| + * + *   x        u + *   w        v		z + * d |........| + * s |------------------| + * + *   x      z   w      v + * d            |------| + * s |------| + * + * x = source_offset + * w = requested_offset + * z = source_offset + src_len + * v = requested_offset + dest_len + * + * w_offset_in_s = w - x = requested_offset - source_offset + * z_offset_in_s = z - x = src_len + * v_offset_in_s = v - x = request_offset + dest_len - src_len + */ +static ssize_t read_offset_data(void *dest, size_t dest_len, +				loff_t requested_offset, void *src, +				size_t src_len, loff_t source_offset) +{ +	size_t w_offset_in_s = requested_offset - source_offset; +	size_t z_offset_in_s = src_len; +	size_t v_offset_in_s = requested_offset + dest_len - src_len; +	size_t u_offset_in_s = min(z_offset_in_s, v_offset_in_s); +	size_t copy_len = u_offset_in_s - w_offset_in_s; + +	if (requested_offset < 0 || source_offset < 0) +		return -EINVAL; + +	if (z_offset_in_s <= w_offset_in_s) +		return 0; + +	memcpy(dest, src + w_offset_in_s, copy_len); +	return copy_len; +} + +static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096, +					      unsigned long version, +					      unsigned long index) +{ +	pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)", +			phys_4096, +			version, +			index); +	WARN_ON(!IS_ALIGNED(phys_4096, 4096)); +	return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE, +			phys_4096, +			version, +			index); +} + +static unsigned long h_get_24x7_catalog_page(char page[], +					     u64 version, u32 index) +{ +	return h_get_24x7_catalog_page_(virt_to_phys(page), +					version, index); +} + +static ssize_t catalog_read(struct file *filp, struct kobject *kobj, +			    struct bin_attribute *bin_attr, char *buf, +			    loff_t offset, size_t count) +{ +	unsigned long hret; +	ssize_t ret = 0; +	size_t catalog_len = 0, catalog_page_len = 0, page_count = 0; +	loff_t page_offset = 0; +	uint64_t catalog_version_num = 0; +	void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); +	struct hv_24x7_catalog_page_0 *page_0 = page; +	if (!page) +		return -ENOMEM; + +	hret = h_get_24x7_catalog_page(page, 0, 0); +	if (hret) { +		ret = -EIO; +		goto e_free; +	} + +	catalog_version_num = be64_to_cpu(page_0->version); +	catalog_page_len = be32_to_cpu(page_0->length); +	catalog_len = catalog_page_len * 4096; + +	page_offset = offset / 4096; +	page_count  = count  / 4096; + +	if (page_offset >= catalog_page_len) +		goto e_free; + +	if (page_offset != 0) { +		hret = h_get_24x7_catalog_page(page, catalog_version_num, +					       page_offset); +		if (hret) { +			ret = -EIO; +			goto e_free; +		} +	} + +	ret = read_offset_data(buf, count, offset, +				page, 4096, page_offset * 4096); +e_free: +	if (hret) +		pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:" +		       " rc=%ld\n", +		       catalog_version_num, page_offset, hret); +	kfree(page); + +	pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n", +			offset, page_offset, count, page_count, catalog_len, +			catalog_page_len, ret); + +	return ret; +} + +#define PAGE_0_ATTR(_name, _fmt, _expr)				\ +static ssize_t _name##_show(struct device *dev,			\ +			    struct device_attribute *dev_attr,	\ +			    char *buf)				\ +{								\ +	unsigned long hret;					\ +	ssize_t ret = 0;					\ +	void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);	\ +	struct hv_24x7_catalog_page_0 *page_0 = page;		\ +	if (!page)						\ +		return -ENOMEM;					\ +	hret = h_get_24x7_catalog_page(page, 0, 0);		\ +	if (hret) {						\ +		ret = -EIO;					\ +		goto e_free;					\ +	}							\ +	ret = sprintf(buf, _fmt, _expr);			\ +e_free:								\ +	kfree(page);						\ +	return ret;						\ +}								\ +static DEVICE_ATTR_RO(_name) + +PAGE_0_ATTR(catalog_version, "%lld\n", +		(unsigned long long)be64_to_cpu(page_0->version)); +PAGE_0_ATTR(catalog_len, "%lld\n", +		(unsigned long long)be32_to_cpu(page_0->length) * 4096); +static BIN_ATTR_RO(catalog, 0/* real length varies */); + +static struct bin_attribute *if_bin_attrs[] = { +	&bin_attr_catalog, +	NULL, +}; + +static struct attribute *if_attrs[] = { +	&dev_attr_catalog_len.attr, +	&dev_attr_catalog_version.attr, +	NULL, +}; + +static struct attribute_group if_group = { +	.name = "interface", +	.bin_attrs = if_bin_attrs, +	.attrs = if_attrs, +}; + +static const struct attribute_group *attr_groups[] = { +	&format_group, +	&if_group, +	NULL, +}; + +static bool is_physical_domain(int domain) +{ +	return  domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP || +		domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE; +} + +static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, +					 u16 lpar, u64 *res, +					 bool success_expected) +{ +	unsigned long ret; + +	/* +	 * request_buffer and result_buffer are not required to be 4k aligned, +	 * but are not allowed to cross any 4k boundary. Aligning them to 4k is +	 * the simplest way to ensure that. +	 */ +	struct reqb { +		struct hv_24x7_request_buffer buf; +		struct hv_24x7_request req; +	} __packed __aligned(4096) request_buffer = { +		.buf = { +			.interface_version = HV_24X7_IF_VERSION_CURRENT, +			.num_requests = 1, +		}, +		.req = { +			.performance_domain = domain, +			.data_size = cpu_to_be16(8), +			.data_offset = cpu_to_be32(offset), +			.starting_lpar_ix = cpu_to_be16(lpar), +			.max_num_lpars = cpu_to_be16(1), +			.starting_ix = cpu_to_be16(ix), +			.max_ix = cpu_to_be16(1), +		} +	}; + +	struct resb { +		struct hv_24x7_data_result_buffer buf; +		struct hv_24x7_result res; +		struct hv_24x7_result_element elem; +		__be64 result; +	} __packed __aligned(4096) result_buffer = {}; + +	ret = plpar_hcall_norets(H_GET_24X7_DATA, +			virt_to_phys(&request_buffer), sizeof(request_buffer), +			virt_to_phys(&result_buffer),  sizeof(result_buffer)); + +	if (ret) { +		if (success_expected) +			pr_err_ratelimited("hcall failed: %d %#x %#x %d => 0x%lx (%ld) detail=0x%x failing ix=%x\n", +					domain, offset, ix, lpar, +					ret, ret, +					result_buffer.buf.detailed_rc, +					result_buffer.buf.failing_request_ix); +		return ret; +	} + +	*res = be64_to_cpu(result_buffer.result); +	return ret; +} + +static unsigned long event_24x7_request(struct perf_event *event, u64 *res, +		bool success_expected) +{ +	return single_24x7_request(event_get_domain(event), +				event_get_offset(event), +				event_get_starting_index(event), +				event_get_lpar(event), +				res, +				success_expected); +} + +static int h_24x7_event_init(struct perf_event *event) +{ +	struct hv_perf_caps caps; +	unsigned domain; +	unsigned long hret; +	u64 ct; + +	/* Not our event */ +	if (event->attr.type != event->pmu->type) +		return -ENOENT; + +	/* Unused areas must be 0 */ +	if (event_get_reserved1(event) || +	    event_get_reserved2(event) || +	    event_get_reserved3(event)) { +		pr_devel("reserved set when forbidden 0x%llx(0x%llx) 0x%llx(0x%llx) 0x%llx(0x%llx)\n", +				event->attr.config, +				event_get_reserved1(event), +				event->attr.config1, +				event_get_reserved2(event), +				event->attr.config2, +				event_get_reserved3(event)); +		return -EINVAL; +	} + +	/* unsupported modes and filters */ +	if (event->attr.exclude_user   || +	    event->attr.exclude_kernel || +	    event->attr.exclude_hv     || +	    event->attr.exclude_idle   || +	    event->attr.exclude_host   || +	    event->attr.exclude_guest  || +	    is_sampling_event(event)) /* no sampling */ +		return -EINVAL; + +	/* no branch sampling */ +	if (has_branch_stack(event)) +		return -EOPNOTSUPP; + +	/* offset must be 8 byte aligned */ +	if (event_get_offset(event) % 8) { +		pr_devel("bad alignment\n"); +		return -EINVAL; +	} + +	/* Domains above 6 are invalid */ +	domain = event_get_domain(event); +	if (domain > 6) { +		pr_devel("invalid domain %d\n", domain); +		return -EINVAL; +	} + +	hret = hv_perf_caps_get(&caps); +	if (hret) { +		pr_devel("could not get capabilities: rc=%ld\n", hret); +		return -EIO; +	} + +	/* PHYSICAL domains & other lpars require extra capabilities */ +	if (!caps.collect_privileged && (is_physical_domain(domain) || +		(event_get_lpar(event) != event_get_lpar_max()))) { +		pr_devel("hv permisions disallow: is_physical_domain:%d, lpar=0x%llx\n", +				is_physical_domain(domain), +				event_get_lpar(event)); +		return -EACCES; +	} + +	/* see if the event complains */ +	if (event_24x7_request(event, &ct, false)) { +		pr_devel("test hcall failed\n"); +		return -EIO; +	} + +	return 0; +} + +static u64 h_24x7_get_value(struct perf_event *event) +{ +	unsigned long ret; +	u64 ct; +	ret = event_24x7_request(event, &ct, true); +	if (ret) +		/* We checked this in event init, shouldn't fail here... */ +		return 0; + +	return ct; +} + +static void h_24x7_event_update(struct perf_event *event) +{ +	s64 prev; +	u64 now; +	now = h_24x7_get_value(event); +	prev = local64_xchg(&event->hw.prev_count, now); +	local64_add(now - prev, &event->count); +} + +static void h_24x7_event_start(struct perf_event *event, int flags) +{ +	if (flags & PERF_EF_RELOAD) +		local64_set(&event->hw.prev_count, h_24x7_get_value(event)); +} + +static void h_24x7_event_stop(struct perf_event *event, int flags) +{ +	h_24x7_event_update(event); +} + +static int h_24x7_event_add(struct perf_event *event, int flags) +{ +	if (flags & PERF_EF_START) +		h_24x7_event_start(event, flags); + +	return 0; +} + +static int h_24x7_event_idx(struct perf_event *event) +{ +	return 0; +} + +static struct pmu h_24x7_pmu = { +	.task_ctx_nr = perf_invalid_context, + +	.name = "hv_24x7", +	.attr_groups = attr_groups, +	.event_init  = h_24x7_event_init, +	.add         = h_24x7_event_add, +	.del         = h_24x7_event_stop, +	.start       = h_24x7_event_start, +	.stop        = h_24x7_event_stop, +	.read        = h_24x7_event_update, +	.event_idx   = h_24x7_event_idx, +}; + +static int hv_24x7_init(void) +{ +	int r; +	unsigned long hret; +	struct hv_perf_caps caps; + +	if (!firmware_has_feature(FW_FEATURE_LPAR)) { +		pr_debug("not a virtualized system, not enabling\n"); +		return -ENODEV; +	} + +	hret = hv_perf_caps_get(&caps); +	if (hret) { +		pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", +				hret); +		return -ENODEV; +	} + +	hv_page_cache = kmem_cache_create("hv-page-4096", 4096, 4096, 0, NULL); +	if (!hv_page_cache) +		return -ENOMEM; + +	r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); +	if (r) +		return r; + +	return 0; +} + +device_initcall(hv_24x7_init); diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h new file mode 100644 index 00000000000..720ebce4b43 --- /dev/null +++ b/arch/powerpc/perf/hv-24x7.h @@ -0,0 +1,109 @@ +#ifndef LINUX_POWERPC_PERF_HV_24X7_H_ +#define LINUX_POWERPC_PERF_HV_24X7_H_ + +#include <linux/types.h> + +struct hv_24x7_request { +	/* PHYSICAL domains require enabling via phyp/hmc. */ +#define HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP 0x01 +#define HV_24X7_PERF_DOMAIN_PHYSICAL_CORE 0x02 +#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CORE   0x03 +#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CHIP   0x04 +#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_NODE   0x05 +#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_REMOTE_NODE 0x06 +	__u8 performance_domain; +	__u8 reserved[0x1]; + +	/* bytes to read starting at @data_offset. must be a multiple of 8 */ +	__be16 data_size; + +	/* +	 * byte offset within the perf domain to read from. must be 8 byte +	 * aligned +	 */ +	__be32 data_offset; + +	/* +	 * only valid for VIRTUAL_PROCESSOR domains, ignored for others. +	 * -1 means "current partition only" +	 *  Enabling via phyp/hmc required for non-"-1" values. 0 forbidden +	 *  unless requestor is 0. +	 */ +	__be16 starting_lpar_ix; + +	/* +	 * Ignored when @starting_lpar_ix == -1 +	 * Ignored when @performance_domain is not VIRTUAL_PROCESSOR_* +	 * -1 means "infinite" or all +	 */ +	__be16 max_num_lpars; + +	/* chip, core, or virtual processor based on @performance_domain */ +	__be16 starting_ix; +	__be16 max_ix; +} __packed; + +struct hv_24x7_request_buffer { +	/* 0 - ? */ +	/* 1 - ? */ +#define HV_24X7_IF_VERSION_CURRENT 0x01 +	__u8 interface_version; +	__u8 num_requests; +	__u8 reserved[0xE]; +	struct hv_24x7_request requests[]; +} __packed; + +struct hv_24x7_result_element { +	__be16 lpar_ix; + +	/* +	 * represents the core, chip, or virtual processor based on the +	 * request's @performance_domain +	 */ +	__be16 domain_ix; + +	/* -1 if @performance_domain does not refer to a virtual processor */ +	__be32 lpar_cfg_instance_id; + +	/* size = @result_element_data_size of cointaining result. */ +	__u8 element_data[]; +} __packed; + +struct hv_24x7_result { +	__u8 result_ix; + +	/* +	 * 0 = not all result elements fit into the buffer, additional requests +	 *     required +	 * 1 = all result elements were returned +	 */ +	__u8 results_complete; +	__be16 num_elements_returned; + +	/* This is a copy of @data_size from the coresponding hv_24x7_request */ +	__be16 result_element_data_size; +	__u8 reserved[0x2]; + +	/* WARNING: only valid for first result element due to variable sizes +	 *          of result elements */ +	/* struct hv_24x7_result_element[@num_elements_returned] */ +	struct hv_24x7_result_element elements[]; +} __packed; + +struct hv_24x7_data_result_buffer { +	/* See versioning for request buffer */ +	__u8 interface_version; + +	__u8 num_results; +	__u8 reserved[0x1]; +	__u8 failing_request_ix; +	__be32 detailed_rc; +	__be64 cec_cfg_instance_id; +	__be64 catalog_version_num; +	__u8 reserved2[0x8]; +	/* WARNING: only valid for the first result due to variable sizes of +	 *	    results */ +	struct hv_24x7_result results[]; /* [@num_results] */ +} __packed; + +#endif diff --git a/arch/powerpc/perf/hv-common.c b/arch/powerpc/perf/hv-common.c new file mode 100644 index 00000000000..47e02b366f5 --- /dev/null +++ b/arch/powerpc/perf/hv-common.c @@ -0,0 +1,39 @@ +#include <asm/io.h> +#include <asm/hvcall.h> + +#include "hv-gpci.h" +#include "hv-common.h" + +unsigned long hv_perf_caps_get(struct hv_perf_caps *caps) +{ +	unsigned long r; +	struct p { +		struct hv_get_perf_counter_info_params params; +		struct cv_system_performance_capabilities caps; +	} __packed __aligned(sizeof(uint64_t)); + +	struct p arg = { +		.params = { +			.counter_request = cpu_to_be32( +					CIR_SYSTEM_PERFORMANCE_CAPABILITIES), +			.starting_index = cpu_to_be32(-1), +			.counter_info_version_in = 0, +		} +	}; + +	r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, +			       virt_to_phys(&arg), sizeof(arg)); + +	if (r) +		return r; + +	pr_devel("capability_mask: 0x%x\n", arg.caps.capability_mask); + +	caps->version = arg.params.counter_info_version_out; +	caps->collect_privileged = !!arg.caps.perf_collect_privileged; +	caps->ga = !!(arg.caps.capability_mask & CV_CM_GA); +	caps->expanded = !!(arg.caps.capability_mask & CV_CM_EXPANDED); +	caps->lab = !!(arg.caps.capability_mask & CV_CM_LAB); + +	return r; +} diff --git a/arch/powerpc/perf/hv-common.h b/arch/powerpc/perf/hv-common.h new file mode 100644 index 00000000000..5d79cecbd73 --- /dev/null +++ b/arch/powerpc/perf/hv-common.h @@ -0,0 +1,36 @@ +#ifndef LINUX_POWERPC_PERF_HV_COMMON_H_ +#define LINUX_POWERPC_PERF_HV_COMMON_H_ + +#include <linux/perf_event.h> +#include <linux/types.h> + +struct hv_perf_caps { +	u16 version; +	u16 collect_privileged:1, +	    ga:1, +	    expanded:1, +	    lab:1, +	    unused:12; +}; + +unsigned long hv_perf_caps_get(struct hv_perf_caps *caps); + + +#define EVENT_DEFINE_RANGE_FORMAT(name, attr_var, bit_start, bit_end)	\ +PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end);		\ +EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end) + +#define EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end)	\ +static u64 event_get_##name##_max(void)					\ +{									\ +	BUILD_BUG_ON((bit_start > bit_end)				\ +		    || (bit_end >= (sizeof(1ull) * 8)));		\ +	return (((1ull << (bit_end - bit_start)) - 1) << 1) + 1;	\ +}									\ +static u64 event_get_##name(struct perf_event *event)			\ +{									\ +	return (event->attr.attr_var >> (bit_start)) &			\ +		event_get_##name##_max();				\ +} + +#endif diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c new file mode 100644 index 00000000000..c9d399a2df8 --- /dev/null +++ b/arch/powerpc/perf/hv-gpci.c @@ -0,0 +1,294 @@ +/* + * Hypervisor supplied "gpci" ("get performance counter info") performance + * counter support + * + * Author: Cody P Schafer <cody@linux.vnet.ibm.com> + * Copyright 2014 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "hv-gpci: " fmt + +#include <linux/init.h> +#include <linux/perf_event.h> +#include <asm/firmware.h> +#include <asm/hvcall.h> +#include <asm/io.h> + +#include "hv-gpci.h" +#include "hv-common.h" + +/* + * Example usage: + *  perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8, + *		  secondary_index=0,starting_index=0xffffffff,request=0x10/' ... + */ + +/* u32 */ +EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31); +/* u32 */ +EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63); +/* u16 */ +EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15); +/* u8 */ +EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23); +/* u8, bytes of data (1-8) */ +EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31); +/* u32, byte offset */ +EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); + +static struct attribute *format_attrs[] = { +	&format_attr_request.attr, +	&format_attr_starting_index.attr, +	&format_attr_secondary_index.attr, +	&format_attr_counter_info_version.attr, + +	&format_attr_offset.attr, +	&format_attr_length.attr, +	NULL, +}; + +static struct attribute_group format_group = { +	.name = "format", +	.attrs = format_attrs, +}; + +#define HV_CAPS_ATTR(_name, _format)				\ +static ssize_t _name##_show(struct device *dev,			\ +			    struct device_attribute *attr,	\ +			    char *page)				\ +{								\ +	struct hv_perf_caps caps;				\ +	unsigned long hret = hv_perf_caps_get(&caps);		\ +	if (hret)						\ +		return -EIO;					\ +								\ +	return sprintf(page, _format, caps._name);		\ +}								\ +static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name) + +static ssize_t kernel_version_show(struct device *dev, +				   struct device_attribute *attr, +				   char *page) +{ +	return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT); +} + +static DEVICE_ATTR_RO(kernel_version); +HV_CAPS_ATTR(version, "0x%x\n"); +HV_CAPS_ATTR(ga, "%d\n"); +HV_CAPS_ATTR(expanded, "%d\n"); +HV_CAPS_ATTR(lab, "%d\n"); +HV_CAPS_ATTR(collect_privileged, "%d\n"); + +static struct attribute *interface_attrs[] = { +	&dev_attr_kernel_version.attr, +	&hv_caps_attr_version.attr, +	&hv_caps_attr_ga.attr, +	&hv_caps_attr_expanded.attr, +	&hv_caps_attr_lab.attr, +	&hv_caps_attr_collect_privileged.attr, +	NULL, +}; + +static struct attribute_group interface_group = { +	.name = "interface", +	.attrs = interface_attrs, +}; + +static const struct attribute_group *attr_groups[] = { +	&format_group, +	&interface_group, +	NULL, +}; + +#define GPCI_MAX_DATA_BYTES \ +	(1024 - sizeof(struct hv_get_perf_counter_info_params)) + +static unsigned long single_gpci_request(u32 req, u32 starting_index, +		u16 secondary_index, u8 version_in, u32 offset, u8 length, +		u64 *value) +{ +	unsigned long ret; +	size_t i; +	u64 count; + +	struct { +		struct hv_get_perf_counter_info_params params; +		uint8_t bytes[GPCI_MAX_DATA_BYTES]; +	} __packed __aligned(sizeof(uint64_t)) arg = { +		.params = { +			.counter_request = cpu_to_be32(req), +			.starting_index = cpu_to_be32(starting_index), +			.secondary_index = cpu_to_be16(secondary_index), +			.counter_info_version_in = version_in, +		} +	}; + +	ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, +			virt_to_phys(&arg), sizeof(arg)); +	if (ret) { +		pr_devel("hcall failed: 0x%lx\n", ret); +		return ret; +	} + +	/* +	 * we verify offset and length are within the zeroed buffer at event +	 * init. +	 */ +	count = 0; +	for (i = offset; i < offset + length; i++) +		count |= arg.bytes[i] << (i - offset); + +	*value = count; +	return ret; +} + +static u64 h_gpci_get_value(struct perf_event *event) +{ +	u64 count; +	unsigned long ret = single_gpci_request(event_get_request(event), +					event_get_starting_index(event), +					event_get_secondary_index(event), +					event_get_counter_info_version(event), +					event_get_offset(event), +					event_get_length(event), +					&count); +	if (ret) +		return 0; +	return count; +} + +static void h_gpci_event_update(struct perf_event *event) +{ +	s64 prev; +	u64 now = h_gpci_get_value(event); +	prev = local64_xchg(&event->hw.prev_count, now); +	local64_add(now - prev, &event->count); +} + +static void h_gpci_event_start(struct perf_event *event, int flags) +{ +	local64_set(&event->hw.prev_count, h_gpci_get_value(event)); +} + +static void h_gpci_event_stop(struct perf_event *event, int flags) +{ +	h_gpci_event_update(event); +} + +static int h_gpci_event_add(struct perf_event *event, int flags) +{ +	if (flags & PERF_EF_START) +		h_gpci_event_start(event, flags); + +	return 0; +} + +static int h_gpci_event_init(struct perf_event *event) +{ +	u64 count; +	u8 length; + +	/* Not our event */ +	if (event->attr.type != event->pmu->type) +		return -ENOENT; + +	/* config2 is unused */ +	if (event->attr.config2) { +		pr_devel("config2 set when reserved\n"); +		return -EINVAL; +	} + +	/* unsupported modes and filters */ +	if (event->attr.exclude_user   || +	    event->attr.exclude_kernel || +	    event->attr.exclude_hv     || +	    event->attr.exclude_idle   || +	    event->attr.exclude_host   || +	    event->attr.exclude_guest  || +	    is_sampling_event(event)) /* no sampling */ +		return -EINVAL; + +	/* no branch sampling */ +	if (has_branch_stack(event)) +		return -EOPNOTSUPP; + +	length = event_get_length(event); +	if (length < 1 || length > 8) { +		pr_devel("length invalid\n"); +		return -EINVAL; +	} + +	/* last byte within the buffer? */ +	if ((event_get_offset(event) + length) > GPCI_MAX_DATA_BYTES) { +		pr_devel("request outside of buffer: %zu > %zu\n", +				(size_t)event_get_offset(event) + length, +				GPCI_MAX_DATA_BYTES); +		return -EINVAL; +	} + +	/* check if the request works... */ +	if (single_gpci_request(event_get_request(event), +				event_get_starting_index(event), +				event_get_secondary_index(event), +				event_get_counter_info_version(event), +				event_get_offset(event), +				length, +				&count)) { +		pr_devel("gpci hcall failed\n"); +		return -EINVAL; +	} + +	return 0; +} + +static int h_gpci_event_idx(struct perf_event *event) +{ +	return 0; +} + +static struct pmu h_gpci_pmu = { +	.task_ctx_nr = perf_invalid_context, + +	.name = "hv_gpci", +	.attr_groups = attr_groups, +	.event_init  = h_gpci_event_init, +	.add         = h_gpci_event_add, +	.del         = h_gpci_event_stop, +	.start       = h_gpci_event_start, +	.stop        = h_gpci_event_stop, +	.read        = h_gpci_event_update, +	.event_idx   = h_gpci_event_idx, +}; + +static int hv_gpci_init(void) +{ +	int r; +	unsigned long hret; +	struct hv_perf_caps caps; + +	if (!firmware_has_feature(FW_FEATURE_LPAR)) { +		pr_debug("not a virtualized system, not enabling\n"); +		return -ENODEV; +	} + +	hret = hv_perf_caps_get(&caps); +	if (hret) { +		pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", +				hret); +		return -ENODEV; +	} + +	r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); +	if (r) +		return r; + +	return 0; +} + +device_initcall(hv_gpci_init); diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h new file mode 100644 index 00000000000..b25f460c9cc --- /dev/null +++ b/arch/powerpc/perf/hv-gpci.h @@ -0,0 +1,73 @@ +#ifndef LINUX_POWERPC_PERF_HV_GPCI_H_ +#define LINUX_POWERPC_PERF_HV_GPCI_H_ + +#include <linux/types.h> + +/* From the document "H_GetPerformanceCounterInfo Interface" v1.07 */ + +/* H_GET_PERF_COUNTER_INFO argument */ +struct hv_get_perf_counter_info_params { +	__be32 counter_request; /* I */ +	__be32 starting_index;  /* IO */ +	__be16 secondary_index; /* IO */ +	__be16 returned_values; /* O */ +	__be32 detail_rc; /* O, only needed when called via *_norets() */ + +	/* +	 * O, size each of counter_value element in bytes, only set for version +	 * >= 0x3 +	 */ +	__be16 cv_element_size; + +	/* I, 0 (zero) for versions < 0x3 */ +	__u8 counter_info_version_in; + +	/* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */ +	__u8 counter_info_version_out; +	__u8 reserved[0xC]; +	__u8 counter_value[]; +} __packed; + +/* + * counter info version => fw version/reference (spec version) + * + * 8 => power8 (1.07) + * [7 is skipped by spec 1.07] + * 6 => TLBIE (1.07) + * 5 => v7r7m0.phyp (1.05) + * [4 skipped] + * 3 => v7r6m0.phyp (?) + * [1,2 skipped] + * 0 => v7r{2,3,4}m0.phyp (?) + */ +#define COUNTER_INFO_VERSION_CURRENT 0x8 + +/* + * These determine the counter_value[] layout and the meaning of starting_index + * and secondary_index. + * + * Unless otherwise noted, @secondary_index is unused and ignored. + */ +enum counter_info_requests { + +	/* GENERAL */ + +	/* @starting_index: must be -1 (to refer to the current partition) +	 */ +	CIR_SYSTEM_PERFORMANCE_CAPABILITIES = 0X40, +}; + +struct cv_system_performance_capabilities { +	/* If != 0, allowed to collect data from other partitions */ +	__u8 perf_collect_privileged; + +	/* These following are only valid if counter_info_version >= 0x3 */ +#define CV_CM_GA       (1 << 7) +#define CV_CM_EXPANDED (1 << 6) +#define CV_CM_LAB      (1 << 5) +	/* remaining bits are reserved */ +	__u8 capability_mask; +	__u8 reserved[0xE]; +} __packed; + +#endif diff --git a/arch/powerpc/perf/power7-events-list.h b/arch/powerpc/perf/power7-events-list.h index 687790a2c0b..64f13d9260a 100644 --- a/arch/powerpc/perf/power7-events-list.h +++ b/arch/powerpc/perf/power7-events-list.h @@ -546,3 +546,13 @@ EVENT(PM_MRK_DATA_FROM_RL2L3_SHR,             0x1d04c)  EVENT(PM_DTLB_MISS_16M,                       0x4c05e)  EVENT(PM_LSU1_LMQ_LHR_MERGE,                  0x0d09a)  EVENT(PM_IFU_FIN,                             0x40066) +EVENT(PM_1THRD_CON_RUN_INSTR,                 0x30062) +EVENT(PM_CMPLU_STALL_COUNT,                   0x4000B) +EVENT(PM_MEM0_PB_RD_CL,                       0x30083) +EVENT(PM_THRD_1_RUN_CYC,                      0x10060) +EVENT(PM_THRD_2_CONC_RUN_INSTR,               0x40062) +EVENT(PM_THRD_2_RUN_CYC,                      0x20060) +EVENT(PM_THRD_3_CONC_RUN_INST,                0x10062) +EVENT(PM_THRD_3_RUN_CYC,                      0x30060) +EVENT(PM_THRD_4_CONC_RUN_INST,                0x20062) +EVENT(PM_THRD_4_RUN_CYC,                      0x40060) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 2ee4a707f0d..639cd915658 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -10,6 +10,8 @@   * 2 of the License, or (at your option) any later version.   */ +#define pr_fmt(fmt)	"power8-pmu: " fmt +  #include <linux/kernel.h>  #include <linux/perf_event.h>  #include <asm/firmware.h> @@ -25,15 +27,48 @@  #define PM_BRU_FIN			0x10068  #define PM_BR_MPRED_CMPL		0x400f6 +/* All L1 D cache load references counted at finish, gated by reject */ +#define PM_LD_REF_L1			0x100ee +/* Load Missed L1 */ +#define PM_LD_MISS_L1			0x3e054 +/* Store Missed L1 */ +#define PM_ST_MISS_L1			0x300f0 +/* L1 cache data prefetches */ +#define PM_L1_PREF			0x0d8b8 +/* Instruction fetches from L1 */ +#define PM_INST_FROM_L1			0x04080 +/* Demand iCache Miss */ +#define PM_L1_ICACHE_MISS		0x200fd +/* Instruction Demand sectors wriittent into IL1 */ +#define PM_L1_DEMAND_WRITE		0x0408c +/* Instruction prefetch written into IL1 */ +#define PM_IC_PREF_WRITE		0x0408e +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define PM_DATA_FROM_L3			0x4c042 +/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ +#define PM_DATA_FROM_L3MISS		0x300fe +/* All successful D-side store dispatches for this thread */ +#define PM_L2_ST			0x17080 +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define PM_L2_ST_MISS			0x17082 +/* Total HW L3 prefetches(Load+store) */ +#define PM_L3_PREF_ALL			0x4e052 +/* Data PTEG reload */ +#define PM_DTLB_MISS			0x300fc +/* ITLB Reloaded */ +#define PM_ITLB_MISS			0x400fc +  /*   * Raw event encoding for POWER8:   *   *        60        56        52        48        44        40        36        32   * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - *   |                                 [      thresh_cmp     ]   [  thresh_ctl   ] - *   |                                                                   | - *   *- EBB (Linux)                      thresh start/stop OR FAB match -* + *   | | [ ]                           [      thresh_cmp     ]   [  thresh_ctl   ] + *   | |  |                                                              | + *   | |  *- IFM (Linux)                 thresh start/stop OR FAB match -* + *   | *- BHRB (Linux) + *   *- EBB (Linux)   *   *        28        24        20        16        12         8         4         0   * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | @@ -83,9 +118,18 @@   *	MMCRA[57:59] = sample[0:2]	(RAND_SAMP_ELIG)   *	MMCRA[61:62] = sample[3:4]	(RAND_SAMP_MODE)   * + * if EBB and BHRB: + *	MMCRA[32:33] = IFM + *   */  #define EVENT_EBB_MASK		1ull +#define EVENT_EBB_SHIFT		PERF_EVENT_CONFIG_EBB_SHIFT +#define EVENT_BHRB_MASK		1ull +#define EVENT_BHRB_SHIFT	62 +#define EVENT_WANTS_BHRB	(EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) +#define EVENT_IFM_MASK		3ull +#define EVENT_IFM_SHIFT		60  #define EVENT_THR_CMP_SHIFT	40	/* Threshold CMP value */  #define EVENT_THR_CMP_MASK	0x3ff  #define EVENT_THR_CTL_SHIFT	32	/* Threshold control value (start/stop) */ @@ -110,6 +154,12 @@  #define EVENT_IS_MARKED		(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)  #define EVENT_PSEL_MASK		0xff	/* PMCxSEL value */ +/* Bits defined by Linux */ +#define EVENT_LINUX_MASK	\ +	((EVENT_EBB_MASK  << EVENT_EBB_SHIFT)			|	\ +	 (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT)			|	\ +	 (EVENT_IFM_MASK  << EVENT_IFM_SHIFT)) +  #define EVENT_VALID_MASK	\  	((EVENT_THRESH_MASK    << EVENT_THRESH_SHIFT)		|	\  	 (EVENT_SAMPLE_MASK    << EVENT_SAMPLE_SHIFT)		|	\ @@ -118,7 +168,7 @@  	 (EVENT_UNIT_MASK      << EVENT_UNIT_SHIFT)		|	\  	 (EVENT_COMBINE_MASK   << EVENT_COMBINE_SHIFT)		|	\  	 (EVENT_MARKED_MASK    << EVENT_MARKED_SHIFT)		|	\ -	 (EVENT_EBB_MASK       << PERF_EVENT_CONFIG_EBB_SHIFT)	|	\ +	  EVENT_LINUX_MASK					|	\  	  EVENT_PSEL_MASK)  /* MMCRA IFM bits - POWER8 */ @@ -142,10 +192,11 @@   *   *        28        24        20        16        12         8         4         0   * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - *                   |   [ ]   [  sample ]   [     ]   [6] [5]   [4] [3]   [2] [1] - *              EBB -*    |                     | - *                        |                     |      Count of events for each PMC. - *      L1 I/D qualifier -*                     |        p1, p2, p3, p4, p5, p6. + *               [ ] |   [ ]   [  sample ]   [     ]   [6] [5]   [4] [3]   [2] [1] + *                |  |    |                     | + *      BHRB IFM -*  |    |                     |      Count of events for each PMC. + *              EBB -*    |                     |        p1, p2, p3, p4, p5, p6. + *      L1 I/D qualifier -*                     |   *                     nc - number of counters -*   *   * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints @@ -164,6 +215,9 @@  #define CNST_EBB_VAL(v)		(((v) & EVENT_EBB_MASK) << 24)  #define CNST_EBB_MASK		CNST_EBB_VAL(EVENT_EBB_MASK) +#define CNST_IFM_VAL(v)		(((v) & EVENT_IFM_MASK) << 25) +#define CNST_IFM_MASK		CNST_IFM_VAL(EVENT_IFM_MASK) +  #define CNST_L1_QUAL_VAL(v)	(((v) & 3) << 22)  #define CNST_L1_QUAL_MASK	CNST_L1_QUAL_VAL(3) @@ -199,6 +253,7 @@  #define MMCR1_UNIT_SHIFT(pmc)		(60 - (4 * ((pmc) - 1)))  #define MMCR1_COMBINE_SHIFT(pmc)	(35 - ((pmc) - 1))  #define MMCR1_PMCSEL_SHIFT(pmc)		(24 - (((pmc) - 1)) * 8) +#define MMCR1_FAB_SHIFT			36  #define MMCR1_DC_QUAL_SHIFT		47  #define MMCR1_IC_QUAL_SHIFT		46 @@ -209,6 +264,7 @@  #define MMCRA_THR_SEL_SHIFT		16  #define MMCRA_THR_CMP_SHIFT		32  #define MMCRA_SDAR_MODE_TLB		(1ull << 42) +#define MMCRA_IFM_SHIFT			30  static inline bool event_is_fab_match(u64 event) @@ -233,20 +289,22 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long  	pmc   = (event >> EVENT_PMC_SHIFT)        & EVENT_PMC_MASK;  	unit  = (event >> EVENT_UNIT_SHIFT)       & EVENT_UNIT_MASK;  	cache = (event >> EVENT_CACHE_SEL_SHIFT)  & EVENT_CACHE_SEL_MASK; -	ebb   = (event >> PERF_EVENT_CONFIG_EBB_SHIFT) & EVENT_EBB_MASK; - -	/* Clear the EBB bit in the event, so event checks work below */ -	event &= ~(EVENT_EBB_MASK << PERF_EVENT_CONFIG_EBB_SHIFT); +	ebb   = (event >> EVENT_EBB_SHIFT)        & EVENT_EBB_MASK;  	if (pmc) { +		u64 base_event; +  		if (pmc > 6)  			return -1; -		mask  |= CNST_PMC_MASK(pmc); -		value |= CNST_PMC_VAL(pmc); +		/* Ignore Linux defined bits when checking event below */ +		base_event = event & ~EVENT_LINUX_MASK; -		if (pmc >= 5 && event != 0x500fa && event != 0x600f4) +		if (pmc >= 5 && base_event != 0x500fa && base_event != 0x600f4)  			return -1; + +		mask  |= CNST_PMC_MASK(pmc); +		value |= CNST_PMC_VAL(pmc);  	}  	if (pmc <= 4) { @@ -267,9 +325,10 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long  		 * HV writable, and there is no API for guest kernels to modify  		 * it. The solution is for the hypervisor to initialise the  		 * field to zeroes, and for us to only ever allow events that -		 * have a cache selector of zero. +		 * have a cache selector of zero. The bank selector (bit 3) is +		 * irrelevant, as long as the rest of the value is 0.  		 */ -		if (cache) +		if (cache & 0x7)  			return -1;  	} else if (event & EVENT_IS_L1) { @@ -310,6 +369,15 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long  		/* EBB events must specify the PMC */  		return -1; +	if (event & EVENT_WANTS_BHRB) { +		if (!ebb) +			/* Only EBB events can request BHRB */ +			return -1; + +		mask  |= CNST_IFM_MASK; +		value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT); +	} +  	/*  	 * All events must agree on EBB, either all request it or none.  	 * EBB events are pinned & exclusive, so this should never actually @@ -388,8 +456,8 @@ static int power8_compute_mmcr(u64 event[], int n_ev,  		 * the threshold bits are used for the match value.  		 */  		if (event_is_fab_match(event[i])) { -			mmcr1 |= (event[i] >> EVENT_THR_CTL_SHIFT) & -				  EVENT_THR_CTL_MASK; +			mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) & +				  EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT;  		} else {  			val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;  			mmcra |= val << MMCRA_THR_CTL_SHIFT; @@ -399,6 +467,11 @@ static int power8_compute_mmcr(u64 event[], int n_ev,  			mmcra |= val << MMCRA_THR_CMP_SHIFT;  		} +		if (event[i] & EVENT_WANTS_BHRB) { +			val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK; +			mmcra |= val << MMCRA_IFM_SHIFT; +		} +  		hwc[i] = pmc - 1;  	} @@ -556,6 +629,8 @@ static int power8_generic_events[] = {  	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,  	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BRU_FIN,  	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL, +	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1, +	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,  };  static u64 power8_bhrb_filter_map(u64 branch_sample_type) @@ -595,6 +670,116 @@ static void power8_config_bhrb(u64 pmu_bhrb_filter)  	mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));  } +#define C(x)	PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +	[ C(L1D) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = PM_LD_REF_L1, +			[ C(RESULT_MISS)   ] = PM_LD_MISS_L1, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = 0, +			[ C(RESULT_MISS)   ] = PM_ST_MISS_L1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = PM_L1_PREF, +			[ C(RESULT_MISS)   ] = 0, +		}, +	}, +	[ C(L1I) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = PM_INST_FROM_L1, +			[ C(RESULT_MISS)   ] = PM_L1_ICACHE_MISS, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE, +			[ C(RESULT_MISS)   ] = 0, +		}, +	}, +	[ C(LL) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3, +			[ C(RESULT_MISS)   ] = PM_DATA_FROM_L3MISS, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = PM_L2_ST, +			[ C(RESULT_MISS)   ] = PM_L2_ST_MISS, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL, +			[ C(RESULT_MISS)   ] = 0, +		}, +	}, +	[ C(DTLB) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = 0, +			[ C(RESULT_MISS)   ] = PM_DTLB_MISS, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +	}, +	[ C(ITLB) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = 0, +			[ C(RESULT_MISS)   ] = PM_ITLB_MISS, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +	}, +	[ C(BPU) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = PM_BRU_FIN, +			[ C(RESULT_MISS)   ] = PM_BR_MPRED_CMPL, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +	}, +	[ C(NODE) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +	}, +}; + +#undef C +  static struct power_pmu power8_pmu = {  	.name			= "POWER8",  	.n_counter		= 6, @@ -607,9 +792,10 @@ static struct power_pmu power8_pmu = {  	.get_constraint		= power8_get_constraint,  	.get_alternatives	= power8_get_alternatives,  	.disable_pmc		= power8_disable_pmc, -	.flags			= PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB, +	.flags			= PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S,  	.n_generic		= ARRAY_SIZE(power8_generic_events),  	.generic_events		= power8_generic_events, +	.cache_events		= &power8_cache_events,  	.attr_groups		= power8_pmu_attr_groups,  	.bhrb_nr		= 32,  }; @@ -629,6 +815,9 @@ static int __init init_power8_pmu(void)  	/* Tell userspace that EBB is supported */  	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; +	if (cpu_has_feature(CPU_FTR_PMAO_BUG)) +		pr_info("PMAO restore workaround active.\n"); +  	return 0;  }  early_initcall(init_power8_pmu);  | 
