From d662ed26734473d4cb5f3d78cebfec8f9126e97c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 17:01:53 +1100 Subject: powerpc/perf_counter: Add perf_counter system call on powerpc ... with an empty/dummy asm/perf_counter.h so it builds. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/perf_counter.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 arch/powerpc/include/asm/perf_counter.h (limited to 'arch/powerpc/include/asm/perf_counter.h') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h new file mode 100644 index 00000000000..59530ae1d53 --- /dev/null +++ b/arch/powerpc/include/asm/perf_counter.h @@ -0,0 +1,10 @@ +/* + * Performance counter support - PowerPC-specific definitions. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ -- cgit v1.2.3-18-g5258 From 4574910e5087085a1f330ff8373cee4503f5c77c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 20:21:55 +1100 Subject: powerpc/perf_counter: Add generic support for POWER-family PMU hardware This provides the architecture-specific functions needed to access PMU hardware on the 64-bit PowerPC processors. It has been designed for the IBM POWER family (POWER 4/4+/5/5+/6 and PPC970) but will hopefully also suit other 64-bit PowerPC machines (although probably not Cell given how different it is in this area). This doesn't include back-ends for any specific processors. This implements a system which allows back-ends to express the constraints that their hardware has on what events can be counted simultaneously. The constraints are expressed as a 64-bit mask + 64-bit value for each event, and the encoding is capable of expressing the constraints arising from having a set of multiplexers feeding an event bus, with some events being available through multiple multiplexer settings, such as we get on POWER4 and PPC970. Furthermore, the back-end can supply alternative event codes for each event, and the constraint checking code will try all possible combinations of alternative event codes to try to find a combination that will fit. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/perf_counter.h | 62 +++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'arch/powerpc/include/asm/perf_counter.h') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 59530ae1d53..9d7ff6d7fb5 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -8,3 +8,65 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include + +#define MAX_HWCOUNTERS 8 +#define MAX_EVENT_ALTERNATIVES 8 + +/* + * This struct provides the constants and functions needed to + * describe the PMU on a particular POWER-family CPU. + */ +struct power_pmu { + int n_counter; + int max_alternatives; + u64 add_fields; + u64 test_adder; + int (*compute_mmcr)(unsigned int events[], int n_ev, + unsigned int hwc[], u64 mmcr[]); + int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp); + int (*get_alternatives)(unsigned int event, unsigned int alt[]); + void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); + int n_generic; + int *generic_events; +}; + +extern struct power_pmu *ppmu; + +/* + * The power_pmu.get_constraint function returns a 64-bit value and + * a 64-bit mask that express the constraints between this event and + * other events. + * + * The value and mask are divided up into (non-overlapping) bitfields + * of three different types: + * + * Select field: this expresses the constraint that some set of bits + * in MMCR* needs to be set to a specific value for this event. For a + * select field, the mask contains 1s in every bit of the field, and + * the value contains a unique value for each possible setting of the + * MMCR* bits. The constraint checking code will ensure that two events + * that set the same field in their masks have the same value in their + * value dwords. + * + * Add field: this expresses the constraint that there can be at most + * N events in a particular class. A field of k bits can be used for + * N <= 2^(k-1) - 1. The mask has the most significant bit of the field + * set (and the other bits 0), and the value has only the least significant + * bit of the field set. In addition, the 'add_fields' and 'test_adder' + * in the struct power_pmu for this processor come into play. The + * add_fields value contains 1 in the LSB of the field, and the + * test_adder contains 2^(k-1) - 1 - N in the field. + * + * NAND field: this expresses the constraint that you may not have events + * in all of a set of classes. (For example, on PPC970, you can't select + * events from the FPU, ISU and IDU simultaneously, although any two are + * possible.) For N classes, the field is N+1 bits wide, and each class + * is assigned one bit from the least-significant N bits. The mask has + * only the most-significant bit set, and the value has only the bit + * for the event's class set. The test_adder has the least significant + * bit set in the field. + * + * If an event is not subject to the constraint expressed by a particular + * field, then it will have 0 in both the mask and value for that field. + */ -- cgit v1.2.3-18-g5258 From ab7ef2e50a557af92f4f90689f51fadadafc16b2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Apr 2009 22:38:51 +1000 Subject: perf_counter: powerpc: allow use of limited-function counters POWER5+ and POWER6 have two hardware counters with limited functionality: PMC5 counts instructions completed in run state and PMC6 counts cycles in run state. (Run state is the state when a hardware RUN bit is 1; the idle task clears RUN while waiting for work to do and sets it when there is work to do.) These counters can't be written to by the kernel, can't generate interrupts, and don't obey the freeze conditions. That means we can only use them for per-task counters (where we know we'll always be in run state; we can't put a per-task counter on an idle task), and only if we don't want interrupts and we do want to count in all processor modes. Obviously some counters can't go on a limited hardware counter, but there are also situations where we can only put a counter on a limited hardware counter - if there are already counters on that exclude some processor modes and we want to put on a per-task cycle or instruction counter that doesn't exclude any processor mode, it could go on if it can use a limited hardware counter. To keep track of these constraints, this adds a flags argument to the processor-specific get_alternatives() functions, with three bits defined: one to say that we can accept alternative event codes that go on limited counters, one to say we only want alternatives on limited counters, and one to say that this is a per-task counter and therefore events that are gated by run state are equivalent to those that aren't (e.g. a "cycles" event is equivalent to a "cycles in run state" event). These flags are computed for each counter and stored in the counter->hw.counter_base field (slightly wonky name for what it does, but it was an existing unused field). Since the limited counters don't freeze when we freeze the other counters, we need some special handling to avoid getting skew between things counted on the limited counters and those counted on normal counters. To minimize this skew, if we are using any limited counters, we read PMC5 and PMC6 immediately after setting and clearing the freeze bit. This is done in a single asm in the new write_mmcr0() function. The code here is specific to PMC5 and PMC6 being the limited hardware counters. Being more general (e.g. having a bitmap of limited hardware counter numbers) would have meant more complex code to read the limited counters when freezing and unfreezing the normal counters, with conditional branches, which would have increased the skew. Since it isn't necessary for the code to be more general at this stage, it isn't. This also extends the back-ends for POWER5+ and POWER6 to be able to handle up to 6 counters rather than the 4 they previously handled. Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Robert Richter LKML-Reference: <18936.19035.163066.892208@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/perf_counter.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm/perf_counter.h') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 9d7ff6d7fb5..56d66c38143 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -12,6 +12,7 @@ #define MAX_HWCOUNTERS 8 #define MAX_EVENT_ALTERNATIVES 8 +#define MAX_LIMITED_HWCOUNTERS 2 /* * This struct provides the constants and functions needed to @@ -25,14 +26,24 @@ struct power_pmu { int (*compute_mmcr)(unsigned int events[], int n_ev, unsigned int hwc[], u64 mmcr[]); int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp); - int (*get_alternatives)(unsigned int event, unsigned int alt[]); + int (*get_alternatives)(unsigned int event, unsigned int flags, + unsigned int alt[]); void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); + int (*limited_pmc_event)(unsigned int event); + int limited_pmc5_6; /* PMC5 and PMC6 have limited function */ int n_generic; int *generic_events; }; extern struct power_pmu *ppmu; +/* + * Values for flags to get_alternatives() + */ +#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ +#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ +#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ + /* * The power_pmu.get_constraint function returns a 64-bit value and * a 64-bit mask that express the constraints between this event and -- cgit v1.2.3-18-g5258 From ef923214a4816c289e4af2d67a9ebb1a31e4ac61 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 14 May 2009 13:29:14 +1000 Subject: perf_counter: powerpc: use u64 for event codes internally Although the perf_counter API allows 63-bit raw event codes, internally in the powerpc back-end we had been using 32-bit event codes. This expands them to 64 bits so that we can add bits for specifying threshold start/stop events and instruction sampling modes later. This also corrects the return value of can_go_on_limited_pmc; we were returning an event code rather than just a 0/1 value in some circumstances. That didn't particularly matter while event codes were 32-bit, but now that event codes are 64-bit it might, so this fixes it. [ Impact: extend PowerPC perfcounter interfaces from u32 to u64 ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <18955.36874.472452.353104@drongo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/perf_counter.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include/asm/perf_counter.h') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 56d66c38143..ceea76a48e3 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -23,13 +23,13 @@ struct power_pmu { int max_alternatives; u64 add_fields; u64 test_adder; - int (*compute_mmcr)(unsigned int events[], int n_ev, + int (*compute_mmcr)(u64 events[], int n_ev, unsigned int hwc[], u64 mmcr[]); - int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp); - int (*get_alternatives)(unsigned int event, unsigned int flags, - unsigned int alt[]); + int (*get_constraint)(u64 event, u64 *mskp, u64 *valp); + int (*get_alternatives)(u64 event, unsigned int flags, + u64 alt[]); void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); - int (*limited_pmc_event)(unsigned int event); + int (*limited_pmc_event)(u64 event); int limited_pmc5_6; /* PMC5 and PMC6 have limited function */ int n_generic; int *generic_events; -- cgit v1.2.3-18-g5258 From 0bbd0d4be8d5d3676c126e06e3c75c16def00441 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 14 May 2009 13:31:48 +1000 Subject: perf_counter: powerpc: supply more precise information on counter overflow events This uses values from the MMCRA, SIAR and SDAR registers on powerpc to supply more precise information for overflow events, including a data address when PERF_RECORD_ADDR is specified. Since POWER6 uses different bit positions in MMCRA from earlier processors, this converts the struct power_pmu limited_pmc5_6 field, which only had 0/1 values, into a flags field and defines bit values for its previous use (PPMU_LIMITED_PMC5_6) and a new flag (PPMU_ALT_SIPR) to indicate that the processor uses the POWER6 bit positions rather than the earlier positions. It also adds definitions in reg.h for the new and old positions of the bit that indicates that the SIAR and SDAR values come from the same instruction. For the data address, the SDAR value is supplied if we are not doing instruction sampling. In that case there is no guarantee that the address given in the PERF_RECORD_ADDR subrecord will correspond to the instruction whose address is given in the PERF_RECORD_IP subrecord. If instruction sampling is enabled (e.g. because this counter is counting a marked instruction event), then we only supply the SDAR value for the PERF_RECORD_ADDR subrecord if it corresponds to the instruction whose address is in the PERF_RECORD_IP subrecord. Otherwise we supply 0. [ Impact: support more PMU hardware features on PowerPC ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <18955.37028.48861.555309@drongo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/perf_counter.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm/perf_counter.h') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index ceea76a48e3..1c60f0ca792 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -30,13 +30,19 @@ struct power_pmu { u64 alt[]); void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); int (*limited_pmc_event)(u64 event); - int limited_pmc5_6; /* PMC5 and PMC6 have limited function */ + u32 flags; int n_generic; int *generic_events; }; extern struct power_pmu *ppmu; +/* + * Values for power_pmu.flags + */ +#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */ +#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ + /* * Values for flags to get_alternatives() */ @@ -44,6 +50,12 @@ extern struct power_pmu *ppmu; #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ +struct pt_regs; +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) + +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); + /* * The power_pmu.get_constraint function returns a 64-bit value and * a 64-bit mask that express the constraints between this event and -- cgit v1.2.3-18-g5258 From 106b506c3a8b74daa5751e83ed3e46438fcf9a52 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 11 Jun 2009 14:55:42 +1000 Subject: perf_counter: powerpc: Implement generalized cache events for POWER processors This adds tables of event codes for the generalized cache events for all the currently supported powerpc processors: POWER{4,5,5+,6,7} and PPC970*, plus powerpc-specific code to use these tables when a generalized cache event is requested. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18992.36430.933526.742969@drongo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/perf_counter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/include/asm/perf_counter.h') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 1c60f0ca792..cc7c887705b 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -33,6 +33,9 @@ struct power_pmu { u32 flags; int n_generic; int *generic_events; + int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; }; extern struct power_pmu *ppmu; -- cgit v1.2.3-18-g5258