1 files changed, 353 insertions, 36 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 29c6482890c..fe52db2eea6 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -24,7 +24,7 @@
 #define BHRB_MAX_ENTRIES	32
 #define BHRB_TARGET		0x0000000000000002
 #define BHRB_PREDICTION		0x0000000000000001
-#define BHRB_EA			0xFFFFFFFFFFFFFFFC
+#define BHRB_EA			0xFFFFFFFFFFFFFFFCUL
 
 struct cpu_hw_events {
 	int n_events;
@@ -75,6 +75,12 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;
 
 #define MMCR0_FCHV		0
 #define MMCR0_PMCjCE		MMCR0_PMCnCE
+#define MMCR0_FC56		0
+#define MMCR0_PMAO		0
+#define MMCR0_EBE		0
+#define MMCR0_BHRBA		0
+#define MMCR0_PMCC		0
+#define MMCR0_PMCC_U6		0
 
 #define SPRN_MMCRA		SPRN_MMCR2
 #define MMCRA_SAMPLE_ENABLE	0
@@ -102,10 +108,20 @@ static inline int siar_valid(struct pt_regs *regs)
 	return 1;
 }
 
+static bool is_ebb_event(struct perf_event *event) { return false; }
+static int ebb_event_check(struct perf_event *event) { return 0; }
+static void ebb_event_add(struct perf_event *event) { }
+static void ebb_switch_out(unsigned long mmcr0) { }
+static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)
+{
+	return mmcr0;
+}
+
 static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
 static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
 void power_pmu_flush_branch_stack(void) {}
 static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
+static void pmao_restore_workaround(bool ebb) { }
 #endif /* CONFIG_PPC32 */
 
 static bool regs_use_siar(struct pt_regs *regs)
@@ -462,13 +478,188 @@ void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 	return;
 }
 
-#endif /* CONFIG_PPC64 */
+static bool is_ebb_event(struct perf_event *event)
+{
+	/*
+	 * This could be a per-PMU callback, but we'd rather avoid the cost. We
+	 * check that the PMU supports EBB, meaning those that don't can still
+	 * use bit 63 of the event code for something else if they wish.
+	 */
+	return (ppmu->flags & PPMU_ARCH_207S) &&
+	       ((event->attr.config >> PERF_EVENT_CONFIG_EBB_SHIFT) & 1);
+}
 
-static void perf_event_interrupt(struct pt_regs *regs);
+static int ebb_event_check(struct perf_event *event)
+{
+	struct perf_event *leader = event->group_leader;
 
-void perf_event_print_debug(void)
+	/* Event and group leader must agree on EBB */
+	if (is_ebb_event(leader) != is_ebb_event(event))
+		return -EINVAL;
+
+	if (is_ebb_event(event)) {
+		if (!(event->attach_state & PERF_ATTACH_TASK))
+			return -EINVAL;
+
+		if (!leader->attr.pinned || !leader->attr.exclusive)
+			return -EINVAL;
+
+		if (event->attr.freq ||
+		    event->attr.inherit ||
+		    event->attr.sample_type ||
+		    event->attr.sample_period ||
+		    event->attr.enable_on_exec)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void ebb_event_add(struct perf_event *event)
+{
+	if (!is_ebb_event(event) || current->thread.used_ebb)
+		return;
+
+	/*
+	 * IFF this is the first time we've added an EBB event, set
+	 * PMXE in the user MMCR0 so we can detect when it's cleared by
+	 * userspace. We need this so that we can context switch while
+	 * userspace is in the EBB handler (where PMXE is 0).
+	 */
+	current->thread.used_ebb = 1;
+	current->thread.mmcr0 |= MMCR0_PMXE;
+}
+
+static void ebb_switch_out(unsigned long mmcr0)
+{
+	if (!(mmcr0 & MMCR0_EBE))
+		return;
+
+	current->thread.siar  = mfspr(SPRN_SIAR);
+	current->thread.sier  = mfspr(SPRN_SIER);
+	current->thread.sdar  = mfspr(SPRN_SDAR);
+	current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK;
+	current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK;
+}
+
+static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)
+{
+	if (!ebb)
+		goto out;
+
+	/* Enable EBB and read/write to all 6 PMCs and BHRB for userspace */
+	mmcr0 |= MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC_U6;
+
+	/*
+	 * Add any bits from the user MMCR0, FC or PMAO. This is compatible
+	 * with pmao_restore_workaround() because we may add PMAO but we never
+	 * clear it here.
+	 */
+	mmcr0 |= current->thread.mmcr0;
+
+	/*
+	 * Be careful not to set PMXE if userspace had it cleared. This is also
+	 * compatible with pmao_restore_workaround() because it has already
+	 * cleared PMXE and we leave PMAO alone.
+	 */
+	if (!(current->thread.mmcr0 & MMCR0_PMXE))
+		mmcr0 &= ~MMCR0_PMXE;
+
+	mtspr(SPRN_SIAR, current->thread.siar);
+	mtspr(SPRN_SIER, current->thread.sier);
+	mtspr(SPRN_SDAR, current->thread.sdar);
+	mtspr(SPRN_MMCR2, current->thread.mmcr2);
+out:
+	return mmcr0;
+}
+
+static void pmao_restore_workaround(bool ebb)
 {
+	unsigned pmcs[6];
+
+	if (!cpu_has_feature(CPU_FTR_PMAO_BUG))
+		return;
+
+	/*
+	 * On POWER8E there is a hardware defect which affects the PMU context
+	 * switch logic, ie. power_pmu_disable/enable().
+	 *
+	 * When a counter overflows PMXE is cleared and FC/PMAO is set in MMCR0
+	 * by the hardware. Sometime later the actual PMU exception is
+	 * delivered.
+	 *
+	 * If we context switch, or simply disable/enable, the PMU prior to the
+	 * exception arriving, the exception will be lost when we clear PMAO.
+	 *
+	 * When we reenable the PMU, we will write the saved MMCR0 with PMAO
+	 * set, and this _should_ generate an exception. However because of the
+	 * defect no exception is generated when we write PMAO, and we get
+	 * stuck with no counters counting but no exception delivered.
+	 *
+	 * The workaround is to detect this case and tweak the hardware to
+	 * create another pending PMU exception.
+	 *
+	 * We do that by setting up PMC6 (cycles) for an imminent overflow and
+	 * enabling the PMU. That causes a new exception to be generated in the
+	 * chip, but we don't take it yet because we have interrupts hard
+	 * disabled. We then write back the PMU state as we want it to be seen
+	 * by the exception handler. When we reenable interrupts the exception
+	 * handler will be called and see the correct state.
+	 *
+	 * The logic is the same for EBB, except that the exception is gated by
+	 * us having interrupts hard disabled as well as the fact that we are
+	 * not in userspace. The exception is finally delivered when we return
+	 * to userspace.
+	 */
+
+	/* Only if PMAO is set and PMAO_SYNC is clear */
+	if ((current->thread.mmcr0 & (MMCR0_PMAO | MMCR0_PMAO_SYNC)) != MMCR0_PMAO)
+		return;
+
+	/* If we're doing EBB, only if BESCR[GE] is set */
+	if (ebb && !(current->thread.bescr & BESCR_GE))
+		return;
+
+	/*
+	 * We are already soft-disabled in power_pmu_enable(). We need to hard
+	 * enable to actually prevent the PMU exception from firing.
+	 */
+	hard_irq_disable();
+
+	/*
+	 * This is a bit gross, but we know we're on POWER8E and have 6 PMCs.
+	 * Using read/write_pmc() in a for loop adds 12 function calls and
+	 * almost doubles our code size.
+	 */
+	pmcs[0] = mfspr(SPRN_PMC1);
+	pmcs[1] = mfspr(SPRN_PMC2);
+	pmcs[2] = mfspr(SPRN_PMC3);
+	pmcs[3] = mfspr(SPRN_PMC4);
+	pmcs[4] = mfspr(SPRN_PMC5);
+	pmcs[5] = mfspr(SPRN_PMC6);
+
+	/* Ensure all freeze bits are unset */
+	mtspr(SPRN_MMCR2, 0);
+
+	/* Set up PMC6 to overflow in one cycle */
+	mtspr(SPRN_PMC6, 0x7FFFFFFE);
+
+	/* Enable exceptions and unfreeze PMC6 */
+	mtspr(SPRN_MMCR0, MMCR0_PMXE | MMCR0_PMCjCE | MMCR0_PMAO);
+
+	/* Now we need to refreeze and restore the PMCs */
+	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMAO);
+
+	mtspr(SPRN_PMC1, pmcs[0]);
+	mtspr(SPRN_PMC2, pmcs[1]);
+	mtspr(SPRN_PMC3, pmcs[2]);
+	mtspr(SPRN_PMC4, pmcs[3]);
+	mtspr(SPRN_PMC5, pmcs[4]);
+	mtspr(SPRN_PMC6, pmcs[5]);
 }
+#endif /* CONFIG_PPC64 */
+
+static void perf_event_interrupt(struct pt_regs *regs);
 
 /*
  * Read one performance monitor counter (PMC).
@@ -548,6 +739,57 @@ static void write_pmc(int idx, unsigned long val)
 	}
 }
 
+/* Called from sysrq_handle_showregs() */
+void perf_event_print_debug(void)
+{
+	unsigned long sdar, sier, flags;
+	u32 pmcs[MAX_HWEVENTS];
+	int i;
+
+	if (!ppmu->n_counter)
+		return;
+
+	local_irq_save(flags);
+
+	pr_info("CPU: %d PMU registers, ppmu = %s n_counters = %d",
+		 smp_processor_id(), ppmu->name, ppmu->n_counter);
+
+	for (i = 0; i < ppmu->n_counter; i++)
+		pmcs[i] = read_pmc(i + 1);
+
+	for (; i < MAX_HWEVENTS; i++)
+		pmcs[i] = 0xdeadbeef;
+
+	pr_info("PMC1:  %08x PMC2: %08x PMC3: %08x PMC4: %08x\n",
+		 pmcs[0], pmcs[1], pmcs[2], pmcs[3]);
+
+	if (ppmu->n_counter > 4)
+		pr_info("PMC5:  %08x PMC6: %08x PMC7: %08x PMC8: %08x\n",
+			 pmcs[4], pmcs[5], pmcs[6], pmcs[7]);
+
+	pr_info("MMCR0: %016lx MMCR1: %016lx MMCRA: %016lx\n",
+		mfspr(SPRN_MMCR0), mfspr(SPRN_MMCR1), mfspr(SPRN_MMCRA));
+
+	sdar = sier = 0;
+#ifdef CONFIG_PPC64
+	sdar = mfspr(SPRN_SDAR);
+
+	if (ppmu->flags & PPMU_HAS_SIER)
+		sier = mfspr(SPRN_SIER);
+
+	if (ppmu->flags & PPMU_ARCH_207S) {
+		pr_info("MMCR2: %016lx EBBHR: %016lx\n",
+			mfspr(SPRN_MMCR2), mfspr(SPRN_EBBHR));
+		pr_info("EBBRR: %016lx BESCR: %016lx\n",
+			mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
+	}
+#endif
+	pr_info("SIAR:  %016lx SDAR:  %016lx SIER:  %016lx\n",
+		mfspr(SPRN_SIAR), sdar, sier);
+
+	local_irq_restore(flags);
+}
+
 /*
  * Check if a set of events can all go on the PMU at once.
  * If they can't, this will look at alternative codes for the events
@@ -732,6 +974,13 @@ static void power_pmu_read(struct perf_event *event)
 
 	if (!event->hw.idx)
 		return;
+
+	if (is_ebb_event(event)) {
+		val = read_pmc(event->hw.idx);
+		local64_set(&event->hw.prev_count, val);
+		return;
+	}
+
 	/*
 	 * Performance monitor interrupts come even when interrupts
 	 * are soft-disabled, as long as interrupts are hard-enabled.
@@ -747,7 +996,22 @@ static void power_pmu_read(struct perf_event *event)
 	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
 	local64_add(delta, &event->count);
-	local64_sub(delta, &event->hw.period_left);
+
+	/*
+	 * A number of places program the PMC with (0x80000000 - period_left).
+	 * We never want period_left to be less than 1 because we will program
+	 * the PMC with a value >= 0x800000000 and an edge detected PMC will
+	 * roll around to 0 before taking an exception. We have seen this
+	 * on POWER8.
+	 *
+	 * To fix this, clamp the minimum value of period_left to 1.
+	 */
+	do {
+		prev = local64_read(&event->hw.period_left);
+		val = prev - delta;
+		if (val < 1)
+			val = 1;
+	} while (local64_cmpxchg(&event->hw.period_left, prev, val) != prev);
 }
 
 /*
@@ -852,7 +1116,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
 static void power_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
-	unsigned long flags;
+	unsigned long flags, mmcr0, val;
 
 	if (!ppmu)
 		return;
@@ -860,9 +1124,6 @@ static void power_pmu_disable(struct pmu *pmu)
 	cpuhw = &__get_cpu_var(cpu_hw_events);
 
 	if (!cpuhw->disabled) {
-		cpuhw->disabled = 1;
-		cpuhw->n_added = 0;
-
 		/*
 		 * Check if we ever enabled the PMU on this cpu.
 		 */
@@ -872,6 +1133,22 @@ static void power_pmu_disable(struct pmu *pmu)
 		}
 
 		/*
+		 * Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56
+		 */
+		val  = mmcr0 = mfspr(SPRN_MMCR0);
+		val |= MMCR0_FC;
+		val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO |
+			 MMCR0_FC56);
+
+		/*
+		 * The barrier is to make sure the mtspr has been
+		 * executed and the PMU has frozen the events etc.
+		 * before we return.
+		 */
+		write_mmcr0(cpuhw, val);
+		mb();
+
+		/*
 		 * Disable instruction sampling if it was enabled
 		 */
 		if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
@@ -880,15 +1157,12 @@ static void power_pmu_disable(struct pmu *pmu)
 			mb();
 		}
 
-		/*
-		 * Set the 'freeze counters' bit.
-		 * The barrier is to make sure the mtspr has been
-		 * executed and the PMU has frozen the events
-		 * before we return.
-		 */
-		write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
-		mb();
+		cpuhw->disabled = 1;
+		cpuhw->n_added = 0;
+
+		ebb_switch_out(mmcr0);
 	}
+
 	local_irq_restore(flags);
 }
 
@@ -903,23 +1177,36 @@ static void power_pmu_enable(struct pmu *pmu)
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
 	long i;
-	unsigned long val;
+	unsigned long val, mmcr0;
 	s64 left;
 	unsigned int hwc_index[MAX_HWEVENTS];
 	int n_lim;
 	int idx;
+	bool ebb;
 
 	if (!ppmu)
 		return;
 	local_irq_save(flags);
+
 	cpuhw = &__get_cpu_var(cpu_hw_events);
-	if (!cpuhw->disabled) {
-		local_irq_restore(flags);
-		return;
+	if (!cpuhw->disabled)
+		goto out;
+
+	if (cpuhw->n_events == 0) {
+		ppc_set_pmu_inuse(0);
+		goto out;
 	}
+
 	cpuhw->disabled = 0;
 
 	/*
+	 * EBB requires an exclusive group and all events must have the EBB
+	 * flag set, or not set, so we can just check a single event. Also we
+	 * know we have at least one event.
+	 */
+	ebb = is_ebb_event(cpuhw->event[0]);
+
+	/*
 	 * If we didn't change anything, or only removed events,
 	 * no need to recalculate MMCR* settings and reset the PMCs.
 	 * Just reenable the PMU with the current MMCR* settings
@@ -928,8 +1215,6 @@ static void power_pmu_enable(struct pmu *pmu)
 	if (!cpuhw->n_added) {
 		mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
 		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
-		if (cpuhw->n_events == 0)
-			ppc_set_pmu_inuse(0);
 		goto out_enable;
 	}
 
@@ -996,25 +1281,42 @@ static void power_pmu_enable(struct pmu *pmu)
 			++n_lim;
 			continue;
 		}
-		val = 0;
-		if (event->hw.sample_period) {
-			left = local64_read(&event->hw.period_left);
-			if (left < 0x80000000L)
-				val = 0x80000000L - left;
+
+		if (ebb)
+			val = local64_read(&event->hw.prev_count);
+		else {
+			val = 0;
+			if (event->hw.sample_period) {
+				left = local64_read(&event->hw.period_left);
+				if (left < 0x80000000L)
+					val = 0x80000000L - left;
+			}
+			local64_set(&event->hw.prev_count, val);
 		}
-		local64_set(&event->hw.prev_count, val);
+
 		event->hw.idx = idx;
 		if (event->hw.state & PERF_HES_STOPPED)
 			val = 0;
 		write_pmc(idx, val);
+
 		perf_event_update_userpage(event);
 	}
 	cpuhw->n_limited = n_lim;
 	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
 
  out_enable:
+	pmao_restore_workaround(ebb);
+
+	if (ppmu->flags & PPMU_ARCH_207S)
+		mtspr(SPRN_MMCR2, 0);
+
+	mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]);
+
 	mb();
-	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+	if (cpuhw->bhrb_users)
+		ppmu->config_bhrb(cpuhw->bhrb_filter);
+
+	write_mmcr0(cpuhw, mmcr0);
 
 	/*
 	 * Enable instruction sampling if necessary
@@ -1025,8 +1327,6 @@ static void power_pmu_enable(struct pmu *pmu)
 	}
 
  out:
-	if (cpuhw->bhrb_users)
-		ppmu->config_bhrb(cpuhw->bhrb_filter);
 
 	local_irq_restore(flags);
 }
@@ -1112,13 +1412,18 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
 	event->hw.config = cpuhw->events[n0];
 
 nocheck:
+	ebb_event_add(event);
+
 	++cpuhw->n_events;
 	++cpuhw->n_added;
 
 	ret = 0;
  out:
-	if (has_branch_stack(event))
+	if (has_branch_stack(event)) {
 		power_pmu_bhrb_enable(event);
+		cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+					event->attr.branch_sample_type);
+	}
 
 	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
@@ -1409,7 +1714,7 @@ static int power_pmu_event_init(struct perf_event *event)
 
 	if (has_branch_stack(event)) {
 	        /* PMU has BHRB enabled */
-		if (!(ppmu->flags & PPMU_BHRB))
+		if (!(ppmu->flags & PPMU_ARCH_207S))
 			return -EOPNOTSUPP;
 	}
 
@@ -1472,6 +1777,11 @@ static int power_pmu_event_init(struct perf_event *event)
 		}
 	}
 
+	/* Extra checks for EBB */
+	err = ebb_event_check(event);
+	if (err)
+		return err;
+
 	/*
 	 * If this is in a group, check if it can go on with all the
 	 * other hardware events in the group.  We assume the event
@@ -1511,6 +1821,13 @@ static int power_pmu_event_init(struct perf_event *event)
 	local64_set(&event->hw.period_left, event->hw.last_period);
 
 	/*
+	 * For EBB events we just context switch the PMC value, we don't do any
+	 * of the sample_period logic. We use hw.prev_count for this.
+	 */
+	if (is_ebb_event(event))
+		local64_set(&event->hw.prev_count, 0);
+
+	/*
 	 * See if we need to reserve the PMU.
 	 * If no events are currently in use, then we have to take a
 	 * mutex to ensure that we don't race with another task doing
@@ -1786,7 +2103,7 @@ static void power_pmu_setup(int cpu)
 	cpuhw->mmcr[0] = MMCR0_FC;
 }
 
-static int __cpuinit
+static int
 power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 {
 	unsigned int cpu = (long)hcpu;
@@ -1803,7 +2120,7 @@ power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu
 	return NOTIFY_OK;
 }
 
-int __cpuinit register_power_pmu(struct power_pmu *pmu)
+int register_power_pmu(struct power_pmu *pmu)
 {
 	if (ppmu)
 		return -EBUSY;		/* something's already registered */