/*
* Performance events - AMD IBS
*
* Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
*
* For licencing details see kernel-base/COPYING
*/
#include <linux/perf_event.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/ptrace.h>
#include <linux/syscore_ops.h>
#include <asm/apic.h>
#include "perf_event.h"
static u32 ibs_caps;
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
#include <linux/kprobes.h>
#include <linux/hardirq.h>
#include <asm/nmi.h>
#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
enum ibs_states {
IBS_ENABLED = 0,
IBS_STARTED = 1,
IBS_STOPPING = 2,
IBS_MAX_STATES,
};
struct cpu_perf_ibs {
struct perf_event *event;
unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
};
struct perf_ibs {
struct pmu pmu;
unsigned int msr;
u64 config_mask;
u64 cnt_mask;
u64 enable_mask;
u64 valid_mask;
u64 max_period;
unsigned long offset_mask[1];
int offset_max;
struct cpu_perf_ibs __percpu *pcpu;
struct attribute **format_attrs;
struct attribute_group format_group;
const struct attribute_group *attr_groups[2];
u64 (*get_count)(u64 config);
};
struct perf_ibs_data {
u32 size;
union {
u32 data[0]; /* data buffer starts here */
u32 caps;
};
u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
};
static int
perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
{
s64 left = local64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int overflow = 0;
/*
* If we are way outside a reasonable range then just skip forward:
*/
if (unlikely(left <= -period)) {
left = period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
overflow = 1;
}
if (unlikely(left < (s64)min)) {
left += period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
overflow = 1;
}
/*
* If the hw period that triggers the sw overflow is too short
* we might hit the irq handler. This biases the results.
* Thus we shorten the next-to-last period and set the last
* period to the max period.
*/
if (left > max) {
left -= max;
if (left > max)
left = max;
else if (left < min)
left = min;
}
*hw_period = (u64)left;
return overflow;
}
static int
perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
{
struct hw_perf_event *hwc = &event->hw;
int shift = 64 - width;
u64 prev_raw_count;
u64 delta;
/*
* Careful: an NMI might modify the previous event value.
*
* Our tactic to handle this is to first atomically read and
* exchange a new raw count - then add that new-prev delta
* count to the generic event atomically:
*/
prev_raw_count = local64_read(&hwc->prev_count);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
return 0;
/*
* Now we have the new raw value and have updated the prev
* timestamp already. We can now calculate the elapsed delta
* (event-)time and add that to the generic event.
*
* Careful, not all hw sign-extends above the physical width
* of the count.
*/
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
local64_add(delta, &event->count);
local64_sub(delta, &hwc->period_left);
return 1;
}
static struct perf_ibs perf_ibs_fetch;
static struct perf_ibs perf_ibs_op;
static struct perf_ibs *get_ibs_pmu(int type)
{
if (perf_ibs_fetch.pmu.type == type)
return &perf_ibs_fetch;
if (perf_ibs_op.pmu.type == type)
return &perf_ibs_op;
return NULL;
}
/*
* Use IBS for precise event sampling:
*
* perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
* perf record -a -e r076:p ... # same as -e cpu-cycles:p
* perf record -a -e r0C1:p ... # use ibs op counting micro-ops
*
* IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
* MSRC001_1033) is used to select either cycle or micro-ops counting
* mode.
*
* The rip of IBS samples has skid 0. Thus, IBS supports precise
* levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
* rip is invalid when IBS was not able to record the rip correctly.
* We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
*
*/
static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
{
switch (event->attr.precise_ip) {
case 0:
return -ENOENT;
case 1:
case 2:
break;
default:
return -EOPNOTSUPP;
}
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
switch (event->attr.config) {
case PERF_COUNT_HW_CPU_CYCLES:
*config = 0;
return 0;
}
break;
case PERF_TYPE_RAW: