/* Performance event support for sparc64.
*
* Copyright (C) 2009 David S. Miller <davem@davemloft.net>
*
* This code is based almost entirely upon the x86 perf event
* code, which is:
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2009 Jaswinder Singh Rajput
* Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
*/
#include <linux/perf_event.h>
#include <linux/kprobes.h>
#include <linux/kernel.h>
#include <linux/kdebug.h>
#include <linux/mutex.h>
#include <asm/cpudata.h>
#include <asm/atomic.h>
#include <asm/nmi.h>
#include <asm/pcr.h>
/* Sparc64 chips have two performance counters, 32-bits each, with
* overflow interrupts generated on transition from 0xffffffff to 0.
* The counters are accessed in one go using a 64-bit register.
*
* Both counters are controlled using a single control register. The
* only way to stop all sampling is to clear all of the context (user,
* supervisor, hypervisor) sampling enable bits. But these bits apply
* to both counters, thus the two counters can't be enabled/disabled
* individually.
*
* The control register has two event fields, one for each of the two
* counters. It's thus nearly impossible to have one counter going
* while keeping the other one stopped. Therefore it is possible to
* get overflow interrupts for counters not currently "in use" and
* that condition must be checked in the overflow interrupt handler.
*
* So we use a hack, in that we program inactive counters with the
* "sw_count0" and "sw_count1" events. These count how many times
* the instruction "sethi %hi(0xfc000), %g0" is executed. It's an
* unusual way to encode a NOP and therefore will not trigger in
* normal code.
*/
#define MAX_HWEVENTS 2
#define MAX_PERIOD ((1UL << 32) - 1)
#define PIC_UPPER_INDEX 0
#define PIC_LOWER_INDEX 1
struct cpu_hw_events {
struct perf_event *events[MAX_HWEVENTS];
unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
u64 pcr;
int enabled;
};
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
struct perf_event_map {
u16 encoding;
u8 pic_mask;
#define PIC_NONE 0x00
#define PIC_UPPER 0x01
#define PIC_LOWER 0x02
};
static unsigned long perf_event_encode(const struct perf_event_map *pmap)
{
return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask;
}
static void perf_event_decode(unsigned long val, u16 *enc, u8 *msk)
{
*msk = val & 0xff;
*enc = val >> 16;
}
#define C(x) PERF_COUNT_HW_CACHE_##x
#define CACHE_OP_UNSUPPORTED 0xfffe
#define CACHE_OP_NONSENSE 0xffff
typedef struct perf_event_map cache_map_t
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
struct sparc_pmu {
const struct perf_event_map *(*event_map)(int);
const cache_map_t *cache_map;
int max_events;
int upper_shift;
int lower_shift;
int event_mask;
int hv_bit;
int irq_bit;
int upper_nop;
int lower_nop;
};
static const struct perf_event_map ultra3_perfmon_event_map[] = {
[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
[PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
};
static const struct perf_event_map *ultra3_event_map(int event_id)
{
return &ultra3_perfmon_event_map[event_id];
}
static const cache_map_t ultra3_cache_map = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
[C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER },
[C(RESULT_MISS)] = { 0x0a, PIC_UPPER },
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
[C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
[ C(RESULT_MISS) ] =