aboutsummaryrefslogtreecommitdiff
path: root/arch/ia64/kernel/perfmon.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ia64/kernel/perfmon.c
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/ia64/kernel/perfmon.c')
-rw-r--r--arch/ia64/kernel/perfmon.c6676
1 files changed, 6676 insertions, 0 deletions
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
new file mode 100644
index 00000000000..71147be3279
--- /dev/null
+++ b/arch/ia64/kernel/perfmon.c
@@ -0,0 +1,6676 @@
+/*
+ * This file implements the perfmon-2 subsystem which is used
+ * to program the IA-64 Performance Monitoring Unit (PMU).
+ *
+ * The initial version of perfmon.c was written by
+ * Ganesh Venkitachalam, IBM Corp.
+ *
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
+ * David Mosberger, Hewlett Packard Co.
+ *
+ * Version Perfmon-2.x is a rewrite of perfmon-1.x
+ * by Stephane Eranian, Hewlett Packard Co.
+ *
+ * Copyright (C) 1999-2003, 2005 Hewlett Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * More information about perfmon available at:
+ * http://www.hpl.hp.com/research/linux/perfmon
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/vfs.h>
+#include <linux/pagemap.h>
+#include <linux/mount.h>
+#include <linux/version.h>
+#include <linux/bitops.h>
+
+#include <asm/errno.h>
+#include <asm/intrinsics.h>
+#include <asm/page.h>
+#include <asm/perfmon.h>
+#include <asm/processor.h>
+#include <asm/signal.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_PERFMON
+/*
+ * perfmon context state
+ */
+#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */
+#define PFM_CTX_LOADED 2 /* context is loaded onto a task */
+#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */
+#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */
+
+#define PFM_INVALID_ACTIVATION (~0UL)
+
+/*
+ * depth of message queue
+ */
+#define PFM_MAX_MSGS 32
+#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
+
+/*
+ * type of a PMU register (bitmask).
+ * bitmask structure:
+ * bit0 : register implemented
+ * bit1 : end marker
+ * bit2-3 : reserved
+ * bit4 : pmc has pmc.pm
+ * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter
+ * bit6-7 : register type
+ * bit8-31: reserved
+ */
+#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */
+#define PFM_REG_IMPL 0x1 /* register implemented */
+#define PFM_REG_END 0x2 /* end marker */
+#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
+#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
+#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */
+#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */
+#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
+
+#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END)
+#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END)
+
+#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)
+
+/* i assumed unsigned */
+#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))
+#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))
+
+/* XXX: these assume that register i is implemented */
+#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
+#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
+#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR)
+#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL)
+
+#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value
+#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask
+#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0]
+#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0]
+
+#define PFM_NUM_IBRS IA64_NUM_DBG_REGS
+#define PFM_NUM_DBRS IA64_NUM_DBG_REGS
+
+#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)
+#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling)
+#define PFM_CTX_TASK(h) (h)->ctx_task
+
+#define PMU_PMC_OI 5 /* position of pmc.oi bit */
+
+/* XXX: does not support more than 64 PMDs */
+#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
+#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
+
+#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
+
+#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
+#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
+#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
+#define PFM_CODE_RR 0 /* requesting code range restriction */
+#define PFM_DATA_RR 1 /* requestion data range restriction */
+
+#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v)
+#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v)
+#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info)
+
+#define RDEP(x) (1UL<<(x))
+
+/*
+ * context protection macros
+ * in SMP:
+ * - we need to protect against CPU concurrency (spin_lock)
+ * - we need to protect against PMU overflow interrupts (local_irq_disable)
+ * in UP:
+ * - we need to protect against PMU overflow interrupts (local_irq_disable)
+ *
+ * spin_lock_irqsave()/spin_lock_irqrestore():
+ * in SMP: local_irq_disable + spin_lock
+ * in UP : local_irq_disable
+ *
+ * spin_lock()/spin_lock():
+ * in UP : removed automatically
+ * in SMP: protect against context accesses from other CPU. interrupts
+ * are not masked. This is useful for the PMU interrupt handler
+ * because we know we will not get PMU concurrency in that code.
+ */
+#define PROTECT_CTX(c, f) \
+ do { \
+ DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
+ spin_lock_irqsave(&(c)->ctx_lock, f); \
+ DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \
+ } while(0)
+
+#define UNPROTECT_CTX(c, f) \
+ do { \
+ DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
+ spin_unlock_irqrestore(&(c)->ctx_lock, f); \
+ } while(0)
+
+#define PROTECT_CTX_NOPRINT(c, f) \
+ do { \
+ spin_lock_irqsave(&(c)->ctx_lock, f); \
+ } while(0)
+
+
+#define UNPROTECT_CTX_NOPRINT(c, f) \
+ do { \
+ spin_unlock_irqrestore(&(c)->ctx_lock, f); \
+ } while(0)
+
+
+#define PROTECT_CTX_NOIRQ(c) \
+ do { \
+ spin_lock(&(c)->ctx_lock); \
+ } while(0)
+
+#define UNPROTECT_CTX_NOIRQ(c) \
+ do { \
+ spin_unlock(&(c)->ctx_lock); \
+ } while(0)
+
+
+#ifdef CONFIG_SMP
+
+#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)
+#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++
+#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION()
+
+#else /* !CONFIG_SMP */
+#define SET_ACTIVATION(t) do {} while(0)
+#define GET_ACTIVATION(t) do {} while(0)
+#define INC_ACTIVATION(t) do {} while(0)
+#endif /* CONFIG_SMP */
+
+#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
+#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner)
+#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx)
+
+#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
+#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)
+
+#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
+
+/*
+ * cmp0 must be the value of pmc0
+ */
+#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL)
+
+#define PFMFS_MAGIC 0xa0b4d889
+
+/*
+ * debugging
+ */
+#define PFM_DEBUGGING 1
+#ifdef PFM_DEBUGGING
+#define DPRINT(a) \
+ do { \
+ if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+ } while (0)
+
+#define DPRINT_ovfl(a) \
+ do { \
+ if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+ } while (0)
+#endif
+
+/*
+ * 64-bit software counter structure
+ *
+ * the next_reset_type is applied to the next call to pfm_reset_regs()
+ */
+typedef struct {
+ unsigned long val; /* virtual 64bit counter value */
+ unsigned long lval; /* last reset value */
+ unsigned long long_reset; /* reset value on sampling overflow */
+ unsigned long short_reset; /* reset value on overflow */
+ unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */
+ unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */
+ unsigned long seed; /* seed for random-number generator */
+ unsigned long mask; /* mask for random-number generator */
+ unsigned int flags; /* notify/do not notify */
+ unsigned long eventid; /* overflow event identifier */
+} pfm_counter_t;
+
+/*
+ * context flags
+ */
+typedef struct {
+ unsigned int block:1; /* when 1, task will blocked on user notifications */
+ unsigned int system:1; /* do system wide monitoring */
+ unsigned int using_dbreg:1; /* using range restrictions (debug registers) */
+ unsigned int is_sampling:1; /* true if using a custom format */
+ unsigned int excl_idle:1; /* exclude idle task in system wide session */
+ unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */
+ unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */
+ unsigned int no_msg:1; /* no message sent on overflow */
+ unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */
+ unsigned int reserved:22;
+} pfm_context_flags_t;
+
+#define PFM_TRAP_REASON_NONE 0x0 /* default value */
+#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */
+#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */
+
+
+/*
+ * perfmon context: encapsulates all the state of a monitoring session
+ */
+
+typedef struct pfm_context {
+ spinlock_t ctx_lock; /* context protection */
+
+ pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */
+ unsigned int ctx_state; /* state: active/inactive (no bitfield) */
+
+ struct task_struct *ctx_task; /* task to which context is attached */
+
+ unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */
+
+ struct semaphore ctx_restart_sem; /* use for blocking notification mode */
+
+ unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */
+ unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */
+ unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */
+
+ unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */
+ unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */
+ unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */
+
+ unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */
+
+ unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */
+ unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */
+ unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */
+ unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */
+
+ pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
+
+ u64 ctx_saved_psr_up; /* only contains psr.up value */
+
+ unsigned long ctx_last_activation; /* context last activation number for last_cpu */
+ unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */
+ unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */
+
+ int ctx_fd; /* file descriptor used my this context */
+ pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */
+
+ pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */
+ void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */
+ unsigned long ctx_smpl_size; /* size of sampling buffer */
+ void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */
+
+ wait_queue_head_t ctx_msgq_wait;
+ pfm_msg_t ctx_msgq[PFM_MAX_MSGS];
+ int ctx_msgq_head;
+ int ctx_msgq_tail;
+ struct fasync_struct *ctx_async_queue;
+
+ wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */
+} pfm_context_t;
+
+/*
+ * magic number used to verify that structure is really
+ * a perfmon context
+ */
+#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops)
+
+#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context)
+
+#ifdef CONFIG_SMP
+#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v)
+#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu
+#else
+#define SET_LAST_CPU(ctx, v) do {} while(0)
+#define GET_LAST_CPU(ctx) do {} while(0)
+#endif
+
+
+#define ctx_fl_block ctx_flags.block
+#define ctx_fl_system ctx_flags.system
+#define ctx_fl_using_dbreg ctx_flags.using_dbreg
+#define ctx_fl_is_sampling ctx_flags.is_sampling
+#define ctx_fl_excl_idle ctx_flags.excl_idle
+#define ctx_fl_going_zombie ctx_flags.going_zombie
+#define ctx_fl_trap_reason ctx_flags.trap_reason
+#define ctx_fl_no_msg ctx_flags.no_msg
+#define ctx_fl_can_restart ctx_flags.can_restart
+
+#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0);
+#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking
+
+/*
+ * global information about all sessions
+ * mostly used to synchronize between system wide and per-process
+ */
+typedef struct {
+ spinlock_t pfs_lock; /* lock the structure */
+
+ unsigned int pfs_task_sessions; /* number of per task sessions */
+ unsigned int pfs_sys_sessions; /* number of per system wide sessions */
+ unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */
+ unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */
+ struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
+} pfm_session_t;
+
+/*
+ * information about a PMC or PMD.
+ * dep_pmd[]: a bitmask of dependent PMD registers
+ * dep_pmc[]: a bitmask of dependent PMC registers
+ */
+typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
+typedef struct {
+ unsigned int type;
+ int pm_pos;
+ unsigned long default_value; /* power-on default value */
+ unsigned long reserved_mask; /* bitmask of reserved bits */
+ pfm_reg_check_t read_check;
+ pfm_reg_check_t write_check;
+ unsigned long dep_pmd[4];
+ unsigned long dep_pmc[4];
+} pfm_reg_desc_t;
+
+/* assume cnum is a valid monitor */
+#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)
+
+/*
+ * This structure is initialized at boot time and contains
+ * a description of the PMU main characteristics.
+ *
+ * If the probe function is defined, detection is based
+ * on its return value:
+ * - 0 means recognized PMU
+ * - anything else means not supported
+ * When the probe function is not defined, then the pmu_family field
+ * is used and it must match the host CPU family such that:
+ * - cpu->family & config->pmu_family != 0
+ */
+typedef struct {
+ unsigned long ovfl_val; /* overflow value for counters */
+
+ pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */
+ pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */
+
+ unsigned int num_pmcs; /* number of PMCS: computed at init time */
+ unsigned int num_pmds; /* number of PMDS: computed at init time */
+ unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */
+ unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */
+
+ char *pmu_name; /* PMU family name */
+ unsigned int pmu_family; /* cpuid family pattern used to identify pmu */
+ unsigned int flags; /* pmu specific flags */
+ unsigned int num_ibrs; /* number of IBRS: computed at init time */
+ unsigned int num_dbrs; /* number of DBRS: computed at init time */
+ unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */
+ int (*probe)(void); /* customized probe routine */
+ unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */
+} pmu_config_t;
+/*
+ * PMU specific flags
+ */
+#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */
+
+/*
+ * debug register related type definitions
+ */
+typedef struct {
+ unsigned long ibr_mask:56;
+ unsigned long ibr_plm:4;
+ unsigned long ibr_ig:3;
+ unsigned long ibr_x:1;
+} ibr_mask_reg_t;
+
+typedef struct {
+ unsigned long dbr_mask:56;
+ unsigned long dbr_plm:4;
+ unsigned long dbr_ig:2;
+ unsigned long dbr_w:1;
+ unsigned long dbr_r:1;
+} dbr_mask_reg_t;
+
+typedef union {
+ unsigned long val;
+ ibr_mask_reg_t ibr;
+ dbr_mask_reg_t dbr;
+} dbreg_t;
+
+
+/*
+ * perfmon command descriptions
+ */
+typedef struct {
+ int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+ char *cmd_name;
+ int cmd_flags;
+ unsigned int cmd_narg;
+ size_t cmd_argsize;
+ int (*cmd_getsize)(void *arg, size_t *sz);
+} pfm_cmd_desc_t;
+
+#define PFM_CMD_FD 0x01 /* command requires a file descriptor */
+#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */
+#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */
+#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */
+
+
+#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name
+#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
+#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
+#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
+#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
+
+#define PFM_CMD_ARG_MANY -1 /* cannot be zero */
+
+typedef struct {
+ int debug; /* turn on/off debugging via syslog */
+ int debug_ovfl; /* turn on/off debug printk in overflow handler */
+ int fastctxsw; /* turn on/off fast (unsecure) ctxsw */
+ int expert_mode; /* turn on/off value checking */
+ int debug_pfm_read;
+} pfm_sysctl_t;
+
+typedef struct {
+ unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
+ unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */
+ unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
+ unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */
+ unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */
+ unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */
+ unsigned long pfm_smpl_handler_calls;
+ unsigned long pfm_smpl_handler_cycles;
+ char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
+} pfm_stats_t;
+
+/*
+ * perfmon internal variables
+ */
+static pfm_stats_t pfm_stats[NR_CPUS];
+static pfm_session_t pfm_sessions; /* global sessions information */
+
+static struct proc_dir_entry *perfmon_dir;
+static pfm_uuid_t pfm_null_uuid = {0,};
+
+static spinlock_t pfm_buffer_fmt_lock;
+static LIST_HEAD(pfm_buffer_fmt_list);
+
+static pmu_config_t *pmu_conf;
+
+/* sysctl() controls */
+static pfm_sysctl_t pfm_sysctl;
+int pfm_debug_var;
+
+static ctl_table pfm_ctl_table[]={
+ {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
+ {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
+ {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
+ {4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
+ { 0, },
+};
+static ctl_table pfm_sysctl_dir[] = {
+ {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
+ {0,},
+};
+static ctl_table pfm_sysctl_root[] = {
+ {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
+ {0,},
+};
+static struct ctl_table_header *pfm_sysctl_header;
+
+static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+static int pfm_flush(struct file *filp);
+
+#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v)
+#define pfm_get_cpu_data(a,b) per_cpu(a, b)
+
+static inline void
+pfm_put_task(struct task_struct *task)
+{
+ if (task != current) put_task_struct(task);
+}
+
+static inline void
+pfm_set_task_notify(struct task_struct *task)
+{
+ struct thread_info *info;
+
+ info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
+ set_bit(TIF_NOTIFY_RESUME, &info->flags);
+}
+
+static inline void
+pfm_clear_task_notify(void)
+{
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+}
+
+static inline void
+pfm_reserve_page(unsigned long a)
+{
+ SetPageReserved(vmalloc_to_page((void *)a));
+}
+static inline void
+pfm_unreserve_page(unsigned long a)
+{
+ ClearPageReserved(vmalloc_to_page((void*)a));
+}
+
+static inline unsigned long
+pfm_protect_ctx_ctxsw(pfm_context_t *x)
+{
+ spin_lock(&(x)->ctx_lock);
+ return 0UL;
+}
+
+static inline unsigned long
+pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
+{
+ spin_unlock(&(x)->ctx_lock);
+}
+
+static inline unsigned int
+pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
+{
+ return do_munmap(mm, addr, len);
+}
+
+static inline unsigned long
+pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
+{
+ return get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
+
+static struct super_block *
+pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
+{
+ return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
+}
+
+static struct file_system_type pfm_fs_type = {
+ .name = "pfmfs",
+ .get_sb = pfmfs_get_sb,
+ .kill_sb = kill_anon_super,
+};
+
+DEFINE_PER_CPU(unsigned long, pfm_syst_info);
+DEFINE_PER_CPU(struct task_struct *, pmu_owner);
+DEFINE_PER_CPU(pfm_context_t *, pmu_ctx);
+DEFINE_PER_CPU(unsigned long, pmu_activation_number);
+
+
+/* forward declaration */
+static struct file_operations pfm_file_ops;
+
+/*
+ * forward declarations
+ */
+#ifndef CONFIG_SMP
+static void pfm_lazy_save_regs (struct task_struct *ta);
+#endif
+
+void dump_pmu_state(const char *);
+static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
+
+#include "perfmon_itanium.h"
+#include "perfmon_mckinley.h"
+#include "perfmon_generic.h"
+
+static pmu_config_t *pmu_confs[]={
+ &pmu_conf_mck,
+ &pmu_conf_ita,
+ &pmu_conf_gen, /* must be last */
+ NULL
+};
+
+
+static int pfm_end_notify_user(pfm_context_t *ctx);
+
+static inline void
+pfm_clear_psr_pp(void)
+{
+ ia64_rsm(IA64_PSR_PP);
+ ia64_srlz_i();
+}
+
+static inline void
+pfm_set_psr_pp(void)
+{
+ ia64_ssm(IA64_PSR_PP);
+ ia64_srlz_i();
+}
+
+static inline void
+pfm_clear_psr_up(void)
+{
+ ia64_rsm(IA64_PSR_UP);
+ ia64_srlz_i();
+}
+
+static inline void
+pfm_set_psr_up(void)
+{
+ ia64_ssm(IA64_PSR_UP);
+ ia64_srlz_i();
+}
+
+static inline unsigned long
+pfm_get_psr(void)
+{
+ unsigned long tmp;
+ tmp = ia64_getreg(_IA64_REG_PSR);
+ ia64_srlz_i();
+ return tmp;
+}
+
+static inline void
+pfm_set_psr_l(unsigned long val)
+{
+ ia64_setreg(_IA64_REG_PSR_L, val);
+ ia64_srlz_i();
+}
+
+static inline void
+pfm_freeze_pmu(void)
+{
+ ia64_set_pmc(0,1UL);
+ ia64_srlz_d();
+}
+
+static inline void
+pfm_unfreeze_pmu(void)
+{
+ ia64_set_pmc(0,0UL);
+ ia64_srlz_d();
+}
+
+static inline void
+pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
+{
+ int i;
+
+ for (i=0; i < nibrs; i++) {
+ ia64_set_ibr(i, ibrs[i]);
+ ia64_dv_serialize_instruction();
+ }
+ ia64_srlz_i();
+}
+
+static inline void
+pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
+{
+ int i;
+
+ for (i=0; i < ndbrs; i++) {
+ ia64_set_dbr(i, dbrs[i]);
+ ia64_dv_serialize_data();
+ }
+ ia64_srlz_d();
+}
+
+/*
+ * PMD[i] must be a counter. no check is made
+ */
+static inline unsigned long
+pfm_read_soft_counter(pfm_context_t *ctx, int i)
+{
+ return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val);
+}
+
+/*
+ * PMD[i] must be a counter. no check is made
+ */
+static inline void
+pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
+{
+ unsigned long ovfl_val = pmu_conf->ovfl_val;
+
+ ctx->ctx_pmds[i].val = val & ~ovfl_val;
+ /*
+ * writing to unimplemented part is ignore, so we do not need to
+ * mask off top part
+ */
+ ia64_set_pmd(i, val & ovfl_val);
+}
+
+static pfm_msg_t *
+pfm_get_new_msg(pfm_context_t *ctx)
+{
+ int idx, next;
+
+ next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;
+
+ DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
+ if (next == ctx->ctx_msgq_head) return NULL;
+
+ idx = ctx->ctx_msgq_tail;
+ ctx->ctx_msgq_tail = next;
+
+ DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));
+
+ return ctx->ctx_msgq+idx;
+}
+
+static pfm_msg_t *
+pfm_get_next_msg(pfm_context_t *ctx)
+{
+ pfm_msg_t *msg;
+
+ DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
+
+ if (PFM_CTXQ_EMPTY(ctx)) return NULL;
+
+ /*
+ * get oldest message
+ */
+ msg = ctx->ctx_msgq+ctx->ctx_msgq_head;
+
+ /*
+ * and move forward
+ */
+ ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;
+
+ DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));
+
+ return msg;
+}
+
+static void
+pfm_reset_msgq(pfm_context_t *ctx)
+{
+ ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
+ DPRINT(("ctx=%p msgq reset\n", ctx));
+}
+
+static void *
+pfm_rvmalloc(unsigned long size)
+{
+ void *mem;
+ unsigned long addr;
+
+ size = PAGE_ALIGN(size);
+ mem = vmalloc(size);
+ if (mem) {
+ //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
+ memset(mem, 0, size);
+ addr = (unsigned long)mem;
+ while (size > 0) {
+ pfm_reserve_page(addr);
+ addr+=PAGE_SIZE;
+ size-=PAGE_SIZE;
+ }
+ }
+ return mem;
+}
+
+static void
+pfm_rvfree(void *mem, unsigned long size)
+{
+ unsigned long addr;
+
+ if (mem) {
+ DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
+ addr = (unsigned long) mem;
+ while ((long) size > 0) {
+ pfm_unreserve_page(addr);
+ addr+=PAGE_SIZE;
+ size-=PAGE_SIZE;
+ }
+ vfree(mem);
+ }
+ return;
+}
+
+static pfm_context_t *
+pfm_context_alloc(void)
+{
+ pfm_context_t *ctx;
+
+ /*
+ * allocate context descriptor
+ * must be able to free with interrupts disabled
+ */
+ ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
+ if (ctx) {
+ memset(ctx, 0, sizeof(pfm_context_t));
+ DPRINT(("alloc ctx @%p\n", ctx));
+ }
+ return ctx;
+}
+
+static void
+pfm_context_free(pfm_context_t *ctx)
+{
+ if (ctx) {
+ DPRINT(("free ctx @%p\n", ctx));
+ kfree(ctx);
+ }
+}
+
+static void
+pfm_mask_monitoring(struct task_struct *task)
+{
+ pfm_context_t *ctx = PFM_GET_CTX(task);
+ struct thread_struct *th = &task->thread;
+ unsigned long mask, val, ovfl_mask;
+ int i;
+
+ DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
+
+ ovfl_mask = pmu_conf->ovfl_val;
+ /*
+ * monitoring can only be masked as a result of a valid
+ * counter overflow. In UP, it means that the PMU still
+ * has an owner. Note that the owner can be different
+ * from the current task. However the PMU state belongs
+ * to the owner.
+ * In SMP, a valid overflow only happens when task is
+ * current. Therefore if we come here, we know that
+ * the PMU state belongs to the current task, therefore
+ * we can access the live registers.
+ *
+ * So in both cases, the live register contains the owner's
+ * state. We can ONLY touch the PMU registers and NOT the PSR.
+ *
+ * As a consequence to this call, the thread->pmds[] array
+ * contains stale information which must be ignored
+ * when context is reloaded AND monitoring is active (see
+ * pfm_restart).
+ */
+ mask = ctx->ctx_used_pmds[0];
+ for (i = 0; mask; i++, mask>>=1) {
+ /* skip non used pmds */
+ if ((mask & 0x1) == 0) continue;
+ val = ia64_get_pmd(i);
+
+ if (PMD_IS_COUNTING(i)) {
+ /*
+ * we rebuild the full 64 bit value of the counter
+ */
+ ctx->ctx_pmds[i].val += (val & ovfl_mask);
+ } else {
+ ctx->ctx_pmds[i].val = val;
+ }
+ DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
+ i,
+ ctx->ctx_pmds[i].val,
+ val & ovfl_mask));
+ }
+ /*
+ * mask monitoring by setting the privilege level to 0
+ * we cannot use psr.pp/psr.up for this, it is controlled by
+ * the user
+ *
+ * if task is current, modify actual registers, otherwise modify
+ * thread save state, i.e., what will be restored in pfm_load_regs()
+ */
+ mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
+ for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
+ if ((mask & 0x1) == 0UL) continue;
+ ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
+ th->pmcs[i] &= ~0xfUL;
+ DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
+ }
+ /*
+ * make all of this visible
+ */
+ ia64_srlz_d();
+}
+
+/*
+ * must always be done with task == current
+ *
+ * context must be in MASKED state when calling
+ */
+static void
+pfm_restore_monitoring(struct task_struct *task)
+{
+ pfm_context_t *ctx = PFM_GET_CTX(task);
+ struct thread_struct *th = &task->thread;
+ unsigned long mask, ovfl_mask;
+ unsigned long psr, val;
+ int i, is_system;
+
+ is_system = ctx->ctx_fl_system;
+ ovfl_mask = pmu_conf->ovfl_val;
+
+ if (task != current) {
+ printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
+ return;
+ }
+ if (ctx->ctx_state != PFM_CTX_MASKED) {
+ printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
+ task->pid, current->pid, ctx->ctx_state);
+ return;
+ }
+ psr = pfm_get_psr();
+ /*
+ * monitoring is masked via the PMC.
+ * As we restore their value, we do not want each counter to
+ * restart right away. We stop monitoring using the PSR,
+ * restore the PMC (and PMD) and then re-establish the psr
+ * as it was. Note that there can be no pending overflow at
+ * this point, because monitoring was MASKED.
+ *
+ * system-wide session are pinned and self-monitoring
+ */
+ if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
+ /* disable dcr pp */
+ ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
+ pfm_clear_psr_pp();
+ } else {
+ pfm_clear_psr_up();
+ }
+ /*
+ * first, we restore the PMD
+ */
+ mask = ctx->ctx_used_pmds[0];
+ for (i = 0; mask; i++, mask>>=1) {
+ /* skip non used pmds */
+ if ((mask & 0x1) == 0) continue;
+
+ if (PMD_IS_COUNTING(i)) {
+ /*
+ * we split the 64bit value according to
+ * counter width
+ */
+ val = ctx->ctx_pmds[i].val & ovfl_mask;
+ ctx->ctx_pmds[i].val &= ~ovfl_mask;
+ } else {
+ val = ctx->ctx_pmds[i].val;
+ }
+ ia64_set_pmd(i, val);
+
+ DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
+ i,
+ ctx->ctx_pmds[i].val,
+ val));
+ }
+ /*
+ * restore the PMCs
+ */
+ mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
+ for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
+ if ((mask & 0x1) == 0UL) continue;
+ th->pmcs[i] = ctx->ctx_pmcs[i];
+ ia64_set_pmc(i, th->pmcs[i]);
+ DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
+ }
+ ia64_srlz_d();
+
+ /*
+ * must restore DBR/IBR because could be modified while masked
+ * XXX: need to optimize
+ */
+ if (ctx->ctx_fl_using_dbreg) {
+ pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
+ pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
+ }
+
+ /*
+ * now restore PSR
+ */
+ if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
+ /* enable dcr pp */
+ ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
+ ia64_srlz_i();
+ }
+ pfm_set_psr_l(psr);
+}
+
+static inline void
+pfm_save_pmds(unsigned long *pmds, unsigned long mask)
+{
+ int i;
+
+ ia64_srlz_d();
+
+ for (i=0; mask; i++, mask>>=1) {
+ if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
+ }
+}
+
+/*
+ * reload from thread state (used for ctxw only)
+ */
+static inline void
+pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
+{
+ int i;
+ unsigned long val, ovfl_val = pmu_conf->ovfl_val;
+
+ for (i=0; mask; i++, mask>>=1) {
+ if ((mask & 0x1) == 0) continue;
+ val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
+ ia64_set_pmd(i, val);
+ }
+ ia64_srlz_d();
+}
+
+/*
+ * propagate PMD from context to thread-state
+ */
+static inline void
+pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
+{
+ struct thread_struct *thread = &task->thread;
+ unsigned long ovfl_val = pmu_conf->ovfl_val;
+ unsigned long mask = ctx->ctx_all_pmds[0];
+ unsigned long val;
+ int i;
+
+ DPRINT(("mask=0x%lx\n", mask));
+
+ for (i=0; mask; i++, mask>>=1) {
+
+ val = ctx->ctx_pmds[i].val;
+
+ /*
+ * We break up the 64 bit value into 2 pieces
+ * the lower bits go to the machine state in the
+ * thread (will be reloaded on ctxsw in).
+ * The upper part stays in the soft-counter.
+ */
+ if (PMD_IS_COUNTING(i)) {
+ ctx->ctx_pmds[i].val = val & ~ovfl_val;
+ val &= ovfl_val;
+ }
+ thread->pmds[i] = val;
+
+ DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
+ i,
+ th