diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ia64/kernel/perfmon.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/ia64/kernel/perfmon.c')
-rw-r--r-- | arch/ia64/kernel/perfmon.c | 6676 |
1 files changed, 6676 insertions, 0 deletions
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c new file mode 100644 index 00000000000..71147be3279 --- /dev/null +++ b/arch/ia64/kernel/perfmon.c @@ -0,0 +1,6676 @@ +/* + * This file implements the perfmon-2 subsystem which is used + * to program the IA-64 Performance Monitoring Unit (PMU). + * + * The initial version of perfmon.c was written by + * Ganesh Venkitachalam, IBM Corp. + * + * Then it was modified for perfmon-1.x by Stephane Eranian and + * David Mosberger, Hewlett Packard Co. + * + * Version Perfmon-2.x is a rewrite of perfmon-1.x + * by Stephane Eranian, Hewlett Packard Co. + * + * Copyright (C) 1999-2003, 2005 Hewlett Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * More information about perfmon available at: + * http://www.hpl.hp.com/research/linux/perfmon + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/smp_lock.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/sysctl.h> +#include <linux/list.h> +#include <linux/file.h> +#include <linux/poll.h> +#include <linux/vfs.h> +#include <linux/pagemap.h> +#include <linux/mount.h> +#include <linux/version.h> +#include <linux/bitops.h> + +#include <asm/errno.h> +#include <asm/intrinsics.h> +#include <asm/page.h> +#include <asm/perfmon.h> +#include <asm/processor.h> +#include <asm/signal.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/delay.h> + +#ifdef CONFIG_PERFMON +/* + * perfmon context state + */ +#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ +#define PFM_CTX_LOADED 2 /* context is loaded onto a task */ +#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ +#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ + +#define PFM_INVALID_ACTIVATION (~0UL) + +/* + * depth of message queue + */ +#define PFM_MAX_MSGS 32 +#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) + +/* + * type of a PMU register (bitmask). + * bitmask structure: + * bit0 : register implemented + * bit1 : end marker + * bit2-3 : reserved + * bit4 : pmc has pmc.pm + * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter + * bit6-7 : register type + * bit8-31: reserved + */ +#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ +#define PFM_REG_IMPL 0x1 /* register implemented */ +#define PFM_REG_END 0x2 /* end marker */ +#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ +#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ +#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ +#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ +#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ + +#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) +#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) + +#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) + +/* i assumed unsigned */ +#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) +#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) + +/* XXX: these assume that register i is implemented */ +#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) +#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) +#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) +#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) + +#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value +#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask +#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] +#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] + +#define PFM_NUM_IBRS IA64_NUM_DBG_REGS +#define PFM_NUM_DBRS IA64_NUM_DBG_REGS + +#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) +#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) +#define PFM_CTX_TASK(h) (h)->ctx_task + +#define PMU_PMC_OI 5 /* position of pmc.oi bit */ + +/* XXX: does not support more than 64 PMDs */ +#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) +#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) + +#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) + +#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) +#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) +#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) +#define PFM_CODE_RR 0 /* requesting code range restriction */ +#define PFM_DATA_RR 1 /* requestion data range restriction */ + +#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) +#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) +#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) + +#define RDEP(x) (1UL<<(x)) + +/* + * context protection macros + * in SMP: + * - we need to protect against CPU concurrency (spin_lock) + * - we need to protect against PMU overflow interrupts (local_irq_disable) + * in UP: + * - we need to protect against PMU overflow interrupts (local_irq_disable) + * + * spin_lock_irqsave()/spin_lock_irqrestore(): + * in SMP: local_irq_disable + spin_lock + * in UP : local_irq_disable + * + * spin_lock()/spin_lock(): + * in UP : removed automatically + * in SMP: protect against context accesses from other CPU. interrupts + * are not masked. This is useful for the PMU interrupt handler + * because we know we will not get PMU concurrency in that code. + */ +#define PROTECT_CTX(c, f) \ + do { \ + DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \ + spin_lock_irqsave(&(c)->ctx_lock, f); \ + DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \ + } while(0) + +#define UNPROTECT_CTX(c, f) \ + do { \ + DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \ + spin_unlock_irqrestore(&(c)->ctx_lock, f); \ + } while(0) + +#define PROTECT_CTX_NOPRINT(c, f) \ + do { \ + spin_lock_irqsave(&(c)->ctx_lock, f); \ + } while(0) + + +#define UNPROTECT_CTX_NOPRINT(c, f) \ + do { \ + spin_unlock_irqrestore(&(c)->ctx_lock, f); \ + } while(0) + + +#define PROTECT_CTX_NOIRQ(c) \ + do { \ + spin_lock(&(c)->ctx_lock); \ + } while(0) + +#define UNPROTECT_CTX_NOIRQ(c) \ + do { \ + spin_unlock(&(c)->ctx_lock); \ + } while(0) + + +#ifdef CONFIG_SMP + +#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) +#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ +#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() + +#else /* !CONFIG_SMP */ +#define SET_ACTIVATION(t) do {} while(0) +#define GET_ACTIVATION(t) do {} while(0) +#define INC_ACTIVATION(t) do {} while(0) +#endif /* CONFIG_SMP */ + +#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) +#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) +#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) + +#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) +#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) + +#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) + +/* + * cmp0 must be the value of pmc0 + */ +#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) + +#define PFMFS_MAGIC 0xa0b4d889 + +/* + * debugging + */ +#define PFM_DEBUGGING 1 +#ifdef PFM_DEBUGGING +#define DPRINT(a) \ + do { \ + if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ + } while (0) + +#define DPRINT_ovfl(a) \ + do { \ + if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ + } while (0) +#endif + +/* + * 64-bit software counter structure + * + * the next_reset_type is applied to the next call to pfm_reset_regs() + */ +typedef struct { + unsigned long val; /* virtual 64bit counter value */ + unsigned long lval; /* last reset value */ + unsigned long long_reset; /* reset value on sampling overflow */ + unsigned long short_reset; /* reset value on overflow */ + unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ + unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ + unsigned long seed; /* seed for random-number generator */ + unsigned long mask; /* mask for random-number generator */ + unsigned int flags; /* notify/do not notify */ + unsigned long eventid; /* overflow event identifier */ +} pfm_counter_t; + +/* + * context flags + */ +typedef struct { + unsigned int block:1; /* when 1, task will blocked on user notifications */ + unsigned int system:1; /* do system wide monitoring */ + unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ + unsigned int is_sampling:1; /* true if using a custom format */ + unsigned int excl_idle:1; /* exclude idle task in system wide session */ + unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ + unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ + unsigned int no_msg:1; /* no message sent on overflow */ + unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ + unsigned int reserved:22; +} pfm_context_flags_t; + +#define PFM_TRAP_REASON_NONE 0x0 /* default value */ +#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ +#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ + + +/* + * perfmon context: encapsulates all the state of a monitoring session + */ + +typedef struct pfm_context { + spinlock_t ctx_lock; /* context protection */ + + pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ + unsigned int ctx_state; /* state: active/inactive (no bitfield) */ + + struct task_struct *ctx_task; /* task to which context is attached */ + + unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ + + struct semaphore ctx_restart_sem; /* use for blocking notification mode */ + + unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ + unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ + unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ + + unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ + unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ + unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ + + unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */ + + unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ + unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ + unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ + unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ + + pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */ + + u64 ctx_saved_psr_up; /* only contains psr.up value */ + + unsigned long ctx_last_activation; /* context last activation number for last_cpu */ + unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ + unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ + + int ctx_fd; /* file descriptor used my this context */ + pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ + + pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ + void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ + unsigned long ctx_smpl_size; /* size of sampling buffer */ + void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ + + wait_queue_head_t ctx_msgq_wait; + pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; + int ctx_msgq_head; + int ctx_msgq_tail; + struct fasync_struct *ctx_async_queue; + + wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ +} pfm_context_t; + +/* + * magic number used to verify that structure is really + * a perfmon context + */ +#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) + +#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) + +#ifdef CONFIG_SMP +#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) +#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu +#else +#define SET_LAST_CPU(ctx, v) do {} while(0) +#define GET_LAST_CPU(ctx) do {} while(0) +#endif + + +#define ctx_fl_block ctx_flags.block +#define ctx_fl_system ctx_flags.system +#define ctx_fl_using_dbreg ctx_flags.using_dbreg +#define ctx_fl_is_sampling ctx_flags.is_sampling +#define ctx_fl_excl_idle ctx_flags.excl_idle +#define ctx_fl_going_zombie ctx_flags.going_zombie +#define ctx_fl_trap_reason ctx_flags.trap_reason +#define ctx_fl_no_msg ctx_flags.no_msg +#define ctx_fl_can_restart ctx_flags.can_restart + +#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); +#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking + +/* + * global information about all sessions + * mostly used to synchronize between system wide and per-process + */ +typedef struct { + spinlock_t pfs_lock; /* lock the structure */ + + unsigned int pfs_task_sessions; /* number of per task sessions */ + unsigned int pfs_sys_sessions; /* number of per system wide sessions */ + unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ + unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ + struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ +} pfm_session_t; + +/* + * information about a PMC or PMD. + * dep_pmd[]: a bitmask of dependent PMD registers + * dep_pmc[]: a bitmask of dependent PMC registers + */ +typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); +typedef struct { + unsigned int type; + int pm_pos; + unsigned long default_value; /* power-on default value */ + unsigned long reserved_mask; /* bitmask of reserved bits */ + pfm_reg_check_t read_check; + pfm_reg_check_t write_check; + unsigned long dep_pmd[4]; + unsigned long dep_pmc[4]; +} pfm_reg_desc_t; + +/* assume cnum is a valid monitor */ +#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) + +/* + * This structure is initialized at boot time and contains + * a description of the PMU main characteristics. + * + * If the probe function is defined, detection is based + * on its return value: + * - 0 means recognized PMU + * - anything else means not supported + * When the probe function is not defined, then the pmu_family field + * is used and it must match the host CPU family such that: + * - cpu->family & config->pmu_family != 0 + */ +typedef struct { + unsigned long ovfl_val; /* overflow value for counters */ + + pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ + pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ + + unsigned int num_pmcs; /* number of PMCS: computed at init time */ + unsigned int num_pmds; /* number of PMDS: computed at init time */ + unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ + unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ + + char *pmu_name; /* PMU family name */ + unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ + unsigned int flags; /* pmu specific flags */ + unsigned int num_ibrs; /* number of IBRS: computed at init time */ + unsigned int num_dbrs; /* number of DBRS: computed at init time */ + unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ + int (*probe)(void); /* customized probe routine */ + unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ +} pmu_config_t; +/* + * PMU specific flags + */ +#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ + +/* + * debug register related type definitions + */ +typedef struct { + unsigned long ibr_mask:56; + unsigned long ibr_plm:4; + unsigned long ibr_ig:3; + unsigned long ibr_x:1; +} ibr_mask_reg_t; + +typedef struct { + unsigned long dbr_mask:56; + unsigned long dbr_plm:4; + unsigned long dbr_ig:2; + unsigned long dbr_w:1; + unsigned long dbr_r:1; +} dbr_mask_reg_t; + +typedef union { + unsigned long val; + ibr_mask_reg_t ibr; + dbr_mask_reg_t dbr; +} dbreg_t; + + +/* + * perfmon command descriptions + */ +typedef struct { + int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); + char *cmd_name; + int cmd_flags; + unsigned int cmd_narg; + size_t cmd_argsize; + int (*cmd_getsize)(void *arg, size_t *sz); +} pfm_cmd_desc_t; + +#define PFM_CMD_FD 0x01 /* command requires a file descriptor */ +#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ +#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ +#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ + + +#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name +#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) +#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) +#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) +#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) + +#define PFM_CMD_ARG_MANY -1 /* cannot be zero */ + +typedef struct { + int debug; /* turn on/off debugging via syslog */ + int debug_ovfl; /* turn on/off debug printk in overflow handler */ + int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ + int expert_mode; /* turn on/off value checking */ + int debug_pfm_read; +} pfm_sysctl_t; + +typedef struct { + unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ + unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ + unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ + unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ + unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ + unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ + unsigned long pfm_smpl_handler_calls; + unsigned long pfm_smpl_handler_cycles; + char pad[SMP_CACHE_BYTES] ____cacheline_aligned; +} pfm_stats_t; + +/* + * perfmon internal variables + */ +static pfm_stats_t pfm_stats[NR_CPUS]; +static pfm_session_t pfm_sessions; /* global sessions information */ + +static struct proc_dir_entry *perfmon_dir; +static pfm_uuid_t pfm_null_uuid = {0,}; + +static spinlock_t pfm_buffer_fmt_lock; +static LIST_HEAD(pfm_buffer_fmt_list); + +static pmu_config_t *pmu_conf; + +/* sysctl() controls */ +static pfm_sysctl_t pfm_sysctl; +int pfm_debug_var; + +static ctl_table pfm_ctl_table[]={ + {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, + {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, + {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,}, + {4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,}, + { 0, }, +}; +static ctl_table pfm_sysctl_dir[] = { + {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, }, + {0,}, +}; +static ctl_table pfm_sysctl_root[] = { + {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, }, + {0,}, +}; +static struct ctl_table_header *pfm_sysctl_header; + +static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); +static int pfm_flush(struct file *filp); + +#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) +#define pfm_get_cpu_data(a,b) per_cpu(a, b) + +static inline void +pfm_put_task(struct task_struct *task) +{ + if (task != current) put_task_struct(task); +} + +static inline void +pfm_set_task_notify(struct task_struct *task) +{ + struct thread_info *info; + + info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE); + set_bit(TIF_NOTIFY_RESUME, &info->flags); +} + +static inline void +pfm_clear_task_notify(void) +{ + clear_thread_flag(TIF_NOTIFY_RESUME); +} + +static inline void +pfm_reserve_page(unsigned long a) +{ + SetPageReserved(vmalloc_to_page((void *)a)); +} +static inline void +pfm_unreserve_page(unsigned long a) +{ + ClearPageReserved(vmalloc_to_page((void*)a)); +} + +static inline unsigned long +pfm_protect_ctx_ctxsw(pfm_context_t *x) +{ + spin_lock(&(x)->ctx_lock); + return 0UL; +} + +static inline unsigned long +pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) +{ + spin_unlock(&(x)->ctx_lock); +} + +static inline unsigned int +pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct) +{ + return do_munmap(mm, addr, len); +} + +static inline unsigned long +pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) +{ + return get_unmapped_area(file, addr, len, pgoff, flags); +} + + +static struct super_block * +pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) +{ + return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC); +} + +static struct file_system_type pfm_fs_type = { + .name = "pfmfs", + .get_sb = pfmfs_get_sb, + .kill_sb = kill_anon_super, +}; + +DEFINE_PER_CPU(unsigned long, pfm_syst_info); +DEFINE_PER_CPU(struct task_struct *, pmu_owner); +DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); +DEFINE_PER_CPU(unsigned long, pmu_activation_number); + + +/* forward declaration */ +static struct file_operations pfm_file_ops; + +/* + * forward declarations + */ +#ifndef CONFIG_SMP +static void pfm_lazy_save_regs (struct task_struct *ta); +#endif + +void dump_pmu_state(const char *); +static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); + +#include "perfmon_itanium.h" +#include "perfmon_mckinley.h" +#include "perfmon_generic.h" + +static pmu_config_t *pmu_confs[]={ + &pmu_conf_mck, + &pmu_conf_ita, + &pmu_conf_gen, /* must be last */ + NULL +}; + + +static int pfm_end_notify_user(pfm_context_t *ctx); + +static inline void +pfm_clear_psr_pp(void) +{ + ia64_rsm(IA64_PSR_PP); + ia64_srlz_i(); +} + +static inline void +pfm_set_psr_pp(void) +{ + ia64_ssm(IA64_PSR_PP); + ia64_srlz_i(); +} + +static inline void +pfm_clear_psr_up(void) +{ + ia64_rsm(IA64_PSR_UP); + ia64_srlz_i(); +} + +static inline void +pfm_set_psr_up(void) +{ + ia64_ssm(IA64_PSR_UP); + ia64_srlz_i(); +} + +static inline unsigned long +pfm_get_psr(void) +{ + unsigned long tmp; + tmp = ia64_getreg(_IA64_REG_PSR); + ia64_srlz_i(); + return tmp; +} + +static inline void +pfm_set_psr_l(unsigned long val) +{ + ia64_setreg(_IA64_REG_PSR_L, val); + ia64_srlz_i(); +} + +static inline void +pfm_freeze_pmu(void) +{ + ia64_set_pmc(0,1UL); + ia64_srlz_d(); +} + +static inline void +pfm_unfreeze_pmu(void) +{ + ia64_set_pmc(0,0UL); + ia64_srlz_d(); +} + +static inline void +pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) +{ + int i; + + for (i=0; i < nibrs; i++) { + ia64_set_ibr(i, ibrs[i]); + ia64_dv_serialize_instruction(); + } + ia64_srlz_i(); +} + +static inline void +pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) +{ + int i; + + for (i=0; i < ndbrs; i++) { + ia64_set_dbr(i, dbrs[i]); + ia64_dv_serialize_data(); + } + ia64_srlz_d(); +} + +/* + * PMD[i] must be a counter. no check is made + */ +static inline unsigned long +pfm_read_soft_counter(pfm_context_t *ctx, int i) +{ + return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); +} + +/* + * PMD[i] must be a counter. no check is made + */ +static inline void +pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) +{ + unsigned long ovfl_val = pmu_conf->ovfl_val; + + ctx->ctx_pmds[i].val = val & ~ovfl_val; + /* + * writing to unimplemented part is ignore, so we do not need to + * mask off top part + */ + ia64_set_pmd(i, val & ovfl_val); +} + +static pfm_msg_t * +pfm_get_new_msg(pfm_context_t *ctx) +{ + int idx, next; + + next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; + + DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); + if (next == ctx->ctx_msgq_head) return NULL; + + idx = ctx->ctx_msgq_tail; + ctx->ctx_msgq_tail = next; + + DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); + + return ctx->ctx_msgq+idx; +} + +static pfm_msg_t * +pfm_get_next_msg(pfm_context_t *ctx) +{ + pfm_msg_t *msg; + + DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); + + if (PFM_CTXQ_EMPTY(ctx)) return NULL; + + /* + * get oldest message + */ + msg = ctx->ctx_msgq+ctx->ctx_msgq_head; + + /* + * and move forward + */ + ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; + + DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); + + return msg; +} + +static void +pfm_reset_msgq(pfm_context_t *ctx) +{ + ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; + DPRINT(("ctx=%p msgq reset\n", ctx)); +} + +static void * +pfm_rvmalloc(unsigned long size) +{ + void *mem; + unsigned long addr; + + size = PAGE_ALIGN(size); + mem = vmalloc(size); + if (mem) { + //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); + memset(mem, 0, size); + addr = (unsigned long)mem; + while (size > 0) { + pfm_reserve_page(addr); + addr+=PAGE_SIZE; + size-=PAGE_SIZE; + } + } + return mem; +} + +static void +pfm_rvfree(void *mem, unsigned long size) +{ + unsigned long addr; + + if (mem) { + DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size)); + addr = (unsigned long) mem; + while ((long) size > 0) { + pfm_unreserve_page(addr); + addr+=PAGE_SIZE; + size-=PAGE_SIZE; + } + vfree(mem); + } + return; +} + +static pfm_context_t * +pfm_context_alloc(void) +{ + pfm_context_t *ctx; + + /* + * allocate context descriptor + * must be able to free with interrupts disabled + */ + ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL); + if (ctx) { + memset(ctx, 0, sizeof(pfm_context_t)); + DPRINT(("alloc ctx @%p\n", ctx)); + } + return ctx; +} + +static void +pfm_context_free(pfm_context_t *ctx) +{ + if (ctx) { + DPRINT(("free ctx @%p\n", ctx)); + kfree(ctx); + } +} + +static void +pfm_mask_monitoring(struct task_struct *task) +{ + pfm_context_t *ctx = PFM_GET_CTX(task); + struct thread_struct *th = &task->thread; + unsigned long mask, val, ovfl_mask; + int i; + + DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid)); + + ovfl_mask = pmu_conf->ovfl_val; + /* + * monitoring can only be masked as a result of a valid + * counter overflow. In UP, it means that the PMU still + * has an owner. Note that the owner can be different + * from the current task. However the PMU state belongs + * to the owner. + * In SMP, a valid overflow only happens when task is + * current. Therefore if we come here, we know that + * the PMU state belongs to the current task, therefore + * we can access the live registers. + * + * So in both cases, the live register contains the owner's + * state. We can ONLY touch the PMU registers and NOT the PSR. + * + * As a consequence to this call, the thread->pmds[] array + * contains stale information which must be ignored + * when context is reloaded AND monitoring is active (see + * pfm_restart). + */ + mask = ctx->ctx_used_pmds[0]; + for (i = 0; mask; i++, mask>>=1) { + /* skip non used pmds */ + if ((mask & 0x1) == 0) continue; + val = ia64_get_pmd(i); + + if (PMD_IS_COUNTING(i)) { + /* + * we rebuild the full 64 bit value of the counter + */ + ctx->ctx_pmds[i].val += (val & ovfl_mask); + } else { + ctx->ctx_pmds[i].val = val; + } + DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", + i, + ctx->ctx_pmds[i].val, + val & ovfl_mask)); + } + /* + * mask monitoring by setting the privilege level to 0 + * we cannot use psr.pp/psr.up for this, it is controlled by + * the user + * + * if task is current, modify actual registers, otherwise modify + * thread save state, i.e., what will be restored in pfm_load_regs() + */ + mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; + for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0UL) continue; + ia64_set_pmc(i, th->pmcs[i] & ~0xfUL); + th->pmcs[i] &= ~0xfUL; + DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i])); + } + /* + * make all of this visible + */ + ia64_srlz_d(); +} + +/* + * must always be done with task == current + * + * context must be in MASKED state when calling + */ +static void +pfm_restore_monitoring(struct task_struct *task) +{ + pfm_context_t *ctx = PFM_GET_CTX(task); + struct thread_struct *th = &task->thread; + unsigned long mask, ovfl_mask; + unsigned long psr, val; + int i, is_system; + + is_system = ctx->ctx_fl_system; + ovfl_mask = pmu_conf->ovfl_val; + + if (task != current) { + printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid); + return; + } + if (ctx->ctx_state != PFM_CTX_MASKED) { + printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, + task->pid, current->pid, ctx->ctx_state); + return; + } + psr = pfm_get_psr(); + /* + * monitoring is masked via the PMC. + * As we restore their value, we do not want each counter to + * restart right away. We stop monitoring using the PSR, + * restore the PMC (and PMD) and then re-establish the psr + * as it was. Note that there can be no pending overflow at + * this point, because monitoring was MASKED. + * + * system-wide session are pinned and self-monitoring + */ + if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { + /* disable dcr pp */ + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); + pfm_clear_psr_pp(); + } else { + pfm_clear_psr_up(); + } + /* + * first, we restore the PMD + */ + mask = ctx->ctx_used_pmds[0]; + for (i = 0; mask; i++, mask>>=1) { + /* skip non used pmds */ + if ((mask & 0x1) == 0) continue; + + if (PMD_IS_COUNTING(i)) { + /* + * we split the 64bit value according to + * counter width + */ + val = ctx->ctx_pmds[i].val & ovfl_mask; + ctx->ctx_pmds[i].val &= ~ovfl_mask; + } else { + val = ctx->ctx_pmds[i].val; + } + ia64_set_pmd(i, val); + + DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", + i, + ctx->ctx_pmds[i].val, + val)); + } + /* + * restore the PMCs + */ + mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; + for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0UL) continue; + th->pmcs[i] = ctx->ctx_pmcs[i]; + ia64_set_pmc(i, th->pmcs[i]); + DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i])); + } + ia64_srlz_d(); + + /* + * must restore DBR/IBR because could be modified while masked + * XXX: need to optimize + */ + if (ctx->ctx_fl_using_dbreg) { + pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); + pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); + } + + /* + * now restore PSR + */ + if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { + /* enable dcr pp */ + ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); + ia64_srlz_i(); + } + pfm_set_psr_l(psr); +} + +static inline void +pfm_save_pmds(unsigned long *pmds, unsigned long mask) +{ + int i; + + ia64_srlz_d(); + + for (i=0; mask; i++, mask>>=1) { + if (mask & 0x1) pmds[i] = ia64_get_pmd(i); + } +} + +/* + * reload from thread state (used for ctxw only) + */ +static inline void +pfm_restore_pmds(unsigned long *pmds, unsigned long mask) +{ + int i; + unsigned long val, ovfl_val = pmu_conf->ovfl_val; + + for (i=0; mask; i++, mask>>=1) { + if ((mask & 0x1) == 0) continue; + val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; + ia64_set_pmd(i, val); + } + ia64_srlz_d(); +} + +/* + * propagate PMD from context to thread-state + */ +static inline void +pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) +{ + struct thread_struct *thread = &task->thread; + unsigned long ovfl_val = pmu_conf->ovfl_val; + unsigned long mask = ctx->ctx_all_pmds[0]; + unsigned long val; + int i; + + DPRINT(("mask=0x%lx\n", mask)); + + for (i=0; mask; i++, mask>>=1) { + + val = ctx->ctx_pmds[i].val; + + /* + * We break up the 64 bit value into 2 pieces + * the lower bits go to the machine state in the + * thread (will be reloaded on ctxsw in). + * The upper part stays in the soft-counter. + */ + if (PMD_IS_COUNTING(i)) { + ctx->ctx_pmds[i].val = val & ~ovfl_val; + val &= ovfl_val; + } + thread->pmds[i] = val; + + DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", + i, + th |