diff options
-rw-r--r-- | arch/x86/include/asm/ds.h | 241 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 13 | ||||
-rw-r--r-- | arch/x86/include/asm/ptrace.h | 36 | ||||
-rw-r--r-- | arch/x86/include/asm/thread_info.h | 5 | ||||
-rw-r--r-- | arch/x86/kernel/apic.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/ds.c | 859 | ||||
-rw-r--r-- | arch/x86/kernel/irq_64.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 59 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 50 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 415 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux_32.lds.S | 1 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux_64.lds.S | 1 | ||||
-rw-r--r-- | include/asm-generic/vmlinux.lds.h | 10 | ||||
-rw-r--r-- | include/linux/ftrace.h | 11 | ||||
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | include/trace/sched.h | 4 | ||||
-rw-r--r-- | kernel/sched.c | 3 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 4 | ||||
-rw-r--r-- | kernel/trace/Makefile | 2 | ||||
-rw-r--r-- | kernel/trace/trace.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace.h | 14 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 66 | ||||
-rw-r--r-- | kernel/trace/trace_hw_branches.c (renamed from kernel/trace/trace_bts.c) | 163 | ||||
-rwxr-xr-x | scripts/recordmcount.pl | 1 |
25 files changed, 962 insertions, 1009 deletions
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 99b6c39774a..ee0ea3a96c1 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -6,13 +6,13 @@ * precise-event based sampling (PEBS). * * It manages: - * - per-thread and per-cpu allocation of BTS and PEBS + * - DS and BTS hardware configuration * - buffer overflow handling (to be done) * - buffer access * - * It assumes: - * - get_task_struct on all traced tasks - * - current is allowed to trace tasks + * It does not do: + * - security checking (is the caller allowed to trace the task) + * - buffer allocation (memory accounting) * * * Copyright (C) 2007-2008 Intel Corporation. @@ -31,6 +31,7 @@ #ifdef CONFIG_X86_DS struct task_struct; +struct ds_context; struct ds_tracer; struct bts_tracer; struct pebs_tracer; @@ -38,6 +39,38 @@ struct pebs_tracer; typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); + +/* + * A list of features plus corresponding macros to talk about them in + * the ds_request function's flags parameter. + * + * We use the enum to index an array of corresponding control bits; + * we use the macro to index a flags bit-vector. + */ +enum ds_feature { + dsf_bts = 0, + dsf_bts_kernel, +#define BTS_KERNEL (1 << dsf_bts_kernel) + /* trace kernel-mode branches */ + + dsf_bts_user, +#define BTS_USER (1 << dsf_bts_user) + /* trace user-mode branches */ + + dsf_bts_overflow, + dsf_bts_max, + dsf_pebs = dsf_bts_max, + + dsf_pebs_max, + dsf_ctl_max = dsf_pebs_max, + dsf_bts_timestamps = dsf_ctl_max, +#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps) + /* add timestamps into BTS trace */ + +#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS) +}; + + /* * Request BTS or PEBS * @@ -58,92 +91,135 @@ typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); * NULL if cyclic buffer requested * th: the interrupt threshold in records from the end of the buffer; * -1 if no interrupt threshold is requested. + * flags: a bit-mask of the above flags */ extern struct bts_tracer *ds_request_bts(struct task_struct *task, void *base, size_t size, - bts_ovfl_callback_t ovfl, size_t th); + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags); extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, void *base, size_t size, pebs_ovfl_callback_t ovfl, - size_t th); + size_t th, unsigned int flags); /* * Release BTS or PEBS resources - * - * Returns 0 on success; -Eerrno otherwise + * Suspend and resume BTS or PEBS tracing * * tracer: the tracer handle returned from ds_request_~() */ -extern int ds_release_bts(struct bts_tracer *tracer); -extern int ds_release_pebs(struct pebs_tracer *tracer); +extern void ds_release_bts(struct bts_tracer *tracer); +extern void ds_suspend_bts(struct bts_tracer *tracer); +extern void ds_resume_bts(struct bts_tracer *tracer); +extern void ds_release_pebs(struct pebs_tracer *tracer); +extern void ds_suspend_pebs(struct pebs_tracer *tracer); +extern void ds_resume_pebs(struct pebs_tracer *tracer); + /* - * Get the (array) index of the write pointer. - * (assuming an array of BTS/PEBS records) - * - * Returns 0 on success; -Eerrno on error + * The raw DS buffer state as it is used for BTS and PEBS recording. * - * tracer: the tracer handle returned from ds_request_~() - * pos (out): will hold the result + * This is the low-level, arch-dependent interface for working + * directly on the raw trace data. */ -extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos); -extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos); +struct ds_trace { + /* the number of bts/pebs records */ + size_t n; + /* the size of a bts/pebs record in bytes */ + size_t size; + /* pointers into the raw buffer: + - to the first entry */ + void *begin; + /* - one beyond the last entry */ + void *end; + /* - one beyond the newest entry */ + void *top; + /* - the interrupt threshold */ + void *ith; + /* flags given on ds_request() */ + unsigned int flags; +}; /* - * Get the (array) index one record beyond the end of the array. - * (assuming an array of BTS/PEBS records) - * - * Returns 0 on success; -Eerrno on error - * - * tracer: the tracer handle returned from ds_request_~() - * pos (out): will hold the result + * An arch-independent view on branch trace data. */ -extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos); -extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos); +enum bts_qualifier { + bts_invalid, +#define BTS_INVALID bts_invalid + + bts_branch, +#define BTS_BRANCH bts_branch + + bts_task_arrives, +#define BTS_TASK_ARRIVES bts_task_arrives + + bts_task_departs, +#define BTS_TASK_DEPARTS bts_task_departs + + bts_qual_bit_size = 4, + bts_qual_max = (1 << bts_qual_bit_size), +}; + +struct bts_struct { + __u64 qualifier; + union { + /* BTS_BRANCH */ + struct { + __u64 from; + __u64 to; + } lbr; + /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ + struct { + __u64 jiffies; + pid_t pid; + } timestamp; + } variant; +}; + /* - * Provide a pointer to the BTS/PEBS record at parameter index. - * (assuming an array of BTS/PEBS records) - * - * The pointer points directly into the buffer. The user is - * responsible for copying the record. - * - * Returns the size of a single record on success; -Eerrno on error + * The BTS state. * - * tracer: the tracer handle returned from ds_request_~() - * index: the index of the requested record - * record (out): pointer to the requested record + * This gives access to the raw DS state and adds functions to provide + * an arch-independent view of the BTS data. */ -extern int ds_access_bts(struct bts_tracer *tracer, - size_t index, const void **record); -extern int ds_access_pebs(struct pebs_tracer *tracer, - size_t index, const void **record); +struct bts_trace { + struct ds_trace ds; + + int (*read)(struct bts_tracer *tracer, const void *at, + struct bts_struct *out); + int (*write)(struct bts_tracer *tracer, const struct bts_struct *in); +}; + /* - * Write one or more BTS/PEBS records at the write pointer index and - * advance the write pointer. + * The PEBS state. * - * If size is not a multiple of the record size, trailing bytes are - * zeroed out. - * - * May result in one or more overflow notifications. - * - * If called during overflow handling, that is, with index >= - * interrupt threshold, the write will wrap around. + * This gives access to the raw DS state and the PEBS-specific counter + * reset value. + */ +struct pebs_trace { + struct ds_trace ds; + + /* the PEBS reset value */ + unsigned long long reset_value; +}; + + +/* + * Read the BTS or PEBS trace. * - * An overflow notification is given if and when the interrupt - * threshold is reached during or after the write. + * Returns a view on the trace collected for the parameter tracer. * - * Returns the number of bytes written or -Eerrno. + * The view remains valid as long as the traced task is not running or + * the tracer is suspended. + * Writes into the trace buffer are not reflected. * * tracer: the tracer handle returned from ds_request_~() - * buffer: the buffer to write - * size: the size of the buffer */ -extern int ds_write_bts(struct bts_tracer *tracer, - const void *buffer, size_t size); -extern int ds_write_pebs(struct pebs_tracer *tracer, - const void *buffer, size_t size); +extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer); +extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer); + /* * Reset the write pointer of the BTS/PEBS buffer. @@ -156,27 +232,6 @@ extern int ds_reset_bts(struct bts_tracer *tracer); extern int ds_reset_pebs(struct pebs_tracer *tracer); /* - * Clear the BTS/PEBS buffer and reset the write pointer. - * The entire buffer will be zeroed out. - * - * Returns 0 on success; -Eerrno on error - * - * tracer: the tracer handle returned from ds_request_~() - */ -extern int ds_clear_bts(struct bts_tracer *tracer); -extern int ds_clear_pebs(struct pebs_tracer *tracer); - -/* - * Provide the PEBS counter reset value. - * - * Returns 0 on success; -Eerrno on error - * - * tracer: the tracer handle returned from ds_request_pebs() - * value (out): the counter reset value - */ -extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value); - -/* * Set the PEBS counter reset value. * * Returns 0 on success; -Eerrno on error @@ -192,35 +247,17 @@ extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); struct cpuinfo_x86; extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); - - /* - * The DS context - part of struct thread_struct. + * Context switch work */ -#define MAX_SIZEOF_DS (12 * 8) - -struct ds_context { - /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ - unsigned char ds[MAX_SIZEOF_DS]; - /* the owner of the BTS and PEBS configuration, respectively */ - struct ds_tracer *owner[2]; - /* use count */ - unsigned long count; - /* a pointer to the context location inside the thread_struct - * or the per_cpu context array */ - struct ds_context **this; - /* a pointer to the task owning this context, or NULL, if the - * context is owned by a cpu */ - struct task_struct *task; -}; - -/* called by exit_thread() to free leftover contexts */ -extern void ds_free(struct ds_context *context); +extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); #else /* CONFIG_X86_DS */ struct cpuinfo_x86; static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} +static inline void ds_switch_to(struct task_struct *prev, + struct task_struct *next) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5ca01e38326..aa5914f8e50 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -752,6 +752,19 @@ extern void switch_to_new_gdt(void); extern void cpu_init(void); extern void init_gdt(int cpu); +static inline unsigned long get_debugctlmsr(void) +{ + unsigned long debugctlmsr = 0; + +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return 0; +#endif + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); + + return debugctlmsr; +} + static inline void update_debugctlmsr(unsigned long debugctlmsr) { #ifndef CONFIG_X86_DEBUGCTLMSR diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index eefb0594b05..fbf74421591 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -6,7 +6,6 @@ #include <asm/processor-flags.h> #ifdef __KERNEL__ -#include <asm/ds.h> /* the DS BTS struct is used for ptrace too */ #include <asm/segment.h> #endif @@ -128,34 +127,6 @@ struct pt_regs { #endif /* !__i386__ */ -#ifdef CONFIG_X86_PTRACE_BTS -/* a branch trace record entry - * - * In order to unify the interface between various processor versions, - * we use the below data structure for all processors. - */ -enum bts_qualifier { - BTS_INVALID = 0, - BTS_BRANCH, - BTS_TASK_ARRIVES, - BTS_TASK_DEPARTS -}; - -struct bts_struct { - __u64 qualifier; - union { - /* BTS_BRANCH */ - struct { - __u64 from_ip; - __u64 to_ip; - } lbr; - /* BTS_TASK_ARRIVES or - BTS_TASK_DEPARTS */ - __u64 jiffies; - } variant; -}; -#endif /* CONFIG_X86_PTRACE_BTS */ - #ifdef __KERNEL__ #include <linux/init.h> @@ -163,13 +134,6 @@ struct bts_struct { struct cpuinfo_x86; struct task_struct; -#ifdef CONFIG_X86_PTRACE_BTS -extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *); -extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); -#else -#define ptrace_bts_init_intel(config) do {} while (0) -#endif /* CONFIG_X86_PTRACE_BTS */ - extern unsigned long profile_pc(struct pt_regs *regs); extern unsigned long diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 0921b4018c1..bf8113d16a3 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,7 +93,6 @@ struct thread_info { #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -115,7 +114,6 @@ struct thread_info { #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ @@ -141,8 +139,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ - _TIF_NOTSC) + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 16f94879b52..b946ac19753 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -30,6 +30,7 @@ #include <linux/module.h> #include <linux/dmi.h> #include <linux/dmar.h> +#include <linux/ftrace.h> #include <asm/atomic.h> #include <asm/smp.h> @@ -800,7 +801,7 @@ static void local_apic_timer_interrupt(void) * [ if a single-CPU system runs an SMP kernel then we call the local * interrupt as well. Thus we cannot inline the local irq ... ] */ -void smp_apic_timer_interrupt(struct pt_regs *regs) +void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 816f27f289b..cd413d9a021 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -11,7 +11,6 @@ #include <asm/pgtable.h> #include <asm/msr.h> #include <asm/uaccess.h> -#include <asm/ptrace.h> #include <asm/ds.h> #include <asm/bugs.h> @@ -309,9 +308,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_P3); #endif - if (cpu_has_bts) - ptrace_bts_init_intel(c); - detect_extended_topology(c); if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { /* diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 19a8c2c0389..dc1e7123ea4 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -6,13 +6,13 @@ * precise-event based sampling (PEBS). * * It manages: - * - per-thread and per-cpu allocation of BTS and PEBS + * - DS and BTS hardware configuration * - buffer overflow handling (to be done) * - buffer access * - * It assumes: - * - get_task_struct on all traced tasks - * - current is allowed to trace tasks + * It does not do: + * - security checking (is the caller allowed to trace the task) + * - buffer allocation (memory accounting) * * * Copyright (C) 2007-2008 Intel Corporation. @@ -34,15 +34,30 @@ * The configuration for a particular DS hardware implementation. */ struct ds_configuration { - /* the size of the DS structure in bytes */ - unsigned char sizeof_ds; - /* the size of one pointer-typed field in the DS structure in bytes; - this covers the first 8 fields related to buffer management. */ + /* the name of the configuration */ + const char *name; + /* the size of one pointer-typed field in the DS structure and + in the BTS and PEBS buffers in bytes; + this covers the first 8 DS fields related to buffer management. */ unsigned char sizeof_field; /* the size of a BTS/PEBS record in bytes */ unsigned char sizeof_rec[2]; + /* a series of bit-masks to control various features indexed + * by enum ds_feature */ + unsigned long ctl[dsf_ctl_max]; }; -static struct ds_configuration ds_cfg; +static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); + +#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) + +#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ +#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ +#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ + +#define BTS_CONTROL \ + (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ + ds_cfg.ctl[dsf_bts_overflow]) + /* * A BTS or PEBS tracer. @@ -61,6 +76,8 @@ struct ds_tracer { struct bts_tracer { /* the common DS part */ struct ds_tracer ds; + /* the trace including the DS configuration */ + struct bts_trace trace; /* buffer overflow notification function */ bts_ovfl_callback_t ovfl; }; @@ -68,6 +85,8 @@ struct bts_tracer { struct pebs_tracer { /* the common DS part */ struct ds_tracer ds; + /* the trace including the DS configuration */ + struct pebs_trace trace; /* buffer overflow notification function */ pebs_ovfl_callback_t ovfl; }; @@ -134,13 +153,11 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, (*(unsigned long *)base) = value; } -#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ - /* * Locking is done only for allocating BTS or PEBS resources. */ -static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); +static DEFINE_SPINLOCK(ds_lock); /* @@ -156,27 +173,32 @@ static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); * >0 number of per-thread tracers * <0 number of per-cpu tracers * - * The below functions to get and put tracers and to check the - * allocation type require the ds_lock to be held by the caller. - * * Tracers essentially gives the number of ds contexts for a certain * type of allocation. */ -static long tracers; +static atomic_t tracers = ATOMIC_INIT(0); static inline void get_tracer(struct task_struct *task) { - tracers += (task ? 1 : -1); + if (task) + atomic_inc(&tracers); + else + atomic_dec(&tracers); } static inline void put_tracer(struct task_struct *task) { - tracers -= (task ? 1 : -1); + if (task) + atomic_dec(&tracers); + else + atomic_inc(&tracers); } static inline int check_tracer(struct task_struct *task) { - return (task ? (tracers >= 0) : (tracers <= 0)); + return task ? + (atomic_read(&tracers) >= 0) : + (atomic_read(&tracers) <= 0); } @@ -190,14 +212,30 @@ static inline int check_tracer(struct task_struct *task) * Contexts are use-counted. They are allocated on first access and * deallocated when the last user puts the context. */ -static DEFINE_PER_CPU(struct ds_context *, system_context); +struct ds_context { + /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ + unsigned char ds[MAX_SIZEOF_DS]; + /* the owner of the BTS and PEBS configuration, respectively */ + struct bts_tracer *bts_master; + struct pebs_tracer *pebs_master; + /* use count */ + unsigned long count; + /* a pointer to the context location inside the thread_struct + * or the per_cpu context array */ + struct ds_context **this; + /* a pointer to the task owning this context, or NULL, if the + * context is owned by a cpu */ + struct task_struct *task; +}; + +static DEFINE_PER_CPU(struct ds_context *, system_context_array); -#define this_system_context per_cpu(system_context, smp_processor_id()) +#define system_context per_cpu(system_context_array, smp_processor_id()) -static inline struct ds_context *ds_get_context(struct task_struct *task) +static struct ds_context *ds_get_context(struct task_struct *task) { struct ds_context **p_context = - (task ? &task->thread.ds_ctx : &this_system_context); + (task ? &task->thread.ds_ctx : &system_context); struct ds_context *context = *p_context; unsigned long irq; @@ -225,10 +263,22 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); } + + context->count++; + + spin_unlock_irqrestore(&ds_lock, irq); + } else { + spin_lock_irqsave(&ds_lock, irq); + + context = *p_context; + if (context) + context->count++; + spin_unlock_irqrestore(&ds_lock, irq); - } - context->count++; + if (!context) + context = ds_get_context(task); + } return context; } @@ -242,8 +292,10 @@ static inline void ds_put_context(struct ds_context *context) spin_lock_irqsave(&ds_lock, irq); - if (--context->count) - goto out; + if (--context->count) { + spin_unlock_irqrestore(&ds_lock, irq); + return; + } *(context->this) = NULL; @@ -253,14 +305,14 @@ static inline void ds_put_context(struct ds_context *context) if (!context->task || (context->task == current)) wrmsrl(MSR_IA32_DS_AREA, 0); - kfree(context); - out: spin_unlock_irqrestore(&ds_lock, irq); + + kfree(context); } /* - * Handle a buffer overflow + * Call the tracer's callback on a buffer overflow. * * context: the ds context * qual: the buffer type @@ -268,30 +320,244 @@ static inline void ds_put_context(struct ds_context *context) static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) { switch (qual) { - case ds_bts: { - struct bts_tracer *tracer = - container_of(context->owner[qual], - struct bts_tracer, ds); - if (tracer->ovfl) - tracer->ovfl(tracer); - } + case ds_bts: + if (context->bts_master && + context->bts_master->ovfl) + context->bts_master->ovfl(context->bts_master); + break; + case ds_pebs: + if (context->pebs_master && + context->pebs_master->ovfl) + context->pebs_master->ovfl(context->pebs_master); break; - case ds_pebs: { - struct pebs_tracer *tracer = - container_of(context->owner[qual], - struct pebs_tracer, ds); - if (tracer->ovfl) - tracer->ovfl(tracer); } +} + + +/* + * Write raw data into the BTS or PEBS buffer. + * + * The remainder of any partially written record is zeroed out. + * + * context: the DS context + * qual: the buffer type + * record: the data to write + * size: the size of the data + */ +static int ds_write(struct ds_context *context, enum ds_qualifier qual, + const void *record, size_t size) +{ + int bytes_written = 0; + + if (!record) + return -EINVAL; + + while (size) { + unsigned long base, index, end, write_end, int_th; + unsigned long write_size, adj_write_size; + + /* + * write as much as possible without producing an + * overflow interrupt. + * + * interrupt_threshold must either be + * - bigger than absolute_maximum or + * - point to a record between buffer_base and absolute_maximum + * + * index points to a valid record. + */ + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + end = ds_get(context->ds, qual, ds_absolute_maximum); + int_th = ds_get(context->ds, qual, ds_interrupt_threshold); + + write_end = min(end, int_th); + + /* if we are already beyond the interrupt threshold, + * we fill the entire buffer */ + if (write_end <= index) + write_end = end; + + if (write_end <= index) + break; + + write_size = min((unsigned long) size, write_end - index); + memcpy((void *)index, record, write_size); + + record = (const char *)record + write_size; + size -= write_size; + bytes_written += write_size; + + adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; + adj_write_size *= ds_cfg.sizeof_rec[qual]; + + /* zero out trailing bytes */ + memset((char *)index + write_size, 0, + adj_write_size - write_size); + index += adj_write_size; + + if (index >= end) + index = base; + ds_set(context->ds, qual, ds_index, index); + + if (index >= int_th) + ds_overflow(context, qual); + } + + return bytes_written; +} + + +/* + * Branch Trace Store (BTS) uses the following format. Different + * architectures vary in the size of those fields. + * - source linear address + * - destination linear address + * - flags + * + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. + * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * + * In order to store additional information in the BTS buffer, we use + * a special source address to indicate that the record requires + * special interpretation. + * + * Netburst indicated via a bit in the flags field whether the branch + * was predicted; this is ignored. + * + * We use two levels of abstraction: + * - the raw data level defined here + * - an arch-independent level defined in ds.h + */ + +enum bts_field { + bts_from, + bts_to, + bts_flags, + + bts_qual = bts_from, + bts_jiffies = bts_to, + bts_pid = bts_flags, + + bts_qual_mask = (bts_qual_max - 1), + bts_escape = ((unsigned long)-1 & ~bts_qual_mask) +}; + +static inline unsigned long bts_get(const char *base, enum bts_field field) +{ + base += (ds_cfg.sizeof_field * field); + return *(unsigned long *)base; +} + +static inline void bts_set(char *base, enum bts_field field, unsigned long val) +{ + base += (ds_cfg.sizeof_field * field);; + (*(unsigned long *)base) = val; +} + + +/* + * The raw BTS data is architecture dependent. + * + * For higher-level users, we give an arch-independent view. + * - ds.h defines struct bts_struct + * - bts_read translates one raw bts record into a bts_struct + * - bts_write translates one bts_struct into the raw format and + * writes it into the top of the parameter tracer's buffer. + * + * return: bytes read/written on success; -Eerrno, otherwise + */ +static int bts_read(struct bts_tracer *tracer, const void *at, + struct bts_struct *out) +{ + if (!tracer) + return -EINVAL; + + if (at < tracer->trace.ds.begin) + return -EINVAL; + + if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) + return -EINVAL; + + memset(out, 0, sizeof(*out)); + if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { + out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); + out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); + out->variant.timestamp.pid = bts_get(at, bts_pid); + } else { + out->qualifier = bts_branch; + out->variant.lbr.from = bts_get(at, bts_from); + out->variant.lbr.to = bts_get(at, bts_to); + } + + return ds_cfg.sizeof_rec[ds_bts]; +} + +static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) +{ + unsigned char raw[MAX_SIZEOF_BTS]; + + if (!tracer) + return -EINVAL; + + if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) + return -EOVERFLOW; + + switch (in->qualifier) { + case bts_invalid: + bts_set(raw, bts_from, 0); + bts_set(raw, bts_to, 0); + bts_set(raw, bts_flags, 0); + break; + case bts_branch: + bts_set(raw, bts_from, in->variant.lbr.from); + bts_set(raw, bts_to, in->variant.lbr.to); + bts_set(raw, bts_flags, 0); + break; + case bts_task_arrives: + case bts_task_departs: + bts_set(raw, bts_qual, (bts_escape | in->qualifier)); |