diff options
Diffstat (limited to 'arch/x86/oprofile')
| -rw-r--r-- | arch/x86/oprofile/Makefile | 3 | ||||
| -rw-r--r-- | arch/x86/oprofile/backtrace.c | 99 | ||||
| -rw-r--r-- | arch/x86/oprofile/init.c | 25 | ||||
| -rw-r--r-- | arch/x86/oprofile/nmi_int.c | 727 | ||||
| -rw-r--r-- | arch/x86/oprofile/nmi_timer_int.c | 69 | ||||
| -rw-r--r-- | arch/x86/oprofile/op_counter.h | 3 | ||||
| -rw-r--r-- | arch/x86/oprofile/op_model_amd.c | 727 | ||||
| -rw-r--r-- | arch/x86/oprofile/op_model_p4.c | 128 | ||||
| -rw-r--r-- | arch/x86/oprofile/op_model_ppro.c | 182 | ||||
| -rw-r--r-- | arch/x86/oprofile/op_x86_model.h | 78 |
10 files changed, 1160 insertions, 881 deletions
diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile index 446902b2a6b..1599f568f0e 100644 --- a/arch/x86/oprofile/Makefile +++ b/arch/x86/oprofile/Makefile @@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprof.o cpu_buffer.o buffer_sync.o \ event_buffer.o oprofile_files.o \ oprofilefs.o oprofile_stats.o \ - timer_int.o ) + timer_int.o nmi_timer_int.o ) oprofile-y := $(DRIVER_OBJS) init.o backtrace.o oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ op_model_ppro.o op_model_p4.o -oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 04df67f8a7b..5d04be5efb6 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -11,21 +11,12 @@ #include <linux/oprofile.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/compat.h> +#include <linux/uaccess.h> + #include <asm/ptrace.h> -#include <asm/uaccess.h> #include <asm/stacktrace.h> -static void backtrace_warning_symbol(void *data, char *msg, - unsigned long symbol) -{ - /* Ignore warnings */ -} - -static void backtrace_warning(void *data, char *msg) -{ - /* Ignore warnings */ -} - static int backtrace_stack(void *data, char *name) { /* Yes, we want all stacks */ @@ -41,50 +32,96 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) } static struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, - .stack = backtrace_stack, - .address = backtrace_address, + .stack = backtrace_stack, + .address = backtrace_address, + .walk_stack = print_context_stack, }; -struct frame_head { - struct frame_head *bp; - unsigned long ret; -} __attribute__((packed)); - -static struct frame_head *dump_user_backtrace(struct frame_head *head) +#ifdef CONFIG_COMPAT +static struct stack_frame_ia32 * +dump_user_backtrace_32(struct stack_frame_ia32 *head) { - struct frame_head bufhead[2]; + /* Also check accessibility of one struct frame_head beyond: */ + struct stack_frame_ia32 bufhead[2]; + struct stack_frame_ia32 *fp; + unsigned long bytes; - /* Also check accessibility of one struct frame_head beyond */ - if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) + bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); + if (bytes != 0) return NULL; - if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) + + fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); + + oprofile_add_trace(bufhead[0].return_address); + + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (head >= fp) return NULL; - oprofile_add_trace(bufhead[0].ret); + return fp; +} + +static inline int +x86_backtrace_32(struct pt_regs * const regs, unsigned int depth) +{ + struct stack_frame_ia32 *head; + + /* User process is IA32 */ + if (!current || !test_thread_flag(TIF_IA32)) + return 0; + + head = (struct stack_frame_ia32 *) regs->bp; + while (depth-- && head) + head = dump_user_backtrace_32(head); + + return 1; +} + +#else +static inline int +x86_backtrace_32(struct pt_regs * const regs, unsigned int depth) +{ + return 0; +} +#endif /* CONFIG_COMPAT */ + +static struct stack_frame *dump_user_backtrace(struct stack_frame *head) +{ + /* Also check accessibility of one struct frame_head beyond: */ + struct stack_frame bufhead[2]; + unsigned long bytes; + + bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead)); + if (bytes != 0) + return NULL; + + oprofile_add_trace(bufhead[0].return_address); /* frame pointers should strictly progress back up the stack * (towards higher addresses) */ - if (head >= bufhead[0].bp) + if (head >= bufhead[0].next_frame) return NULL; - return bufhead[0].bp; + return bufhead[0].next_frame; } void x86_backtrace(struct pt_regs * const regs, unsigned int depth) { - struct frame_head *head = (struct frame_head *)frame_pointer(regs); - unsigned long stack = kernel_trap_sp(regs); + struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); if (!user_mode_vm(regs)) { + unsigned long stack = kernel_stack_pointer(regs); if (depth) dump_trace(NULL, regs, (unsigned long *)stack, 0, &backtrace_ops, &depth); return; } + if (x86_backtrace_32(regs, depth)) + return; + while (depth-- && head) head = dump_user_backtrace(head); } diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c index cdfe4c54dec..9e138d00ad3 100644 --- a/arch/x86/oprofile/init.c +++ b/arch/x86/oprofile/init.c @@ -16,34 +16,23 @@ * with the NMI mode driver. */ +#ifdef CONFIG_X86_LOCAL_APIC extern int op_nmi_init(struct oprofile_operations *ops); -extern int op_nmi_timer_init(struct oprofile_operations *ops); extern void op_nmi_exit(void); -extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); +#else +static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; } +static void op_nmi_exit(void) { } +#endif +extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); int __init oprofile_arch_init(struct oprofile_operations *ops) { - int ret; - - ret = -ENODEV; - -#ifdef CONFIG_X86_LOCAL_APIC - ret = op_nmi_init(ops); -#endif -#ifdef CONFIG_X86_IO_APIC - if (ret < 0) - ret = op_nmi_timer_init(ops); -#endif ops->backtrace = x86_backtrace; - - return ret; + return op_nmi_init(ops); } - void oprofile_arch_exit(void) { -#ifdef CONFIG_X86_LOCAL_APIC op_nmi_exit(); -#endif } diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 202864ad49a..379e8bd0dee 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -1,18 +1,21 @@ /** * @file nmi_int.c * - * @remark Copyright 2002-2008 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon <levon@movementarian.org> * @author Robert Richter <robert.richter@amd.com> + * @author Barry Kasindorf <barry.kasindorf@amd.com> + * @author Jason Yeh <jason.yeh@amd.com> + * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> */ #include <linux/init.h> #include <linux/notifier.h> #include <linux/smp.h> #include <linux/oprofile.h> -#include <linux/sysdev.h> +#include <linux/syscore_ops.h> #include <linux/slab.h> #include <linux/moduleparam.h> #include <linux/kdebug.h> @@ -24,63 +27,279 @@ #include "op_counter.h" #include "op_x86_model.h" -static struct op_x86_model_spec const *model; +static struct op_x86_model_spec *model; static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); static DEFINE_PER_CPU(unsigned long, saved_lvtpc); -/* 0 == registered but off, 1 == registered and on */ -static int nmi_enabled = 0; +/* must be protected with get_online_cpus()/put_online_cpus(): */ +static int nmi_enabled; +static int ctr_running; -static int profile_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) +struct op_counter_config counter_config[OP_MAX_COUNTER]; + +/* common functions */ + +u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, + struct op_counter_config *counter_config) { - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - int cpu = smp_processor_id(); + u64 val = 0; + u16 event = (u16)counter_config->event; + + val |= ARCH_PERFMON_EVENTSEL_INT; + val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; + val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; + val |= (counter_config->unit_mask & 0xFF) << 8; + counter_config->extra &= (ARCH_PERFMON_EVENTSEL_INV | + ARCH_PERFMON_EVENTSEL_EDGE | + ARCH_PERFMON_EVENTSEL_CMASK); + val |= counter_config->extra; + event &= model->event_mask ? model->event_mask : 0xFF; + val |= event & 0xFF; + val |= (u64)(event & 0x0F00) << 24; + + return val; +} - switch (val) { - case DIE_NMI: - if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu))) - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; + +static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs) +{ + if (ctr_running) + model->check_ctrs(regs, &__get_cpu_var(cpu_msrs)); + else if (!nmi_enabled) + return NMI_DONE; + else + model->stop(&__get_cpu_var(cpu_msrs)); + return NMI_HANDLED; } static void nmi_cpu_save_registers(struct op_msrs *msrs) { - unsigned int const nr_ctrs = model->num_counters; - unsigned int const nr_ctrls = model->num_controls; struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; unsigned int i; - for (i = 0; i < nr_ctrs; ++i) { - if (counters[i].addr) { - rdmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); - } + for (i = 0; i < model->num_counters; ++i) { + if (counters[i].addr) + rdmsrl(counters[i].addr, counters[i].saved); } - for (i = 0; i < nr_ctrls; ++i) { - if (controls[i].addr) { - rdmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); + for (i = 0; i < model->num_controls; ++i) { + if (controls[i].addr) + rdmsrl(controls[i].addr, controls[i].saved); + } +} + +static void nmi_cpu_start(void *dummy) +{ + struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); + if (!msrs->controls) + WARN_ON_ONCE(1); + else + model->start(msrs); +} + +static int nmi_start(void) +{ + get_online_cpus(); + ctr_running = 1; + /* make ctr_running visible to the nmi handler: */ + smp_mb(); + on_each_cpu(nmi_cpu_start, NULL, 1); + put_online_cpus(); + return 0; +} + +static void nmi_cpu_stop(void *dummy) +{ + struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); + if (!msrs->controls) + WARN_ON_ONCE(1); + else + model->stop(msrs); +} + +static void nmi_stop(void) +{ + get_online_cpus(); + on_each_cpu(nmi_cpu_stop, NULL, 1); + ctr_running = 0; + put_online_cpus(); +} + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static DEFINE_PER_CPU(int, switch_index); + +static inline int has_mux(void) +{ + return !!model->switch_ctrl; +} + +inline int op_x86_phys_to_virt(int phys) +{ + return __this_cpu_read(switch_index) + phys; +} + +inline int op_x86_virt_to_phys(int virt) +{ + return virt % model->num_counters; +} + +static void nmi_shutdown_mux(void) +{ + int i; + + if (!has_mux()) + return; + + for_each_possible_cpu(i) { + kfree(per_cpu(cpu_msrs, i).multiplex); + per_cpu(cpu_msrs, i).multiplex = NULL; + per_cpu(switch_index, i) = 0; + } +} + +static int nmi_setup_mux(void) +{ + size_t multiplex_size = + sizeof(struct op_msr) * model->num_virt_counters; + int i; + + if (!has_mux()) + return 1; + + for_each_possible_cpu(i) { + per_cpu(cpu_msrs, i).multiplex = + kzalloc(multiplex_size, GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).multiplex) + return 0; + } + + return 1; +} + +static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) +{ + int i; + struct op_msr *multiplex = msrs->multiplex; + + if (!has_mux()) + return; + + for (i = 0; i < model->num_virt_counters; ++i) { + if (counter_config[i].enabled) { + multiplex[i].saved = -(u64)counter_config[i].count; + } else { + multiplex[i].saved = 0; } } + + per_cpu(switch_index, cpu) = 0; +} + +static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) +{ + struct op_msr *counters = msrs->counters; + struct op_msr *multiplex = msrs->multiplex; + int i; + + for (i = 0; i < model->num_counters; ++i) { + int virt = op_x86_phys_to_virt(i); + if (counters[i].addr) + rdmsrl(counters[i].addr, multiplex[virt].saved); + } } -static void nmi_save_registers(void *dummy) +static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) +{ + struct op_msr *counters = msrs->counters; + struct op_msr *multiplex = msrs->multiplex; + int i; + + for (i = 0; i < model->num_counters; ++i) { + int virt = op_x86_phys_to_virt(i); + if (counters[i].addr) + wrmsrl(counters[i].addr, multiplex[virt].saved); + } +} + +static void nmi_cpu_switch(void *dummy) { int cpu = smp_processor_id(); + int si = per_cpu(switch_index, cpu); struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); - nmi_cpu_save_registers(msrs); + + nmi_cpu_stop(NULL); + nmi_cpu_save_mpx_registers(msrs); + + /* move to next set */ + si += model->num_counters; + if ((si >= model->num_virt_counters) || (counter_config[si].count == 0)) + per_cpu(switch_index, cpu) = 0; + else + per_cpu(switch_index, cpu) = si; + + model->switch_ctrl(model, msrs); + nmi_cpu_restore_mpx_registers(msrs); + + nmi_cpu_start(NULL); +} + + +/* + * Quick check to see if multiplexing is necessary. + * The check should be sufficient since counters are used + * in ordre. + */ +static int nmi_multiplex_on(void) +{ + return counter_config[model->num_counters].count ? 0 : -EINVAL; +} + +static int nmi_switch_event(void) +{ + if (!has_mux()) + return -ENOSYS; /* not implemented */ + if (nmi_multiplex_on() < 0) + return -EINVAL; /* not necessary */ + + get_online_cpus(); + if (ctr_running) + on_each_cpu(nmi_cpu_switch, NULL, 1); + put_online_cpus(); + + return 0; } +static inline void mux_init(struct oprofile_operations *ops) +{ + if (has_mux()) + ops->switch_events = nmi_switch_event; +} + +static void mux_clone(int cpu) +{ + if (!has_mux()) + return; + + memcpy(per_cpu(cpu_msrs, cpu).multiplex, + per_cpu(cpu_msrs, 0).multiplex, + sizeof(struct op_msr) * model->num_virt_counters); +} + +#else + +inline int op_x86_phys_to_virt(int phys) { return phys; } +inline int op_x86_virt_to_phys(int virt) { return virt; } +static inline void nmi_shutdown_mux(void) { } +static inline int nmi_setup_mux(void) { return 1; } +static inline void +nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { } +static inline void mux_init(struct oprofile_operations *ops) { } +static void mux_clone(int cpu) { } + +#endif + static void free_msrs(void) { int i; @@ -90,113 +309,63 @@ static void free_msrs(void) kfree(per_cpu(cpu_msrs, i).controls); per_cpu(cpu_msrs, i).controls = NULL; } + nmi_shutdown_mux(); } static int allocate_msrs(void) { - int success = 1; size_t controls_size = sizeof(struct op_msr) * model->num_controls; size_t counters_size = sizeof(struct op_msr) * model->num_counters; int i; for_each_possible_cpu(i) { - per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, - GFP_KERNEL); - if (!per_cpu(cpu_msrs, i).counters) { - success = 0; - break; - } - per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, - GFP_KERNEL); - if (!per_cpu(cpu_msrs, i).controls) { - success = 0; - break; - } + per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, + GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).counters) + goto fail; + per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, + GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).controls) + goto fail; } - if (!success) - free_msrs(); + if (!nmi_setup_mux()) + goto fail; + + return 1; - return success; +fail: + free_msrs(); + return 0; } static void nmi_cpu_setup(void *dummy) { int cpu = smp_processor_id(); struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); - spin_lock(&oprofilefs_lock); - model->setup_ctrs(msrs); - spin_unlock(&oprofilefs_lock); + nmi_cpu_save_registers(msrs); + raw_spin_lock(&oprofilefs_lock); + model->setup_ctrs(model, msrs); + nmi_cpu_setup_mux(cpu, msrs); + raw_spin_unlock(&oprofilefs_lock); per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); } -static struct notifier_block profile_exceptions_nb = { - .notifier_call = profile_exceptions_notify, - .next = NULL, - .priority = 0 -}; - -static int nmi_setup(void) -{ - int err = 0; - int cpu; - - if (!allocate_msrs()) - return -ENOMEM; - - err = register_die_notifier(&profile_exceptions_nb); - if (err) { - free_msrs(); - return err; - } - - /* We need to serialize save and setup for HT because the subset - * of msrs are distinct for save and setup operations - */ - - /* Assume saved/restored counters are the same on all CPUs */ - model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); - for_each_possible_cpu(cpu) { - if (cpu != 0) { - memcpy(per_cpu(cpu_msrs, cpu).counters, - per_cpu(cpu_msrs, 0).counters, - sizeof(struct op_msr) * model->num_counters); - - memcpy(per_cpu(cpu_msrs, cpu).controls, - per_cpu(cpu_msrs, 0).controls, - sizeof(struct op_msr) * model->num_controls); - } - - } - on_each_cpu(nmi_save_registers, NULL, 1); - on_each_cpu(nmi_cpu_setup, NULL, 1); - nmi_enabled = 1; - return 0; -} - -static void nmi_restore_registers(struct op_msrs *msrs) +static void nmi_cpu_restore_registers(struct op_msrs *msrs) { - unsigned int const nr_ctrs = model->num_counters; - unsigned int const nr_ctrls = model->num_controls; struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; unsigned int i; - for (i = 0; i < nr_ctrls; ++i) { - if (controls[i].addr) { - wrmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); - } + for (i = 0; i < model->num_controls; ++i) { + if (controls[i].addr) + wrmsrl(controls[i].addr, controls[i].saved); } - for (i = 0; i < nr_ctrs; ++i) { - if (counters[i].addr) { - wrmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); - } + for (i = 0; i < model->num_counters; ++i) { + if (counters[i].addr) + wrmsrl(counters[i].addr, counters[i].saved); } } @@ -204,7 +373,7 @@ static void nmi_cpu_shutdown(void *dummy) { unsigned int v; int cpu = smp_processor_id(); - struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); + struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); /* restoring APIC_LVTPC can trigger an apic error because the delivery * mode and vector nr combination can be illegal. That's by design: on @@ -215,52 +384,30 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); - nmi_restore_registers(msrs); + nmi_cpu_restore_registers(msrs); } -static void nmi_shutdown(void) +static void nmi_cpu_up(void *dummy) { - struct op_msrs *msrs; - - nmi_enabled = 0; - on_each_cpu(nmi_cpu_shutdown, NULL, 1); - unregister_die_notifier(&profile_exceptions_nb); - msrs = &get_cpu_var(cpu_msrs); - model->shutdown(msrs); - free_msrs(); - put_cpu_var(cpu_msrs); + if (nmi_enabled) + nmi_cpu_setup(dummy); + if (ctr_running) + nmi_cpu_start(dummy); } -static void nmi_cpu_start(void *dummy) +static void nmi_cpu_down(void *dummy) { - struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); - model->start(msrs); -} - -static int nmi_start(void) -{ - on_each_cpu(nmi_cpu_start, NULL, 1); - return 0; + if (ctr_running) + nmi_cpu_stop(dummy); + if (nmi_enabled) + nmi_cpu_shutdown(dummy); } -static void nmi_cpu_stop(void *dummy) -{ - struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); - model->stop(msrs); -} - -static void nmi_stop(void) -{ - on_each_cpu(nmi_cpu_stop, NULL, 1); -} - -struct op_counter_config counter_config[OP_MAX_COUNTER]; - -static int nmi_create_files(struct super_block *sb, struct dentry *root) +static int nmi_create_files(struct dentry *root) { unsigned int i; - for (i = 0; i < model->num_counters; ++i) { + for (i = 0; i < model->num_virt_counters; ++i) { struct dentry *dir; char buf[4]; @@ -269,23 +416,23 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) * NOTE: assumes 1:1 mapping here (that counters are organized * sequentially in their struct assignment). */ - if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) + if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i))) continue; snprintf(buf, sizeof(buf), "%d", i); - dir = oprofilefs_mkdir(sb, root, buf); - oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); - oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); - oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); - oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); - oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); - oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + dir = oprofilefs_mkdir(root, buf); + oprofilefs_create_ulong(dir, "enabled", &counter_config[i].enabled); + oprofilefs_create_ulong(dir, "event", &counter_config[i].event); + oprofilefs_create_ulong(dir, "count", &counter_config[i].count); + oprofilefs_create_ulong(dir, "unit_mask", &counter_config[i].unit_mask); + oprofilefs_create_ulong(dir, "kernel", &counter_config[i].kernel); + oprofilefs_create_ulong(dir, "user", &counter_config[i].user); + oprofilefs_create_ulong(dir, "extra", &counter_config[i].extra); } return 0; } -#ifdef CONFIG_SMP static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, void *data) { @@ -293,10 +440,10 @@ static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, switch (action) { case CPU_DOWN_FAILED: case CPU_ONLINE: - smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); + smp_call_function_single(cpu, nmi_cpu_up, NULL, 0); break; case CPU_DOWN_PREPARE: - smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); + smp_call_function_single(cpu, nmi_cpu_down, NULL, 1); break; } return NOTIFY_DONE; @@ -305,11 +452,95 @@ static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, static struct notifier_block oprofile_cpu_nb = { .notifier_call = oprofile_cpu_notifier }; -#endif + +static int nmi_setup(void) +{ + int err = 0; + int cpu; + + if (!allocate_msrs()) + return -ENOMEM; + + /* We need to serialize save and setup for HT because the subset + * of msrs are distinct for save and setup operations + */ + + /* Assume saved/restored counters are the same on all CPUs */ + err = model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); + if (err) + goto fail; + + for_each_possible_cpu(cpu) { + if (!cpu) + continue; + + memcpy(per_cpu(cpu_msrs, cpu).counters, + per_cpu(cpu_msrs, 0).counters, + sizeof(struct op_msr) * model->num_counters); + + memcpy(per_cpu(cpu_msrs, cpu).controls, + per_cpu(cpu_msrs, 0).controls, + sizeof(struct op_msr) * model->num_controls); + + mux_clone(cpu); + } + + nmi_enabled = 0; + ctr_running = 0; + /* make variables visible to the nmi handler: */ + smp_mb(); + err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify, + 0, "oprofile"); + if (err) + goto fail; + + cpu_notifier_register_begin(); + + /* Use get/put_online_cpus() to protect 'nmi_enabled' */ + get_online_cpus(); + nmi_enabled = 1; + /* make nmi_enabled visible to the nmi handler: */ + smp_mb(); + on_each_cpu(nmi_cpu_setup, NULL, 1); + __register_cpu_notifier(&oprofile_cpu_nb); + put_online_cpus(); + + cpu_notifier_register_done(); + + return 0; +fail: + free_msrs(); + return err; +} + +static void nmi_shutdown(void) +{ + struct op_msrs *msrs; + + cpu_notifier_register_begin(); + + /* Use get/put_online_cpus() to protect 'nmi_enabled' & 'ctr_running' */ + get_online_cpus(); + on_each_cpu(nmi_cpu_shutdown, NULL, 1); + nmi_enabled = 0; + ctr_running = 0; + __unregister_cpu_notifier(&oprofile_cpu_nb); + put_online_cpus(); + + cpu_notifier_register_done(); + + /* make variables visible to the nmi handler: */ + smp_mb(); + unregister_nmi_handler(NMI_LOCAL, "oprofile"); + msrs = &get_cpu_var(cpu_msrs); + model->shutdown(msrs); + free_msrs(); + put_cpu_var(cpu_msrs); +} #ifdef CONFIG_PM -static int nmi_suspend(struct sys_device *dev, pm_message_t state) +static int nmi_suspend(void) { /* Only one CPU left, just stop that one */ if (nmi_enabled == 1) @@ -317,53 +548,39 @@ static int nmi_suspend(struct sys_device *dev, pm_message_t state) return 0; } -static int nmi_resume(struct sys_device *dev) +static void nmi_resume(void) { if (nmi_enabled == 1) nmi_cpu_start(NULL); - return 0; } -static struct sysdev_class oprofile_sysclass = { - .name = "oprofile", +static struct syscore_ops oprofile_syscore_ops = { .resume = nmi_resume, .suspend = nmi_suspend, }; -static struct sys_device device_oprofile = { - .id = 0, - .cls = &oprofile_sysclass, -}; - -static int __init init_sysfs(void) +static void __init init_suspend_resume(void) { - int error; - - error = sysdev_class_register(&oprofile_sysclass); - if (!error) - error = sysdev_register(&device_oprofile); - return error; + register_syscore_ops(&oprofile_syscore_ops); } -static void exit_sysfs(void) +static void exit_suspend_resume(void) { - sysdev_unregister(&device_oprofile); - sysdev_class_unregister(&oprofile_sysclass); + unregister_syscore_ops(&oprofile_syscore_ops); } #else -#define init_sysfs() do { } while (0) -#define exit_sysfs() do { } while (0) -#endif /* CONFIG_PM */ -static int p4force; -module_param(p4force, int, 0); +static inline void init_suspend_resume(void) { } +static inline void exit_suspend_resume(void) { } + +#endif /* CONFIG_PM */ static int __init p4_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; - if (!p4force && (cpu_model > 6 || cpu_model == 5)) + if (cpu_model > 6 || cpu_model == 5) return 0; #ifndef CONFIG_SMP @@ -389,10 +606,50 @@ static int __init p4_init(char **cpu_type) return 0; } +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, + arch_perfmon, +}; + +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) +{ + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing NMI timer mode\n"); + } else if (!strcmp(str, "arch_perfmon")) { + force_cpu_type = arch_perfmon; + printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); + } else { + force_cpu_type = 0; + } + + return 0; +} +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); + static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; + struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ + if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon) + return 0; + + /* + * Documentation on identifying Intel processors by CPU family + * and model can be found in the Intel Software Developer's + * Manuals (SDM): + * + * http://www.intel.com/products/processor/manuals/ + * + * As of May 2010 the documentation for this was in the: + * "Intel 64 and IA-32 Architectures Software Developer's + * Manual Volume 3B: System Programming Guide", "Table B-1 + * CPUID Signature Values of DisplayFamily_DisplayModel". + */ switch (cpu_model) { case 0 ... 2: *cpu_type = "i386/ppro"; @@ -411,31 +668,30 @@ static int __init ppro_init(char **cpu_type) case 14: *cpu_type = "i386/core"; break; - case 15: case 23: + case 0x0f: + case 0x16: + case 0x17: + case 0x1d: *cpu_type = "i386/core_2"; break; + case 0x1a: + case 0x1e: + case 0x2e: + spec = &op_arch_perfmon_spec; + *cpu_type = "i386/core_i7"; + break; + case 0x1c: + *cpu_type = "i386/atom"; + break; default: /* Unknown */ return 0; } - model = &op_ppro_spec; - return 1; -} - -static int __init arch_perfmon_init(char **cpu_type) -{ - if (!cpu_has_arch_perfmon) - return 0; - *cpu_type = "i386/arch_perfmon"; - model = &op_arch_perfmon_spec; - arch_perfmon_setup_counters(); + model = spec; return 1; } -/* in order to get sysfs right */ -static int using_nmi; - int __init op_nmi_init(struct oprofile_operations *ops) { __u8 vendor = boot_cpu_data.x86_vendor; @@ -446,32 +702,43 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (!cpu_has_apic) return -ENODEV; + if (force_cpu_type == timer) + return -ENODEV; + switch (vendor) { case X86_VENDOR_AMD: /* Needs to be at least an Athlon (or hammer in 32bit mode) */ switch (family) { - default: - return -ENODEV; case 6: - model = &op_amd_spec; cpu_type = "i386/athlon"; break; case 0xf: - model = &op_amd_spec; - /* Actually it could be i386/hammer too, but give - user space an consistent name. */ + /* + * Actually it could be i386/hammer too, but + * give user space an consistent name. + */ cpu_type = "x86-64/hammer"; break; case 0x10: - model = &op_amd_spec; cpu_type = "x86-64/family10"; break; case 0x11: - model = &op_amd_spec; cpu_type = "x86-64/family11h"; break; + case 0x12: + cpu_type = "x86-64/family12h"; + break; + case 0x14: + cpu_type = "x86-64/family14h"; + break; + case 0x15: + cpu_type = "x86-64/family15h"; + break; + default: + return -ENODEV; } + model = &op_amd_spec; break; case X86_VENDOR_INTEL: @@ -490,44 +757,46 @@ int __init op_nmi_init(struct oprofile_operations *ops) break; } - if (!cpu_type && !arch_perfmon_init(&cpu_type)) + if (cpu_type) + break; + + if (!cpu_has_arch_perfmon) return -ENODEV; + + /* use arch perfmon as fallback */ + cpu_type = "i386/arch_perfmon"; + model = &op_arch_perfmon_spec; break; default: return -ENODEV; } -#ifdef CONFIG_SMP - register_cpu_notifier(&oprofile_cpu_nb); -#endif /* default values, can be overwritten by model */ - ops->create_files = nmi_create_files; - ops->setup = nmi_setup; - ops->shutdown = nmi_shutdown; - ops->start = nmi_start; - ops->stop = nmi_stop; - ops->cpu_type = cpu_type; + ops->create_files = nmi_create_files; + ops->setup = nmi_setup; + ops->shutdown = nmi_shutdown; + ops->start = nmi_start; + ops->stop = nmi_stop; + ops->cpu_type = cpu_type; if (model->init) ret = model->init(ops); if (ret) return ret; - init_sysfs(); - using_nmi = 1; + if (!model->num_virt_counters) + model->num_virt_counters = model->num_counters; + + mux_init(ops); + + init_suspend_resume(); + printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; } void op_nmi_exit(void) { - if (using_nmi) { - exit_sysfs(); -#ifdef CONFIG_SMP - unregister_cpu_notifier(&oprofile_cpu_nb); -#endif - } - if (model->exit) - model->exit(); + exit_suspend_resume(); } diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c deleted file mode 100644 index e3ecb71b579..00000000000 --- a/arch/x86/oprofile/nmi_timer_int.c +++ /dev/null @@ -1,69 +0,0 @@ -/** - * @file nmi_timer_int.c - * - * @remark Copyright 2003 OProfile authors - * @remark Read the file COPYING - * - * @author Zwane Mwaikambo <zwane@linuxpower.ca> - */ - -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/errno.h> -#include <linux/oprofile.h> -#include <linux/rcupdate.h> -#include <linux/kdebug.h> - -#include <asm/nmi.h> -#include <asm/apic.h> -#include <asm/ptrace.h> - -static int profile_timer_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - switch (val) { - case DIE_NMI: - oprofile_add_sample(args->regs, 0); - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; -} - -static struct notifier_block profile_timer_exceptions_nb = { - .notifier_call = profile_timer_exceptions_notify, - .next = NULL, - .priority = 0 -}; - -static int timer_start(void) -{ - if (register_die_notifier(&profile_timer_exceptions_nb)) - return 1; - return 0; -} - - -static void timer_stop(void) -{ - unregister_die_notifier(&profile_timer_exceptions_nb); - synchronize_sched(); /* Allow already-started NMIs to complete. */ -} - - -int __init op_nmi_timer_init(struct oprofile_operations *ops) -{ - if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0)) - return -ENODEV; - - ops->start = timer_start; - ops->stop = timer_stop; - ops->cpu_type = "timer"; - printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); - return 0; -} diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 91b6a116165..0b7b7b179cb 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -10,7 +10,7 @@ #ifndef OP_COUNTER_H #define OP_COUNTER_H -#define OP_MAX_COUNTER 8 +#define OP_MAX_COUNTER 32 /* Per-perfctr configuration as set via * oprofilefs. @@ -22,6 +22,7 @@ struct op_counter_config { unsigned long kernel; unsigned long user; unsigned long unit_mask; + unsigned long extra; }; extern struct op_counter_config counter_config[]; diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 8fdf06e4edf..50d86c0e9ba 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -9,496 +9,535 @@ * @author Philippe Elie * @author Graydon Hoare * @author Robert Richter <robert.richter@amd.com> - * @author Barry Kasindorf + * @author Barry Kasindorf <barry.kasindorf@amd.com> + * @author Jason Yeh <jason.yeh@amd.com> + * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> */ #include <linux/oprofile.h> #include <linux/device.h> #include <linux/pci.h> +#include <linux/percpu.h> #include <asm/ptrace.h> #include <asm/msr.h> #include <asm/nmi.h> +#include <asm/apic.h> +#include <asm/processor.h> +#include <asm/cpufeature.h> #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 4 -#define NUM_CONTROLS 4 - -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) -#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) - -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) -#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) -#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) -#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) -#define CTRL_CLEAR_LO(x) (x &= (1<<21)) -#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) -#define CTRL_SET_ENABLE(val) (val |= 1<<20) -#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) -#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) -#define CTRL_SET_UM(val, m) (val |= (m << 8)) -#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) -#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) -#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) -#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) - -static unsigned long reset_value[NUM_COUNTERS]; - -#ifdef CONFIG_OPROFILE_IBS - -/* IbsFetchCtl bits/masks */ -#define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ -#define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ -#define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ - -/*IbsOpCtl bits */ -#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ -#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ - -#define IBS_FETCH_SIZE 6 -#define IBS_OP_SIZE 12 - -static int has_ibs; /* AMD Family10h and later */ - -struct op_ibs_config { +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX +#define NUM_VIRT_COUNTERS 32 +#else +#define NUM_VIRT_COUNTERS 0 +#endif + +#define OP_EVENT_MASK 0x0FFF +#define OP_CTR_OVERFLOW (1ULL<<31) + +#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) + +static int num_counters; +static unsigned long reset_value[OP_MAX_COUNTER]; + +#define IBS_FETCH_SIZE 6 +#define IBS_OP_SIZE 12 + +static u32 ibs_caps; + +struct ibs_config { unsigned long op_enabled; unsigned long fetch_enabled; unsigned long max_cnt_fetch; unsigned long max_cnt_op; unsigned long rand_en; unsigned long dispatched_ops; + unsigned long branch_target; }; -static struct op_ibs_config ibs_config; +struct ibs_state { + u64 ibs_op_ctl; + int branch_target; + unsigned long sample_size; +}; -#endif +static struct ibs_config ibs_config; +static struct ibs_state ibs_state; -/* functions for op_amd_spec */ +/* + * IBS randomization macros + */ +#define IBS_RANDOM_BITS 12 +#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) +#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) -static void op_amd_fill_in_addresses(struct op_msrs * const msrs) +/* + * 16-bit Linear Feedback Shift Register (LFSR) + * + * 16 14 13 11 + * Feedback polynomial = X + X + X + X + 1 + */ +static unsigned int lfsr_random(void) { - int i; + static unsigned int lfsr_value = 0xF00D; + unsigned int bit; - for (i = 0; i < NUM_COUNTERS; i++) { - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) - msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; - else - msrs->counters[i].addr = 0; - } + /* Compute next bit to shift in */ + bit = ((lfsr_value >> 0) ^ + (lfsr_value >> 2) ^ + (lfsr_value >> 3) ^ + (lfsr_value >> 5)) & 0x0001; - for (i = 0; i < NUM_CONTROLS; i++) { - if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) - msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; - else - msrs->controls[i].addr = 0; - } -} + /* Advance to next register value */ + lfsr_value = (lfsr_value >> 1) | (bit << 15); + return lfsr_value; +} -static void op_amd_setup_ctrs(struct op_msrs const * const msrs) +/* + * IBS software randomization + * + * The IBS periodic op counter is randomized in software. The lower 12 + * bits of the 20 bit counter are randomized. IbsOpCurCnt is + * initialized with a 12 bit random value. + */ +static inline u64 op_amd_randomize_ibs_op(u64 val) { - unsigned int low, high; - int i; - - /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) - continue; - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR_LO(low); - CTRL_CLEAR_HI(high); - CTRL_WRITE(low, high, msrs, i); - } - - /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { - if (unlikely(!CTR_IS_RESERVED(msrs, i))) - continue; - CTR_WRITE(1, msrs, i); - } - - /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { - if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { - reset_value[i] = counter_config[i].count; - - CTR_WRITE(counter_config[i].count, msrs, i); - - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR_LO(low); - CTRL_CLEAR_HI(high); - CTRL_SET_ENABLE(low); - CTRL_SET_USR(low, counter_config[i].user); - CTRL_SET_KERN(low, counter_config[i].kernel); - CTRL_SET_UM(low, counter_config[i].unit_mask); - CTRL_SET_EVENT_LOW(low, counter_config[i].event); - CTRL_SET_EVENT_HIGH(high, counter_config[i].event); - CTRL_SET_HOST_ONLY(high, 0); - CTRL_SET_GUEST_ONLY(high, 0); - - CTRL_WRITE(low, high, msrs, i); - } else { - reset_value[i] = 0; - } - } + unsigned int random = lfsr_random(); + + if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) + /* + * Work around if the hw can not write to IbsOpCurCnt + * + * Randomize the lower 8 bits of the 16 bit + * IbsOpMaxCnt [15:0] value in the range of -128 to + * +127 by adding/subtracting an offset to the + * maximum count (IbsOpMaxCnt). + * + * To avoid over or underflows and protect upper bits + * starting at bit 16, the initial value for + * IbsOpMaxCnt must fit in the range from 0x0081 to + * 0xff80. + */ + val += (s8)(random >> 4); + else + val |= (u64)(random & IBS_RANDOM_MASK) << 32; + + return val; } -#ifdef CONFIG_OPROFILE_IBS - -static inline int +static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { - u32 low, high; - u64 msr; + u64 val, ctl; struct op_entry entry; - if (!has_ibs) - return 1; + if (!ibs_caps) + return; if (ibs_config.fetch_enabled) { - rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); - if (high & IBS_FETCH_HIGH_VALID_BIT) { - rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); - oprofile_write_reserve(&entry, regs, msr, + rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); + if (ctl & IBS_FETCH_VAL) { + rdmsrl(MSR_AMD64_IBSFETCHLINAD, val); + oprofile_write_reserve(&entry, regs, val, IBS_FETCH_CODE, IBS_FETCH_SIZE); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - oprofile_add_data(&entry, low); - oprofile_add_data(&entry, high); - rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); + oprofile_add_data64(&entry, val); + oprofile_add_data64(&entry, ctl); + rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val); + oprofile_add_data64(&entry, val); oprofile_write_commit(&entry); /* reenable the IRQ */ - high &= ~IBS_FETCH_HIGH_VALID_BIT; - high |= IBS_FETCH_HIGH_ENABLE; - low &= IBS_FETCH_LOW_MAX_CNT_MASK; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT); + ctl |= IBS_FETCH_ENABLE; + wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); } } if (ibs_config.op_enabled) { - rdmsr(MSR_AMD64_IBSOPCTL, low, high); - if (low & IBS_OP_LOW_VALID_BIT) { - rdmsrl(MSR_AMD64_IBSOPRIP, msr); - oprofile_write_reserve(&entry, regs, msr, - IBS_OP_CODE, IBS_OP_SIZE); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSOPDATA, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSOPDATA2, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSOPDATA3, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSDCLINAD, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); + rdmsrl(MSR_AMD64_IBSOPCTL, ctl); + if (ctl & IBS_OP_VAL) { + rdmsrl(MSR_AMD64_IBSOPRIP, val); + oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE, + ibs_state.sample_size); + oprofile_add_data64(&entry, val); + rdmsrl(MSR_AMD64_IBSOPDATA, val); + oprofile_add_data64(&entry, val); + rdmsrl(MSR_AMD64_IBSOPDATA2, val); + oprofile_add_data64(&entry, val); + rdmsrl(MSR_AMD64_IBSOPDATA3, val); + oprofile_add_data64(&entry, val); + rdmsrl(MSR_AMD64_IBSDCLINAD, val); + oprofile_add_data64(&entry, val); + rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); + oprofile_add_data64(&entry, val); + if (ibs_state.branch_target) { + rdmsrl(MSR_AMD64_IBSBRTARGET, val); + oprofile_add_data(&entry, (unsigned long)val); + } oprofile_write_commit(&entry); /* reenable the IRQ */ - high = 0; - low &= ~IBS_OP_LOW_VALID_BIT; - low |= IBS_OP_LOW_ENABLE; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); + ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl); + wrmsrl(MSR_AMD64_IBSOPCTL, ctl); } } - - return 1; } -#endif - -static int op_amd_check_ctrs(struct pt_regs * const regs, - struct op_msrs const * const msrs) +static inline void op_amd_start_ibs(void) { - unsigned int low, high; - int i; + u64 val; - for (i = 0 ; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) - continue; - CTR_READ(low, high, msrs, i); - if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], msrs, i); + if (!ibs_caps) + return; + + memset(&ibs_state, 0, sizeof(ibs_state)); + + /* + * Note: Since the max count settings may out of range we + * write back the actual used values so that userland can read + * it. + */ + + if (ibs_config.fetch_enabled) { + val = ibs_config.max_cnt_fetch >> 4; + val = min(val, IBS_FETCH_MAX_CNT); + ibs_config.max_cnt_fetch = val << 4; + val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; + val |= IBS_FETCH_ENABLE; + wrmsrl(MSR_AMD64_IBSFETCHCTL, val); + } + + if (ibs_config.op_enabled) { + val = ibs_config.max_cnt_op >> 4; + if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { + /* + * IbsOpCurCnt not supported. See + * op_amd_randomize_ibs_op() for details. + */ + val = clamp(val, 0x0081ULL, 0xFF80ULL); + ibs_config.max_cnt_op = val << 4; + } else { + /* + * The start value is randomized with a + * positive offset, we need to compensate it + * with the half of the randomized range. Also + * avoid underflows. + */ + val += IBS_RANDOM_MAXCNT_OFFSET; + if (ibs_caps & IBS_CAPS_OPCNTEXT) + val = min(val, IBS_OP_MAX_CNT_EXT); + else + val = min(val, IBS_OP_MAX_CNT); + ibs_config.max_cnt_op = + (val - IBS_RANDOM_MAXCNT_OFFSET) << 4; + } + val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT); + val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; + val |= IBS_OP_ENABLE; + ibs_state.ibs_op_ctl = val; + ibs_state.sample_size = IBS_OP_SIZE; + if (ibs_config.branch_target) { + ibs_state.branch_target = 1; + ibs_state.sample_size++; } + val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl); + wrmsrl(MSR_AMD64_IBSOPCTL, val); } +} -#ifdef CONFIG_OPROFILE_IBS - op_amd_handle_ibs(regs, msrs); -#endif +static void op_amd_stop_ibs(void) +{ + if (!ibs_caps) + return; - /* See op_model_ppro.c */ - return 1; + if (ibs_config.fetch_enabled) + /* clear max count and enable */ + wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); + + if (ibs_config.op_enabled) + /* clear max count and enable */ + wrmsrl(MSR_AMD64_IBSOPCTL, 0); } -static void op_amd_start(struct op_msrs const * const msrs) +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (reset_value[i]) { - CTRL_READ(low, high, msrs, i); - CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, i); - } - } -#ifdef CONFIG_OPROFILE_IBS - if (has_ibs && ibs_config.fetch_enabled) { - low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; - high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ - + IBS_FETCH_HIGH_ENABLE; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + /* enable active counters */ + for (i = 0; i < num_counters; ++i) { + int virt = op_x86_phys_to_virt(i); + if (!reset_value[virt]) + continue; + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[virt]); + wrmsrl(msrs->controls[i].addr, val); } +} - if (has_ibs && ibs_config.op_enabled) { - low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) - + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ - + IBS_OP_LOW_ENABLE; - high = 0; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); - } #endif -} +/* functions for op_amd_spec */ -static void op_amd_stop(struct op_msrs const * const msrs) +static void op_amd_shutdown(struct op_msrs const * const msrs) { - unsigned int low, high; int i; - /* - * Subtle: stop on all counters to avoid race with setting our - * pm callback - */ - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (!reset_value[i]) + for (i = 0; i < num_counters; ++i) { + if (!msrs->counters[i].addr) continue; - CTRL_READ(low, high, msrs, i); - CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, i); + release_perfctr_nmi(MSR_K7_PERFCTR0 + i); + release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } +} -#ifdef CONFIG_OPROFILE_IBS - if (has_ibs && ibs_config.fetch_enabled) { - /* clear max count and enable */ - low = 0; - high = 0; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); - } +static int op_amd_fill_in_addresses(struct op_msrs * const msrs) +{ + int i; - if (has_ibs && ibs_config.op_enabled) { - /* clear max count and enable */ - low = 0; - high = 0; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); + for (i = 0; i < num_counters; i++) { + if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) + goto fail; + if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { + release_perfctr_nmi(MSR_K7_PERFCTR0 + i); + goto fail; + } + /* both registers must be reserved */ + if (num_counters == AMD64_NUM_COUNTERS_CORE) { + msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); + msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); + } else { + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; + msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; + } + continue; + fail: + if (!counter_config[i].enabled) + continue; + op_x86_warn_reserved(i); + op_amd_shutdown(msrs); + return -EBUSY; } -#endif + + return 0; } -static void op_amd_shutdown(struct op_msrs const * const msrs) +static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) { + u64 val; int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (CTR_IS_RESERVED(msrs, i)) - release_perfctr_nmi(MSR_K7_PERFCTR0 + i); - } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { - if (CTRL_IS_RESERVED(msrs, i)) - release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); + /* setup reset_value */ + for (i = 0; i < OP_MAX_COUNTER; ++i) { + if (counter_config[i].enabled + && msrs->counters[op_x86_virt_to_phys(i)].addr) + reset_value[i] = counter_config[i].count; + else + reset_value[i] = 0; } -} -#ifdef CONFIG_OPROFILE_IBS + /* clear all counters */ + for (i = 0; i < num_counters; ++i) { + if (!msrs->controls[i].addr) + continue; + rdmsrl(msrs->controls[i].addr, val); + if (val & ARCH_PERFMON_EVENTSEL_ENABLE) + op_x86_warn_in_use(i); + val &= model->reserved; + wrmsrl(msrs->controls[i].addr, val); + /* + * avoid a false detection of ctr overflows in NMI + * handler + */ + wrmsrl(msrs->counters[i].addr, -1LL); + } -static u8 ibs_eilvt_off; + /* enable active counters */ + for (i = 0; i < num_counters; ++i) { + int virt = op_x86_phys_to_virt(i); + if (!reset_value[virt]) + continue; -static inline void apic_init_ibs_nmi_per_cpu(void *arg) -{ - ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); -} + /* setup counter registers */ + wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); -static inline void apic_clear_ibs_nmi_per_cpu(void *arg) -{ - setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); + /* setup control registers */ + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[virt]); + wrmsrl(msrs->controls[i].addr, val); + } } -static int init_ibs_nmi(void) +static int op_amd_check_ctrs(struct pt_regs * const regs, + struct op_msrs const * const msrs) { -#define IBSCTL_LVTOFFSETVAL (1 << 8) -#define IBSCTL 0x1cc - struct pci_dev *cpu_cfg; - int nodes; - u32 value = 0; - - /* per CPU setup */ - on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1); - - nodes = 0; - cpu_cfg = NULL; - do { - cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_AMD_10H_NB_MISC, - cpu_cfg); - if (!cpu_cfg) - break; - ++nodes; - pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off - | IBSCTL_LVTOFFSETVAL); - pci_read_config_dword(cpu_cfg, IBSCTL, &value); - if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { - pci_dev_put(cpu_cfg); - printk(KERN_DEBUG "Failed to setup IBS LVT offset, " - "IBSCTL = 0x%08x", value); - return 1; - } - } while (1); + u64 val; + int i; - if (!nodes) { - printk(KERN_DEBUG "No CPU node configured for IBS"); - return 1; + for (i = 0; i < num_counters; ++i) { + int virt = op_x86_phys_to_virt(i); + if (!reset_value[virt]) + continue; + rdmsrl(msrs->counters[i].addr, val); + /* bit is clear if overflowed: */ + if (val & OP_CTR_OVERFLOW) + continue; + oprofile_add_sample(regs, virt); + wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); } -#ifdef CONFIG_NUMA - /* Sanity check */ - /* Works only for 64bit with proper numa implementation. */ - if (nodes != num_possible_nodes()) { - printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " - "found: %d, expected %d", - nodes, num_possible_nodes()); - return 1; - } -#endif - return 0; + op_amd_handle_ibs(regs, msrs); + + /* See op_model_ppro.c */ + return 1; } -/* uninitialize the APIC for the IBS interrupts if needed */ -static void clear_ibs_nmi(void) +static void op_amd_start(struct op_msrs const * const msrs) { - if (has_ibs) - on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); + u64 val; + int i; + + for (i = 0; i < num_counters; ++i) { + if (!reset_value[op_x86_phys_to_virt(i)]) + continue; + rdmsrl(msrs->controls[i].addr, val); + val |= ARCH_PERFMON_EVENTSEL_ENABLE; + wrmsrl(msrs->controls[i].addr, val); + } + + op_amd_start_ibs(); } -/* initialize the APIC for the IBS interrupts if available */ -static void ibs_init(void) +static void op_amd_stop(struct op_msrs const * const msrs) { - has_ibs = boot_cpu_has(X86_FEATURE_IBS); - - if (!has_ibs) - return; + u64 val; + int i; - if (init_ibs_nmi()) { - has_ibs = 0; - return; + /* + * Subtle: stop on all counters to avoid race with setting our + * pm callback + */ + for (i = 0; i < num_counters; ++i) { + if (!reset_value[op_x86_phys_to_virt(i)]) + continue; + rdmsrl(msrs->controls[i].addr, val); + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } - printk(KERN_INFO "oprofile: AMD IBS detected\n"); + op_amd_stop_ibs(); } -static void ibs_exit(void) +/* + * check and reserve APIC extended interrupt LVT offset for IBS if + * available + */ + +static void init_ibs(void) { - if (!has_ibs) + ibs_caps = get_ibs_caps(); + + if (!ibs_caps) return; - clear_ibs_nmi(); + printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); } -static int (*create_arch_files)(struct super_block *sb, struct dentry *root); +static int (*create_arch_files)(struct dentry *root); -static int setup_ibs_files(struct super_block *sb, struct dentry *root) +static int setup_ibs_files(struct dentry *root) { struct dentry *dir; int ret = 0; /* architecture specific files */ if (create_arch_files) - ret = create_arch_files(sb, root); + ret = create_arch_files(root); if (ret) return ret; - if (!has_ibs) + if (!ibs_caps) return ret; /* model specific files */ /* setup some reasonable defaults */ + memset(&ibs_config, 0, sizeof(ibs_config)); ibs_config.max_cnt_fetch = 250000; - ibs_config.fetch_enabled = 0; ibs_config.max_cnt_op = 250000; - ibs_config.op_enabled = 0; - ibs_config.dispatched_ops = 1; - - dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); - oprofilefs_create_ulong(sb, dir, "enable", - &ibs_config.fetch_enabled); - oprofilefs_create_ulong(sb, dir, "max_count", - &ibs_config.max_cnt_fetch); - oprofilefs_create_ulong(sb, dir, "rand_enable", - &ibs_config.rand_en); - - dir = oprofilefs_mkdir(sb, root, "ibs_op"); - oprofilefs_create_ulong(sb, dir, "enable", - &ibs_config.op_enabled); - oprofilefs_create_ulong(sb, dir, "max_count", - &ibs_config.max_cnt_op); - oprofilefs_create_ulong(sb, dir, "dispatched_ops", - &ibs_config.dispatched_ops); + + if (ibs_caps & IBS_CAPS_FETCHSAM) { + dir = oprofilefs_mkdir(root, "ibs_fetch"); + oprofilefs_create_ulong(dir, "enable", + &ibs_config.fetch_enabled); + oprofilefs_create_ulong(dir, "max_count", + &ibs_config.max_cnt_fetch); + oprofilefs_create_ulong(dir, "rand_enable", + &ibs_config.rand_en); + } + + if (ibs_caps & IBS_CAPS_OPSAM) { + dir = oprofilefs_mkdir(root, "ibs_op"); + oprofilefs_create_ulong(dir, "enable", + &ibs_config.op_enabled); + oprofilefs_create_ulong(dir, "max_count", + &ibs_config.max_cnt_op); + if (ibs_caps & IBS_CAPS_OPCNT) + oprofilefs_create_ulong(dir, "dispatched_ops", + &ibs_config.dispatched_ops); + if (ibs_caps & IBS_CAPS_BRNTRGT) + oprofilefs_create_ulong(dir, "branch_target", + &ibs_config.branch_target); + } return 0; } +struct op_x86_model_spec op_amd_spec; + static int op_amd_init(struct oprofile_operations *ops) { - ibs_init(); + init_ibs(); create_arch_files = ops->create_files; ops->create_files = setup_ibs_files; - return 0; -} -static void op_amd_exit(void) -{ - ibs_exit(); -} - -#else + if (boot_cpu_data.x86 == 0x15) { + num_counters = AMD64_NUM_COUNTERS_CORE; + } else { + num_counters = AMD64_NUM_COUNTERS; + } -/* no IBS support */ + op_amd_spec.num_counters = num_counters; + op_amd_spec.num_controls = num_counters; + op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS); -static int op_amd_init(struct oprofile_operations *ops) -{ return 0; } -static void op_amd_exit(void) {} - -#endif /* CONFIG_OPROFILE_IBS */ - -struct op_x86_model_spec const op_amd_spec = { +struct op_x86_model_spec op_amd_spec = { + /* num_counters/num_controls filled in at runtime */ + .reserved = MSR_AMD_EVENTSEL_RESERVED, + .event_mask = OP_EVENT_MASK, .init = op_amd_init, - .exit = op_amd_exit, - .num_counters = NUM_COUNTERS, - .num_controls = NUM_CONTROLS, .fill_in_addresses = &op_amd_fill_in_addresses, .setup_ctrs = &op_amd_setup_ctrs, .check_ctrs = &op_amd_check_ctrs, .start = &op_amd_start, .stop = &op_amd_stop, - .shutdown = &op_amd_shutdown + .shutdown = &op_amd_shutdown, +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + .switch_ctrl = &op_mux_switch_ctrl, +#endif }; diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 4c4a51c90bc..98ab13058f8 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -11,7 +11,7 @@ #include <linux/oprofile.h> #include <linux/smp.h> #include <linux/ptrace.h> -#include <linux/nmi.h> +#include <asm/nmi.h> #include <asm/msr.h> #include <asm/fixmap.h> #include <asm/apic.h> @@ -32,6 +32,8 @@ #define NUM_CCCRS_HT2 9 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) +#define OP_CTR_OVERFLOW (1ULL<<31) + static unsigned int num_counters = NUM_COUNTERS_NON_HT; static unsigned int num_controls = NUM_CONTROLS_NON_HT; @@ -48,7 +50,7 @@ static inline void setup_num_counters(void) #endif } -static int inline addr_increment(void) +static inline int addr_increment(void) { #ifdef CONFIG_SMP return smp_num_siblings == 2 ? 2 : 1; @@ -350,8 +352,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) -#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) -#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) #define CCCR_RESERVED_BITS 0x38030FFF #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) @@ -361,17 +361,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) -#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) -#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0) -#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0) -#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) - /* this assigns a "stagger" to the current CPU, which is used throughout the code in this module as an extra array offset, to select the "even" @@ -380,7 +372,7 @@ static unsigned int get_stagger(void) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); - return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu))); + return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); #endif return 0; } @@ -393,8 +385,26 @@ static unsigned int get_stagger(void) static unsigned long reset_value[NUM_COUNTERS_NON_HT]; +static void p4_shutdown(struct op_msrs const * const msrs) +{ + int i; -static void p4_fill_in_addresses(struct op_msrs * const msrs) + for (i = 0; i < num_counters; ++i) { + if (msrs->counters[i].addr) + release_perfctr_nmi(msrs->counters[i].addr); + } + /* + * some of the control registers are specially reserved in + * conjunction with the counter registers (hence the starting offset). + * This saves a few bits. + */ + for (i = num_counters; i < num_controls; ++i) { + if (msrs->controls[i].addr) + release_evntsel_nmi(msrs->controls[i].addr); + } +} + +static int p4_fill_in_addresses(struct op_msrs * const msrs) { unsigned int i; unsigned int addr, cccraddr, stag; @@ -402,12 +412,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) setup_num_counters(); stag = get_stagger(); - /* initialize some registers */ - for (i = 0; i < num_counters; ++i) - msrs->counters[i].addr = 0; - for (i = 0; i < num_controls; ++i) - msrs->controls[i].addr = 0; - /* the counter & cccr registers we pay attention to */ for (i = 0; i < num_counters; ++i) { addr = p4_counters[VIRT_CTR(stag, i)].counter_address; @@ -482,6 +486,18 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; } } + + for (i = 0; i < num_counters; ++i) { + if (!counter_config[i].enabled) + continue; + if (msrs->controls[i].addr) + continue; + op_x86_warn_reserved(i); + p4_shutdown(msrs); + return -EBUSY; + } + + return 0; } @@ -515,7 +531,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) if (ev->bindings[i].virt_counter & counter_bit) { /* modify ESCR */ - ESCR_READ(escr, high, ev, i); + rdmsr(ev->bindings[i].escr_address, escr, high); ESCR_CLEAR(escr); if (stag == 0) { ESCR_SET_USR_0(escr, counter_config[ctr].user); @@ -526,10 +542,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) } ESCR_SET_EVENT_SELECT(escr, ev->event_select); ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); - ESCR_WRITE(escr, high, ev, i); + wrmsr(ev->bindings[i].escr_address, escr, high); /* modify CCCR */ - CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); + rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, + cccr, high); CCCR_CLEAR(cccr); CCCR_SET_REQUIRED_BITS(cccr); CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); @@ -537,7 +554,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) CCCR_SET_PMI_OVF_0(cccr); else CCCR_SET_PMI_OVF_1(cccr); - CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); + wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, + cccr, high); return; } } @@ -548,7 +566,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) } -static void p4_setup_ctrs(struct op_msrs const * const msrs) +static void p4_setup_ctrs(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) { unsigned int i; unsigned int low, high; @@ -563,8 +582,8 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) } /* clear the cccrs we will use */ - for (i = 0 ; i < num_counters ; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) + for (i = 0; i < num_counters; i++) { + if (unlikely(!msrs->controls[i].addr)) continue; rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_CLEAR(low); @@ -574,17 +593,18 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) /* clear all escrs (including those outside our concern) */ for (i = num_counters; i < num_controls; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) + if (unlikely(!msrs->controls[i].addr)) continue; wrmsr(msrs->controls[i].addr, 0, 0); } /* setup all counters */ - for (i = 0 ; i < num_counters ; ++i) { - if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) { + for (i = 0; i < num_counters; ++i) { + if (counter_config[i].enabled && msrs->controls[i].addr) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); - CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); + wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address, + -(u64)counter_config[i].count); } else { reset_value[i] = 0; } @@ -624,14 +644,16 @@ static int p4_check_ctrs(struct pt_regs * const regs, real = VIRT_CTR(stag, i); - CCCR_READ(low, high, real); - CTR_READ(ctr, high, real); - if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { + rdmsr(p4_counters[real].cccr_address, low, high); + rdmsr(p4_counters[real].counter_address, ctr, high); + if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) { oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], real); + wrmsrl(p4_counters[real].counter_address, + -(u64)reset_value[i]); CCCR_CLEAR_OVF(low); - CCCR_WRITE(low, high, real); - CTR_WRITE(reset_value[i], real); + wrmsr(p4_counters[real].cccr_address, low, high); + wrmsrl(p4_counters[real].counter_address, + -(u64)reset_value[i]); } } @@ -653,9 +675,9 @@ static void p4_start(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; - CCCR_READ(low, high, VIRT_CTR(stag, i)); + rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_SET_ENABLE(low); - CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } } @@ -670,34 +692,14 @@ static void p4_stop(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; - CCCR_READ(low, high, VIRT_CTR(stag, i)); + rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_SET_DISABLE(low); - CCCR_WRITE(low, high, VIRT_CTR(stag, i)); - } -} - -static void p4_shutdown(struct op_msrs const * const msrs) -{ - int i; - - for (i = 0 ; i < num_counters ; ++i) { - if (CTR_IS_RESERVED(msrs, i)) - release_perfctr_nmi(msrs->counters[i].addr); - } - /* - * some of the control registers are specially reserved in - * conjunction with the counter registers (hence the starting offset). - * This saves a few bits. - */ - for (i = num_counters ; i < num_controls ; ++i) { - if (CTRL_IS_RESERVED(msrs, i)) - release_evntsel_nmi(msrs->controls[i].addr); + wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } } - #ifdef CONFIG_SMP -struct op_x86_model_spec const op_p4_ht2_spec = { +struct op_x86_model_spec op_p4_ht2_spec = { .num_counters = NUM_COUNTERS_HT2, .num_controls = NUM_CONTROLS_HT2, .fill_in_addresses = &p4_fill_in_addresses, @@ -709,7 +711,7 @@ struct op_x86_model_spec const op_p4_ht2_spec = { }; #endif -struct op_x86_model_spec const op_p4_spec = { +struct op_x86_model_spec op_p4_spec = { .num_counters = NUM_COUNTERS_NON_HT, .num_controls = NUM_CONTROLS_NON_HT, .fill_in_addresses = &p4_fill_in_addresses, diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 10131fbdaad..d90528ea541 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -10,6 +10,7 @@ * @author Philippe Elie * @author Graydon Hoare * @author Andi Kleen + * @author Robert Richter <robert.richter@amd.com> */ #include <linux/oprofile.h> @@ -18,7 +19,6 @@ #include <asm/msr.h> #include <asm/apic.h> #include <asm/nmi.h> -#include <asm/intel_arch_perfmon.h> #include "op_x86_model.h" #include "op_counter.h" @@ -26,55 +26,55 @@ static int num_counters = 2; static int counter_width = 32; -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) +#define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21)) -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) -#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) -#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) -#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) -#define CTRL_CLEAR(x) (x &= (1<<21)) -#define CTRL_SET_ENABLE(val) (val |= 1<<20) -#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) -#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) -#define CTRL_SET_UM(val, m) (val |= (m << 8)) -#define CTRL_SET_EVENT(val, e) (val |= e) +static u64 reset_value[OP_MAX_COUNTER]; -static u64 *reset_value; - -static void ppro_fill_in_addresses(struct op_msrs * const msrs) +static void ppro_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0; i < num_counters; i++) { - if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) - msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; - else - msrs->counters[i].addr = 0; + for (i = 0; i < num_counters; ++i) { + if (!msrs->counters[i].addr) + continue; + release_perfctr_nmi(MSR_P6_PERFCTR0 + i); + release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); } +} + +static int ppro_fill_in_addresses(struct op_msrs * const msrs) +{ + int i; for (i = 0; i < num_counters; i++) { - if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) - msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; - else - msrs->controls[i].addr = 0; + if (!reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) + goto fail; + if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) { + release_perfctr_nmi(MSR_P6_PERFCTR0 + i); + goto fail; + } + /* both registers must be reserved */ + msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; + msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; + continue; + fail: + if (!counter_config[i].enabled) + continue; + op_x86_warn_reserved(i); + ppro_shutdown(msrs); + return -EBUSY; } + + return 0; } -static void ppro_setup_ctrs(struct op_msrs const * const msrs) +static void ppro_setup_ctrs(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; - if (!reset_value) { - reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, - GFP_ATOMIC); - if (!reset_value) - return; - } - if (cpu_has_arch_perfmon) { union cpuid10_eax eax; eax.full = cpuid_eax(0xa); @@ -84,8 +84,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) * counter width: */ if (!(eax.split.version_id == 0 && - current_cpu_data.x86 == 6 && - current_cpu_data.x86_model == 15)) { + __this_cpu_read(cpu_info.x86) == 6 && + __this_cpu_read(cpu_info.x86_model) == 15)) { if (counter_width < eax.split.bit_width) counter_width = eax.split.bit_width; @@ -93,36 +93,30 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) } /* clear all counters */ - for (i = 0 ; i < num_counters; ++i) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) - continue; - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR(low); - CTRL_WRITE(low, high, msrs, i); - } - - /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < num_counters; ++i) { - if (unlikely(!CTR_IS_RESERVED(msrs, i))) + if (!msrs->controls[i].addr) continue; + rdmsrl(msrs->controls[i].addr, val); + if (val & ARCH_PERFMON_EVENTSEL_ENABLE) + op_x86_warn_in_use(i); + val &= model->reserved; + wrmsrl(msrs->controls[i].addr, val); + /* + * avoid a false detection of ctr overflows in NMI * + * handler + */ wrmsrl(msrs->counters[i].addr, -1LL); } /* enable active counters */ for (i = 0; i < num_counters; ++i) { - if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { + if (counter_config[i].enabled && msrs->counters[i].addr) { reset_value[i] = counter_config[i].count; - wrmsrl(msrs->counters[i].addr, -reset_value[i]); - - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR(low); - CTRL_SET_ENABLE(low); - CTRL_SET_USR(low, counter_config[i].user); - CTRL_SET_KERN(low, counter_config[i].kernel); - CTRL_SET_UM(low, counter_config[i].unit_mask); - CTRL_SET_EVENT(low, counter_config[i].event); - CTRL_WRITE(low, high, msrs, i); + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[i]); + wrmsrl(msrs->controls[i].addr, val); } else { reset_value[i] = 0; } @@ -136,14 +130,14 @@ static int ppro_check_ctrs(struct pt_regs * const regs, u64 val; int i; - for (i = 0 ; i < num_counters; ++i) { + for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; rdmsrl(msrs->counters[i].addr, val); - if (CTR_OVERFLOWED(val)) { - oprofile_add_sample(regs, i); - wrmsrl(msrs->counters[i].addr, -reset_value[i]); - } + if (val & (1ULL << (counter_width - 1))) + continue; + oprofile_add_sample(regs, i); + wrmsrl(msrs->counters[i].addr, -reset_value[i]); } /* Only P6 based Pentium M need to re-unmask the apic vector but it @@ -163,16 +157,14 @@ static int ppro_check_ctrs(struct pt_regs * const regs, static void ppro_start(struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; - if (!reset_value) - return; for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { - CTRL_READ(low, high, msrs, i); - CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, i); + rdmsrl(msrs->controls[i].addr, val); + val |= ARCH_PERFMON_EVENTSEL_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } } } @@ -180,42 +172,22 @@ static void ppro_start(struct op_msrs const * const msrs) static void ppro_stop(struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; - if (!reset_value) - return; for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; - CTRL_READ(low, high, msrs, i); - CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, i); + rdmsrl(msrs->controls[i].addr, val); + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } } -static void ppro_shutdown(struct op_msrs const * const msrs) -{ - int i; - - for (i = 0 ; i < num_counters ; ++i) { - if (CTR_IS_RESERVED(msrs, i)) - release_perfctr_nmi(MSR_P6_PERFCTR0 + i); - } - for (i = 0 ; i < num_counters ; ++i) { - if (CTRL_IS_RESERVED(msrs, i)) - release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); - } - if (reset_value) { - kfree(reset_value); - reset_value = NULL; - } -} - - struct op_x86_model_spec op_ppro_spec = { - .num_counters = 2, /* can be overriden */ - .num_controls = 2, /* dito */ + .num_counters = 2, + .num_controls = 2, + .reserved = MSR_PPRO_EVENTSEL_RESERVED, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, @@ -233,29 +205,35 @@ struct op_x86_model_spec op_ppro_spec = { * the specific CPU. */ -void arch_perfmon_setup_counters(void) +static void arch_perfmon_setup_counters(void) { union cpuid10_eax eax; eax.full = cpuid_eax(0xa); /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ - if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && - current_cpu_data.x86_model == 15) { + if (eax.split.version_id == 0 && __this_cpu_read(cpu_info.x86) == 6 && + __this_cpu_read(cpu_info.x86_model) == 15) { eax.split.version_id = 2; eax.split.num_counters = 2; eax.split.bit_width = 40; } - num_counters = eax.split.num_counters; + num_counters = min((int)eax.split.num_counters, OP_MAX_COUNTER); op_arch_perfmon_spec.num_counters = num_counters; op_arch_perfmon_spec.num_controls = num_counters; - op_ppro_spec.num_counters = num_counters; - op_ppro_spec.num_controls = num_counters; +} + +static int arch_perfmon_init(struct oprofile_operations *ignore) +{ + arch_perfmon_setup_counters(); + return 0; } struct op_x86_model_spec op_arch_perfmon_spec = { + .reserved = MSR_PPRO_EVENTSEL_RESERVED, + .init = &arch_perfmon_init, /* num_counters/num_controls filled in at runtime */ .fill_in_addresses = &ppro_fill_in_addresses, /* user space does the cpuid check for available events */ diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 825e79064d6..71e8a67337e 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -6,51 +6,85 @@ * @remark Read the file COPYING * * @author Graydon Hoare + * @author Robert Richter <robert.richter@amd.com> */ #ifndef OP_X86_MODEL_H #define OP_X86_MODEL_H -struct op_saved_msr { - unsigned int high; - unsigned int low; -}; +#include <asm/types.h> +#include <asm/perf_event.h> struct op_msr { - unsigned long addr; - struct op_saved_msr saved; + unsigned long addr; + u64 saved; }; struct op_msrs { struct op_msr *counters; struct op_msr *controls; + struct op_msr *multiplex; }; struct pt_regs; +struct oprofile_operations; + /* The model vtable abstracts the differences between * various x86 CPU models' perfctr support. */ struct op_x86_model_spec { - int (*init)(struct oprofile_operations *ops); - void (*exit)(void); - unsigned int num_counters; - unsigned int num_controls; - void (*fill_in_addresses)(struct op_msrs * const msrs); - void (*setup_ctrs)(struct op_msrs const * const msrs); - int (*check_ctrs)(struct pt_regs * const regs, - struct op_msrs const * const msrs); - void (*start)(struct op_msrs const * const msrs); - void (*stop)(struct op_msrs const * const msrs); - void (*shutdown)(struct op_msrs const * const msrs); + unsigned int num_counters; + unsigned int num_controls; + unsigned int num_virt_counters; + u64 reserved; + u16 event_mask; + int (*init)(struct oprofile_operations *ops); + int (*fill_in_addresses)(struct op_msrs * const msrs); + void (*setup_ctrs)(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs); + int (*check_ctrs)(struct pt_regs * const regs, + struct op_msrs const * const msrs); + void (*start)(struct op_msrs const * const msrs); + void (*stop)(struct op_msrs const * const msrs); + void (*shutdown)(struct op_msrs const * const msrs); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + void (*switch_ctrl)(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs); +#endif }; +struct op_counter_config; + +static inline void op_x86_warn_in_use(int counter) +{ + /* + * The warning indicates an already running counter. If + * oprofile doesn't collect data, then try using a different + * performance counter on your platform to monitor the desired + * event. Delete counter #%d from the desired event by editing + * the /usr/share/oprofile/%s/<cpu>/events file. If the event + * cannot be monitored by any other counter, contact your + * hardware or BIOS vendor. + */ + pr_warning("oprofile: counter #%d on cpu #%d may already be used\n", + counter, smp_processor_id()); +} + +static inline void op_x86_warn_reserved(int counter) +{ + pr_warning("oprofile: counter #%d is already reserved\n", counter); +} + +extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, + struct op_counter_config *counter_config); +extern int op_x86_phys_to_virt(int phys); +extern int op_x86_virt_to_phys(int virt); + extern struct op_x86_model_spec op_ppro_spec; -extern struct op_x86_model_spec const op_p4_spec; -extern struct op_x86_model_spec const op_p4_ht2_spec; -extern struct op_x86_model_spec const op_amd_spec; +extern struct op_x86_model_spec op_p4_spec; +extern struct op_x86_model_spec op_p4_ht2_spec; +extern struct op_x86_model_spec op_amd_spec; extern struct op_x86_model_spec op_arch_perfmon_spec; -extern void arch_perfmon_setup_counters(void); - #endif /* OP_X86_MODEL_H */ |
