diff options
Diffstat (limited to 'arch')
68 files changed, 1572 insertions, 1411 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index fe48fc7a3eb..53d7f619a1b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_ARCH_JUMP_LABEL + bool + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index b9647bb66d1..d04ccd73af4 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -9,6 +9,7 @@ config ALPHA select HAVE_IDE select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_DMA_ATTRS help diff --git a/arch/alpha/include/asm/perf_event.h b/arch/alpha/include/asm/perf_event.h index 4157cd3c44a..fe792ca818f 100644 --- a/arch/alpha/include/asm/perf_event.h +++ b/arch/alpha/include/asm/perf_event.h @@ -1,11 +1,6 @@ #ifndef __ASM_ALPHA_PERF_EVENT_H #define __ASM_ALPHA_PERF_EVENT_H -/* Alpha only supports software events through this interface. */ -extern void set_perf_event_pending(void); - -#define PERF_EVENT_INDEX_OFFSET 0 - #ifdef CONFIG_PERF_EVENTS extern void init_hw_perf_events(void); #else diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 85d8e4f58c8..1cc49683fb6 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -307,7 +307,7 @@ again: new_raw_count) != prev_raw_count) goto again; - delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; + delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; /* It is possible on very rare occasions that the PMC has overflowed * but the interrupt is yet to come. Detect and fix this situation. @@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) struct hw_perf_event *hwc = &pe->hw; int idx = hwc->idx; - if (cpuc->current_idx[j] != PMC_NO_INDEX) { - cpuc->idx_mask |= (1<<cpuc->current_idx[j]); - continue; + if (cpuc->current_idx[j] == PMC_NO_INDEX) { + alpha_perf_event_set_period(pe, hwc, idx); + cpuc->current_idx[j] = idx; } - alpha_perf_event_set_period(pe, hwc, idx); - cpuc->current_idx[j] = idx; - cpuc->idx_mask |= (1<<cpuc->current_idx[j]); + if (!(hwc->state & PERF_HES_STOPPED)) + cpuc->idx_mask |= (1<<cpuc->current_idx[j]); } cpuc->config = cpuc->event[0]->hw.config_base; } @@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) * - this function is called from outside this module via the pmu struct * returned from perf event initialisation. */ -static int alpha_pmu_enable(struct perf_event *event) +static int alpha_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; int n0; int ret; - unsigned long flags; + unsigned long irq_flags; /* * The Sparc code has the IRQ disable first followed by the perf @@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event) * nevertheless we disable the PMCs first to enable a potential * final PMI to occur before we disable interrupts. */ - perf_disable(); - local_irq_save(flags); + perf_pmu_disable(event->pmu); + local_irq_save(irq_flags); /* Default to error to be returned */ ret = -EAGAIN; @@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event) } } - local_irq_restore(flags); - perf_enable(); + hwc->state = PERF_HES_UPTODATE; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_STOPPED; + + local_irq_restore(irq_flags); + perf_pmu_enable(event->pmu); return ret; } @@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event) * - this function is called from outside this module via the pmu struct * returned from perf event initialisation. */ -static void alpha_pmu_disable(struct perf_event *event) +static void alpha_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - unsigned long flags; + unsigned long irq_flags; int j; - perf_disable(); - local_irq_save(flags); + perf_pmu_disable(event->pmu); + local_irq_save(irq_flags); for (j = 0; j < cpuc->n_events; j++) { if (event == cpuc->event[j]) { @@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event) } } - local_irq_restore(flags); - perf_enable(); + local_irq_restore(irq_flags); + perf_pmu_enable(event->pmu); } @@ -514,13 +518,44 @@ static void alpha_pmu_read(struct perf_event *event) } -static void alpha_pmu_unthrottle(struct perf_event *event) +static void alpha_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!(hwc->state & PERF_HES_STOPPED)) { + cpuc->idx_mask &= ~(1UL<<hwc->idx); + hwc->state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + alpha_perf_event_update(event, hwc, hwc->idx, 0); + hwc->state |= PERF_HES_UPTODATE; + } + + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx)); +} + + +static void alpha_pmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + alpha_perf_event_set_period(event, hwc, hwc->idx); + } + + hwc->state = 0; + cpuc->idx_mask |= 1UL<<hwc->idx; - wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); } @@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event) return 0; } -static const struct pmu pmu = { - .enable = alpha_pmu_enable, - .disable = alpha_pmu_disable, - .read = alpha_pmu_read, - .unthrottle = alpha_pmu_unthrottle, -}; - - /* * Main entry point to initialise a HW performance event. */ -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int alpha_pmu_event_init(struct perf_event *event) { int err; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!alpha_pmu) - return ERR_PTR(-ENODEV); + return -ENODEV; /* Do the real initialisation work. */ err = __hw_perf_event_init(event); - if (err) - return ERR_PTR(err); - - return &pmu; + return err; } - - /* * Main entry point - enable HW performance counters. */ -void hw_perf_enable(void) +static void alpha_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -700,7 +732,7 @@ void hw_perf_enable(void) * Main entry point - disable HW performance counters. */ -void hw_perf_disable(void) +static void alpha_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -713,6 +745,17 @@ void hw_perf_disable(void) wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); } +static struct pmu pmu = { + .pmu_enable = alpha_pmu_enable, + .pmu_disable = alpha_pmu_disable, + .event_init = alpha_pmu_event_init, + .add = alpha_pmu_add, + .del = alpha_pmu_del, + .start = alpha_pmu_start, + .stop = alpha_pmu_stop, + .read = alpha_pmu_read, +}; + /* * Main entry point - don't know when this is called but it @@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); /* la_ptr is the counter that overflowed. */ - if (unlikely(la_ptr >= perf_max_events)) { + if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) { /* This should never occur! */ irq_err_count++; pr_warning("PMI: silly index %ld\n", la_ptr); @@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, /* Interrupts coming too quickly; "throttle" the * counter, i.e., disable it for a little while. */ - cpuc->idx_mask &= ~(1UL<<idx); + alpha_pmu_stop(event, 0); } } wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); @@ -837,6 +880,7 @@ void __init init_hw_perf_events(void) /* And set up PMU specification */ alpha_pmu = &ev67_pmu; - perf_max_events = alpha_pmu->num_pmcs; + + perf_pmu_register(&pmu); } diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 396af1799ea..0f1d8493cfc 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -41,7 +41,7 @@ #include <linux/init.h> #include <linux/bcd.h> #include <linux/profile.h> -#include <linux/perf_event.h> +#include <linux/irq_work.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -83,25 +83,25 @@ static struct { unsigned long est_cycle_freq; -#ifdef CONFIG_PERF_EVENTS +#ifdef CONFIG_IRQ_WORK -DEFINE_PER_CPU(u8, perf_event_pending); +DEFINE_PER_CPU(u8, irq_work_pending); -#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 -#define test_perf_event_pending() __get_cpu_var(perf_event_pending) -#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 +#define test_irq_work_pending() __get_cpu_var(irq_work_pending) +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 -void set_perf_event_pending(void) +void set_irq_work_pending(void) { - set_perf_event_pending_flag(); + set_irq_work_pending_flag(); } -#else /* CONFIG_PERF_EVENTS */ +#else /* CONFIG_IRQ_WORK */ -#define test_perf_event_pending() 0 -#define clear_perf_event_pending() +#define test_irq_work_pending() 0 +#define clear_irq_work_pending() -#endif /* CONFIG_PERF_EVENTS */ +#endif /* CONFIG_IRQ_WORK */ static inline __u32 rpcc(void) @@ -191,9 +191,9 @@ irqreturn_t timer_interrupt(int irq, void *dev) write_sequnlock(&xtime_lock); - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); } #ifndef CONFIG_SMP diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9c26ba7244f..9103904b3da 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -23,6 +23,7 @@ config ARM select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO select HAVE_KERNEL_LZMA + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index b5799a3b711..c4aa4e8c6af 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -12,18 +12,6 @@ #ifndef __ARM_PERF_EVENT_H__ #define __ARM_PERF_EVENT_H__ -/* - * NOP: on *most* (read: all supported) ARM platforms, the performance - * counter interrupts are regular interrupts and not an NMI. This - * means that when we receive the interrupt we can call - * perf_event_do_pending() that handles all of the work with - * interrupts disabled. - */ -static inline void -set_perf_event_pending(void) -{ -} - /* ARM performance counters start from 1 (in the cp15 accesses) so use the * same indexes here for consistency. */ #define PERF_EVENT_INDEX_OFFSET 1 diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ecbb0288e5d..49643b1467e 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -123,6 +123,12 @@ armpmu_get_max_events(void) } EXPORT_SYMBOL_GPL(armpmu_get_max_events); +int perf_num_counters(void) +{ + return armpmu_get_max_events(); +} +EXPORT_SYMBOL_GPL(perf_num_counters); + #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) \ @@ -221,46 +227,56 @@ again: } static void -armpmu_disable(struct perf_event *event) +armpmu_read(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - WARN_ON(idx < 0); - - clear_bit(idx, cpuc->active_mask); - armpmu->disable(hwc, idx); - - barrier(); - armpmu_event_update(event, hwc, idx); - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + /* Don't read disabled counters! */ + if (hwc->idx < 0) + return; - perf_event_update_userpage(event); + armpmu_event_update(event, hwc, hwc->idx); } static void -armpmu_read(struct perf_event *event) +armpmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - /* Don't read disabled counters! */ - if (hwc->idx < 0) + if (!armpmu) return; - armpmu_event_update(event, hwc, hwc->idx); + /* + * ARM pmu always has to update the counter, so ignore + * PERF_EF_UPDATE, see comments in armpmu_start(). + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + armpmu->disable(hwc, hwc->idx); + barrier(); /* why? */ + armpmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } } static void -armpmu_unthrottle(struct perf_event *event) +armpmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; + if (!armpmu) + return; + + /* + * ARM pmu always has to reprogram the period, so ignore + * PERF_EF_RELOAD, see the comment below. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; /* * Set the period again. Some counters can't be stopped, so when we - * were throttled we simply disabled the IRQ source and the counter + * were stopped we simply disabled the IRQ source and the counter * may have been left counting. If we don't do this step then we may * get an interrupt too soon or *way* too late if the overflow has * happened since disabling. @@ -269,14 +285,33 @@ armpmu_unthrottle(struct perf_event *event) armpmu->enable(hwc, hwc->idx); } +static void +armpmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + WARN_ON(idx < 0); + + clear_bit(idx, cpuc->active_mask); + armpmu_stop(event, PERF_EF_UPDATE); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + static int -armpmu_enable(struct perf_event *event) +armpmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx; int err = 0; + perf_pmu_disable(event->pmu); + /* If we don't have a space for the counter then finish early. */ idx = armpmu->get_event_idx(cpuc, hwc); if (idx < 0) { @@ -293,25 +328,19 @@ armpmu_enable(struct perf_event *event) cpuc->events[idx] = event; set_bit(idx, cpuc->active_mask); - /* Set the period for the event. */ - armpmu_event_set_period(event, hwc, idx); - - /* Enable the event. */ - armpmu->enable(hwc, idx); + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + armpmu_start(event, PERF_EF_RELOAD); /* Propagate our changes to the userspace mapping. */ perf_event_update_userpage(event); out: + perf_pmu_enable(event->pmu); return err; } -static struct pmu pmu = { - .enable = armpmu_enable, - .disable = armpmu_disable, - .unthrottle = armpmu_unthrottle, - .read = armpmu_read, -}; +static struct pmu pmu; static int validate_event(struct cpu_hw_events *cpuc, @@ -491,20 +520,29 @@ __hw_perf_event_init(struct perf_event *event) return err; } -const struct pmu * -hw_perf_event_init(struct perf_event *event) +static int armpmu_event_init(struct perf_event *event) { int err = 0; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!armpmu) - return ERR_PTR(-ENODEV); + return -ENODEV; event->destroy = hw_perf_event_destroy; if (!atomic_inc_not_zero(&active_events)) { - if (atomic_read(&active_events) > perf_max_events) { + if (atomic_read(&active_events) > armpmu->num_events) { atomic_dec(&active_events); - return ERR_PTR(-ENOSPC); + return -ENOSPC; } mutex_lock(&pmu_reserve_mutex); @@ -518,17 +556,16 @@ hw_perf_event_init(struct perf_event *event) } if (err) - return ERR_PTR(err); + return err; err = __hw_perf_event_init(event); if (err) hw_perf_event_destroy(event); - return err ? ERR_PTR(err) : &pmu; + return err; } -void -hw_perf_enable(void) +static void armpmu_enable(struct pmu *pmu) { /* Enable all of the perf events on hardware. */ int idx; @@ -549,13 +586,23 @@ hw_perf_enable(void) armpmu->start(); } -void -hw_perf_disable(void) +static void armpmu_disable(struct pmu *pmu) { if (armpmu) armpmu->stop(); } +static struct pmu pmu = { + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, +}; + /* * ARMv6 Performance counter handling code. * @@ -1045,7 +1092,7 @@ armv6pmu_handle_irq(int irq_num, * platforms that can have the PMU interrupts raised as an NMI, this * will not work. */ - perf_event_do_pending(); + irq_work_run(); return IRQ_HANDLED; } @@ -2021,7 +2068,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) * platforms that can have the PMU interrupts raised as an NMI, this * will not work. */ - perf_event_do_pending(); + irq_work_run(); return IRQ_HANDLED; } @@ -2389,7 +2436,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev) armpmu->disable(hwc, idx); } - perf_event_do_pending(); + irq_work_run(); /* * Re-enable the PMU. @@ -2716,7 +2763,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev) armpmu->disable(hwc, idx); } - perf_event_do_pending(); + irq_work_run(); /* * Re-enable the PMU. @@ -2933,14 +2980,12 @@ init_hw_perf_events(void) armpmu = &armv6pmu; memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, sizeof(armv6_perf_cache_map)); - perf_max_events = armv6pmu.num_events; break; case 0xB020: /* ARM11mpcore */ armpmu = &armv6mpcore_pmu; memcpy(armpmu_perf_cache_map, armv6mpcore_perf_cache_map, sizeof(armv6mpcore_perf_cache_map)); - perf_max_events = armv6mpcore_pmu.num_events; break; case 0xC080: /* Cortex-A8 */ armv7pmu.id = ARM_PERF_PMU_ID_CA8; @@ -2952,7 +2997,6 @@ init_hw_perf_events(void) /* Reset PMNC and read the nb of CNTx counters supported */ armv7pmu.num_events = armv7_reset_read_pmnc(); - perf_max_events = armv7pmu.num_events; break; case 0xC090: /* Cortex-A9 */ armv7pmu.id = ARM_PERF_PMU_ID_CA9; @@ -2964,7 +3008,6 @@ init_hw_perf_events(void) /* Reset PMNC and read the nb of CNTx counters supported */ armv7pmu.num_events = armv7_reset_read_pmnc(); - perf_max_events = armv7pmu.num_events; break; } /* Intel CPUs [xscale]. */ @@ -2975,13 +3018,11 @@ init_hw_perf_events(void) armpmu = &xscale1pmu; memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, sizeof(xscale_perf_cache_map)); - perf_max_events = xscale1pmu.num_events; break; case 2: armpmu = &xscale2pmu; memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, sizeof(xscale_perf_cache_map)); - perf_max_events = xscale2pmu.num_events; break; } } @@ -2991,9 +3032,10 @@ init_hw_perf_events(void) arm_pmu_names[armpmu->id], armpmu->num_events); } else { pr_info("no hardware support available\n"); - perf_max_events = -1; } + perf_pmu_register(&pmu); + return 0; } arch_initcall(init_hw_perf_events); @@ -3001,13 +3043,6 @@ arch_initcall(init_hw_perf_events); /* * Callchain handling code. */ -static inline void -callchain_store(struct perf_callchain_entry *entry, - u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} /* * The registers we're interested in are at the end of the variable @@ -3039,7 +3074,7 @@ user_backtrace(struct frame_tail *tail, if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) return NULL; - callchain_store(entry, buftail.lr); + perf_callchain_store(entry, buftail.lr); /* * Frame pointers should strictly progress back up the stack @@ -3051,16 +3086,11 @@ user_backtrace(struct frame_tail *tail, return buftail.fp - 1; } -static void -perf_callchain_user(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct frame_tail *tail; - callchain_store(entry, PERF_CONTEXT_USER); - - if (!user_mode(regs)) - regs = task_pt_regs(current); tail = (struct frame_tail *)regs->ARM_fp - 1; @@ -3078,56 +3108,18 @@ callchain_trace(struct stackframe *fr, void *data) { struct perf_callchain_entry *entry = data; - callchain_store(entry, fr->pc); + perf_callchain_store(entry, fr->pc); return 0; } -static void -perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stackframe fr; - callchain_store(entry, PERF_CONTEXT_KERNEL); fr.fp = regs->ARM_fp; fr.sp = regs->ARM_sp; fr.lr = regs->ARM_lr; fr.pc = regs->ARM_pc; walk_stackframe(&fr, callchain_trace, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!current || !current->pid) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); - -struct perf_callchain_entry * -perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - perf_do_callchain(regs, entry); - return entry; -} diff --git a/arch/arm/oprofile/Makefile b/arch/arm/oprofile/Makefile index e666eafed15..b2215c61cdf 100644 --- a/arch/arm/oprofile/Makefile +++ b/arch/arm/oprofile/Makefile @@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) +ifeq ($(CONFIG_HW_PERF_EVENTS),y) +DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o) +endif + oprofile-y := $(DRIVER_OBJS) common.o diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 72e09eb642d..8aa974491df 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -25,139 +25,10 @@ #include <asm/ptrace.h> #ifdef CONFIG_HW_PERF_EVENTS -/* - * Per performance monitor configuration as set via oprofilefs. - */ -struct op_counter_config { - unsigned long count; - unsigned long enabled; - unsigned long event; - unsigned long unit_mask; - unsigned long kernel; - unsigned long user; - struct perf_event_attr attr; -}; - -static int op_arm_enabled; -static DEFINE_MUTEX(op_arm_mutex); - -static struct op_counter_config *counter_config; -static struct perf_event **perf_events[nr_cpumask_bits]; -static int perf_num_counters; - -/* - * Overflow callback for oprofile. - */ -static void op_overflow_handler(struct perf_event *event, int unused, - struct perf_sample_data *data, struct pt_regs *regs) +char *op_name_from_perf_id(void) { - int id; - u32 cpu = smp_processor_id(); - - for (id = 0; id < perf_num_counters; ++id) - if (perf_events[cpu][id] == event) - break; - - if (id != perf_num_counters) - oprofile_add_sample(regs, id); - else - pr_warning("oprofile: ignoring spurious overflow " - "on cpu %u\n", cpu); -} - -/* - * Called by op_arm_setup to create perf attributes to mirror the oprofile - * settings in counter_config. Attributes are created as `pinned' events and - * so are permanently scheduled on the PMU. - */ -static void op_perf_setup(void) -{ - int i; - u32 size = sizeof(struct perf_event_attr); - struct perf_event_attr *attr; - - for (i = 0; i < perf_num_counters; ++i) { - attr = &counter_config[i].attr; - memset(attr, 0, size); - attr->type = PERF_TYPE_RAW; - attr->size = size; - attr->config = counter_config[i].event; - attr->sample_period = counter_config[i].count; - attr->pinned = 1; - } -} - -static int op_create_counter(int cpu, int event) -{ - int ret = 0; - struct perf_event *pevent; - - if (!counter_config[event].enabled || (perf_events[cpu][event] != NULL)) - return ret; - - pevent = perf_event_create_kernel_counter(&counter_config[event].attr, - cpu, -1, - op_overflow_handler); - - if (IS_ERR(pevent)) { - ret = PTR_ERR(pevent); - } else if (pevent->state != PERF_EVENT_STATE_ACTIVE) { - perf_event_release_kernel(pevent); - pr_warning("oprofile: failed to enable event %d " - "on CPU %d\n", event, cpu); - ret = -EBUSY; - } else { - perf_events[cpu][event] = pevent; - } - - return ret; -} + enum arm_perf_pmu_ids id = armpmu_get_pmu_id(); -static void op_destroy_counter(int cpu, int event) -{ - struct perf_event *pevent = perf_events[cpu][event]; - - if (pevent) { - perf_event_release_kernel(pevent); - perf_events[cpu][event] = NULL; - } -} - -/* - * Called by op_arm_start to create active perf events based on the - * perviously configured attributes. - */ -static int op_perf_start(void) -{ - int cpu, event, ret = 0; - - for_each_online_cpu(cpu) { - for (event = 0; event < perf_num_counters; ++event) { - ret = op_create_counter(cpu, event); - if (ret) - goto out; - } - } - -out: - return ret; -} - -/* - * Called by op_arm_stop at the end of a profiling run. - */ -static void op_perf_stop(void) -{ - int cpu, event; - - for_each_online_cpu(cpu) - for (event = 0; event < perf_num_counters; ++event) - op_destroy_counter(cpu, event); -} - - -static char *op_name_from_perf_id(enum arm_perf_pmu_ids id) -{ switch (id) { case ARM_PERF_PMU_ID_XSCALE1: return "arm/xscale1"; @@ -176,116 +47,6 @@ static char *op_name_from_perf_id(enum arm_perf_pmu_ids id) } } -static int op_arm_create_files(struct super_block *sb, struct dentry *root) -{ - unsigned int i; - - for (i = 0; i < perf_num_counters; i++) { - struct dentry *dir; - char buf[4]; - - snprintf(buf, sizeof buf, "%d", i); - dir = oprofilefs_mkdir(sb, root, buf); - oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); - oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); - oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); - oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); - oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); - oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); - } - - return 0; -} - -static int op_arm_setup(void) -{ - spin_lock(&oprofilefs_lock); - op_perf_setup(); - spin_unlock(&oprofilefs_lock); - return 0; -} - -static int op_arm_start(void) -{ - int ret = -EBUSY; - - mutex_lock(&op_arm_mutex); - if (!op_arm_enabled) { - ret = 0; - op_perf_start(); - op_arm_enabled = 1; - } - mutex_unlock(&op_arm_mutex); - return ret; -} - -static void op_arm_stop(void) -{ - mutex_lock(&op_arm_mutex); - if (op_arm_enabled) - op_perf_stop(); - op_arm_enabled = 0; - mutex_unlock(&op_arm_mutex); -} - -#ifdef CONFIG_PM -static int op_arm_suspend(struct platform_device *dev, pm_message_t state) -{ - mutex_lock(&op_arm_mutex); - if (op_arm_enabled) - op_perf_stop(); - mutex_unlock(&op_arm_mutex); - return 0; -} - -static int op_arm_resume(struct platform_device *dev) -{ - mutex_lock(&op_arm_mutex); - if (op_arm_enabled && op_perf_start()) - op_arm_enabled = 0; - mutex_unlock(&op_arm_mutex); - return 0; -} - -static struct platform_driver oprofile_driver = { - .driver = { - .name = "arm-oprofile", - }, - .resume = op_arm_resume, - .suspend = op_arm_suspend, -}; - -static struct platform_device *oprofile_pdev; - -static int __init init_driverfs(void) -{ - int ret; - - ret = platform_driver_register(&oprofile_driver); - if (ret) - goto out; - - oprofile_pdev = platform_device_register_simple( - oprofile_driver.driver.name, 0, NULL, 0); - if (IS_ERR(oprofile_pdev)) { - ret = PTR_ERR(oprofile_pdev); - platform_driver_unregister(&oprofile_driver); - } - -out: - return ret; -} - -static void exit_driverfs(void) -{ - platform_device_unregister(oprofile_pdev); - platform_driver_unregister(&oprofile_driver); -} -#else -static int __init init_driverfs(void) { return 0; } -#define exit_driverfs() do { } while (0) -#endif /* CONFIG_PM */ - static int report_trace(struct stackframe *frame, void *d) { unsigned int *depth = d; @@ -350,74 +111,14 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth) int __init oprofile_arch_init(struct oprofile_operations *ops) { - int cpu, ret = 0; - - perf_num_counters = armpmu_get_max_events(); - - counter_config = kcalloc(perf_num_counters, - sizeof(struct op_counter_config), GFP_KERNEL); - - if (!counter_config) { - pr_info("oprofile: failed to allocate %d " - "counters\n", perf_num_counters); - return -ENOMEM; - } - - ret = init_driverfs(); - if (ret) { - kfree(counter_config); - counter_config = NULL; - return ret; - } - - for_each_possible_cpu(cpu) { - perf_events[cpu] = kcalloc(perf_num_counters, - sizeof(struct perf_event *), GFP_KERNEL); - if (!perf_events[cpu]) { - pr_info("oprofile: failed to allocate %d perf events " - "for cpu %d\n", perf_num_counters, cpu); - while (--cpu >= 0) - kfree(perf_events[cpu]); - return -ENOMEM; - } - } - ops->backtrace = arm_backtrace; - ops->create_files = op_arm_create_files; - ops->setup = op_arm_setup; - ops->start = op_arm_start; - ops->stop = op_arm_stop; - ops->shutdown = op_arm_stop; - ops->cpu_type = op_name_from_perf_id(armpmu_get_pmu_id()); - - if (!ops->cpu_type) - ret = -ENODEV; - else - pr_info("oprofile: using %s\n", ops->cpu_type); - return ret; + return oprofile_perf_init(ops); } -void oprofile_arch_exit(void) +void __exit oprofile_arch_exit(void) { - int cpu, id; - struct perf_event *event; - - if (*perf_events) { - for_each_possible_cpu(cpu) { - for (id = 0; id < perf_num_counters; ++id) { - event = perf_events[cpu][id]; - if (event != NULL) - perf_event_release_kernel(event); - } - kfree(perf_events[cpu]); - } - } - - if (counter_config) { - kfree(counter_config); - exit_driverfs(); - } + oprofile_perf_exit(); } #else int __init oprofile_arch_init(struct oprofile_operations *ops) @@ -425,5 +126,5 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) pr_info("oprofile: hardware counters not available\n"); return -ENODEV; } -void oprofile_arch_exit(void) {} +void __exit oprofile_arch_exit(void) {} #endif /* CONFIG_HW_PERF_EVENTS */ diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 16399bd2499..0f2417df632 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -7,6 +7,7 @@ config FRV default y select HAVE_IDE select HAVE_ARCH_TRACEHOOK + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS config ZONE_DMA diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile index f4709756d0d..4ff2fb1e6b1 100644 --- a/arch/frv/lib/Makefile +++ b/arch/frv/lib/Makefile @@ -5,4 +5,4 @@ lib-y := \ __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \ - outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o + outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c deleted file mode 100644 index 9ac5acfd2e9..00000000000 --- a/arch/frv/lib/perf_event.c +++ /dev/null @@ -1,19 +0,0 @@ -/* Performance event handling - * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include <linux/perf_event.h> - -/* - * mark the performance event as pending - */ -void set_perf_event_pending(void) -{ -} diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h index d514cd9edb4..8fb7d33a661 100644 --- a/arch/ia64/include/asm/hardirq.h +++ b/arch/ia64/include/asm/hardirq.h @@ -6,12 +6,6 @@ * David Mosberger-Tang <davidm@hpl.hp.com> */ - -#include <linux/threads.h> -#include <linux/irq.h> - -#include <asm/processor.h> - /* * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. */ @@ -20,6 +14,11 @@ #define local_softirq_pending() (local_cpu_data->softirq_pending) +#include <linux/threads.h> +#include <linux/irq.h> + +#include <asm/processor.h> + extern void __iomem *ipi_base_addr; void ack_bad_irq(unsigned int irq); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 907417d187e..79a04a9394d 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -16,6 +16,7 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT help diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h index cc146427d8f..1e0fd8ba6c0 100644 --- a/arch/parisc/include/asm/perf_event.h +++ b/arch/parisc/include/asm/perf_event.h @@ -1,7 +1,6 @@ #ifndef __ASM_PARISC_PERF_EVENT_H #define __ASM_PARISC_PERF_EVENT_H -/* parisc only supports software events through this interface. */ -static inline void set_perf_event_pending(void) { } +/* Empty, just to avoid compiling error */ #endif /* __ASM_PARISC_PERF_EVENT_H */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 631e5a0fb6a..4b1e521d966 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -138,6 +138,7 @@ config PPC select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 1ff6662f7fa..9b287fdd8ea 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -129,7 +129,7 @@ struct paca_struct { u8 soft_enabled; /* irq soft-enable flag */ u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 io_sync; /* writel() needs spin_unlock sync */ - u8 perf_event_pending; /* PM interrupt while soft-disabled */ + u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 95ad9dad298..d05ae4204bb 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -23,18 +23,6 @@ #include "ppc32.h" #endif -/* - * Store another value in a callchain_entry. - */ -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - unsigned int nr = entry->nr; - - if (nr < PERF_MAX_STACK_DEPTH) { - entry->ip[nr] = ip; - entry->nr = nr + 1; - } -} /* * Is sp valid as the address of the next kernel stack frame after prev_sp? @@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; } -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->nip); + perf_callchain_store(entry, regs->nip); if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) return; @@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; level = 0; - callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); } else { if (level == 0) @@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, ++level; } - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); if (!valid_next_sp(next_sp, sp)) return; sp = next_sp; @@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp) puc == (unsigned long) &sf->uc; } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); for (;;) { fp = (unsigned long __user *) sp; @@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs, read_user_stack_64(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } @@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) return __get_user_inatomic(*ret, ptr); } -static inline void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { } @@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp, return mctx->mc_gregs; } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned int sp, next_sp; unsigned int next_ip; @@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); while (entry->nr < PERF_MAX_STACK_DEPTH) { fp = (unsigned int __user *) (unsigned long) sp; @@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs, read_user_stack_32(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } } -/* - * Since we can't get PMU interrupts inside a PMU interrupt handler, - * we don't need separate irq and nmi entries here. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); - - entry->nr = 0; - - if (!user_mode(regs)) { - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - if (current_is_64bit()) - perf_callchain_user_64(regs, entry); - else - perf_callchain_user_32(regs, entry); - } - - return entry; + if (current_is_64bit()) + perf_callchain_user_64(entry, regs); + else + perf_callchain_user_32(entry, regs); } diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index d301a30445e..3129c855933 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + if (!event->hw.idx) return; /* @@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -565,7 +568,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void power_pmu_enable(struct pmu *pmu) { struct perf_event *event; struct cpu_hw_events *cpuhw; @@ -672,6 +675,8 @@ void hw_perf_enable(void) } local64_set(&event->hw.prev_count, val); event->hw.idx = idx; + if (event->hw.state & PERF_HES_STOPPED) + val = 0; write_pmc(idx, val); perf_event_update_userpage(event); } @@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count, * re-enable the PMU in order to get hw_perf_enable to do the * actual work of reconfiguring the PMU. */ -static int power_pmu_enable(struct perf_event *event) +static int power_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event) int ret = -EAGAIN; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); /* * Add the event to the list (if there is room) @@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event) cpuhw->events[n0] = event->hw.config; cpuhw->flags[n0] = event->hw.event_base; + if (!(ef_flags & PERF_EF_START)) + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -769,7 +777,7 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } @@ -777,14 +785,14 @@ nocheck: /* * Remove a event from the PMU. */ -static void power_pmu_disable(struct perf_event *event) +static void power_pmu_del(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; long i; unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); power_pmu_read(event); @@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event) cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } /* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. + * POWER-PMU does not support disabling individual counters, hence + * program their cycle counter to their max value and ignore the interrupts. */ -static void power_pmu_unthrottle(struct perf_event *event) + +static void power_pmu_start(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + s64 left; + + if (!event->hw.idx || !event->hw.sample_period) + return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} + +static void power_pmu_stop(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; if (!event->hw.idx || !event->hw.sample_period) return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); + power_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -void power_pmu_start_txn(const struct pmu *pmu) +void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -void power_pmu_cancel_txn(const struct pmu *pmu) +void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); } /* @@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -int power_pmu_commit_txn(const struct pmu *pmu) +int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; @@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); return 0; } -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, - .start_txn = power_pmu_start_txn, - .cancel_txn = power_pmu_cancel_txn, - .commit_txn = power_pmu_commit_txn, -}; - /* * Return 1 if we might be able to put event on a limited PMC, * or 0 if not. @@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int power_pmu_event_init(struct perf_event *event) { u64 ev; unsigned long flags; @@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) struct cpu_hw_events *cpuhw; if (!ppmu) - return ERR_PTR(-ENXIO); + return -ENOENT; + switch (event->attr.type) { case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: ev = event->attr.config; break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } + event->hw.config_base = ev; event->hw.idx = 0; @@ -1063,7 +1092,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) * XXX we should check if the task is an idle task. */ flags = 0; - if (event->ctx->task) + if (event->attach_state & PERF_ATTACH_TASK) flags |= PPMU_ONLY_COUNT_RUN; /* @@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) */ ev = normal_pmc_alternative(ev, flags); if (!ev) - return ERR_PTR(-EINVAL); + return -EINVAL; } } @@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, ctrs, events, cflags); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } events[n] = ev; ctrs[n] = event; cflags[n] = flags; if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); + return -EINVAL; cpuhw = &get_cpu_var(cpu_hw_events); err = power_check_constraints(cpuhw, events, cflags, n + 1); put_cpu_var(cpu_hw_events); if (err) - return ERR_PTR(-EINVAL); + return -EINVAL; event->hw.config = events[n]; event->hw.event_base = cflags[n]; @@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &power_pmu; + return err; } +struct pmu power_pmu = { + .pmu_enable = power_pmu_enable, + .pmu_disable = power_pmu_disable, + .event_init = power_pmu_event_init, + .add = power_pmu_add, + .del = power_pmu_del, + .start = power_pmu_start, + .stop = power_pmu_stop, + .read = power_pmu_read, + .start_txn = power_pmu_start_txn, + .cancel_txn = power_pmu_cancel_txn, + .commit_txn = power_pmu_commit_txn, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + power_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } /* @@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_pmu_register(&power_pmu); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 1ba45471ae4..7ecca59ddf7 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + /* * Performance monitor interrupts come even when interrupts * are soft-disabled, as long as interrupts are hard-enabled. @@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void fsl_emb_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -216,7 +219,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void fsl_emb_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -/* perf must be disabled, context locked on entry */ -static int fsl_emb_pmu_enable(struct perf_event *event) +/* context locked on entry */ +static int fsl_emb_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int ret = -EAGAIN; @@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) u64 val; int i; + perf_pmu_disable(event->pmu); cpuhw = &get_cpu_var(cpu_hw_events); if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) @@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event) val = 0x80000000L - left; } local64_set(&event->hw.prev_count, val); + + if (!(flags & PERF_EF_START)) { + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + val = 0; + } + write_pmc(i, val); perf_event_update_userpage(event); @@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ret = 0; out: put_cpu_var(cpu_hw_events); + perf_pmu_enable(event->pmu); return ret; } -/* perf must be disabled, context locked on entry */ -static void fsl_emb_pmu_disable(struct perf_event *event) +/* context locked on entry */ +static void fsl_emb_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int i = event->hw.idx; + perf_pmu_disable(event->pmu); if (i < 0) goto out; @@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event) cpuhw->n_events--; out: + perf_pmu_enable(event->pmu); put_cpu_var(cpu_hw_events); } -/* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. - * - * Context is locked on entry, but perf is not disabled. - */ -static void fsl_emb_pmu_unthrottle(struct perf_event *event) +static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; + s64 left; if (event->hw.idx < 0 || !event->hw.sample_period) return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + local_irq_save(flags); - perf_disable(); - fsl_emb_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } -static struct pmu fsl_emb_pmu = { - .enable = fsl_emb_pmu_enable, - .disable = fsl_emb_pmu_disable, - .read = fsl_emb_pmu_read, - .unthrottle = fsl_emb_pmu_unthrottle, -}; +static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + fsl_emb_pmu_read(event); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} /* * Release the PMU if this is the last perf_event. @@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int fsl_emb_pmu_event_init(struct perf_event *event) { u64 ev; struct perf_event *events[MAX_HWEVENTS]; @@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: @@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } event->hw.config = ppmu->xlate_event(ev); if (!(event->hw.config & FSL_EMB_EVENT_VALID)) - return ERR_PTR(-EINVAL); + return -EINVAL; /* * If this is in a group, check if it can go on with all the @@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, events); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { @@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } if (num_restricted >= ppmu->n_restricted) - return ERR_PTR(-EINVAL); + return -EINVAL; } event->hw.idx = -1; @@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) if (event->attr.exclude_kernel) event->hw.config_base |= PMLCA_FCS; if (event->attr.exclude_idle) - return ERR_PTR(-ENOTSUPP); + return -ENOTSUPP; event->hw.last_period = event->hw.sample_period; local64_set(&event->hw.period_left, event->hw.last_period); @@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &fsl_emb_pmu; + return err; } +static struct pmu fsl_emb_pmu = { + .pmu_enable = fsl_emb_pmu_enable, + .pmu_disable = fsl_emb_pmu_disable, + .event_init = fsl_emb_pmu_event_init, + .add = fsl_emb_pmu_add, + .del = fsl_emb_pmu_del, + .start = fsl_emb_pmu_start, + .stop = fsl_emb_pmu_stop, + .read = fsl_emb_pmu_read, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_sample_data_init(&data, 0); data.period = event->hw.last_period; - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + fsl_emb_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } static void perf_event_interrupt(struct pt_regs *regs) @@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); + perf_pmu_register(&fsl_emb_pmu); + return 0; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 8533b3b83f5..54888eb10c3 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,7 +53,7 @@ #include <linux/posix-timers.h> #include <linux/irq.h> #include <linux/delay.h> -#include <linux/perf_event.h> +#include <linux/irq_work.h> #include <asm/trace.h> #include <asm/io.h> @@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#ifdef CONFIG_PERF_EVENTS +#ifdef CONFIG_IRQ_WORK /* * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ #ifdef CONFIG_PPC64 -static inline unsigned long test_perf_event_pending(void) +static inline unsigned long test_irq_work_pending(void) { unsigned long x; asm volatile("lbz %0,%1(13)" : "=r" (x) - : "i" (offsetof(struct paca_struct, perf_event_pending))); + : "i" (offsetof(struct paca_struct, irq_work_pending))); return x; } -static inline void set_perf_event_pending_flag(void) +static inline void set_irq_work_pending_flag(void) { asm volatile("stb %0,%1(13)" : : "r" (1), - "i" (offsetof(struct paca_struct, perf_event_pending))); + "i" (offsetof(struct paca_struct, irq_work_pending))); } -static inline void clear_perf_event_pending(void) +static inline void clear_irq_work_pending(void) { asm volatile("stb %0,%1(13)" : : "r" (0), - "i" (offsetof(struct paca_struct, perf_event_pending))); + "i" (offsetof(struct paca_struct, irq_work_pending))); } #else /* 32-bit */ -DEFINE_PER_CPU(u8, perf_event_pending); +DEFINE_PER_CPU(u8, irq_work_pending); -#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 -#define test_perf_event_pending() __get_cpu_var(perf_event_pending) -#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 +#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1 +#define test_irq_work_pending() __get_cpu_var(irq_work_pending) +#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 #endif /* 32 vs 64 bit */ -void set_perf_event_pending(void) +void set_irq_work_pending(void) { preempt_disable(); - set_perf_event_pending_flag(); + set_irq_work_pending_flag(); set_dec(1); preempt_enable(); } -#else /* CONFIG_PERF_EVENTS */ +#else /* CONFIG_IRQ_WORK */ -#define test_perf_event_pending() 0 -#define clear_perf_event_pending() +#define test_irq_work_pending() 0 +#define clear_irq_work_pending() -#endif /* CONFIG_PERF_EVENTS */ +#endif /* CONFIG_IRQ_WORK */ /* * For iSeries shared processors, we have to let the hypervisor @@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs) calculate_steal_time(); - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); + if (test_irq_work_pending()) { + clear_irq_work_pending(); + irq_work_run(); } #ifdef CONFIG_PPC_ISERIES diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f0777a47e3a..958f0dadead 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -95,6 +95,7 @@ config S390 select HAVE_KVM if 64BIT select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 498bc389238..881d94590ae 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -12,10 +12,6 @@ #ifndef __ASM_HARDIRQ_H #define __ASM_HARDIRQ_H -#include <linux/threads.h> -#include <linux/sched.h> -#include <linux/cache.h> -#include <linux/interrupt.h> #include <asm/lowcore.h> #define local_softirq_pending() (S390_lowcore.softirq_pending) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 3840cbe7763..a75f168d271 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -4,7 +4,6 @@ * Copyright 2009 Martin Schwidefsky, IBM Corporation. */ -static inline void set_perf_event_pending(void) {} -static inline void clear_perf_event_pending(void) {} +/* Empty, just to avoid compiling error */ #define PERF_EVENT_INDEX_OFFSET 0 diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 33990fa95af..35b6879628a 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -16,6 +16,7 @@ config SUPERH select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC select HAVE_KERNEL_GZIP @@ -249,6 +250,11 @@ config ARCH_SHMOBILE select PM select PM_RUNTIME +config CPU_HAS_PMU + depends on CPU_SH4 || CPU_SH4A + default y + bool + if SUPERH32 choice @@ -738,6 +744,14 @@ config GUSA_RB LLSC, this should be more efficient than the other alternative of disabling interrupts around the atomic sequence. +config HW_PERF_EVENTS + bool "Enable hardware performance counter support for perf events" + depends on PERF_EVENTS && CPU_HAS_PMU + default y + help + Enable hardware performance counter support for perf events. If + disabled, perf events will use software events only. + source "drivers/sh/Kconfig" endmenu diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h index 3d0c9f36d15..14308bed7ea 100644 --- a/arch/sh/include/asm/perf_event.h +++ b/arch/sh/include/asm/perf_event.h @@ -26,11 +26,4 @@ extern int register_sh_pmu(struct sh_pmu *); extern int reserve_pmc_hardware(void); extern void release_pmc_hardware(void); -static inline void set_perf_event_pending(void) -{ - /* Nothing to see here, move along. */ -} - -#define PERF_EVENT_INDEX_OFFSET 0 - #endif /* __ASM_SH_PERF_EVENT_H */ diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index a9dd3abde28..d5ca1ef50fa 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -14,11 +14,6 @@ #include <asm/unwinder.h> #include <asm/ptrace.h> -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} static void callchain_warning(void *data, char *msg) { @@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable) struct perf_callchain_entry *entry = data; if (reliable) - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops callchain_ops = { @@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = { .address = callchain_address, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->pc); + perf_callchain_store(entry, regs->pc); unwind_stack(NULL, regs, NULL, &callchain_ops, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (is_user && current->state != TASK_RUNNING) - return; - - /* - * Only the kernel side is implemented for now. - */ - if (!is_user) - perf_callchain_kernel(regs, entry); -} - -/* - * No need for separate IRQ and NMI entries. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 7a3dc356725..5a4b3343565 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -59,6 +59,24 @@ static inline int sh_pmu_initialized(void) return !!sh_pmu; } +const char *perf_pmu_name(void) +{ + if (!sh_pmu) + return NULL; + + return sh_pmu->name; +} +EXPORT_SYMBOL_GPL(perf_pmu_name); + +int perf_num_counters(void) +{ + if (!sh_pmu) + return 0; + + return sh_pmu->num_events; +} +EXPORT_SYMBOL_GPL(perf_num_counters); + /* * Release the PMU if this is the last perf_event. */ @@ -206,50 +224,80 @@ again: local64_add(delta, &event->count); } -static void sh_pmu_disable(struct perf_event *event) +static void sh_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - clear_bit(idx, cpuc->active_mask); - sh_pmu->disable(hwc, idx); + if (!(event->hw.state & PERF_HES_STOPPED)) { + sh_pmu->disable(hwc, idx); + cpuc->events[idx] = NULL; + event->hw.state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { + sh_perf_event_update(event, &event->hw, idx); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static void sh_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); - barrier(); + cpuc->events[idx] = event; + event->hw.state = 0; + sh_pmu->enable(hwc, idx); +} - sh_perf_event_update(event, &event->hw, idx); +static void sh_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + sh_pmu_stop(event, PERF_EF_UPDATE); + __clear_bit(event->hw.idx, cpuc->used_mask); perf_event_update_userpage(event); } -static int sh_pmu_enable(struct perf_event *event) +static int sh_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + int ret = -EAGAIN; + + perf_pmu_disable(event->pmu); - if (test_and_set_bit(idx, cpuc->used_mask)) { + if (__test_and_set_bit(idx, cpuc->used_mask)) { idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); if (idx == sh_pmu->num_events) - return -EAGAIN; + goto out; - set_bit(idx, cpuc->used_mask); + __set_bit(idx, cpuc->used_mask); hwc->idx = idx; } sh_pmu->disable(hwc, idx); - cpuc->events[idx] = event; - set_bit(idx, cpuc->active_mask); - - sh_pmu->enable(hwc, idx); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (flags & PERF_EF_START) + sh_pmu_start(event, PERF_EF_RELOAD); perf_event_update_userpage(event); - - return 0; + ret = 0; +out: + perf_pmu_enable(event->pmu); + return ret; } static void sh_pmu_read(struct perf_event *event) @@ -257,24 +305,56 @@ static void sh_pmu_read(struct perf_event *event) sh_perf_event_update(event, &event->hw, event->hw.idx); } -static const struct pmu pmu = { - .enable = sh_pmu_enable, - .disable = sh_pmu_disable, - .read = sh_pmu_read, -}; - -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int sh_pmu_event_init(struct perf_event *event) { - int err = __hw_perf_event_init(event); + int err; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_HARDWARE: + err = __hw_perf_event_init(event); + break; + + default: + return -ENOENT; + } + if (unlikely(err)) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; +} + +static void sh_pmu_enable(struct pmu *pmu) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->enable_all(); +} + +static void sh_pmu_disable(struct pmu *pmu) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->disable_all(); } +static struct pmu pmu = { + .pmu_enable = sh_pmu_enable, + .pmu_disable = sh_pmu_disable, + .event_init = sh_pmu_event_init, + .add = sh_pmu_add, + .del = sh_pmu_del, + .start = sh_pmu_start, + .stop = sh_pmu_stop, + .read = sh_pmu_read, +}; + static void sh_pmu_setup(int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); @@ -299,32 +379,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -void hw_perf_enable(void) -{ - if (!sh_pmu_initialized()) - return; - - sh_pmu->enable_all(); -} - -void hw_perf_disable(void) -{ - if (!sh_pmu_initialized()) - return; - - sh_pmu->disable_all(); -} - -int __cpuinit register_sh_pmu(struct sh_pmu *pmu) +int __cpuinit register_sh_pmu(struct sh_pmu *_pmu) { if (sh_pmu) return -EBUSY; - sh_pmu = pmu; + sh_pmu = _pmu; - pr_info("Performance Events: %s support registered\n", pmu->name); + pr_info("Performance Events: %s support registered\n", _pmu->name); - WARN_ON(pmu->num_events > MAX_HWEVENTS); + WARN_ON(_pmu->num_events > MAX_HWEVENTS); + perf_pmu_register(&pmu); perf_cpu_notifier(sh_pmu_notifier); return 0; } diff --git a/arch/sh/oprofile/Makefile b/arch/sh/oprofile/Makefile index 4886c5c1786..e85aae73e3d 100644 --- a/arch/sh/oprofile/Makefile +++ b/arch/sh/oprofile/Makefile @@ -6,4 +6,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) +ifeq ($(CONFIG_HW_PERF_EVENTS),y) +DRIVER_OBJS += $(addprefix ../../../drivers/oprofile/, oprofile_perf.o) +endif + oprofile-y := $(DRIVER_OBJS) common.o backtrace.o diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c index ac604937f3e..e10d89376f9 100644 --- a/arch/sh/oprofile/common.c +++ b/arch/sh/oprofile/common.c @@ -17,114 +17,45 @@ #include <linux/init.h> #include <linux/errno.h> #include <linux/smp.h> +#include <linux/perf_event.h> #include <asm/processor.h> -#include "op_impl.h" - -static struct op_sh_model *model; - -static struct op_counter_config ctr[20]; +#ifdef CONFIG_HW_PERF_EVENTS extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth); -static int op_sh_setup(void) -{ - /* Pre-compute the values to stuff in the hardware registers. */ - model->reg_setup(ctr); - - /* Configure the registers on all cpus. */ - on_each_cpu(model->cpu_setup, NULL, 1); - - return 0; -} - -static int op_sh_create_files(struct super_block *sb, struct dentry *root) +char *op_name_from_perf_id(void) { - int i, ret = 0; + const char *pmu; + char buf[20]; + int size; - for (i = 0; i < model->num_counters; i++) { - struct dentry *dir; - char buf[4]; + pmu = perf_pmu_name(); + if (!pmu) + return NULL; - snprintf(buf, sizeof(buf), "%d", i); - dir = oprofilefs_mkdir(sb, root, buf); + size = snprintf(buf, sizeof(buf), "sh/%s", pmu); + if (size > -1 && size < sizeof(buf)) + return buf; - ret |= oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled); - ret |= oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event); - ret |= oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel); - ret |= oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user); - - if (model->create_files) - ret |= model->create_files(sb, dir); - else - ret |= oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count); - - /* Dummy entries */ - ret |= oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask); - } - - return ret; + return NULL; } -static int op_sh_start(void) +int __init oprofile_arch_init(struct oprofile_operations *ops) { - /* Enable performance monitoring for all counters. */ - on_each_cpu(model->cpu_start, NULL, 1); + ops->backtrace = sh_backtrace; - return 0; + return oprofile_perf_init(ops); } -static void op_sh_stop(void) +void __exit oprofile_arch_exit(void) { - /* Disable performance monitoring for all counters. */ - on_each_cpu(model->cpu_stop, NULL, 1); + oprofile_perf_exit(); } - +#else int __init oprofile_arch_init(struct oprofile_operations *ops) { - struct op_sh_model *lmodel = NULL; - int ret; - - /* - * Always assign the backtrace op. If the counter initialization - * fails, we fall back to the timer which will still make use of - * this. - */ - ops->backtrace = sh_backtrace; - - /* - * XXX - * - * All of the SH7750/SH-4A counters have been converted to perf, - * this infrastructure hook is left for other users until they've - * had a chance to convert over, at which point all of this - * will be deleted. - */ - - if (!lmodel) - return -ENODEV; - if (!(current_cpu_data.flags & CPU_HAS_PERF_COUNTER)) - return -ENODEV; - - ret = lmodel->init(); - if (unlikely(ret != 0)) - return ret; - - model = lmodel; - - ops->setup = op_sh_setup; - ops->create_files = op_sh_create_files; - ops->start = op_sh_start; - ops->stop = op_sh_stop; - ops->cpu_type = lmodel->cpu_type; - - printk(KERN_INFO "oprofile: using %s performance monitoring.\n", - lmodel->cpu_type); - - return 0; -} - -void oprofile_arch_exit(void) -{ - if (model && model->exit) - model->exit(); + pr_info("oprofile: hardware counters not available\n"); + return -ENODEV; } +void __exit oprofile_arch_exit(void) {} +#endif /* CONFIG_HW_PERF_EVENTS */ diff --git a/arch/sh/oprofile/op_impl.h b/arch/sh/oprofile/op_impl.h deleted file mode 100644 index 1244479ceb2..00000000000 --- a/arch/sh/oprofile/op_impl.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef __OP_IMPL_H -#define __OP_IMPL_H - -/* Per-counter configuration as set via oprofilefs. */ -struct op_counter_config { - unsigned long enabled; - unsigned long event; - - unsigned long count; - - /* Dummy values for userspace tool compliance */ - unsigned long kernel; - unsigned long user; - unsigned long unit_mask; -}; - -/* Per-architecture configury and hooks. */ -struct op_sh_model { - void (*reg_setup)(struct op_counter_config *); - int (*create_files)(struct super_block *sb, struct dentry *dir); - void (*cpu_setup)(void *dummy); - int (*init)(void); - void (*exit)(void); - void (*cpu_start)(void *args); - void (*cpu_stop)(void *args); - char *cpu_type; - unsigned char num_counters; -}; - -/* arch/sh/oprofile/common.c */ -extern void sh_backtrace(struct pt_regs * const regs, unsigned int depth); - -#endif /* __OP_IMPL_H */ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 491e9d6de19..3e9d31401fb 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -26,10 +26,12 @@ config SPARC select ARCH_WANT_OPTIONAL_GPIOLIB select RTC_CLASS select RTC_DRV_M48T59 + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG + select HAVE_ARCH_JUMP_LABEL config SPARC32 def_bool !64BIT @@ -53,6 +55,7 @@ config SPARC64 select RTC_DRV_BQ4802 select RTC_DRV_SUN4V select RTC_DRV_STARFIRE + select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h new file mode 100644 index 00000000000..62e66d7b2fb --- /dev/null +++ b/arch/sparc/include/asm/jump_label.h @@ -0,0 +1,32 @@ +#ifndef _ASM_SPARC_JUMP_LABEL_H +#define _ASM_SPARC_JUMP_LABEL_H + +#ifdef __KERNEL__ + +#include <linux/types.h> +#include <asm/system.h> + +#define JUMP_LABEL_NOP_SIZE 4 + +#define JUMP_LABEL(key, label) \ + do { \ + asm goto("1:\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + ".pushsection __jump_table, \"a\"\n\t"\ + ".word 1b, %l[" #label "], %c0\n\t" \ + ".popsection \n\t" \ + : : "i" (key) : : label);\ + } while (0) + +#endif /* __KERNEL__ */ + +typedef u32 jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h index 727af70646c..6e8bfa1786d 100644 --- a/arch/sparc/include/asm/perf_event.h +++ b/arch/sparc/include/asm/perf_event.h @@ -1,10 +1,6 @@ #ifndef __ASM_SPARC_PERF_EVENT_H #define __ASM_SPARC_PERF_EVENT_H -extern void set_perf_event_pending(void); - -#define PERF_EVENT_INDEX_OFFSET 0 - #ifdef CONFIG_PERF_EVENTS #include <asm/ptrace.h> diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 0c2dc1f24a9..599398fbbc7 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT) += $(audit--y) pc--$(CONFIG_PERF_EVENTS) := perf_event.o obj-$(CONFIG_SPARC64) += $(pc--y) + +obj-$(CONFIG_SPARC64) += jump_label.o diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c new file mode 100644 index 00000000000..ea2dafc93d7 --- /dev/null +++ b/arch/sparc/kernel/jump_label.c @@ -0,0 +1,47 @@ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/cpu.h> + +#include <linux/jump_label.h> +#include <linux/memory.h> + +#ifdef HAVE_JUMP_LABEL + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + u32 val; + u32 *insn = (u32 *) (unsigned long) entry->code; + + if (type == JUMP_LABEL_ENABLE) { + s32 off = (s32)entry->target - (s32)entry->code; + +#ifdef CONFIG_SPARC64 + /* ba,pt %xcc, . + (off << 2) */ + val = 0x10680000 | ((u32) off >> 2); +#else + /* ba . + (off << 2) */ + val = 0x10800000 | ((u32) off >> 2); +#endif + } else { + val = 0x01000000; + } + + get_online_cpus(); + mutex_lock(&text_mutex); + *insn = val; + flushi(insn); + mutex_unlock(&text_mutex); + put_online_cpus(); +} + +void arch_jump_label_text_poke_early(jump_label_t addr) +{ + u32 *insn_p = (u32 *) (unsigned long) addr; + + *insn_p = 0x01000000; + flushi(insn_p); +} + +#endif diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index f848aadf54d..ee3c7dde8d9 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -18,6 +18,9 @@ #include <asm/spitfire.h> #ifdef CONFIG_SPARC64 + +#include <linux/jump_label.h> + static void *module_map(unsigned long size) { struct vm_struct *area; @@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { + /* make jump label nops */ + jump_label_apply_nops(me); + /* Cheetah's I-cache is fully coherent. */ if (tlb_type == spitfire) { unsigned long va; diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index c4a6a50b484..b87873c0e8e 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -7,7 +7,7 @@ #include <linux/init.h> #include <linux/irq.h> -#include <linux/perf_event.h> +#include <linux/irq_work.h> #include <linux/ftrace.h> #include <asm/pil.h> @@ -43,14 +43,14 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs) old_regs = set_irq_regs(regs); irq_enter(); -#ifdef CONFIG_PERF_EVENTS - perf_event_do_pending(); +#ifdef CONFIG_IRQ_WORK + irq_work_run(); #endif irq_exit(); set_irq_regs(old_regs); } -void set_perf_event_pending(void) +void arch_irq_work_raise(void) { set_softint(1 << PIL_DEFERRED_PCR_WORK); } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6318e622cfb..0d6deb55a2a 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) enc = perf_event_get_enc(cpuc->events[i]); pcr &= ~mask_for_index(idx); - pcr |= event_encoding(enc, idx); + if (hwc->state & PERF_HES_STOPPED) + pcr |= nop_for_index(idx); + else + pcr |= event_encoding(enc, idx); } out: return pcr; } -void hw_perf_enable(void) +static void sparc_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 pcr; @@ -691,7 +694,7 @@ void hw_perf_enable(void) pcr_ops->write(cpuc->pcr); } -void hw_perf_disable(void) +static void sparc_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; @@ -710,19 +713,65 @@ void hw_perf_disable(void) pcr_ops->write(cpuc->pcr); } -static void sparc_pmu_disable(struct perf_event *event) +static int active_event_index(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + int i; + + for (i = 0; i < cpuc->n_events; i++) { + if (cpuc->event[i] == event) + break; + } + BUG_ON(i == cpuc->n_events); + return cpuc->current_idx[i]; +} + +static void sparc_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = active_event_index(cpuc, event); + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + sparc_perf_event_set_period(event, &event->hw, idx); + } + + event->hw.state = 0; + + sparc_pmu_enable_event(cpuc, &event->hw, idx); +} + +static void sparc_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = active_event_index(cpuc, event); + + if (!(event->hw.state & PERF_HES_STOPPED)) { + sparc_pmu_disable_event(cpuc, &event->hw, idx); + event->hw.state |= PERF_HES_STOPPED; + } + + if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) { + sparc_perf_event_update(event, &event->hw, idx); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static void sparc_pmu_del(struct perf_event *event, int _flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; unsigned long flags; int i; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event[i]) { - int idx = cpuc->current_idx[i]; + /* Absorb the final count and turn off the + * event. + */ + sparc_pmu_stop(event, PERF_EF_UPDATE); /* Shift remaining entries down into * the existing slot. @@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event) cpuc->current_idx[i]; } - /* Absorb the final count and turn off the - * event. - */ - sparc_pmu_disable_event(cpuc, hwc, idx); - barrier(); - sparc_perf_event_update(event, hwc, idx); - perf_event_update_userpage(event); cpuc->n_events--; @@ -748,23 +790,10 @@ static void sparc_pmu_disable(struct perf_event *event) } } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } -static int active_event_index(struct cpu_hw_events *cpuc, - struct perf_event *event) -{ - int i; - - for (i = 0; i < cpuc->n_events; i++) { - if (cpuc->event[i] == event) - break; - } - BUG_ON(i == cpuc->n_events); - return cpuc->current_idx[i]; -} - static void sparc_pmu_read(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event) sparc_perf_event_update(event, hwc, idx); } -static void sparc_pmu_unthrottle(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx = active_event_index(cpuc, event); - struct hw_perf_event *hwc = &event->hw; - - sparc_pmu_enable_event(cpuc, hwc, idx); -} - static atomic_t active_events = ATOMIC_INIT(0); static DEFINE_MUTEX(pmc_grab_mutex); @@ -877,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts, if (!n_ev) return 0; - if (n_ev > perf_max_events) + if (n_ev > MAX_HWEVENTS) return -1; msk0 = perf_event_get_msk(events[0]); @@ -984,23 +1004,27 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -static int sparc_pmu_enable(struct perf_event *event) +static int sparc_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n0, ret = -EAGAIN; unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); n0 = cpuc->n_events; - if (n0 >= perf_max_events) + if (n0 >= MAX_HWEVENTS) goto out; cpuc->event[n0] = event; cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; + event->hw.state = PERF_HES_UPTODATE; + if (!(ef_flags & PERF_EF_START)) + event->hw.state |= PERF_HES_STOPPED; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -1020,12 +1044,12 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } -static int __hw_perf_event_init(struct perf_event *event) +static int sparc_pmu_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; struct perf_event *evts[MAX_HWEVENTS]; @@ -1038,22 +1062,33 @@ static int __hw_perf_event_init(struct perf_event *event) if (atomic_read(&nmi_active) < 0) return -ENODEV; - pmap = NULL; - if (attr->type == PERF_TYPE_HARDWARE) { + switch (attr->type) { + case PERF_TYPE_HARDWARE: if (attr->config >= sparc_pmu->max_events) return -EINVAL; pmap = sparc_pmu->event_map(attr->config); - } else if (attr->type == PERF_TYPE_HW_CACHE) { + break; + + case PERF_TYPE_HW_CACHE: pmap = sparc_map_cache_event(attr->config); if (IS_ERR(pmap)) return PTR_ERR(pmap); - } else if (attr->type != PERF_TYPE_RAW) - return -EOPNOTSUPP; + break; + + case PERF_TYPE_RAW: + pmap = NULL; + break; + + default: + return -ENOENT; + + } if (pmap) { hwc->event_base = perf_event_encode(pmap); } else { - /* User gives us "(encoding << 16) | pic_mask" for + /* + * User gives us "(encoding << 16) | pic_mask" for * PERF_TYPE_RAW events. */ hwc->event_base = attr->config; @@ -1071,7 +1106,7 @@ static int __hw_perf_event_init(struct perf_event *event) n = 0; if (event->group_leader != event) { n = collect_events(event->group_leader, - perf_max_events - 1, + MAX_HWEVENTS - 1, evts, events, current_idx_dmy); if (n < 0) return -EINVAL; @@ -1107,10 +1142,11 @@ static int __hw_perf_event_init(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void sparc_pmu_start_txn(const struct pmu *pmu) +static void sparc_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; } @@ -1119,11 +1155,12 @@ static void sparc_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -static void sparc_pmu_cancel_txn(const struct pmu *pmu) +static void sparc_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); } /* @@ -1131,7 +1168,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -static int sparc_pmu_commit_txn(const struct pmu *pmu) +static int sparc_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n; @@ -1147,28 +1184,24 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu) return -EAGAIN; cpuc->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); return 0; } -static const struct pmu pmu = { - .enable = sparc_pmu_enable, - .disable = sparc_pmu_disable, +static struct pmu pmu = { + .pmu_enable = sparc_pmu_enable, + .pmu_disable = sparc_pmu_disable, + .event_init = sparc_pmu_event_init, + .add = sparc_pmu_add, + .del = sparc_pmu_del, + .start = sparc_pmu_start, + .stop = sparc_pmu_stop, .read = sparc_pmu_read, - .unthrottle = sparc_pmu_unthrottle, .start_txn = sparc_pmu_start_txn, .cancel_txn = sparc_pmu_cancel_txn, .commit_txn = sparc_pmu_commit_txn, }; -const struct pmu *hw_perf_event_init(struct perf_event *event) -{ - int err = __hw_perf_event_init(event); - - if (err) - return ERR_PTR(err); - return &pmu; -} - void perf_event_print_debug(void) { unsigned long flags; @@ -1244,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, continue; if (perf_event_overflow(event, 1, &data, regs)) - sparc_pmu_disable_event(cpuc, hwc, idx); + sparc_pmu_stop(event, 0); } return NOTIFY_STOP; @@ -1285,28 +1318,21 @@ void __init init_hw_perf_events(void) pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); - /* All sparc64 PMUs currently have 2 events. */ - perf_max_events = 2; - + perf_pmu_register(&pmu); register_die_notifier(&perf_event_nmi_notifier); } -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} - -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ksp, fp; #ifdef CONFIG_FUNCTION_GRAPH_TRACER int graph = 0; #endif - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->tpc); + stack_trace_flush(); + + perf_callchain_store(entry, regs->tpc); ksp = regs->u_regs[UREG_I6]; fp = ksp + STACK_BIAS; @@ -1330,13 +1356,13 @@ static void perf_callchain_kernel(struct pt_regs *regs, pc = sf->callers_pc; fp = (unsigned long)sf->fp + STACK_BIAS; } - callchain_store(entry, pc); + perf_callchain_store(entry, pc); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((pc + 8UL) == (unsigned long) &return_to_handler) { int index = current->curr_ret_stack; if (current->ret_stack && index >= graph) { pc = current->ret_stack[index - graph].ret; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); graph++; } } @@ -1344,13 +1370,12 @@ static void perf_callchain_kernel(struct pt_regs *regs, } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] + STACK_BIAS; do { @@ -1363,17 +1388,16 @@ static void perf_callchain_user_64(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp + STACK_BIAS; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; do { @@ -1386,34 +1410,16 @@ static void perf_callchain_user_32(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } -/* Like powerpc we can't get PMU interrupts within the PMU handler, - * so no need for separate NMI and IRQ chains as on x86. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - if (!user_mode(regs)) { - stack_trace_flush(); - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - if (regs) { - flushw_user(); - if (test_thread_flag(TIF_32BIT)) - perf_callchain_user_32(regs, entry); - else - perf_callchain_user_64(regs, entry); - } - return entry; + flushw_user(); + if (test_thread_flag(TIF_32BIT)) + perf_callchain_user_32(entry, regs); + else + perf_callchain_user_64(entry, regs); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cea0cd9a316..fd227d6b8d9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -25,6 +25,7 @@ config X86 select HAVE_IDE select HAVE_OPROFILE select HAVE_PERF_EVENTS if (!M386 && !M486) + select HAVE_IRQ_WORK select HAVE_IOREMAP_PROT select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB @@ -33,6 +34,7 @@ config X86 select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER @@ -59,6 +61,8 @@ config X86 select ANON_INODES select HAVE_ARCH_KMEMCHECK select HAVE_USER_RETURN_NOTIFIER + select HAVE_ARCH_JUMP_LABEL + select HAVE_TEXT_POKE_SMP config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) @@ -2125,6 +2129,10 @@ config HAVE_ATOMIC_IOMAP def_bool y depends on X86_32 +config HAVE_TEXT_POKE_SMP + bool + select STOP_MACHINE if SMP + source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index bc6abb7bc7e..76561d20ea2 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/stddef.h> #include <linux/stringify.h> +#include <linux/jump_label.h> #include <asm/asm.h> /* @@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, #define __parainstructions_end NULL #endif +extern void *text_poke_early(void *addr, const void *opcode, size_t len); + /* * Clear and restore the kernel write-protection flag on the local CPU. * Allows the kernel to edit read-only pages. @@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_smp(void *addr, const void *opcode, size_t len); +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) +#define IDEAL_NOP_SIZE_5 5 +extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; +extern void arch_init_ideal_nop5(void); +#else +static inline void arch_init_ideal_nop5(void) {} +#endif + #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 8e8ec663a98..b8e96a18676 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) -#ifdef CONFIG_PERF_EVENTS -BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) +#ifdef CONFIG_IRQ_WORK +BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR) #endif #ifdef CONFIG_X86_THERMAL_VECTOR diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index aeab29aee61..55e4de613f0 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -14,7 +14,7 @@ typedef struct { #endif unsigned int x86_platform_ipis; /* arch dependent */ unsigned int apic_perf_irqs; - unsigned int apic_pending_irqs; + unsigned int apic_irq_work_irqs; #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 46c0fe05f23..3a54a1ca1a0 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -29,7 +29,7 @@ extern void apic_timer_interrupt(void); extern void x86_platform_ipi(void); extern void error_interrupt(void); -extern void perf_pending_interrupt(void); +extern void irq_work_interrupt(void); extern void spurious_interrupt(void); extern void thermal_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index e2ca3009255..6af0894dafb 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -114,9 +114,9 @@ #define X86_PLATFORM_IPI_VECTOR 0xed /* - * Performance monitoring pending work vector: + * IRQ work vector: */ -#define LOCAL_PENDING_VECTOR 0xec +#define IRQ_WORK_VECTOR 0xec #define UV_BAU_MESSAGE 0xea diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h new file mode 100644 index 00000000000..f52d42e8058 --- /dev/null +++ b/arch/x86/include/asm/jump_label.h @@ -0,0 +1,37 @@ +#ifndef _ASM_X86_JUMP_LABEL_H +#define _ASM_X86_JUMP_LABEL_H + +#ifdef __KERNEL__ + +#include <linux/types.h> +#include <asm/nops.h> + +#define JUMP_LABEL_NOP_SIZE 5 + +# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" + +# define JUMP_LABEL(key, label) \ + do { \ + asm goto("1:" \ + JUMP_LABEL_INITIAL_NOP \ + ".pushsection __jump_table, \"a\" \n\t"\ + _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ + ".popsection \n\t" \ + : : "i" (key) : : label); \ + } while (0) + +#endif /* __KERNEL__ */ + +#ifdef CONFIG_X86_64 +typedef u64 jump_label_t; +#else +typedef u32 jump_label_t; +#endif + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index def500776b1..a70cd216be5 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -36,19 +36,6 @@ #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) -/* Non HT mask */ -#define P4_ESCR_MASK \ - (P4_ESCR_EVENT_MASK | \ - P4_ESCR_EVENTMASK_MASK | \ - P4_ESCR_TAG_MASK | \ - P4_ESCR_TAG_ENABLE | \ - P4_ESCR_T0_OS | \ - P4_ESCR_T0_USR) - -/* HT mask */ -#define P4_ESCR_MASK_HT \ - (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR) - #define P4_CCCR_OVF 0x80000000U #define P4_CCCR_CASCADE 0x40000000U #define P4_CCCR_OVF_PMI_T0 0x04000000U @@ -70,23 +57,6 @@ #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) -/* Non HT mask */ -#define P4_CCCR_MASK \ - (P4_CCCR_OVF | \ - P4_CCCR_CASCADE | \ - P4_CCCR_OVF_PMI_T0 | \ - P4_CCCR_FORCE_OVF | \ - P4_CCCR_EDGE | \ - P4_CCCR_THRESHOLD_MASK | \ - P4_CCCR_COMPLEMENT | \ - P4_CCCR_COMPARE | \ - P4_CCCR_ESCR_SELECT_MASK | \ - P4_CCCR_ENABLE) - -/* HT mask */ -#define P4_CCCR_MASK_HT \ - (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY) - #define P4_GEN_ESCR_EMASK(class, name, bit) \ class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_EMASK_BIT(class, name) class##__##name @@ -127,6 +97,28 @@ #define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) +/* + * The bits we allow to pass for RAW events + */ +#define P4_CONFIG_MASK_ESCR \ + P4_ESCR_EVENT_MASK | \ + P4_ESCR_EVENTMASK_MASK | \ + P4_ESCR_TAG_MASK | \ + P4_ESCR_TAG_ENABLE + +#define P4_CONFIG_MASK_CCCR \ + P4_CCCR_EDGE | \ + P4_CCCR_THRESHOLD_MASK | \ + P4_CCCR_COMPLEMENT | \ + P4_CCCR_COMPARE | \ + P4_CCCR_THREAD_ANY | \ + P4_CCCR_RESERVED + +/* some dangerous bits are reserved for kernel internals */ +#define P4_CONFIG_MASK \ + (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ + (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) + static inline bool p4_is_event_cascaded(u64 config) { u32 cccr = p4_config_unpack_cccr(config); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index fedf32a8c3e..7490bf8d145 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o ldt.o dumpstack.o -obj-y += setup.o x86_init.o i8259.o irqinit.o +obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o +obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f65ab8b014c..a36bb90aef5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; -static void *text_poke_early(void *addr, const void *opcode, size_t len); +void *text_poke_early(void *addr, const void *opcode, size_t len); /* Replace instructions with better alternatives for this CPU type. This runs before SMP is initialized to avoid SMP problems with @@ -522,7 +522,7 @@ void __init alternative_instructions(void) * instructions. And on the local CPU you need to be protected again NMI or MCE * handlers seeing an inconsistent instruction while you patch. */ -static void *__init_or_module text_poke_early(void *addr, const void *opcode, +void *__init_or_module text_poke_early(void *addr, const void *opcode, size_t len) { unsigned long flags; @@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) tpp.len = len; atomic_set(&stop_machine_first, 1); wrote_text = 0; - stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); + /* Use __stop_machine() because the caller already got online_cpus. */ + __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); return addr; } +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) + +unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; + +void __init arch_init_ideal_nop5(void) +{ + extern const unsigned char ftrace_test_p6nop[]; + extern const unsigned char ftrace_test_nop5[]; + extern const unsigned char ftrace_test_jmp[]; + int faulted = 0; + + /* + * There is no good nop for all x86 archs. + * We will default to using the P6_NOP5, but first we + * will test to make sure that the nop will actually + * work on this CPU. If it faults, we will then + * go to a lesser efficient 5 byte nop. If that fails + * we then just use a jmp as our nop. This isn't the most + * efficient nop, but we can not use a multi part nop + * since we would then risk being preempted in the middle + * of that nop, and if we enabled tracing then, it might + * cause a system crash. + * + * TODO: check the cpuid to determine the best nop. + */ + asm volatile ( + "ftrace_test_jmp:" + "jmp ftrace_test_p6nop\n" + "nop\n" + "nop\n" + "nop\n" /* 2 byte jmp + 3 bytes */ + "ftrace_test_p6nop:" + P6_NOP5 + "jmp 1f\n" + "ftrace_test_nop5:" + ".byte 0x66,0x66,0x66,0x66,0x90\n" + "1:" + ".section .fixup, \"ax\"\n" + "2: movl $1, %0\n" + " jmp ftrace_test_nop5\n" + "3: movl $2, %0\n" + " jmp 1b\n" + ".previous\n" + _ASM_EXTABLE(ftrace_test_p6nop, 2b) + _ASM_EXTABLE(ftrace_test_nop5, 3b) + : "=r"(faulted) : "0" (faulted)); + + switch (faulted) { + case 0: + pr_info("converting mcount calls to 0f 1f 44 00 00\n"); + memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5); + break; + case 1: + pr_info("converting mcount calls to 66 66 66 66 90\n"); + memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5); + break; + case 2: + pr_info("converting mcount calls to jmp . + 5\n"); + memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5); + break; + } + +} +#endif diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 03a5b0385ad..fe73c1844a9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event) /* * Setup the hardware configuration for a given attr_type */ -static int __hw_perf_event_init(struct perf_event *event) +static int __x86_pmu_event_init(struct perf_event *event) { int err; @@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void) } } -void hw_perf_disable(void) +static void x86_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added) } } -static const struct pmu pmu; +static struct pmu pmu; static inline int is_x86_event(struct perf_event *event) { @@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, hwc->last_tag == cpuc->tags[i]; } -static int x86_pmu_start(struct perf_event *event); -static void x86_pmu_stop(struct perf_event *event); +static void x86_pmu_start(struct perf_event *event, int flags); +static void x86_pmu_stop(struct perf_event *event, int flags); -void hw_perf_enable(void) +static void x86_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct perf_event *event; @@ -840,7 +840,14 @@ void hw_perf_enable(void) match_prev_assignment(hwc, cpuc, i)) continue; - x86_pmu_stop(event); + /* + * Ensure we don't accidentally enable a stopped + * counter simply because we rescheduled. + */ + if (hwc->state & PERF_HES_STOPPED) + hwc->state |= PERF_HES_ARCH; + + x86_pmu_stop(event, PERF_EF_UPDATE); } for (i = 0; i < cpuc->n_events; i++) { @@ -852,7 +859,10 @@ void hw_perf_enable(void) else if (i < n_running) continue; - x86_pmu_start(event); + if (hwc->state & PERF_HES_ARCH) + continue; + + x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added = 0; perf_events_lapic_init(); @@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event) } /* - * activate a single event + * Add a single event to the PMU. * * The event is added to the group of enabled events * but only if it can be scehduled with existing events. - * - * Called with PMU disabled. If successful and return value 1, - * then guaranteed to call perf_enable() and hw_perf_enable() */ -static int x86_pmu_enable(struct perf_event *event) +static int x86_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc; @@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event) hwc = &event->hw; + perf_pmu_disable(event->pmu); n0 = cpuc->n_events; - n = collect_events(cpuc, event, false); - if (n < 0) - return n; + ret = n = collect_events(cpuc, event, false); + if (ret < 0) + goto out; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed - * at commit time(->commit_txn) as a whole + * at commit time (->commit_txn) as a whole */ if (cpuc->group_flag & PERF_EVENT_TXN) - goto out; + goto done_collect; ret = x86_pmu.schedule_events(cpuc, n, assign); if (ret) - return ret; + goto out; /* * copy new assignment, now we know it is possible * will be used by hw_perf_enable() */ memcpy(cpuc->assign, assign, n*sizeof(int)); -out: +done_collect: cpuc->n_events = n; cpuc->n_added += n - n0; cpuc->n_txn += n - n0; - return 0; + ret = 0; +out: + perf_pmu_enable(event->pmu); + return ret; } -static int x86_pmu_start(struct perf_event *event) +static void x86_pmu_start(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx = event->hw.idx; - if (idx == -1) - return -EAGAIN; + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + x86_perf_event_set_period(event); + } + + event->hw.state = 0; - x86_perf_event_set_period(event); cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); __set_bit(idx, cpuc->running); x86_pmu.enable(event); perf_event_update_userpage(event); - - return 0; -} - -static void x86_pmu_unthrottle(struct perf_event *event) -{ - int ret = x86_pmu_start(event); - WARN_ON_ONCE(ret); } void perf_event_print_debug(void) @@ -1078,27 +1094,29 @@ void perf_event_print_debug(void) local_irq_restore(flags); } -static void x86_pmu_stop(struct perf_event *event) +static void x86_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - if (!__test_and_clear_bit(idx, cpuc->active_mask)) - return; - - x86_pmu.disable(event); - - /* - * Drain the remaining delta count out of a event - * that we are disabling: - */ - x86_perf_event_update(event); + if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { + x86_pmu.disable(event); + cpuc->events[hwc->idx] = NULL; + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } - cpuc->events[idx] = NULL; + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + x86_perf_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } } -static void x86_pmu_disable(struct perf_event *event) +static void x86_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int i; @@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event) if (cpuc->group_flag & PERF_EVENT_TXN) return; - x86_pmu_stop(event); + x86_pmu_stop(event, PERF_EF_UPDATE); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { @@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) struct perf_sample_data data; struct cpu_hw_events *cpuc; struct perf_event *event; - struct hw_perf_event *hwc; int idx, handled = 0; u64 val; @@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) } event = cpuc->events[idx]; - hwc = &event->hw; val = x86_perf_event_update(event); if (val & (1ULL << (x86_pmu.cntval_bits - 1))) @@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) continue; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } if (handled) @@ -1180,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) return handled; } -void smp_perf_pending_interrupt(struct pt_regs *regs) -{ - irq_enter(); - ack_APIC_irq(); - inc_irq_stat(apic_pending_irqs); - perf_event_do_pending(); - irq_exit(); -} - -void set_perf_event_pending(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - if (!x86_pmu.apic || !x86_pmu_initialized()) - return; - - apic->send_IPI_self(LOCAL_PENDING_VECTOR); -#endif -} - void perf_events_lapic_init(void) { if (!x86_pmu.apic || !x86_pmu_initialized()) @@ -1388,7 +1385,6 @@ void __init init_hw_perf_events(void) x86_pmu.num_counters = X86_PMC_MAX_GENERIC; } x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; - perf_max_events = x86_pmu.num_counters; if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", @@ -1424,6 +1420,7 @@ void __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); + perf_pmu_register(&pmu); perf_cpu_notifier(x86_pmu_notifier); } @@ -1437,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void x86_pmu_start_txn(const struct pmu *pmu) +static void x86_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + perf_pmu_disable(pmu); cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1450,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -static void x86_pmu_cancel_txn(const struct pmu *pmu) +static void x86_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1460,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) */ cpuc->n_added -= cpuc->n_txn; cpuc->n_events -= cpuc->n_txn; + perf_pmu_enable(pmu); } /* @@ -1467,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -static int x86_pmu_commit_txn(const struct pmu *pmu) +static int x86_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int assign[X86_PMC_IDX_MAX]; @@ -1489,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) memcpy(cpuc->assign, assign, n*sizeof(int)); cpuc->group_flag &= ~PERF_EVENT_TXN; - + perf_pmu_enable(pmu); return 0; } -static const struct pmu pmu = { - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, - .start = x86_pmu_start, - .stop = x86_pmu_stop, - .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, - .start_txn = x86_pmu_start_txn, - .cancel_txn = x86_pmu_cancel_txn, - .commit_txn = x86_pmu_commit_txn, -}; - /* * validate that we can schedule this event */ @@ -1579,12 +1566,22 @@ out: return ret; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +int x86_pmu_event_init(struct perf_event *event) { - const struct pmu *tmp; + struct pmu *tmp; int err; - err = __hw_perf_event_init(event); + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + + err = __x86_pmu_event_init(event); if (!err) { /* * we temporarily connect event to its pmu @@ -1604,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; } -/* - * callchain support - */ +static struct pmu pmu = { + .pmu_enable = x86_pmu_enable, + .pmu_disable = x86_pmu_disable, -static inline -void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} + .event_init = x86_pmu_event_init, -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); + .add = x86_pmu_add, + .del = x86_pmu_del, + .start = x86_pmu_start, + .stop = x86_pmu_stop, + .read = x86_pmu_read, + .start_txn = x86_pmu_start_txn, + .cancel_txn = x86_pmu_cancel_txn, + .commit_txn = x86_pmu_commit_txn, +}; + +/* + * callchain support + */ static void backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) @@ -1645,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops backtrace_ops = { @@ -1656,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack_bp, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->ip); + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->ip); dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } @@ -1689,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (fp < compat_ptr(regs->sp)) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = compat_ptr(frame.next_frame); } return 1; @@ -1702,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) } #endif -static void -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stack_frame frame; const void __user *fp; - if (!user_mode(regs)) - regs = task_pt_regs(current); + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return; + } fp = (void __user *)regs->bp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->ip); + perf_callchain_store(entry, regs->ip); if (perf_callchain_user32(regs, entry)) return; @@ -1731,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) if ((unsigned long)fp < regs->sp) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = frame.next_frame; } } -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* TODO: We don't support guest os callchain now */ - return NULL; - } - - if (in_nmi()) - entry = &__get_cpu_var(pmc_nmi_entry); - else - entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} - unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long ip; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index c2897b7b4a3..46d58448c3a 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids [ C(DTLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ + [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0, @@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids [ C(ITLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ - [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ + [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ee05c90012d..c8f5c088cad 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_events *cpuc; int bit, loops; u64 status; - int handled = 0; + int handled; perf_sample_data_init(&data, 0); cpuc = &__get_cpu_var(cpu_hw_events); intel_pmu_disable_all(); - intel_pmu_drain_bts_buffer(); + handled = intel_pmu_drain_bts_buffer(); status = intel_pmu_get_status(); if (!status) { intel_pmu_enable_all(0); - return 0; + return handled; } loops = 0; @@ -763,7 +763,7 @@ again: data.period = event->hw.last_period; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 18018d1311c..4977f9c400e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void) update_debugctlmsr(debugctlmsr); } -static void intel_pmu_drain_bts_buffer(void) +static int intel_pmu_drain_bts_buffer(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void) struct pt_regs regs; if (!event) - return; + return 0; if (!ds) - return; + return 0; at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; top = (struct bts_record *)(unsigned long)ds->bts_index; if (top <= at) - return; + return 0; ds->bts_index = ds->bts_buffer_base; @@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void) perf_prepare_sample(&header, &data, event, ®s); if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) - return; + return 1; for (; at < top; at++) { data.ip = at->from; @@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void) /* There's new data available. */ event->hw.interrupts++; event->pending_kill = POLL_IN; + return 1; } /* @@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.flags &= ~PERF_EFLAGS_EXACT; if (perf_event_overflow(event, 1, &data, ®s)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 24901517399..81400b93e69 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -18,6 +18,8 @@ struct p4_event_bind { unsigned int opcode; /* Event code and ESCR selector */ unsigned int escr_msr[2]; /* ESCR MSR for this event */ + unsigned int escr_emask; /* valid ESCR EventMask bits */ + unsigned int shared; /* event is shared across threads */ char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ }; @@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = { [P4_EVENT_TC_DELIVER_MODE] = { .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID), + .shared = 1, .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_BPU_FETCH_REQUEST] = { .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_ITLB_REFERENCE] = { .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_MEMORY_CANCEL] = { .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_MEMORY_COMPLETE] = { .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_LOAD_PORT_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_STORE_PORT_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_MOB_LOAD_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_PAGE_WALK_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS), + .shared = 1, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BSQ_CACHE_REFERENCE] = { .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_IOQ_ALLOCATION] = { .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH), .cntr = { {2, -1, -1}, {3, -1, -1} }, }, [P4_EVENT_FSB_DATA_ACTIVITY] = { .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER), + .shared = 1, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2), .cntr = { {0, -1, -1}, {1, -1, -1} }, }, [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2), .cntr = { {2, -1, -1}, {3, -1, -1} }, }, [P4_EVENT_SSE_INPUT_ASSIST] = { .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_PACKED_SP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_PACKED_DP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_SCALAR_SP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_SCALAR_DP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_64BIT_MMX_UOP] = { .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_128BIT_MMX_UOP] = { .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_X87_FP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_TC_MISC] = { .opcode = P4_OPCODE(P4_EVENT_TC_MISC), .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_GLOBAL_POWER_EVENTS] = { .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_TC_MS_XFER] = { .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_UOP_QUEUE_WRITES] = { .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RETIRED_BRANCH_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RESOURCE_STALL] = { .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_WC_BUFFER] = { .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) | + P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_B2B_CYCLES] = { .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BNR] = { .opcode = P4_OPCODE(P4_EVENT_BNR), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_SNOOP] = { .opcode = P4_OPCODE(P4_EVENT_SNOOP), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_RESPONSE] = { .opcode = P4_OPCODE(P4_EVENT_RESPONSE), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_FRONT_END_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_EXECUTION_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_REPLAY_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_INSTR_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_UOPS_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_UOP_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_BRANCH_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_MISPRED_BRANCH_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_X87_ASSIST] = { .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_MACHINE_CLEAR] = { .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) | + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) | + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_INSTR_COMPLETED] = { .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, }; @@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event) return config; } +/* check cpu model specifics */ +static bool p4_event_match_cpu_model(unsigned int event_idx) +{ + /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ + if (event_idx == P4_EVENT_INSTR_COMPLETED) { + if (boot_cpu_data.x86_model != 3 && + boot_cpu_data.x86_model != 4 && + boot_cpu_data.x86_model != 6) + return false; + } + + /* + * For info + * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2 + */ + + return true; +} + static int p4_validate_raw_event(struct perf_event *event) { - unsigned int v; + unsigned int v, emask; - /* user data may have out-of-bound event index */ + /* User data may have out-of-bound event index */ v = p4_config_unpack_event(event->attr.config); - if (v >= ARRAY_SIZE(p4_event_bind_map)) { - pr_warning("P4 PMU: Unknown event code: %d\n", v); + if (v >= ARRAY_SIZE(p4_event_bind_map)) + return -EINVAL; + + /* It may be unsupported: */ + if (!p4_event_match_cpu_model(v)) return -EINVAL; + + /* + * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as + * in Architectural Performance Monitoring, it means not + * on _which_ logical cpu to count but rather _when_, ie it + * depends on logical cpu state -- count event if one cpu active, + * none, both or any, so we just allow user to pass any value + * desired. + * + * In turn we always set Tx_OS/Tx_USR bits bound to logical + * cpu without their propagation to another cpu + */ + + /* + * if an event is shared accross the logical threads + * the user needs special permissions to be able to use it + */ + if (p4_event_bind_map[v].shared) { + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return -EACCES; } + /* ESCR EventMask bits may be invalid */ + emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK; + if (emask & ~p4_event_bind_map[v].escr_emask) + return -EINVAL; + /* - * it may have some screwed PEBS bits + * it may have some invalid PEBS bits */ - if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { - pr_warning("P4 PMU: PEBS are not supported yet\n"); + if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) return -EINVAL; - } + v = p4_config_unpack_metric(event->attr.config); - if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { - pr_warning("P4 PMU: Unknown metric code: %d\n", v); + if (v >= ARRAY_SIZE(p4_pebs_bind_map)) return -EINVAL; - } return 0; } @@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) { + /* + * Clear bits we reserve to be managed by kernel itself + * and never allowed from a user space + */ + event->attr.config &= P4_CONFIG_MASK; + rc = p4_validate_raw_event(event); if (rc) goto out; /* - * We don't control raw events so it's up to the caller - * to pass sane values (and we don't count the thread number - * on HT machine but allow HT-compatible specifics to be - * passed on) - * * Note that for RAW events we allow user to use P4_CCCR_RESERVED * bits since we keep additional info here (for cache events and etc) - * - * XXX: HT wide things should check perf_paranoid_cpu() && - * CAP_SYS_ADMIN */ - event->hw.config |= event->attr.config & - (p4_config_pack_escr(P4_ESCR_MASK_HT) | - p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); - - event->hw.config &= ~P4_CCCR_FORCE_OVF; + event->hw.config |= event->attr.config; } rc = x86_setup_perfctr(event); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 17be5ec7cbb..c375c79065f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1023,9 +1023,9 @@ apicinterrupt ERROR_APIC_VECTOR \ apicinterrupt SPURIOUS_APIC_VECTOR \ spurious_interrupt smp_spurious_interrupt -#ifdef CONFIG_PERF_EVENTS -apicinterrupt LOCAL_PENDING_VECTOR \ - perf_pending_interrupt smp_perf_pending_interrupt +#ifdef CONFIG_IRQ_WORK +apicinterrupt IRQ_WORK_VECTOR \ + irq_work_interrupt smp_irq_work_interrupt #endif /* diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index cd37469b54e..3afb33f14d2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) return mod_code_status; } - - - -static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; - static unsigned char *ftrace_nop_replace(void) { - return ftrace_nop; + return ideal_nop5; } static int @@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int __init ftrace_dyn_arch_init(void *data) { - extern const unsigned char ftrace_test_p6nop[]; - extern const unsigned char ftrace_test_nop5[]; - extern const unsigned char ftrace_test_jmp[]; - int faulted = 0; - - /* - * There is no good nop for all x86 archs. - * We will default to using the P6_NOP5, but first we - * will test to make sure that the nop will actually - * work on this CPU. If it faults, we will then - * go to a lesser efficient 5 byte nop. If that fails - * we then just use a jmp as our nop. This isn't the most - * efficient nop, but we can not use a multi part nop - * since we would then risk being preempted in the middle - * of that nop, and if we enabled tracing then, it might - * cause a system crash. - * - * TODO: check the cpuid to determine the best nop. - */ - asm volatile ( - "ftrace_test_jmp:" - "jmp ftrace_test_p6nop\n" - "nop\n" - "nop\n" - "nop\n" /* 2 byte jmp + 3 bytes */ - "ftrace_test_p6nop:" - P6_NOP5 - "jmp 1f\n" - "ftrace_test_nop5:" - ".byte 0x66,0x66,0x66,0x66,0x90\n" - "1:" - ".section .fixup, \"ax\"\n" - "2: movl $1, %0\n" - " jmp ftrace_test_nop5\n" - "3: movl $2, %0\n" - " jmp 1b\n" - ".previous\n" - _ASM_EXTABLE(ftrace_test_p6nop, 2b) - _ASM_EXTABLE(ftrace_test_nop5, 3b) - : "=r"(faulted) : "0" (faulted)); - - switch (faulted) { - case 0: - pr_info("converting mcount calls to 0f 1f 44 00 00\n"); - memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); - break; - case 1: - pr_info("converting mcount calls to 66 66 66 66 90\n"); - memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); - break; - case 2: - pr_info("converting mcount calls to jmp . + 5\n"); - memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); - break; - } - /* The return code is retured via data */ *(unsigned long *)data = 0; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 91fd0c70a18..44edb03fc9e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); seq_printf(p, " Performance monitoring interrupts\n"); - seq_printf(p, "%*s: ", prec, "PND"); + seq_printf(p, "%*s: ", prec, "IWI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); - seq_printf(p, " Performance pending work\n"); + seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); + seq_printf(p, " IRQ work interrupts\n"); #endif if (x86_platform_ipi_callback) { seq_printf(p, "%*s: ", prec, "PLT"); @@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->apic_timer_irqs; sum += irq_stats(cpu)->irq_spurious_count; sum += irq_stats(cpu)->apic_perf_irqs; - sum += irq_stats(cpu)->apic_pending_irqs; + sum += irq_stats(cpu)->apic_irq_work_irqs; #endif if (x86_platform_ipi_callback) sum += irq_stats(cpu)->x86_platform_ipis; diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c new file mode 100644 index 00000000000..ca8f703a1e7 --- /dev/null +++ b/arch/x86/kernel/irq_work.c @@ -0,0 +1,30 @@ +/* + * x86 specific code for irq_work + * + * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + */ + +#include <linux/kernel.h> +#include <linux/irq_work.h> +#include <linux/hardirq.h> +#include <asm/apic.h> + +void smp_irq_work_interrupt(struct pt_regs *regs) +{ + irq_enter(); + ack_APIC_irq(); + inc_irq_stat(apic_irq_work_irqs); + irq_work_run(); + irq_exit(); +} + +void arch_irq_work_raise(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + if (!cpu_has_apic) + return; + + apic->send_IPI_self(IRQ_WORK_VECTOR); + apic_wait_icr_idle(); +#endif +} diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 990ae7cfc57..713969b9266 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -224,9 +224,9 @@ static void __init apic_intr_init(void) alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - /* Performance monitoring interrupts: */ -# ifdef CONFIG_PERF_EVENTS - alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); + /* IRQ work interrupts: */ +# ifdef CONFIG_IRQ_WORK + alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt); # endif #endif diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c new file mode 100644 index 00000000000..961b6b30ba9 --- /dev/null +++ b/arch/x86/kernel/jump_label.c @@ -0,0 +1,50 @@ +/* + * jump label x86 support + * + * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> + * + */ +#include <linux/jump_label.h> +#include <linux/memory.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/jhash.h> +#include <linux/cpu.h> +#include <asm/kprobes.h> +#include <asm/alternative.h> + +#ifdef HAVE_JUMP_LABEL + +union jump_code_union { + char code[JUMP_LABEL_NOP_SIZE]; + struct { + char jump; + int offset; + } __attribute__((packed)); +}; + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + union jump_code_union code; + + if (type == JUMP_LABEL_ENABLE) { + code.jump = 0xe9; + code.offset = entry->target - + (entry->code + JUMP_LABEL_NOP_SIZE); + } else + memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE); + get_online_cpus(); + mutex_lock(&text_mutex); + text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); + mutex_unlock(&text_mutex); + put_online_cpus(); +} + +void arch_jump_label_text_poke_early(jump_label_t addr) +{ + text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE); +} + +#endif diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 770ebfb349e..1cbd54c0df9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) return 0; } -/* Dummy buffers for kallsyms_lookup */ -static char __dummy_buf[KSYM_NAME_LEN]; - /* Check if paddr is at an instruction boundary */ static int __kprobes can_probe(unsigned long paddr) { @@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr) struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; - if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) + if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) return 0; /* Decode instructions */ @@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, *(unsigned long *)addr = val; } -void __kprobes kprobes_optinsn_template_holder(void) +static void __used __kprobes kprobes_optinsn_template_holder(void) { asm volatile ( ".global optprobe_template_entry\n" @@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) } /* Check whether the address range is reserved */ if (ftrace_text_reserved(src, src + len - 1) || - alternatives_text_reserved(src, src + len - 1)) + alternatives_text_reserved(src, src + len - 1) || + jump_label_text_reserved(src, src + len - 1)) return -EBUSY; return len; @@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr) unsigned long addr, size = 0, offset = 0; struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; - /* Dummy buffers for lookup_symbol_attrs */ - static char __dummy_buf[KSYM_NAME_LEN]; /* Lookup symbol including addr */ - if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) + if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) return 0; /* Check there is enough space for a relative jump. */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 1c355c55096..8f295609173 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr, apply_paravirt(pseg, pseg + para->sh_size); } + /* make jump label nops */ + jump_label_apply_nops(me); + return 0; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c3a4fbb2b99..00e167870f7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -112,6 +112,7 @@ #include <asm/numa_64.h> #endif #include <asm/mce.h> +#include <asm/alternative.h> /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p) { int acpi = 0; int k8 = 0; + unsigned long flags; #ifdef CONFIG_X86_32 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); @@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.banner(); mcheck_init(); + + local_irq_save(flags); + arch_init_ideal_nop5(); + local_irq_restore(flags); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4c4508e8a20..a24c6cfdccc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -251,6 +251,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; + WARN_ON_ONCE(in_nmi()); + /* * Synchronize this task's top level page-table * with the 'reference' page table. @@ -369,6 +371,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; + WARN_ON_ONCE(in_nmi()); + /* * Copy kernel mappings over when needed. This can also * happen within a race in page table update. In the later diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index b3b531a4f8e..d87dd6d042d 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, if (!pte) return false; + WARN_ON_ONCE(in_nmi()); + if (error_code & 2) kmemcheck_access(regs, address, KMEMCHECK_WRITE); else diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 3855096c59b..2d49d4e19a3 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -14,6 +14,7 @@ #include <asm/ptrace.h> #include <asm/uaccess.h> #include <asm/stacktrace.h> +#include <linux/compat.h> static void backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) @@ -48,14 +49,12 @@ static struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack, }; -struct frame_head { - struct frame_head *bp; - unsigned long ret; -} __attribute__((packed)); - -static struct frame_head *dump_user_backtrace(struct frame_head *head) +#ifdef CONFIG_COMPAT +static struct stack_frame_ia32 * +dump_user_backtrace_32(struct stack_frame_ia32 *head) { - struct frame_head bufhead[2]; + struct stack_frame_ia32 bufhead[2]; + struct stack_frame_ia32 *fp; /* Also check accessibility of one struct frame_head beyond */ if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) @@ -63,20 +62,66 @@ static struct frame_head *dump_user_backtrace(struct frame_head *head) if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) return NULL; - oprofile_add_trace(bufhead[0].ret); + fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); + + oprofile_add_trace(bufhead[0].return_address); + + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (head >= fp) + return NULL; + + return fp; +} + +static inline int +x86_backtrace_32(struct pt_regs * const regs, unsigned int depth) +{ + struct stack_frame_ia32 *head; + + /* User process is 32-bit */ + if (!current || !test_thread_flag(TIF_IA32)) + return 0; + + head = (struct stack_frame_ia32 *) regs->bp; + while (depth-- && head) + head = dump_user_backtrace_32(head); + + return 1; +} + +#else +static inline int +x86_backtrace_32(struct pt_regs * const regs, unsigned int depth) +{ + return 0; +} +#endif /* CONFIG_COMPAT */ + +static struct stack_frame *dump_user_backtrace(struct stack_frame *head) +{ + struct stack_frame bufhead[2]; + + /* Also check accessibility of one struct stack_frame beyond */ + if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) + return NULL; + if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) + return NULL; + + oprofile_add_trace(bufhead[0].return_address); /* frame pointers should strictly progress back up the stack * (towards higher addresses) */ - if (head >= bufhead[0].bp) + if (head >= bufhead[0].next_frame) return NULL; - return bufhead[0].bp; + return bufhead[0].next_frame; } void x86_backtrace(struct pt_regs * const regs, unsigned int depth) { - struct frame_head *head = (struct frame_head *)frame_pointer(regs); + struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); if (!user_mode_vm(regs)) { unsigned long stack = kernel_stack_pointer(regs); @@ -86,6 +131,9 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) return; } + if (x86_backtrace_32(regs, depth)) + return; + while (depth-- && head) head = dump_user_backtrace(head); } diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index f1575c9a257..bd1489c3ce0 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -695,9 +695,6 @@ static int __init ppro_init(char **cpu_type) return 1; } -/* in order to get sysfs right */ -static int using_nmi; - int __init op_nmi_init(struct oprofile_operations *ops) { __u8 vendor = boot_cpu_data.x86_vendor; @@ -705,8 +702,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) char *cpu_type = NULL; int ret = 0; - using_nmi = 0; - if (!cpu_has_apic) return -ENODEV; @@ -790,13 +785,11 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (ret) return ret; - using_nmi = 1; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; } void op_nmi_exit(void) { - if (using_nmi) - exit_sysfs(); + exit_sysfs(); } |