diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel_ds.c')
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 232 | 
1 files changed, 139 insertions, 93 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 63438aad177..696ade311de 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -12,6 +12,7 @@  #define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)  #define PEBS_BUFFER_SIZE	PAGE_SIZE +#define PEBS_FIXUP_SIZE		PAGE_SIZE  /*   * pebs_record_32 for p4 and core not supported @@ -107,15 +108,31 @@ static u64 precise_store_data(u64 status)  	return val;  } -static u64 precise_store_data_hsw(u64 status) +static u64 precise_store_data_hsw(struct perf_event *event, u64 status)  {  	union perf_mem_data_src dse; +	u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;  	dse.val = 0;  	dse.mem_op = PERF_MEM_OP_STORE;  	dse.mem_lvl = PERF_MEM_LVL_NA; + +	/* +	 * L1 info only valid for following events: +	 * +	 * MEM_UOPS_RETIRED.STLB_MISS_STORES +	 * MEM_UOPS_RETIRED.LOCK_STORES +	 * MEM_UOPS_RETIRED.SPLIT_STORES +	 * MEM_UOPS_RETIRED.ALL_STORES +	 */ +	if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0) +		return dse.mem_lvl; +  	if (status & 1) -		dse.mem_lvl = PERF_MEM_LVL_L1; +		dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; +	else +		dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; +  	/* Nothing else supported. Sorry. */  	return dse.val;  } @@ -182,18 +199,32 @@ struct pebs_record_nhm {   * Same as pebs_record_nhm, with two additional fields.   */  struct pebs_record_hsw { -	struct pebs_record_nhm nhm; -	/* -	 * Real IP of the event. In the Intel documentation this -	 * is called eventingrip. -	 */ -	u64 real_ip; -	/* -	 * TSX tuning information field: abort cycles and abort flags. -	 */ -	u64 tsx_tuning; +	u64 flags, ip; +	u64 ax, bx, cx, dx; +	u64 si, di, bp, sp; +	u64 r8,  r9,  r10, r11; +	u64 r12, r13, r14, r15; +	u64 status, dla, dse, lat; +	u64 real_ip, tsx_tuning; +}; + +union hsw_tsx_tuning { +	struct { +		u32 cycles_last_block     : 32, +		    hle_abort		  : 1, +		    rtm_abort		  : 1, +		    instruction_abort     : 1, +		    non_instruction_abort : 1, +		    retry		  : 1, +		    data_conflict	  : 1, +		    capacity_writes	  : 1, +		    capacity_reads	  : 1; +	}; +	u64	    value;  }; +#define PEBS_HSW_TSX_FLAGS	0xff00000000ULL +  void init_debug_store_on_cpu(int cpu)  {  	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -214,12 +245,14 @@ void fini_debug_store_on_cpu(int cpu)  	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);  } +static DEFINE_PER_CPU(void *, insn_buffer); +  static int alloc_pebs_buffer(int cpu)  {  	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;  	int node = cpu_to_node(cpu);  	int max, thresh = 1; /* always use a single PEBS record */ -	void *buffer; +	void *buffer, *ibuffer;  	if (!x86_pmu.pebs)  		return 0; @@ -228,6 +261,19 @@ static int alloc_pebs_buffer(int cpu)  	if (unlikely(!buffer))  		return -ENOMEM; +	/* +	 * HSW+ already provides us the eventing ip; no need to allocate this +	 * buffer then. +	 */ +	if (x86_pmu.intel_cap.pebs_format < 2) { +		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); +		if (!ibuffer) { +			kfree(buffer); +			return -ENOMEM; +		} +		per_cpu(insn_buffer, cpu) = ibuffer; +	} +  	max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;  	ds->pebs_buffer_base = (u64)(unsigned long)buffer; @@ -248,6 +294,9 @@ static void release_pebs_buffer(int cpu)  	if (!ds || !x86_pmu.pebs)  		return; +	kfree(per_cpu(insn_buffer, cpu)); +	per_cpu(insn_buffer, cpu) = NULL; +  	kfree((void *)(unsigned long)ds->pebs_buffer_base);  	ds->pebs_buffer_base = 0;  } @@ -262,9 +311,11 @@ static int alloc_bts_buffer(int cpu)  	if (!x86_pmu.bts)  		return 0; -	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node); -	if (unlikely(!buffer)) +	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); +	if (unlikely(!buffer)) { +		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);  		return -ENOMEM; +	}  	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;  	thresh = max / 16; @@ -584,6 +635,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {  	INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */  	INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */  	INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ +	INTEL_EVENT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */  	INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */  	EVENT_CONSTRAINT_END  }; @@ -714,6 +766,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)  	unsigned long old_to, to = cpuc->lbr_entries[0].to;  	unsigned long ip = regs->ip;  	int is_64bit = 0; +	void *kaddr;  	/*  	 * We don't need to fixup if the PEBS assist is fault like @@ -737,7 +790,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)  	 * unsigned math, either ip is before the start (impossible) or  	 * the basic block is larger than 1 page (sanity)  	 */ -	if ((ip - to) > PAGE_SIZE) +	if ((ip - to) > PEBS_FIXUP_SIZE)  		return 0;  	/* @@ -748,29 +801,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)  		return 1;  	} +	if (!kernel_ip(ip)) { +		int size, bytes; +		u8 *buf = this_cpu_read(insn_buffer); + +		size = ip - to; /* Must fit our buffer, see above */ +		bytes = copy_from_user_nmi(buf, (void __user *)to, size); +		if (bytes != 0) +			return 0; + +		kaddr = buf; +	} else { +		kaddr = (void *)to; +	} +  	do {  		struct insn insn; -		u8 buf[MAX_INSN_SIZE]; -		void *kaddr;  		old_to = to; -		if (!kernel_ip(ip)) { -			int bytes, size = MAX_INSN_SIZE; - -			bytes = copy_from_user_nmi(buf, (void __user *)to, size); -			if (bytes != size) -				return 0; - -			kaddr = buf; -		} else -			kaddr = (void *)to;  #ifdef CONFIG_X86_64  		is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);  #endif  		insn_init(&insn, kaddr, is_64bit);  		insn_get_length(&insn); +  		to += insn.length; +		kaddr += insn.length;  	} while (to < ip);  	if (to == ip) { @@ -785,16 +842,34 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)  	return 0;  } +static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs) +{ +	if (pebs->tsx_tuning) { +		union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; +		return tsx.cycles_last_block; +	} +	return 0; +} + +static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs) +{ +	u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; + +	/* For RTM XABORTs also log the abort code from AX */ +	if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) +		txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; +	return txn; +} +  static void __intel_pmu_pebs_event(struct perf_event *event,  				   struct pt_regs *iregs, void *__pebs)  {  	/* -	 * We cast to pebs_record_nhm to get the load latency data -	 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used +	 * We cast to the biggest pebs_record but are careful not to +	 * unconditionally access the 'extra' entries.  	 */  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	struct pebs_record_nhm *pebs = __pebs; -	struct pebs_record_hsw *pebs_hsw = __pebs; +	struct pebs_record_hsw *pebs = __pebs;  	struct perf_sample_data data;  	struct pt_regs regs;  	u64 sample_type; @@ -830,7 +905,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,  				data.data_src.val = load_latency_data(pebs->dse);  			else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)  				data.data_src.val = -					precise_store_data_hsw(pebs->dse); +					precise_store_data_hsw(event, pebs->dse);  			else  				data.data_src.val = precise_store_data(pebs->dse);  		} @@ -853,7 +928,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,  	regs.sp = pebs->sp;  	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { -		regs.ip = pebs_hsw->real_ip; +		regs.ip = pebs->real_ip;  		regs.flags |= PERF_EFLAGS_EXACT;  	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s))  		regs.flags |= PERF_EFLAGS_EXACT; @@ -861,9 +936,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,  		regs.flags &= ~PERF_EFLAGS_EXACT;  	if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && -		x86_pmu.intel_cap.pebs_format >= 1) +	    x86_pmu.intel_cap.pebs_format >= 1)  		data.addr = pebs->dla; +	if (x86_pmu.intel_cap.pebs_format >= 2) { +		/* Only set the TSX weight when no memory weight. */ +		if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll) +			data.weight = intel_hsw_weight(pebs); + +		if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION) +			data.txn = intel_hsw_transaction(pebs); +	} +  	if (has_branch_stack(event))  		data.br_stack = &cpuc->lbr_stack; @@ -912,17 +996,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)  	__intel_pmu_pebs_event(event, iregs, at);  } -static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, -					void *top) +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)  {  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);  	struct debug_store *ds = cpuc->ds;  	struct perf_event *event = NULL; +	void *at, *top;  	u64 status = 0;  	int bit; +	if (!x86_pmu.pebs_active) +		return; + +	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; +	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; +  	ds->pebs_index = ds->pebs_buffer_base; +	if (unlikely(at > top)) +		return; + +	/* +	 * Should not happen, we program the threshold at 1 and do not +	 * set a reset value. +	 */ +	WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size, +		  "Unexpected number of pebs records %ld\n", +		  (long)(top - at) / x86_pmu.pebs_record_size); +  	for (; at < top; at += x86_pmu.pebs_record_size) {  		struct pebs_record_nhm *p = at; @@ -950,61 +1051,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,  	}  } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) -{ -	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	struct debug_store *ds = cpuc->ds; -	struct pebs_record_nhm *at, *top; -	int n; - -	if (!x86_pmu.pebs_active) -		return; - -	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; -	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; - -	ds->pebs_index = ds->pebs_buffer_base; - -	n = top - at; -	if (n <= 0) -		return; - -	/* -	 * Should not happen, we program the threshold at 1 and do not -	 * set a reset value. -	 */ -	WARN_ONCE(n > x86_pmu.max_pebs_events, -		  "Unexpected number of pebs records %d\n", n); - -	return __intel_pmu_drain_pebs_nhm(iregs, at, top); -} - -static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs) -{ -	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	struct debug_store *ds = cpuc->ds; -	struct pebs_record_hsw *at, *top; -	int n; - -	if (!x86_pmu.pebs_active) -		return; - -	at  = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base; -	top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index; - -	n = top - at; -	if (n <= 0) -		return; -	/* -	 * Should not happen, we program the threshold at 1 and do not -	 * set a reset value. -	 */ -	WARN_ONCE(n > x86_pmu.max_pebs_events, -		  "Unexpected number of pebs records %d\n", n); - -	return __intel_pmu_drain_pebs_nhm(iregs, at, top); -} -  /*   * BTS, PEBS probe and setup   */ @@ -1039,7 +1085,7 @@ void intel_ds_init(void)  		case 2:  			pr_cont("PEBS fmt2%c, ", pebs_type);  			x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); -			x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; +			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;  			break;  		default:  | 
