diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/perf_event.h | 24 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | include/trace/events/sched.h | 22 | ||||
-rw-r--r-- | include/trace/ftrace.h | 33 | ||||
-rw-r--r-- | include/uapi/linux/perf_event.h | 123 |
5 files changed, 141 insertions, 64 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c43f6eabad5..4019d82c3d0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -64,30 +64,6 @@ struct perf_raw_record { }; /* - * single taken branch record layout: - * - * from: source instruction (may not always be a branch insn) - * to: branch target - * mispred: branch target was mispredicted - * predicted: branch target was predicted - * - * support for mispred, predicted is optional. In case it - * is not supported mispred = predicted = 0. - * - * in_tx: running in a hardware transaction - * abort: aborting a hardware transaction - */ -struct perf_branch_entry { - __u64 from; - __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - in_tx:1, /* in transaction */ - abort:1, /* transaction abort */ - reserved:60; -}; - -/* * branch stack layout: * nr: number of taken branches stored in entries[] * diff --git a/include/linux/sched.h b/include/linux/sched.h index 078066daffd..f79ced71943 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1034,6 +1034,9 @@ struct task_struct { #ifdef CONFIG_SMP struct llist_node wake_entry; int on_cpu; + struct task_struct *last_wakee; + unsigned long wakee_flips; + unsigned long wakee_flip_decay_ts; #endif int on_rq; diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index e5586caff67..2e7d9947a10 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -57,7 +57,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, TP_PROTO(struct task_struct *p, int success), - TP_ARGS(p, success), + TP_ARGS(__perf_task(p), success), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -73,9 +73,6 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, __entry->prio = p->prio; __entry->success = success; __entry->target_cpu = task_cpu(p); - ) - TP_perf_assign( - __perf_task(p); ), TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", @@ -313,7 +310,7 @@ DECLARE_EVENT_CLASS(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), + TP_ARGS(__perf_task(tsk), __perf_count(delay)), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -325,10 +322,6 @@ DECLARE_EVENT_CLASS(sched_stat_template, memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - __perf_task(tsk); ), TP_printk("comm=%s pid=%d delay=%Lu [ns]", @@ -372,11 +365,11 @@ DEFINE_EVENT(sched_stat_template, sched_stat_blocked, * Tracepoint for accounting runtime (time the task is executing * on a CPU). */ -TRACE_EVENT(sched_stat_runtime, +DECLARE_EVENT_CLASS(sched_stat_runtime, TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), - TP_ARGS(tsk, runtime, vruntime), + TP_ARGS(tsk, __perf_count(runtime), vruntime), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -390,9 +383,6 @@ TRACE_EVENT(sched_stat_runtime, __entry->pid = tsk->pid; __entry->runtime = runtime; __entry->vruntime = vruntime; - ) - TP_perf_assign( - __perf_count(runtime); ), TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", @@ -401,6 +391,10 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); +DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, + TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), + TP_ARGS(tsk, runtime, vruntime)); + /* * Tracepoint for showing priority inheritance modifying a tasks * priority. diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 41a6643e213..5c7ab17cbb0 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -507,8 +507,14 @@ static inline notrace int ftrace_get_offsets_##call( \ #undef TP_fast_assign #define TP_fast_assign(args...) args -#undef TP_perf_assign -#define TP_perf_assign(args...) +#undef __perf_addr +#define __perf_addr(a) (a) + +#undef __perf_count +#define __perf_count(c) (c) + +#undef __perf_task +#define __perf_task(t) (t) #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -636,16 +642,13 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call #define __get_str(field) (char *)__get_dynamic_array(field) #undef __perf_addr -#define __perf_addr(a) __addr = (a) +#define __perf_addr(a) (__addr = (a)) #undef __perf_count -#define __perf_count(c) __count = (c) +#define __perf_count(c) (__count = (c)) #undef __perf_task -#define __perf_task(t) __task = (t) - -#undef TP_perf_assign -#define TP_perf_assign(args...) args +#define __perf_task(t) (__task = (t)) #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ @@ -663,15 +666,20 @@ perf_trace_##call(void *__data, proto) \ int __data_size; \ int rctx; \ \ - perf_fetch_caller_regs(&__regs); \ - \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ + \ + head = this_cpu_ptr(event_call->perf_events); \ + if (__builtin_constant_p(!__task) && !__task && \ + hlist_empty(head)) \ + return; \ + \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ sizeof(u64)); \ __entry_size -= sizeof(u32); \ \ - entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare( \ - __entry_size, event_call->event.type, &__regs, &rctx); \ + perf_fetch_caller_regs(&__regs); \ + entry = perf_trace_buf_prepare(__entry_size, \ + event_call->event.type, &__regs, &rctx); \ if (!entry) \ return; \ \ @@ -679,7 +687,6 @@ perf_trace_##call(void *__data, proto) \ \ { assign; } \ \ - head = this_cpu_ptr(event_call->perf_events); \ perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ __count, &__regs, head, __task); \ } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 0b1df41691e..ca1d90bcb74 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -109,6 +109,7 @@ enum perf_sw_ids { PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, PERF_COUNT_SW_EMULATION_FAULTS = 8, + PERF_COUNT_SW_DUMMY = 9, PERF_COUNT_SW_MAX, /* non-ABI */ }; @@ -134,8 +135,9 @@ enum perf_event_sample_format { PERF_SAMPLE_STACK_USER = 1U << 13, PERF_SAMPLE_WEIGHT = 1U << 14, PERF_SAMPLE_DATA_SRC = 1U << 15, + PERF_SAMPLE_IDENTIFIER = 1U << 16, - PERF_SAMPLE_MAX = 1U << 16, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 17, /* non-ABI */ }; /* @@ -275,8 +277,9 @@ struct perf_event_attr { exclude_callchain_kernel : 1, /* exclude kernel callchains */ exclude_callchain_user : 1, /* exclude user callchains */ + mmap2 : 1, /* include mmap with inode data */ - __reserved_1 : 41; + __reserved_1 : 40; union { __u32 wakeup_events; /* wakeup every n events */ @@ -321,6 +324,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR('$', 7, u64 *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, @@ -375,9 +379,12 @@ struct perf_event_mmap_page { __u64 time_running; /* time event on cpu */ union { __u64 capabilities; - __u64 cap_usr_time : 1, - cap_usr_rdpmc : 1, - cap_____res : 62; + struct { + __u64 cap_usr_time : 1, + cap_usr_rdpmc : 1, + cap_usr_time_zero : 1, + cap_____res : 61; + }; }; /* @@ -418,12 +425,29 @@ struct perf_event_mmap_page { __u16 time_shift; __u32 time_mult; __u64 time_offset; + /* + * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated + * from sample timestamps. + * + * time = timestamp - time_zero; + * quot = time / time_mult; + * rem = time % time_mult; + * cyc = (quot << time_shift) + (rem << time_shift) / time_mult; + * + * And vice versa: + * + * quot = cyc >> time_shift; + * rem = cyc & ((1 << time_shift) - 1); + * timestamp = time_zero + quot * time_mult + + * ((rem * time_mult) >> time_shift); + */ + __u64 time_zero; /* * Hole for extension of the self monitor capabilities */ - __u64 __reserved[120]; /* align to 1k */ + __u64 __reserved[119]; /* align to 1k */ /* * Control data for the mmap() data buffer. @@ -471,13 +495,28 @@ enum perf_event_type { /* * If perf_event_attr.sample_id_all is set then all event types will * have the sample_type selected fields related to where/when - * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID) - * described in PERF_RECORD_SAMPLE below, it will be stashed just after - * the perf_event_header and the fields already present for the existing - * fields, i.e. at the end of the payload. That way a newer perf.data - * file will be supported by older perf tools, with these new optional - * fields being ignored. + * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU, + * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed + * just after the perf_event_header and the fields already present for + * the existing fields, i.e. at the end of the payload. That way a newer + * perf.data file will be supported by older perf tools, with these new + * optional fields being ignored. + * + * struct sample_id { + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 id; } && PERF_SAMPLE_IDENTIFIER + * } && perf_event_attr::sample_id_all * + * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. The + * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed + * relative to header.size. + */ + + /* * The MMAP events record the PROT_EXEC mappings so that we can * correlate userspace IPs to code. They have the following structure: * @@ -498,6 +537,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 id; * u64 lost; + * struct sample_id sample_id; * }; */ PERF_RECORD_LOST = 2, @@ -508,6 +548,7 @@ enum perf_event_type { * * u32 pid, tid; * char comm[]; + * struct sample_id sample_id; * }; */ PERF_RECORD_COMM = 3, @@ -518,6 +559,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; + * struct sample_id sample_id; * }; */ PERF_RECORD_EXIT = 4, @@ -528,6 +570,7 @@ enum perf_event_type { * u64 time; * u64 id; * u64 stream_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_THROTTLE = 5, @@ -539,6 +582,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; + * struct sample_id sample_id; * }; */ PERF_RECORD_FORK = 7, @@ -549,6 +593,7 @@ enum perf_event_type { * u32 pid, tid; * * struct read_format values; + * struct sample_id sample_id; * }; */ PERF_RECORD_READ = 8, @@ -557,6 +602,13 @@ enum perf_event_type { * struct { * struct perf_event_header header; * + * # + * # Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. + * # The advantage of PERF_SAMPLE_IDENTIFIER is that its position + * # is fixed relative to header. + * # + * + * { u64 id; } && PERF_SAMPLE_IDENTIFIER * { u64 ip; } && PERF_SAMPLE_IP * { u32 pid, tid; } && PERF_SAMPLE_TID * { u64 time; } && PERF_SAMPLE_TIME @@ -596,11 +648,32 @@ enum perf_event_type { * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { u64 weight; } && PERF_SAMPLE_WEIGHT - * { u64 data_src; } && PERF_SAMPLE_DATA_SRC + * { u64 data_src; } && PERF_SAMPLE_DATA_SRC * }; */ PERF_RECORD_SAMPLE = 9, + /* + * The MMAP2 records are an augmented version of MMAP, they add + * maj, min, ino numbers to be used to uniquely identify each mapping + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * u32 maj; + * u32 min; + * u64 ino; + * u64 ino_generation; + * char filename[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_MMAP2 = 10, + PERF_RECORD_MAX, /* non-ABI */ }; @@ -685,4 +758,28 @@ union perf_mem_data_src { #define PERF_MEM_S(a, s) \ (((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) +/* + * single taken branch record layout: + * + * from: source instruction (may not always be a branch insn) + * to: branch target + * mispred: branch target was mispredicted + * predicted: branch target was predicted + * + * support for mispred, predicted is optional. In case it + * is not supported mispred = predicted = 0. + * + * in_tx: running in a hardware transaction + * abort: aborting a hardware transaction + */ +struct perf_branch_entry { + __u64 from; + __u64 to; + __u64 mispred:1, /* target mispredicted */ + predicted:1,/* target predicted */ + in_tx:1, /* in transaction */ + abort:1, /* transaction abort */ + reserved:60; +}; + #endif /* _UAPI_LINUX_PERF_EVENT_H */ |