aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/perf_event.h24
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/trace/events/sched.h22
-rw-r--r--include/trace/ftrace.h33
-rw-r--r--include/uapi/linux/perf_event.h123
5 files changed, 141 insertions, 64 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c43f6eabad5..4019d82c3d0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -64,30 +64,6 @@ struct perf_raw_record {
};
/*
- * single taken branch record layout:
- *
- * from: source instruction (may not always be a branch insn)
- * to: branch target
- * mispred: branch target was mispredicted
- * predicted: branch target was predicted
- *
- * support for mispred, predicted is optional. In case it
- * is not supported mispred = predicted = 0.
- *
- * in_tx: running in a hardware transaction
- * abort: aborting a hardware transaction
- */
-struct perf_branch_entry {
- __u64 from;
- __u64 to;
- __u64 mispred:1, /* target mispredicted */
- predicted:1,/* target predicted */
- in_tx:1, /* in transaction */
- abort:1, /* transaction abort */
- reserved:60;
-};
-
-/*
* branch stack layout:
* nr: number of taken branches stored in entries[]
*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 078066daffd..f79ced71943 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1034,6 +1034,9 @@ struct task_struct {
#ifdef CONFIG_SMP
struct llist_node wake_entry;
int on_cpu;
+ struct task_struct *last_wakee;
+ unsigned long wakee_flips;
+ unsigned long wakee_flip_decay_ts;
#endif
int on_rq;
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index e5586caff67..2e7d9947a10 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -57,7 +57,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
TP_PROTO(struct task_struct *p, int success),
- TP_ARGS(p, success),
+ TP_ARGS(__perf_task(p), success),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
@@ -73,9 +73,6 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
__entry->prio = p->prio;
__entry->success = success;
__entry->target_cpu = task_cpu(p);
- )
- TP_perf_assign(
- __perf_task(p);
),
TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
@@ -313,7 +310,7 @@ DECLARE_EVENT_CLASS(sched_stat_template,
TP_PROTO(struct task_struct *tsk, u64 delay),
- TP_ARGS(tsk, delay),
+ TP_ARGS(__perf_task(tsk), __perf_count(delay)),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
@@ -325,10 +322,6 @@ DECLARE_EVENT_CLASS(sched_stat_template,
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->delay = delay;
- )
- TP_perf_assign(
- __perf_count(delay);
- __perf_task(tsk);
),
TP_printk("comm=%s pid=%d delay=%Lu [ns]",
@@ -372,11 +365,11 @@ DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
* Tracepoint for accounting runtime (time the task is executing
* on a CPU).
*/
-TRACE_EVENT(sched_stat_runtime,
+DECLARE_EVENT_CLASS(sched_stat_runtime,
TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
- TP_ARGS(tsk, runtime, vruntime),
+ TP_ARGS(tsk, __perf_count(runtime), vruntime),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
@@ -390,9 +383,6 @@ TRACE_EVENT(sched_stat_runtime,
__entry->pid = tsk->pid;
__entry->runtime = runtime;
__entry->vruntime = vruntime;
- )
- TP_perf_assign(
- __perf_count(runtime);
),
TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
@@ -401,6 +391,10 @@ TRACE_EVENT(sched_stat_runtime,
(unsigned long long)__entry->vruntime)
);
+DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
+ TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+ TP_ARGS(tsk, runtime, vruntime));
+
/*
* Tracepoint for showing priority inheritance modifying a tasks
* priority.
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 41a6643e213..5c7ab17cbb0 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -507,8 +507,14 @@ static inline notrace int ftrace_get_offsets_##call( \
#undef TP_fast_assign
#define TP_fast_assign(args...) args
-#undef TP_perf_assign
-#define TP_perf_assign(args...)
+#undef __perf_addr
+#define __perf_addr(a) (a)
+
+#undef __perf_count
+#define __perf_count(c) (c)
+
+#undef __perf_task
+#define __perf_task(t) (t)
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
@@ -636,16 +642,13 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
#define __get_str(field) (char *)__get_dynamic_array(field)
#undef __perf_addr
-#define __perf_addr(a) __addr = (a)
+#define __perf_addr(a) (__addr = (a))
#undef __perf_count
-#define __perf_count(c) __count = (c)
+#define __perf_count(c) (__count = (c))
#undef __perf_task
-#define __perf_task(t) __task = (t)
-
-#undef TP_perf_assign
-#define TP_perf_assign(args...) args
+#define __perf_task(t) (__task = (t))
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
@@ -663,15 +666,20 @@ perf_trace_##call(void *__data, proto) \
int __data_size; \
int rctx; \
\
- perf_fetch_caller_regs(&__regs); \
- \
__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
+ \
+ head = this_cpu_ptr(event_call->perf_events); \
+ if (__builtin_constant_p(!__task) && !__task && \
+ hlist_empty(head)) \
+ return; \
+ \
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
sizeof(u64)); \
__entry_size -= sizeof(u32); \
\
- entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare( \
- __entry_size, event_call->event.type, &__regs, &rctx); \
+ perf_fetch_caller_regs(&__regs); \
+ entry = perf_trace_buf_prepare(__entry_size, \
+ event_call->event.type, &__regs, &rctx); \
if (!entry) \
return; \
\
@@ -679,7 +687,6 @@ perf_trace_##call(void *__data, proto) \
\
{ assign; } \
\
- head = this_cpu_ptr(event_call->perf_events); \
perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
__count, &__regs, head, __task); \
}
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 0b1df41691e..ca1d90bcb74 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -109,6 +109,7 @@ enum perf_sw_ids {
PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
PERF_COUNT_SW_EMULATION_FAULTS = 8,
+ PERF_COUNT_SW_DUMMY = 9,
PERF_COUNT_SW_MAX, /* non-ABI */
};
@@ -134,8 +135,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_STACK_USER = 1U << 13,
PERF_SAMPLE_WEIGHT = 1U << 14,
PERF_SAMPLE_DATA_SRC = 1U << 15,
+ PERF_SAMPLE_IDENTIFIER = 1U << 16,
- PERF_SAMPLE_MAX = 1U << 16, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 17, /* non-ABI */
};
/*
@@ -275,8 +277,9 @@ struct perf_event_attr {
exclude_callchain_kernel : 1, /* exclude kernel callchains */
exclude_callchain_user : 1, /* exclude user callchains */
+ mmap2 : 1, /* include mmap with inode data */
- __reserved_1 : 41;
+ __reserved_1 : 40;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -321,6 +324,7 @@ struct perf_event_attr {
#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
+#define PERF_EVENT_IOC_ID _IOR('$', 7, u64 *)
enum perf_event_ioc_flags {
PERF_IOC_FLAG_GROUP = 1U << 0,
@@ -375,9 +379,12 @@ struct perf_event_mmap_page {
__u64 time_running; /* time event on cpu */
union {
__u64 capabilities;
- __u64 cap_usr_time : 1,
- cap_usr_rdpmc : 1,
- cap_____res : 62;
+ struct {
+ __u64 cap_usr_time : 1,
+ cap_usr_rdpmc : 1,
+ cap_usr_time_zero : 1,
+ cap_____res : 61;
+ };
};
/*
@@ -418,12 +425,29 @@ struct perf_event_mmap_page {
__u16 time_shift;
__u32 time_mult;
__u64 time_offset;
+ /*
+ * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
+ * from sample timestamps.
+ *
+ * time = timestamp - time_zero;
+ * quot = time / time_mult;
+ * rem = time % time_mult;
+ * cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
+ *
+ * And vice versa:
+ *
+ * quot = cyc >> time_shift;
+ * rem = cyc & ((1 << time_shift) - 1);
+ * timestamp = time_zero + quot * time_mult +
+ * ((rem * time_mult) >> time_shift);
+ */
+ __u64 time_zero;
/*
* Hole for extension of the self monitor capabilities
*/
- __u64 __reserved[120]; /* align to 1k */
+ __u64 __reserved[119]; /* align to 1k */
/*
* Control data for the mmap() data buffer.
@@ -471,13 +495,28 @@ enum perf_event_type {
/*
* If perf_event_attr.sample_id_all is set then all event types will
* have the sample_type selected fields related to where/when
- * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
- * described in PERF_RECORD_SAMPLE below, it will be stashed just after
- * the perf_event_header and the fields already present for the existing
- * fields, i.e. at the end of the payload. That way a newer perf.data
- * file will be supported by older perf tools, with these new optional
- * fields being ignored.
+ * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU,
+ * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed
+ * just after the perf_event_header and the fields already present for
+ * the existing fields, i.e. at the end of the payload. That way a newer
+ * perf.data file will be supported by older perf tools, with these new
+ * optional fields being ignored.
+ *
+ * struct sample_id {
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u64 id; } && PERF_SAMPLE_IDENTIFIER
+ * } && perf_event_attr::sample_id_all
*
+ * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. The
+ * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed
+ * relative to header.size.
+ */
+
+ /*
* The MMAP events record the PROT_EXEC mappings so that we can
* correlate userspace IPs to code. They have the following structure:
*
@@ -498,6 +537,7 @@ enum perf_event_type {
* struct perf_event_header header;
* u64 id;
* u64 lost;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_LOST = 2,
@@ -508,6 +548,7 @@ enum perf_event_type {
*
* u32 pid, tid;
* char comm[];
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_COMM = 3,
@@ -518,6 +559,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_EXIT = 4,
@@ -528,6 +570,7 @@ enum perf_event_type {
* u64 time;
* u64 id;
* u64 stream_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_THROTTLE = 5,
@@ -539,6 +582,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_FORK = 7,
@@ -549,6 +593,7 @@ enum perf_event_type {
* u32 pid, tid;
*
* struct read_format values;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_READ = 8,
@@ -557,6 +602,13 @@ enum perf_event_type {
* struct {
* struct perf_event_header header;
*
+ * #
+ * # Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
+ * # The advantage of PERF_SAMPLE_IDENTIFIER is that its position
+ * # is fixed relative to header.
+ * #
+ *
+ * { u64 id; } && PERF_SAMPLE_IDENTIFIER
* { u64 ip; } && PERF_SAMPLE_IP
* { u32 pid, tid; } && PERF_SAMPLE_TID
* { u64 time; } && PERF_SAMPLE_TIME
@@ -596,11 +648,32 @@ enum perf_event_type {
* u64 dyn_size; } && PERF_SAMPLE_STACK_USER
*
* { u64 weight; } && PERF_SAMPLE_WEIGHT
- * { u64 data_src; } && PERF_SAMPLE_DATA_SRC
+ * { u64 data_src; } && PERF_SAMPLE_DATA_SRC
* };
*/
PERF_RECORD_SAMPLE = 9,
+ /*
+ * The MMAP2 records are an augmented version of MMAP, they add
+ * maj, min, ino numbers to be used to uniquely identify each mapping
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u32 pid, tid;
+ * u64 addr;
+ * u64 len;
+ * u64 pgoff;
+ * u32 maj;
+ * u32 min;
+ * u64 ino;
+ * u64 ino_generation;
+ * char filename[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_MMAP2 = 10,
+
PERF_RECORD_MAX, /* non-ABI */
};
@@ -685,4 +758,28 @@ union perf_mem_data_src {
#define PERF_MEM_S(a, s) \
(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
+/*
+ * single taken branch record layout:
+ *
+ * from: source instruction (may not always be a branch insn)
+ * to: branch target
+ * mispred: branch target was mispredicted
+ * predicted: branch target was predicted
+ *
+ * support for mispred, predicted is optional. In case it
+ * is not supported mispred = predicted = 0.
+ *
+ * in_tx: running in a hardware transaction
+ * abort: aborting a hardware transaction
+ */
+struct perf_branch_entry {
+ __u64 from;
+ __u64 to;
+ __u64 mispred:1, /* target mispredicted */
+ predicted:1,/* target predicted */
+ in_tx:1, /* in transaction */
+ abort:1, /* transaction abort */
+ reserved:60;
+};
+
#endif /* _UAPI_LINUX_PERF_EVENT_H */