diff options
Diffstat (limited to 'tools/perf/util/session.c')
| -rw-r--r-- | tools/perf/util/session.c | 1804 | 
1 files changed, 1273 insertions, 531 deletions
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index fa9d652c2dc..64a186edc7b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,419 +1,531 @@ -#define _FILE_OFFSET_BITS 64 -  #include <linux/kernel.h> +#include <traceevent/event-parse.h>  #include <byteswap.h>  #include <unistd.h>  #include <sys/types.h>  #include <sys/mman.h> +#include "evlist.h" +#include "evsel.h"  #include "session.h" +#include "tool.h"  #include "sort.h"  #include "util.h" +#include "cpumap.h" +#include "perf_regs.h" +#include "vdso.h" -static int perf_session__open(struct perf_session *self, bool force) +static int perf_session__open(struct perf_session *session)  { -	struct stat input_stat; - -	if (!strcmp(self->filename, "-")) { -		self->fd_pipe = true; -		self->fd = STDIN_FILENO; - -		if (perf_header__read(self, self->fd) < 0) -			pr_err("incompatible file format"); - -		return 0; -	} - -	self->fd = open(self->filename, O_RDONLY); -	if (self->fd < 0) { -		int err = errno; +	struct perf_data_file *file = session->file; -		pr_err("failed to open %s: %s", self->filename, strerror(err)); -		if (err == ENOENT && !strcmp(self->filename, "perf.data")) -			pr_err("  (try 'perf record' first)"); -		pr_err("\n"); -		return -errno; +	if (perf_session__read_header(session) < 0) { +		pr_err("incompatible file format (rerun with -v to learn more)"); +		return -1;  	} -	if (fstat(self->fd, &input_stat) < 0) -		goto out_close; +	if (perf_data_file__is_pipe(file)) +		return 0; -	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { -		pr_err("file %s not owned by current user or root\n", -		       self->filename); -		goto out_close; +	if (!perf_evlist__valid_sample_type(session->evlist)) { +		pr_err("non matching sample_type"); +		return -1;  	} -	if (!input_stat.st_size) { -		pr_info("zero-sized file (%s), nothing to do!\n", -			self->filename); -		goto out_close; +	if (!perf_evlist__valid_sample_id_all(session->evlist)) { +		pr_err("non matching sample_id_all"); +		return -1;  	} -	if (perf_header__read(self, self->fd) < 0) { -		pr_err("incompatible file format"); -		goto out_close; +	if (!perf_evlist__valid_read_format(session->evlist)) { +		pr_err("non matching read_format"); +		return -1;  	} -	self->size = input_stat.st_size;  	return 0; - -out_close: -	close(self->fd); -	self->fd = -1; -	return -1;  } -void perf_session__update_sample_type(struct perf_session *self) +void perf_session__set_id_hdr_size(struct perf_session *session)  { -	self->sample_type = perf_header__sample_type(&self->header); +	u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist); + +	machines__set_id_hdr_size(&session->machines, id_hdr_size);  } -int perf_session__create_kernel_maps(struct perf_session *self) +int perf_session__create_kernel_maps(struct perf_session *session)  { -	int ret = machine__create_kernel_maps(&self->host_machine); +	int ret = machine__create_kernel_maps(&session->machines.host);  	if (ret >= 0) -		ret = machines__create_guest_kernel_maps(&self->machines); +		ret = machines__create_guest_kernel_maps(&session->machines);  	return ret;  } -static void perf_session__destroy_kernel_maps(struct perf_session *self) +static void perf_session__destroy_kernel_maps(struct perf_session *session)  { -	machine__destroy_kernel_maps(&self->host_machine); -	machines__destroy_guest_kernel_maps(&self->machines); +	machines__destroy_kernel_maps(&session->machines);  } -struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe) +struct perf_session *perf_session__new(struct perf_data_file *file, +				       bool repipe, struct perf_tool *tool)  { -	size_t len = filename ? strlen(filename) + 1 : 0; -	struct perf_session *self = zalloc(sizeof(*self) + len); +	struct perf_session *session = zalloc(sizeof(*session)); -	if (self == NULL) +	if (!session)  		goto out; -	if (perf_header__init(&self->header) < 0) -		goto out_free; - -	memcpy(self->filename, filename, len); -	self->threads = RB_ROOT; -	INIT_LIST_HEAD(&self->dead_threads); -	self->hists_tree = RB_ROOT; -	self->last_match = NULL; -	self->mmap_window = 32; -	self->machines = RB_ROOT; -	self->repipe = repipe; -	INIT_LIST_HEAD(&self->ordered_samples.samples_head); -	machine__init(&self->host_machine, "", HOST_KERNEL_ID); - -	if (mode == O_RDONLY) { -		if (perf_session__open(self, force) < 0) +	session->repipe = repipe; +	INIT_LIST_HEAD(&session->ordered_samples.samples); +	INIT_LIST_HEAD(&session->ordered_samples.sample_cache); +	INIT_LIST_HEAD(&session->ordered_samples.to_free); +	machines__init(&session->machines); + +	if (file) { +		if (perf_data_file__open(file))  			goto out_delete; -	} else if (mode == O_WRONLY) { + +		session->file = file; + +		if (perf_data_file__is_read(file)) { +			if (perf_session__open(session) < 0) +				goto out_close; + +			perf_session__set_id_hdr_size(session); +		} +	} + +	if (!file || perf_data_file__is_write(file)) {  		/*  		 * In O_RDONLY mode this will be performed when reading the -		 * kernel MMAP event, in event__process_mmap(). +		 * kernel MMAP event, in perf_event__process_mmap().  		 */ -		if (perf_session__create_kernel_maps(self) < 0) +		if (perf_session__create_kernel_maps(session) < 0)  			goto out_delete;  	} -	perf_session__update_sample_type(self); -out: -	return self; -out_free: -	free(self); -	return NULL; -out_delete: -	perf_session__delete(self); +	if (tool && tool->ordering_requires_timestamps && +	    tool->ordered_samples && !perf_evlist__sample_id_all(session->evlist)) { +		dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n"); +		tool->ordered_samples = false; +	} + +	return session; + + out_close: +	perf_data_file__close(file); + out_delete: +	perf_session__delete(session); + out:  	return NULL;  } -static void perf_session__delete_dead_threads(struct perf_session *self) +static void perf_session__delete_dead_threads(struct perf_session *session)  { -	struct thread *n, *t; - -	list_for_each_entry_safe(t, n, &self->dead_threads, node) { -		list_del(&t->node); -		thread__delete(t); -	} +	machine__delete_dead_threads(&session->machines.host);  } -static void perf_session__delete_threads(struct perf_session *self) +static void perf_session__delete_threads(struct perf_session *session)  { -	struct rb_node *nd = rb_first(&self->threads); - -	while (nd) { -		struct thread *t = rb_entry(nd, struct thread, rb_node); - -		rb_erase(&t->rb_node, &self->threads); -		nd = rb_next(nd); -		thread__delete(t); -	} +	machine__delete_threads(&session->machines.host);  } -void perf_session__delete(struct perf_session *self) +static void perf_session_env__delete(struct perf_session_env *env)  { -	perf_header__exit(&self->header); -	perf_session__destroy_kernel_maps(self); -	perf_session__delete_dead_threads(self); -	perf_session__delete_threads(self); -	machine__exit(&self->host_machine); -	close(self->fd); -	free(self); +	zfree(&env->hostname); +	zfree(&env->os_release); +	zfree(&env->version); +	zfree(&env->arch); +	zfree(&env->cpu_desc); +	zfree(&env->cpuid); + +	zfree(&env->cmdline); +	zfree(&env->sibling_cores); +	zfree(&env->sibling_threads); +	zfree(&env->numa_nodes); +	zfree(&env->pmu_mappings);  } -void perf_session__remove_thread(struct perf_session *self, struct thread *th) +void perf_session__delete(struct perf_session *session)  { -	self->last_match = NULL; -	rb_erase(&th->rb_node, &self->threads); -	/* -	 * We may have references to this thread, for instance in some hist_entry -	 * instances, so just move them to a separate list. -	 */ -	list_add_tail(&th->node, &self->dead_threads); +	perf_session__destroy_kernel_maps(session); +	perf_session__delete_dead_threads(session); +	perf_session__delete_threads(session); +	perf_session_env__delete(&session->header.env); +	machines__exit(&session->machines); +	if (session->file) +		perf_data_file__close(session->file); +	free(session); +	vdso__exit();  } -static bool symbol__match_parent_regex(struct symbol *sym) +static int process_event_synth_tracing_data_stub(struct perf_tool *tool +						 __maybe_unused, +						 union perf_event *event +						 __maybe_unused, +						 struct perf_session *session +						__maybe_unused)  { -	if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) -		return 1; - +	dump_printf(": unhandled!\n");  	return 0;  } -struct map_symbol *perf_session__resolve_callchain(struct perf_session *self, -						   struct thread *thread, -						   struct ip_callchain *chain, -						   struct symbol **parent) +static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused, +					 union perf_event *event __maybe_unused, +					 struct perf_evlist **pevlist +					 __maybe_unused)  { -	u8 cpumode = PERF_RECORD_MISC_USER; -	unsigned int i; -	struct map_symbol *syms = calloc(chain->nr, sizeof(*syms)); - -	if (!syms) -		return NULL; - -	for (i = 0; i < chain->nr; i++) { -		u64 ip = chain->ips[i]; -		struct addr_location al; - -		if (ip >= PERF_CONTEXT_MAX) { -			switch (ip) { -			case PERF_CONTEXT_HV: -				cpumode = PERF_RECORD_MISC_HYPERVISOR;	break; -			case PERF_CONTEXT_KERNEL: -				cpumode = PERF_RECORD_MISC_KERNEL;	break; -			case PERF_CONTEXT_USER: -				cpumode = PERF_RECORD_MISC_USER;	break; -			default: -				break; -			} -			continue; -		} - -		al.filtered = false; -		thread__find_addr_location(thread, self, cpumode, -				MAP__FUNCTION, thread->pid, ip, &al, NULL); -		if (al.sym != NULL) { -			if (sort__has_parent && !*parent && -			    symbol__match_parent_regex(al.sym)) -				*parent = al.sym; -			if (!symbol_conf.use_callchain) -				break; -			syms[i].map = al.map; -			syms[i].sym = al.sym; -		} -	} +	dump_printf(": unhandled!\n"); +	return 0; +} -	return syms; +static int process_event_sample_stub(struct perf_tool *tool __maybe_unused, +				     union perf_event *event __maybe_unused, +				     struct perf_sample *sample __maybe_unused, +				     struct perf_evsel *evsel __maybe_unused, +				     struct machine *machine __maybe_unused) +{ +	dump_printf(": unhandled!\n"); +	return 0;  } -static int process_event_stub(event_t *event __used, -			      struct perf_session *session __used) +static int process_event_stub(struct perf_tool *tool __maybe_unused, +			      union perf_event *event __maybe_unused, +			      struct perf_sample *sample __maybe_unused, +			      struct machine *machine __maybe_unused)  {  	dump_printf(": unhandled!\n");  	return 0;  } -static int process_finished_round_stub(event_t *event __used, -				       struct perf_session *session __used, -				       struct perf_event_ops *ops __used) +static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, +				       union perf_event *event __maybe_unused, +				       struct perf_session *perf_session +				       __maybe_unused)  {  	dump_printf(": unhandled!\n");  	return 0;  } -static int process_finished_round(event_t *event, -				  struct perf_session *session, -				  struct perf_event_ops *ops); - -static void perf_event_ops__fill_defaults(struct perf_event_ops *handler) -{ -	if (handler->sample == NULL) -		handler->sample = process_event_stub; -	if (handler->mmap == NULL) -		handler->mmap = process_event_stub; -	if (handler->comm == NULL) -		handler->comm = process_event_stub; -	if (handler->fork == NULL) -		handler->fork = process_event_stub; -	if (handler->exit == NULL) -		handler->exit = process_event_stub; -	if (handler->lost == NULL) -		handler->lost = process_event_stub; -	if (handler->read == NULL) -		handler->read = process_event_stub; -	if (handler->throttle == NULL) -		handler->throttle = process_event_stub; -	if (handler->unthrottle == NULL) -		handler->unthrottle = process_event_stub; -	if (handler->attr == NULL) -		handler->attr = process_event_stub; -	if (handler->event_type == NULL) -		handler->event_type = process_event_stub; -	if (handler->tracing_data == NULL) -		handler->tracing_data = process_event_stub; -	if (handler->build_id == NULL) -		handler->build_id = process_event_stub; -	if (handler->finished_round == NULL) { -		if (handler->ordered_samples) -			handler->finished_round = process_finished_round; +static int process_finished_round(struct perf_tool *tool, +				  union perf_event *event, +				  struct perf_session *session); + +void perf_tool__fill_defaults(struct perf_tool *tool) +{ +	if (tool->sample == NULL) +		tool->sample = process_event_sample_stub; +	if (tool->mmap == NULL) +		tool->mmap = process_event_stub; +	if (tool->mmap2 == NULL) +		tool->mmap2 = process_event_stub; +	if (tool->comm == NULL) +		tool->comm = process_event_stub; +	if (tool->fork == NULL) +		tool->fork = process_event_stub; +	if (tool->exit == NULL) +		tool->exit = process_event_stub; +	if (tool->lost == NULL) +		tool->lost = perf_event__process_lost; +	if (tool->read == NULL) +		tool->read = process_event_sample_stub; +	if (tool->throttle == NULL) +		tool->throttle = process_event_stub; +	if (tool->unthrottle == NULL) +		tool->unthrottle = process_event_stub; +	if (tool->attr == NULL) +		tool->attr = process_event_synth_attr_stub; +	if (tool->tracing_data == NULL) +		tool->tracing_data = process_event_synth_tracing_data_stub; +	if (tool->build_id == NULL) +		tool->build_id = process_finished_round_stub; +	if (tool->finished_round == NULL) { +		if (tool->ordered_samples) +			tool->finished_round = process_finished_round;  		else -			handler->finished_round = process_finished_round_stub; +			tool->finished_round = process_finished_round_stub;  	}  } +  +static void swap_sample_id_all(union perf_event *event, void *data) +{ +	void *end = (void *) event + event->header.size; +	int size = end - data; + +	BUG_ON(size % sizeof(u64)); +	mem_bswap_64(data, size); +} -void mem_bswap_64(void *src, int byte_size) +static void perf_event__all64_swap(union perf_event *event, +				   bool sample_id_all __maybe_unused)  { -	u64 *m = src; +	struct perf_event_header *hdr = &event->header; +	mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); +} + +static void perf_event__comm_swap(union perf_event *event, bool sample_id_all) +{ +	event->comm.pid = bswap_32(event->comm.pid); +	event->comm.tid = bswap_32(event->comm.tid); -	while (byte_size > 0) { -		*m = bswap_64(*m); -		byte_size -= sizeof(u64); -		++m; +	if (sample_id_all) { +		void *data = &event->comm.comm; + +		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); +		swap_sample_id_all(event, data);  	}  } -static void event__all64_swap(event_t *self) +static void perf_event__mmap_swap(union perf_event *event, +				  bool sample_id_all)  { -	struct perf_event_header *hdr = &self->header; -	mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr)); +	event->mmap.pid	  = bswap_32(event->mmap.pid); +	event->mmap.tid	  = bswap_32(event->mmap.tid); +	event->mmap.start = bswap_64(event->mmap.start); +	event->mmap.len	  = bswap_64(event->mmap.len); +	event->mmap.pgoff = bswap_64(event->mmap.pgoff); + +	if (sample_id_all) { +		void *data = &event->mmap.filename; + +		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); +		swap_sample_id_all(event, data); +	}  } -static void event__comm_swap(event_t *self) +static void perf_event__mmap2_swap(union perf_event *event, +				  bool sample_id_all) +{ +	event->mmap2.pid   = bswap_32(event->mmap2.pid); +	event->mmap2.tid   = bswap_32(event->mmap2.tid); +	event->mmap2.start = bswap_64(event->mmap2.start); +	event->mmap2.len   = bswap_64(event->mmap2.len); +	event->mmap2.pgoff = bswap_64(event->mmap2.pgoff); +	event->mmap2.maj   = bswap_32(event->mmap2.maj); +	event->mmap2.min   = bswap_32(event->mmap2.min); +	event->mmap2.ino   = bswap_64(event->mmap2.ino); + +	if (sample_id_all) { +		void *data = &event->mmap2.filename; + +		data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); +		swap_sample_id_all(event, data); +	} +} +static void perf_event__task_swap(union perf_event *event, bool sample_id_all)  { -	self->comm.pid = bswap_32(self->comm.pid); -	self->comm.tid = bswap_32(self->comm.tid); +	event->fork.pid	 = bswap_32(event->fork.pid); +	event->fork.tid	 = bswap_32(event->fork.tid); +	event->fork.ppid = bswap_32(event->fork.ppid); +	event->fork.ptid = bswap_32(event->fork.ptid); +	event->fork.time = bswap_64(event->fork.time); + +	if (sample_id_all) +		swap_sample_id_all(event, &event->fork + 1);  } -static void event__mmap_swap(event_t *self) +static void perf_event__read_swap(union perf_event *event, bool sample_id_all)  { -	self->mmap.pid	 = bswap_32(self->mmap.pid); -	self->mmap.tid	 = bswap_32(self->mmap.tid); -	self->mmap.start = bswap_64(self->mmap.start); -	self->mmap.len	 = bswap_64(self->mmap.len); -	self->mmap.pgoff = bswap_64(self->mmap.pgoff); +	event->read.pid		 = bswap_32(event->read.pid); +	event->read.tid		 = bswap_32(event->read.tid); +	event->read.value	 = bswap_64(event->read.value); +	event->read.time_enabled = bswap_64(event->read.time_enabled); +	event->read.time_running = bswap_64(event->read.time_running); +	event->read.id		 = bswap_64(event->read.id); + +	if (sample_id_all) +		swap_sample_id_all(event, &event->read + 1);  } -static void event__task_swap(event_t *self) +static void perf_event__throttle_swap(union perf_event *event, +				      bool sample_id_all)  { -	self->fork.pid	= bswap_32(self->fork.pid); -	self->fork.tid	= bswap_32(self->fork.tid); -	self->fork.ppid	= bswap_32(self->fork.ppid); -	self->fork.ptid	= bswap_32(self->fork.ptid); -	self->fork.time	= bswap_64(self->fork.time); +	event->throttle.time	  = bswap_64(event->throttle.time); +	event->throttle.id	  = bswap_64(event->throttle.id); +	event->throttle.stream_id = bswap_64(event->throttle.stream_id); + +	if (sample_id_all) +		swap_sample_id_all(event, &event->throttle + 1);  } -static void event__read_swap(event_t *self) +static u8 revbyte(u8 b)  { -	self->read.pid		= bswap_32(self->read.pid); -	self->read.tid		= bswap_32(self->read.tid); -	self->read.value	= bswap_64(self->read.value); -	self->read.time_enabled	= bswap_64(self->read.time_enabled); -	self->read.time_running	= bswap_64(self->read.time_running); -	self->read.id		= bswap_64(self->read.id); +	int rev = (b >> 4) | ((b & 0xf) << 4); +	rev = ((rev & 0xcc) >> 2) | ((rev & 0x33) << 2); +	rev = ((rev & 0xaa) >> 1) | ((rev & 0x55) << 1); +	return (u8) rev;  } -static void event__attr_swap(event_t *self) +/* + * XXX this is hack in attempt to carry flags bitfield + * throught endian village. ABI says: + * + * Bit-fields are allocated from right to left (least to most significant) + * on little-endian implementations and from left to right (most to least + * significant) on big-endian implementations. + * + * The above seems to be byte specific, so we need to reverse each + * byte of the bitfield. 'Internet' also says this might be implementation + * specific and we probably need proper fix and carry perf_event_attr + * bitfield flags in separate data file FEAT_ section. Thought this seems + * to work for now. + */ +static void swap_bitfield(u8 *p, unsigned len)  { -	size_t size; +	unsigned i; -	self->attr.attr.type		= bswap_32(self->attr.attr.type); -	self->attr.attr.size		= bswap_32(self->attr.attr.size); -	self->attr.attr.config		= bswap_64(self->attr.attr.config); -	self->attr.attr.sample_period	= bswap_64(self->attr.attr.sample_period); -	self->attr.attr.sample_type	= bswap_64(self->attr.attr.sample_type); -	self->attr.attr.read_format	= bswap_64(self->attr.attr.read_format); -	self->attr.attr.wakeup_events	= bswap_32(self->attr.attr.wakeup_events); -	self->attr.attr.bp_type		= bswap_32(self->attr.attr.bp_type); -	self->attr.attr.bp_addr		= bswap_64(self->attr.attr.bp_addr); -	self->attr.attr.bp_len		= bswap_64(self->attr.attr.bp_len); +	for (i = 0; i < len; i++) { +		*p = revbyte(*p); +		p++; +	} +} -	size = self->header.size; -	size -= (void *)&self->attr.id - (void *)self; -	mem_bswap_64(self->attr.id, size); +/* exported for swapping attributes in file header */ +void perf_event__attr_swap(struct perf_event_attr *attr) +{ +	attr->type		= bswap_32(attr->type); +	attr->size		= bswap_32(attr->size); +	attr->config		= bswap_64(attr->config); +	attr->sample_period	= bswap_64(attr->sample_period); +	attr->sample_type	= bswap_64(attr->sample_type); +	attr->read_format	= bswap_64(attr->read_format); +	attr->wakeup_events	= bswap_32(attr->wakeup_events); +	attr->bp_type		= bswap_32(attr->bp_type); +	attr->bp_addr		= bswap_64(attr->bp_addr); +	attr->bp_len		= bswap_64(attr->bp_len); +	attr->branch_sample_type = bswap_64(attr->branch_sample_type); +	attr->sample_regs_user	 = bswap_64(attr->sample_regs_user); +	attr->sample_stack_user  = bswap_32(attr->sample_stack_user); + +	swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64));  } -static void event__event_type_swap(event_t *self) +static void perf_event__hdr_attr_swap(union perf_event *event, +				      bool sample_id_all __maybe_unused)  { -	self->event_type.event_type.event_id = -		bswap_64(self->event_type.event_type.event_id); +	size_t size; + +	perf_event__attr_swap(&event->attr.attr); + +	size = event->header.size; +	size -= (void *)&event->attr.id - (void *)event; +	mem_bswap_64(event->attr.id, size);  } -static void event__tracing_data_swap(event_t *self) +static void perf_event__event_type_swap(union perf_event *event, +					bool sample_id_all __maybe_unused)  { -	self->tracing_data.size = bswap_32(self->tracing_data.size); +	event->event_type.event_type.event_id = +		bswap_64(event->event_type.event_type.event_id);  } -typedef void (*event__swap_op)(event_t *self); +static void perf_event__tracing_data_swap(union perf_event *event, +					  bool sample_id_all __maybe_unused) +{ +	event->tracing_data.size = bswap_32(event->tracing_data.size); +} -static event__swap_op event__swap_ops[] = { -	[PERF_RECORD_MMAP]   = event__mmap_swap, -	[PERF_RECORD_COMM]   = event__comm_swap, -	[PERF_RECORD_FORK]   = event__task_swap, -	[PERF_RECORD_EXIT]   = event__task_swap, -	[PERF_RECORD_LOST]   = event__all64_swap, -	[PERF_RECORD_READ]   = event__read_swap, -	[PERF_RECORD_SAMPLE] = event__all64_swap, -	[PERF_RECORD_HEADER_ATTR]   = event__attr_swap, -	[PERF_RECORD_HEADER_EVENT_TYPE]   = event__event_type_swap, -	[PERF_RECORD_HEADER_TRACING_DATA]   = event__tracing_data_swap, -	[PERF_RECORD_HEADER_BUILD_ID]   = NULL, -	[PERF_RECORD_HEADER_MAX]    = NULL, +typedef void (*perf_event__swap_op)(union perf_event *event, +				    bool sample_id_all); + +static perf_event__swap_op perf_event__swap_ops[] = { +	[PERF_RECORD_MMAP]		  = perf_event__mmap_swap, +	[PERF_RECORD_MMAP2]		  = perf_event__mmap2_swap, +	[PERF_RECORD_COMM]		  = perf_event__comm_swap, +	[PERF_RECORD_FORK]		  = perf_event__task_swap, +	[PERF_RECORD_EXIT]		  = perf_event__task_swap, +	[PERF_RECORD_LOST]		  = perf_event__all64_swap, +	[PERF_RECORD_READ]		  = perf_event__read_swap, +	[PERF_RECORD_THROTTLE]		  = perf_event__throttle_swap, +	[PERF_RECORD_UNTHROTTLE]	  = perf_event__throttle_swap, +	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap, +	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap, +	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap, +	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, +	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL, +	[PERF_RECORD_HEADER_MAX]	  = NULL,  };  struct sample_queue {  	u64			timestamp; -	struct sample_event	*event; +	u64			file_offset; +	union perf_event	*event;  	struct list_head	list;  }; -static void flush_sample_queue(struct perf_session *s, -			       struct perf_event_ops *ops) +static void perf_session_free_sample_buffers(struct perf_session *session) +{ +	struct ordered_samples *os = &session->ordered_samples; + +	while (!list_empty(&os->to_free)) { +		struct sample_queue *sq; + +		sq = list_entry(os->to_free.next, struct sample_queue, list); +		list_del(&sq->list); +		free(sq); +	} +} + +static int perf_session_deliver_event(struct perf_session *session, +				      union perf_event *event, +				      struct perf_sample *sample, +				      struct perf_tool *tool, +				      u64 file_offset); + +static int flush_sample_queue(struct perf_session *s, +		       struct perf_tool *tool)  { -	struct list_head *head = &s->ordered_samples.samples_head; -	u64 limit = s->ordered_samples.next_flush; +	struct ordered_samples *os = &s->ordered_samples; +	struct list_head *head = &os->samples;  	struct sample_queue *tmp, *iter; +	struct perf_sample sample; +	u64 limit = os->next_flush; +	u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL; +	bool show_progress = limit == ULLONG_MAX; +	struct ui_progress prog; +	int ret; + +	if (!tool->ordered_samples || !limit) +		return 0; -	if (!ops->ordered_samples || !limit) -		return; +	if (show_progress) +		ui_progress__init(&prog, os->nr_samples, "Processing time ordered events...");  	list_for_each_entry_safe(iter, tmp, head, list) { +		if (session_done()) +			return 0; +  		if (iter->timestamp > limit) -			return; +			break; + +		ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample); +		if (ret) +			pr_err("Can't parse sample, err = %d\n", ret); +		else { +			ret = perf_session_deliver_event(s, iter->event, &sample, tool, +							 iter->file_offset); +			if (ret) +				return ret; +		} -		if (iter == s->ordered_samples.last_inserted) -			s->ordered_samples.last_inserted = NULL; +		os->last_flush = iter->timestamp; +		list_del(&iter->list); +		list_add(&iter->list, &os->sample_cache); -		ops->sample((event_t *)iter->event, s); +		if (show_progress) +			ui_progress__update(&prog, 1); +	} -		s->ordered_samples.last_flush = iter->timestamp; -		list_del(&iter->list); -		free(iter->event); -		free(iter); +	if (list_empty(head)) { +		os->last_sample = NULL; +	} else if (last_ts <= limit) { +		os->last_sample = +			list_entry(head->prev, struct sample_queue, list);  	} + +	os->nr_samples = 0; + +	return 0;  }  /* @@ -455,200 +567,530 @@ static void flush_sample_queue(struct perf_session *s,   *      Flush every events below timestamp 7   *      etc...   */ -static int process_finished_round(event_t *event __used, -				  struct perf_session *session, -				  struct perf_event_ops *ops) +static int process_finished_round(struct perf_tool *tool, +				  union perf_event *event __maybe_unused, +				  struct perf_session *session)  { -	flush_sample_queue(session, ops); -	session->ordered_samples.next_flush = session->ordered_samples.max_timestamp; +	int ret = flush_sample_queue(session, tool); +	if (!ret) +		session->ordered_samples.next_flush = session->ordered_samples.max_timestamp; -	return 0; +	return ret;  } -static void __queue_sample_end(struct sample_queue *new, struct list_head *head) +/* The queue is ordered by time */ +static void __queue_event(struct sample_queue *new, struct perf_session *s)  { -	struct sample_queue *iter; +	struct ordered_samples *os = &s->ordered_samples; +	struct sample_queue *sample = os->last_sample; +	u64 timestamp = new->timestamp; +	struct list_head *p; -	list_for_each_entry_reverse(iter, head, list) { -		if (iter->timestamp < new->timestamp) { -			list_add(&new->list, &iter->list); -			return; -		} +	++os->nr_samples; +	os->last_sample = new; + +	if (!sample) { +		list_add(&new->list, &os->samples); +		os->max_timestamp = timestamp; +		return;  	} -	list_add(&new->list, head); +	/* +	 * last_sample might point to some random place in the list as it's +	 * the last queued event. We expect that the new event is close to +	 * this. +	 */ +	if (sample->timestamp <= timestamp) { +		while (sample->timestamp <= timestamp) { +			p = sample->list.next; +			if (p == &os->samples) { +				list_add_tail(&new->list, &os->samples); +				os->max_timestamp = timestamp; +				return; +			} +			sample = list_entry(p, struct sample_queue, list); +		} +		list_add_tail(&new->list, &sample->list); +	} else { +		while (sample->timestamp > timestamp) { +			p = sample->list.prev; +			if (p == &os->samples) { +				list_add(&new->list, &os->samples); +				return; +			} +			sample = list_entry(p, struct sample_queue, list); +		} +		list_add(&new->list, &sample->list); +	}  } -static void __queue_sample_before(struct sample_queue *new, -				  struct sample_queue *iter, -				  struct list_head *head) +#define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct sample_queue)) + +int perf_session_queue_event(struct perf_session *s, union perf_event *event, +				    struct perf_sample *sample, u64 file_offset)  { -	list_for_each_entry_continue_reverse(iter, head, list) { -		if (iter->timestamp < new->timestamp) { -			list_add(&new->list, &iter->list); -			return; -		} +	struct ordered_samples *os = &s->ordered_samples; +	struct list_head *sc = &os->sample_cache; +	u64 timestamp = sample->time; +	struct sample_queue *new; + +	if (!timestamp || timestamp == ~0ULL) +		return -ETIME; + +	if (timestamp < s->ordered_samples.last_flush) { +		printf("Warning: Timestamp below last timeslice flush\n"); +		return -EINVAL; +	} + +	if (!list_empty(sc)) { +		new = list_entry(sc->next, struct sample_queue, list); +		list_del(&new->list); +	} else if (os->sample_buffer) { +		new = os->sample_buffer + os->sample_buffer_idx; +		if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER) +			os->sample_buffer = NULL; +	} else { +		os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new)); +		if (!os->sample_buffer) +			return -ENOMEM; +		list_add(&os->sample_buffer->list, &os->to_free); +		os->sample_buffer_idx = 2; +		new = os->sample_buffer + 1;  	} -	list_add(&new->list, head); +	new->timestamp = timestamp; +	new->file_offset = file_offset; +	new->event = event; + +	__queue_event(new, s); + +	return 0;  } -static void __queue_sample_after(struct sample_queue *new, -				 struct sample_queue *iter, -				 struct list_head *head) +static void callchain__printf(struct perf_sample *sample)  { -	list_for_each_entry_continue(iter, head, list) { -		if (iter->timestamp > new->timestamp) { -			list_add_tail(&new->list, &iter->list); -			return; -		} +	unsigned int i; + +	printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr); + +	for (i = 0; i < sample->callchain->nr; i++) +		printf("..... %2d: %016" PRIx64 "\n", +		       i, sample->callchain->ips[i]); +} + +static void branch_stack__printf(struct perf_sample *sample) +{ +	uint64_t i; + +	printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); + +	for (i = 0; i < sample->branch_stack->nr; i++) +		printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", +			i, sample->branch_stack->entries[i].from, +			sample->branch_stack->entries[i].to); +} + +static void regs_dump__printf(u64 mask, u64 *regs) +{ +	unsigned rid, i = 0; + +	for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) { +		u64 val = regs[i++]; + +		printf(".... %-5s 0x%" PRIx64 "\n", +		       perf_reg_name(rid), val);  	} -	list_add_tail(&new->list, head);  } -/* The queue is ordered by time */ -static void __queue_sample_event(struct sample_queue *new, -				 struct perf_session *s) +static void regs_user__printf(struct perf_sample *sample) +{ +	struct regs_dump *user_regs = &sample->user_regs; + +	if (user_regs->regs) { +		u64 mask = user_regs->mask; +		printf("... user regs: mask 0x%" PRIx64 "\n", mask); +		regs_dump__printf(mask, user_regs->regs); +	} +} + +static void stack_user__printf(struct stack_dump *dump)  { -	struct sample_queue *last_inserted = s->ordered_samples.last_inserted; -	struct list_head *head = &s->ordered_samples.samples_head; +	printf("... ustack: size %" PRIu64 ", offset 0x%x\n", +	       dump->size, dump->offset); +} +static void perf_session__print_tstamp(struct perf_session *session, +				       union perf_event *event, +				       struct perf_sample *sample) +{ +	u64 sample_type = __perf_evlist__combined_sample_type(session->evlist); -	if (!last_inserted) { -		__queue_sample_end(new, head); +	if (event->header.type != PERF_RECORD_SAMPLE && +	    !perf_evlist__sample_id_all(session->evlist)) { +		fputs("-1 -1 ", stdout);  		return;  	} -	/* -	 * Most of the time the current event has a timestamp -	 * very close to the last event inserted, unless we just switched -	 * to another event buffer. Having a sorting based on a list and -	 * on the last inserted event that is close to the current one is -	 * probably more efficient than an rbtree based sorting. -	 */ -	if (last_inserted->timestamp >= new->timestamp) -		__queue_sample_before(new, last_inserted, head); -	else -		__queue_sample_after(new, last_inserted, head); +	if ((sample_type & PERF_SAMPLE_CPU)) +		printf("%u ", sample->cpu); + +	if (sample_type & PERF_SAMPLE_TIME) +		printf("%" PRIu64 " ", sample->time);  } -static int queue_sample_event(event_t *event, struct sample_data *data, -			      struct perf_session *s) +static void sample_read__printf(struct perf_sample *sample, u64 read_format)  { -	u64 timestamp = data->time; -	struct sample_queue *new; +	printf("... sample_read:\n"); +	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) +		printf("...... time enabled %016" PRIx64 "\n", +		       sample->read.time_enabled); -	if (timestamp < s->ordered_samples.last_flush) { -		printf("Warning: Timestamp below last timeslice flush\n"); -		return -EINVAL; -	} +	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) +		printf("...... time running %016" PRIx64 "\n", +		       sample->read.time_running); -	new = malloc(sizeof(*new)); -	if (!new) -		return -ENOMEM; +	if (read_format & PERF_FORMAT_GROUP) { +		u64 i; -	new->timestamp = timestamp; +		printf(".... group nr %" PRIu64 "\n", sample->read.group.nr); -	new->event = malloc(event->header.size); -	if (!new->event) { -		free(new); -		return -ENOMEM; -	} +		for (i = 0; i < sample->read.group.nr; i++) { +			struct sample_read_value *value; -	memcpy(new->event, event, event->header.size); +			value = &sample->read.group.values[i]; +			printf("..... id %016" PRIx64 +			       ", value %016" PRIx64 "\n", +			       value->id, value->value); +		} +	} else +		printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n", +			sample->read.one.id, sample->read.one.value); +} -	__queue_sample_event(new, s); -	s->ordered_samples.last_inserted = new; +static void dump_event(struct perf_session *session, union perf_event *event, +		       u64 file_offset, struct perf_sample *sample) +{ +	if (!dump_trace) +		return; -	if (new->timestamp > s->ordered_samples.max_timestamp) -		s->ordered_samples.max_timestamp = new->timestamp; +	printf("\n%#" PRIx64 " [%#x]: event: %d\n", +	       file_offset, event->header.size, event->header.type); -	return 0; +	trace_event(event); + +	if (sample) +		perf_session__print_tstamp(session, event, sample); + +	printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, +	       event->header.size, perf_event__name(event->header.type));  } -static int perf_session__process_sample(event_t *event, struct perf_session *s, -					struct perf_event_ops *ops) +static void dump_sample(struct perf_evsel *evsel, union perf_event *event, +			struct perf_sample *sample)  { -	struct sample_data data; +	u64 sample_type; -	if (!ops->ordered_samples) -		return ops->sample(event, s); +	if (!dump_trace) +		return; -	bzero(&data, sizeof(struct sample_data)); -	event__parse_sample(event, s->sample_type, &data); +	printf("(IP, 0x%x): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n", +	       event->header.misc, sample->pid, sample->tid, sample->ip, +	       sample->period, sample->addr); -	queue_sample_event(event, &data, s); +	sample_type = evsel->attr.sample_type; -	return 0; +	if (sample_type & PERF_SAMPLE_CALLCHAIN) +		callchain__printf(sample); + +	if (sample_type & PERF_SAMPLE_BRANCH_STACK) +		branch_stack__printf(sample); + +	if (sample_type & PERF_SAMPLE_REGS_USER) +		regs_user__printf(sample); + +	if (sample_type & PERF_SAMPLE_STACK_USER) +		stack_user__printf(&sample->user_stack); + +	if (sample_type & PERF_SAMPLE_WEIGHT) +		printf("... weight: %" PRIu64 "\n", sample->weight); + +	if (sample_type & PERF_SAMPLE_DATA_SRC) +		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src); + +	if (sample_type & PERF_SAMPLE_TRANSACTION) +		printf("... transaction: %" PRIx64 "\n", sample->transaction); + +	if (sample_type & PERF_SAMPLE_READ) +		sample_read__printf(sample, evsel->attr.read_format);  } -static int perf_session__process_event(struct perf_session *self, -				       event_t *event, -				       struct perf_event_ops *ops, -				       u64 offset, u64 head) +static struct machine * +	perf_session__find_machine_for_cpumode(struct perf_session *session, +					       union perf_event *event, +					       struct perf_sample *sample)  { -	trace_event(event); +	const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; +	struct machine *machine; + +	if (perf_guest && +	    ((cpumode == PERF_RECORD_MISC_GUEST_KERNEL) || +	     (cpumode == PERF_RECORD_MISC_GUEST_USER))) { +		u32 pid; + +		if (event->header.type == PERF_RECORD_MMAP +		    || event->header.type == PERF_RECORD_MMAP2) +			pid = event->mmap.pid; +		else +			pid = sample->pid; + +		machine = perf_session__find_machine(session, pid); +		if (!machine) +			machine = perf_session__findnew_machine(session, +						DEFAULT_GUEST_KERNEL_ID); +		return machine; +	} + +	return &session->machines.host; +} + +static int deliver_sample_value(struct perf_session *session, +				struct perf_tool *tool, +				union perf_event *event, +				struct perf_sample *sample, +				struct sample_read_value *v, +				struct machine *machine) +{ +	struct perf_sample_id *sid; + +	sid = perf_evlist__id2sid(session->evlist, v->id); +	if (sid) { +		sample->id     = v->id; +		sample->period = v->value - sid->period; +		sid->period    = v->value; +	} + +	if (!sid || sid->evsel == NULL) { +		++session->stats.nr_unknown_id; +		return 0; +	} + +	return tool->sample(tool, event, sample, sid->evsel, machine); +} + +static int deliver_sample_group(struct perf_session *session, +				struct perf_tool *tool, +				union  perf_event *event, +				struct perf_sample *sample, +				struct machine *machine) +{ +	int ret = -EINVAL; +	u64 i; + +	for (i = 0; i < sample->read.group.nr; i++) { +		ret = deliver_sample_value(session, tool, event, sample, +					   &sample->read.group.values[i], +					   machine); +		if (ret) +			break; +	} + +	return ret; +} -	if (event->header.type < PERF_RECORD_HEADER_MAX) { -		dump_printf("%#Lx [%#x]: PERF_RECORD_%s", -			    offset + head, event->header.size, -			    event__name[event->header.type]); -		hists__inc_nr_events(&self->hists, event->header.type); +static int +perf_session__deliver_sample(struct perf_session *session, +			     struct perf_tool *tool, +			     union  perf_event *event, +			     struct perf_sample *sample, +			     struct perf_evsel *evsel, +			     struct machine *machine) +{ +	/* We know evsel != NULL. */ +	u64 sample_type = evsel->attr.sample_type; +	u64 read_format = evsel->attr.read_format; + +	/* Standard sample delievery. */ +	if (!(sample_type & PERF_SAMPLE_READ)) +		return tool->sample(tool, event, sample, evsel, machine); + +	/* For PERF_SAMPLE_READ we have either single or group mode. */ +	if (read_format & PERF_FORMAT_GROUP) +		return deliver_sample_group(session, tool, event, sample, +					    machine); +	else +		return deliver_sample_value(session, tool, event, sample, +					    &sample->read.one, machine); +} + +static int perf_session_deliver_event(struct perf_session *session, +				      union perf_event *event, +				      struct perf_sample *sample, +				      struct perf_tool *tool, +				      u64 file_offset) +{ +	struct perf_evsel *evsel; +	struct machine *machine; + +	dump_event(session, event, file_offset, sample); + +	evsel = perf_evlist__id2evsel(session->evlist, sample->id); +	if (evsel != NULL && event->header.type != PERF_RECORD_SAMPLE) { +		/* +		 * XXX We're leaving PERF_RECORD_SAMPLE unnacounted here +		 * because the tools right now may apply filters, discarding +		 * some of the samples. For consistency, in the future we +		 * should have something like nr_filtered_samples and remove +		 * the sample->period from total_sample_period, etc, KISS for +		 * now tho. +		 * +		 * Also testing against NULL allows us to handle files without +		 * attr.sample_id_all and/or without PERF_SAMPLE_ID. In the +		 * future probably it'll be a good idea to restrict event +		 * processing via perf_session to files with both set. +		 */ +		hists__inc_nr_events(&evsel->hists, event->header.type);  	} -	if (self->header.needs_swap && event__swap_ops[event->header.type]) -		event__swap_ops[event->header.type](event); +	machine = perf_session__find_machine_for_cpumode(session, event, +							 sample);  	switch (event->header.type) {  	case PERF_RECORD_SAMPLE: -		return perf_session__process_sample(event, self, ops); +		dump_sample(evsel, event, sample); +		if (evsel == NULL) { +			++session->stats.nr_unknown_id; +			return 0; +		} +		if (machine == NULL) { +			++session->stats.nr_unprocessable_samples; +			return 0; +		} +		return perf_session__deliver_sample(session, tool, event, +						    sample, evsel, machine);  	case PERF_RECORD_MMAP: -		return ops->mmap(event, self); +		return tool->mmap(tool, event, sample, machine); +	case PERF_RECORD_MMAP2: +		return tool->mmap2(tool, event, sample, machine);  	case PERF_RECORD_COMM: -		return ops->comm(event, self); +		return tool->comm(tool, event, sample, machine);  	case PERF_RECORD_FORK: -		return ops->fork(event, self); +		return tool->fork(tool, event, sample, machine);  	case PERF_RECORD_EXIT: -		return ops->exit(event, self); +		return tool->exit(tool, event, sample, machine);  	case PERF_RECORD_LOST: -		return ops->lost(event, self); +		if (tool->lost == perf_event__process_lost) +			session->stats.total_lost += event->lost.lost; +		return tool->lost(tool, event, sample, machine);  	case PERF_RECORD_READ: -		return ops->read(event, self); +		return tool->read(tool, event, sample, evsel, machine);  	case PERF_RECORD_THROTTLE: -		return ops->throttle(event, self); +		return tool->throttle(tool, event, sample, machine);  	case PERF_RECORD_UNTHROTTLE: -		return ops->unthrottle(event, self); +		return tool->unthrottle(tool, event, sample, machine); +	default: +		++session->stats.nr_unknown_events; +		return -1; +	} +} + +static int perf_session__process_user_event(struct perf_session *session, union perf_event *event, +					    struct perf_tool *tool, u64 file_offset) +{ +	int fd = perf_data_file__fd(session->file); +	int err; + +	dump_event(session, event, file_offset, NULL); + +	/* These events are processed right away */ +	switch (event->header.type) {  	case PERF_RECORD_HEADER_ATTR: -		return ops->attr(event, self); +		err = tool->attr(tool, event, &session->evlist); +		if (err == 0) +			perf_session__set_id_hdr_size(session); +		return err;  	case PERF_RECORD_HEADER_EVENT_TYPE: -		return ops->event_type(event, self); +		/* +		 * Depreceated, but we need to handle it for sake +		 * of old data files create in pipe mode. +		 */ +		return 0;  	case PERF_RECORD_HEADER_TRACING_DATA:  		/* setup for reading amidst mmap */ -		lseek(self->fd, offset + head, SEEK_SET); -		return ops->tracing_data(event, self); +		lseek(fd, file_offset, SEEK_SET); +		return tool->tracing_data(tool, event, session);  	case PERF_RECORD_HEADER_BUILD_ID: -		return ops->build_id(event, self); +		return tool->build_id(tool, event, session);  	case PERF_RECORD_FINISHED_ROUND: -		return ops->finished_round(event, self, ops); +		return tool->finished_round(tool, event, session);  	default: -		++self->hists.stats.nr_unknown_events; -		return -1; +		return -EINVAL; +	} +} + +static void event_swap(union perf_event *event, bool sample_id_all) +{ +	perf_event__swap_op swap; + +	swap = perf_event__swap_ops[event->header.type]; +	if (swap) +		swap(event, sample_id_all); +} + +static int perf_session__process_event(struct perf_session *session, +				       union perf_event *event, +				       struct perf_tool *tool, +				       u64 file_offset) +{ +	struct perf_sample sample; +	int ret; + +	if (session->header.needs_swap) +		event_swap(event, perf_evlist__sample_id_all(session->evlist)); + +	if (event->header.type >= PERF_RECORD_HEADER_MAX) +		return -EINVAL; + +	events_stats__inc(&session->stats, event->header.type); + +	if (event->header.type >= PERF_RECORD_USER_TYPE_START) +		return perf_session__process_user_event(session, event, tool, file_offset); + +	/* +	 * For all kernel events we get the sample data +	 */ +	ret = perf_evlist__parse_sample(session->evlist, event, &sample); +	if (ret) +		return ret; + +	if (tool->ordered_samples) { +		ret = perf_session_queue_event(session, event, &sample, +					       file_offset); +		if (ret != -ETIME) +			return ret;  	} + +	return perf_session_deliver_event(session, event, &sample, tool, +					  file_offset); +} + +void perf_event_header__bswap(struct perf_event_header *hdr) +{ +	hdr->type = bswap_32(hdr->type); +	hdr->misc = bswap_16(hdr->misc); +	hdr->size = bswap_16(hdr->size);  } -void perf_event_header__bswap(struct perf_event_header *self) +struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)  { -	self->type = bswap_32(self->type); -	self->misc = bswap_16(self->misc); -	self->size = bswap_16(self->size); +	return machine__findnew_thread(&session->machines.host, 0, pid);  } -static struct thread *perf_session__register_idle_thread(struct perf_session *self) +static struct thread *perf_session__register_idle_thread(struct perf_session *session)  { -	struct thread *thread = perf_session__findnew(self, 0); +	struct thread *thread = perf_session__findnew(session, 0); -	if (thread == NULL || thread__set_comm(thread, "swapper")) { +	if (thread == NULL || thread__set_comm(thread, "swapper", 0)) {  		pr_err("problem inserting idle task.\n");  		thread = NULL;  	} @@ -656,41 +1098,71 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se  	return thread;  } -int do_read(int fd, void *buf, size_t size) +static void perf_session__warn_about_errors(const struct perf_session *session, +					    const struct perf_tool *tool)  { -	void *buf_start = buf; - -	while (size) { -		int ret = read(fd, buf, size); +	if (tool->lost == perf_event__process_lost && +	    session->stats.nr_events[PERF_RECORD_LOST] != 0) { +		ui__warning("Processed %d events and lost %d chunks!\n\n" +			    "Check IO/CPU overload!\n\n", +			    session->stats.nr_events[0], +			    session->stats.nr_events[PERF_RECORD_LOST]); +	} -		if (ret <= 0) -			return ret; +	if (session->stats.nr_unknown_events != 0) { +		ui__warning("Found %u unknown events!\n\n" +			    "Is this an older tool processing a perf.data " +			    "file generated by a more recent tool?\n\n" +			    "If that is not the case, consider " +			    "reporting to linux-kernel@vger.kernel.org.\n\n", +			    session->stats.nr_unknown_events); +	} -		size -= ret; -		buf += ret; +	if (session->stats.nr_unknown_id != 0) { +		ui__warning("%u samples with id not present in the header\n", +			    session->stats.nr_unknown_id);  	} -	return buf - buf_start; + 	if (session->stats.nr_invalid_chains != 0) { + 		ui__warning("Found invalid callchains!\n\n" + 			    "%u out of %u events were discarded for this reason.\n\n" + 			    "Consider reporting to linux-kernel@vger.kernel.org.\n\n", + 			    session->stats.nr_invalid_chains, + 			    session->stats.nr_events[PERF_RECORD_SAMPLE]); + 	} + +	if (session->stats.nr_unprocessable_samples != 0) { +		ui__warning("%u unprocessable samples recorded.\n" +			    "Do you have a KVM guest running and not using 'perf kvm'?\n", +			    session->stats.nr_unprocessable_samples); +	}  } -#define session_done()	(*(volatile int *)(&session_done))  volatile int session_done; -static int __perf_session__process_pipe_events(struct perf_session *self, -					       struct perf_event_ops *ops) +static int __perf_session__process_pipe_events(struct perf_session *session, +					       struct perf_tool *tool)  { -	event_t event; -	uint32_t size; +	int fd = perf_data_file__fd(session->file); +	union perf_event *event; +	uint32_t size, cur_size = 0; +	void *buf = NULL;  	int skip = 0;  	u64 head; -	int err; +	ssize_t err;  	void *p; -	perf_event_ops__fill_defaults(ops); +	perf_tool__fill_defaults(tool);  	head = 0; +	cur_size = sizeof(union perf_event); + +	buf = malloc(cur_size); +	if (!buf) +		return -errno;  more: -	err = do_read(self->fd, &event, sizeof(struct perf_event_header)); +	event = buf; +	err = readn(fd, event, sizeof(struct perf_event_header));  	if (err <= 0) {  		if (err == 0)  			goto done; @@ -699,19 +1171,30 @@ more:  		goto out_err;  	} -	if (self->header.needs_swap) -		perf_event_header__bswap(&event.header); +	if (session->header.needs_swap) +		perf_event_header__bswap(&event->header); -	size = event.header.size; -	if (size == 0) -		size = 8; +	size = event->header.size; +	if (size < sizeof(struct perf_event_header)) { +		pr_err("bad event header size\n"); +		goto out_err; +	} -	p = &event; +	if (size > cur_size) { +		void *new = realloc(buf, size); +		if (!new) { +			pr_err("failed to allocate memory to read event\n"); +			goto out_err; +		} +		buf = new; +		cur_size = size; +		event = buf; +	} +	p = event;  	p += sizeof(struct perf_event_header);  	if (size - sizeof(struct perf_event_header)) { -		err = do_read(self->fd, p, -			      size - sizeof(struct perf_event_header)); +		err = readn(fd, p, size - sizeof(struct perf_event_header));  		if (err <= 0) {  			if (err == 0) {  				pr_err("unexpected end of event stream\n"); @@ -723,170 +1206,202 @@ more:  		}  	} -	if (size == 0 || -	    (skip = perf_session__process_event(self, &event, ops, -						0, head)) < 0) { -		dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", -			    head, event.header.size, event.header.type); -		/* -		 * assume we lost track of the stream, check alignment, and -		 * increment a single u64 in the hope to catch on again 'soon'. -		 */ -		if (unlikely(head & 7)) -			head &= ~7ULL; - -		size = 8; +	if ((skip = perf_session__process_event(session, event, tool, head)) < 0) { +		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", +		       head, event->header.size, event->header.type); +		err = -EINVAL; +		goto out_err;  	}  	head += size; -	dump_printf("\n%#Lx [%#x]: event: %d\n", -		    head, event.header.size, event.header.type); -  	if (skip > 0)  		head += skip;  	if (!session_done())  		goto more;  done: -	err = 0; +	/* do the final flush for ordered samples */ +	session->ordered_samples.next_flush = ULLONG_MAX; +	err = flush_sample_queue(session, tool);  out_err: +	free(buf); +	perf_session__warn_about_errors(session, tool); +	perf_session_free_sample_buffers(session);  	return err;  } -int __perf_session__process_events(struct perf_session *self, +static union perf_event * +fetch_mmaped_event(struct perf_session *session, +		   u64 head, size_t mmap_size, char *buf) +{ +	union perf_event *event; + +	/* +	 * Ensure we have enough space remaining to read +	 * the size of the event in the headers. +	 */ +	if (head + sizeof(event->header) > mmap_size) +		return NULL; + +	event = (union perf_event *)(buf + head); + +	if (session->header.needs_swap) +		perf_event_header__bswap(&event->header); + +	if (head + event->header.size > mmap_size) { +		/* We're not fetching the event so swap back again */ +		if (session->header.needs_swap) +			perf_event_header__bswap(&event->header); +		return NULL; +	} + +	return event; +} + +/* + * On 64bit we can mmap the data file in one go. No need for tiny mmap + * slices. On 32bit we use 32MB. + */ +#if BITS_PER_LONG == 64 +#define MMAP_SIZE ULLONG_MAX +#define NUM_MMAPS 1 +#else +#define MMAP_SIZE (32 * 1024 * 1024ULL) +#define NUM_MMAPS 128 +#endif + +int __perf_session__process_events(struct perf_session *session,  				   u64 data_offset, u64 data_size, -				   u64 file_size, struct perf_event_ops *ops) +				   u64 file_size, struct perf_tool *tool)  { -	int err, mmap_prot, mmap_flags; -	u64 head, shift; -	u64 offset = 0; -	size_t	page_size; -	event_t *event; +	int fd = perf_data_file__fd(session->file); +	u64 head, page_offset, file_offset, file_pos; +	int err, mmap_prot, mmap_flags, map_idx = 0; +	size_t	mmap_size; +	char *buf, *mmaps[NUM_MMAPS]; +	union perf_event *event;  	uint32_t size; -	char *buf; -	struct ui_progress *progress = ui_progress__new("Processing events...", -							self->size); -	if (progress == NULL) -		return -1; +	struct ui_progress prog; + +	perf_tool__fill_defaults(tool); -	perf_event_ops__fill_defaults(ops); +	page_offset = page_size * (data_offset / page_size); +	file_offset = page_offset; +	head = data_offset - page_offset; -	page_size = sysconf(_SC_PAGESIZE); +	if (data_size && (data_offset + data_size < file_size)) +		file_size = data_offset + data_size; -	head = data_offset; -	shift = page_size * (head / page_size); -	offset += shift; -	head -= shift; +	ui_progress__init(&prog, file_size, "Processing events..."); + +	mmap_size = MMAP_SIZE; +	if (mmap_size > file_size) +		mmap_size = file_size; + +	memset(mmaps, 0, sizeof(mmaps));  	mmap_prot  = PROT_READ;  	mmap_flags = MAP_SHARED; -	if (self->header.needs_swap) { +	if (session->header.needs_swap) {  		mmap_prot  |= PROT_WRITE;  		mmap_flags = MAP_PRIVATE;  	}  remap: -	buf = mmap(NULL, page_size * self->mmap_window, mmap_prot, -		   mmap_flags, self->fd, offset); +	buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, fd, +		   file_offset);  	if (buf == MAP_FAILED) {  		pr_err("failed to mmap file\n");  		err = -errno;  		goto out_err;  	} +	mmaps[map_idx] = buf; +	map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); +	file_pos = file_offset + head;  more: -	event = (event_t *)(buf + head); -	ui_progress__update(progress, offset); - -	if (self->header.needs_swap) -		perf_event_header__bswap(&event->header); -	size = event->header.size; -	if (size == 0) -		size = 8; - -	if (head + event->header.size >= page_size * self->mmap_window) { -		int munmap_ret; - -		shift = page_size * (head / page_size); - -		munmap_ret = munmap(buf, page_size * self->mmap_window); -		assert(munmap_ret == 0); +	event = fetch_mmaped_event(session, head, mmap_size, buf); +	if (!event) { +		if (mmaps[map_idx]) { +			munmap(mmaps[map_idx], mmap_size); +			mmaps[map_idx] = NULL; +		} -		offset += shift; -		head -= shift; +		page_offset = page_size * (head / page_size); +		file_offset += page_offset; +		head -= page_offset;  		goto remap;  	}  	size = event->header.size; -	dump_printf("\n%#Lx [%#x]: event: %d\n", -		    offset + head, event->header.size, event->header.type); - -	if (size == 0 || -	    perf_session__process_event(self, event, ops, offset, head) < 0) { -		dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n", -			    offset + head, event->header.size, -			    event->header.type); -		/* -		 * assume we lost track of the stream, check alignment, and -		 * increment a single u64 in the hope to catch on again 'soon'. -		 */ -		if (unlikely(head & 7)) -			head &= ~7ULL; - -		size = 8; +	if (size < sizeof(struct perf_event_header) || +	    perf_session__process_event(session, event, tool, file_pos) < 0) { +		pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", +		       file_offset + head, event->header.size, +		       event->header.type); +		err = -EINVAL; +		goto out_err;  	}  	head += size; +	file_pos += size; -	if (offset + head >= data_offset + data_size) -		goto done; +	ui_progress__update(&prog, size); + +	if (session_done()) +		goto out; -	if (offset + head < file_size) +	if (file_pos < file_size)  		goto more; -done: -	err = 0; + +out:  	/* do the final flush for ordered samples */ -	self->ordered_samples.next_flush = ULLONG_MAX; -	flush_sample_queue(self, ops); +	session->ordered_samples.next_flush = ULLONG_MAX; +	err = flush_sample_queue(session, tool);  out_err: -	ui_progress__delete(progress); +	ui_progress__finish(); +	perf_session__warn_about_errors(session, tool); +	perf_session_free_sample_buffers(session);  	return err;  } -int perf_session__process_events(struct perf_session *self, -				 struct perf_event_ops *ops) +int perf_session__process_events(struct perf_session *session, +				 struct perf_tool *tool)  { +	u64 size = perf_data_file__size(session->file);  	int err; -	if (perf_session__register_idle_thread(self) == NULL) +	if (perf_session__register_idle_thread(session) == NULL)  		return -ENOMEM; -	if (!self->fd_pipe) -		err = __perf_session__process_events(self, -						     self->header.data_offset, -						     self->header.data_size, -						     self->size, ops); +	if (!perf_data_file__is_pipe(session->file)) +		err = __perf_session__process_events(session, +						     session->header.data_offset, +						     session->header.data_size, +						     size, tool);  	else -		err = __perf_session__process_pipe_events(self, ops); +		err = __perf_session__process_pipe_events(session, tool);  	return err;  } -bool perf_session__has_traces(struct perf_session *self, const char *msg) +bool perf_session__has_traces(struct perf_session *session, const char *msg)  { -	if (!(self->sample_type & PERF_SAMPLE_RAW)) { -		pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg); -		return false; +	struct perf_evsel *evsel; + +	evlist__for_each(session->evlist, evsel) { +		if (evsel->attr.type == PERF_TYPE_TRACEPOINT) +			return true;  	} -	return true; +	pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg); +	return false;  } -int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps, -					     const char *symbol_name, -					     u64 addr) +int maps__set_kallsyms_ref_reloc_sym(struct map **maps, +				     const char *symbol_name, u64 addr)  {  	char *bracket;  	enum map_type i; @@ -916,16 +1431,243 @@ int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,  	return 0;  } -size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp) +size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp) +{ +	return machines__fprintf_dsos(&session->machines, fp); +} + +size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp, +					  bool (skip)(struct dso *dso, int parm), int parm) +{ +	return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm); +} + +size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) +{ +	struct perf_evsel *pos; +	size_t ret = fprintf(fp, "Aggregated stats:\n"); + +	ret += events_stats__fprintf(&session->stats, fp); + +	evlist__for_each(session->evlist, pos) { +		ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos)); +		ret += events_stats__fprintf(&pos->hists.stats, fp); +	} + +	return ret; +} + +size_t perf_session__fprintf(struct perf_session *session, FILE *fp) +{ +	/* +	 * FIXME: Here we have to actually print all the machines in this +	 * session, not just the host... +	 */ +	return machine__fprintf(&session->machines.host, fp); +} + +struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, +					      unsigned int type) +{ +	struct perf_evsel *pos; + +	evlist__for_each(session->evlist, pos) { +		if (pos->attr.type == type) +			return pos; +	} +	return NULL; +} + +void perf_evsel__print_ip(struct perf_evsel *evsel, struct perf_sample *sample, +			  struct addr_location *al, +			  unsigned int print_opts, unsigned int stack_depth) +{ +	struct callchain_cursor_node *node; +	int print_ip = print_opts & PRINT_IP_OPT_IP; +	int print_sym = print_opts & PRINT_IP_OPT_SYM; +	int print_dso = print_opts & PRINT_IP_OPT_DSO; +	int print_symoffset = print_opts & PRINT_IP_OPT_SYMOFFSET; +	int print_oneline = print_opts & PRINT_IP_OPT_ONELINE; +	int print_srcline = print_opts & PRINT_IP_OPT_SRCLINE; +	char s = print_oneline ? ' ' : '\t'; + +	if (symbol_conf.use_callchain && sample->callchain) { +		struct addr_location node_al; + +		if (machine__resolve_callchain(al->machine, evsel, al->thread, +					       sample, NULL, NULL, +					       PERF_MAX_STACK_DEPTH) != 0) { +			if (verbose) +				error("Failed to resolve callchain. Skipping\n"); +			return; +		} +		callchain_cursor_commit(&callchain_cursor); + +		if (print_symoffset) +			node_al = *al; + +		while (stack_depth) { +			u64 addr = 0; + +			node = callchain_cursor_current(&callchain_cursor); +			if (!node) +				break; + +			if (node->sym && node->sym->ignore) +				goto next; + +			if (print_ip) +				printf("%c%16" PRIx64, s, node->ip); + +			if (node->map) +				addr = node->map->map_ip(node->map, node->ip); + +			if (print_sym) { +				printf(" "); +				if (print_symoffset) { +					node_al.addr = addr; +					node_al.map  = node->map; +					symbol__fprintf_symname_offs(node->sym, &node_al, stdout); +				} else +					symbol__fprintf_symname(node->sym, stdout); +			} + +			if (print_dso) { +				printf(" ("); +				map__fprintf_dsoname(node->map, stdout); +				printf(")"); +			} + +			if (print_srcline) +				map__fprintf_srcline(node->map, addr, "\n  ", +						     stdout); + +			if (!print_oneline) +				printf("\n"); + +			stack_depth--; +next: +			callchain_cursor_advance(&callchain_cursor); +		} + +	} else { +		if (al->sym && al->sym->ignore) +			return; + +		if (print_ip) +			printf("%16" PRIx64, sample->ip); + +		if (print_sym) { +			printf(" "); +			if (print_symoffset) +				symbol__fprintf_symname_offs(al->sym, al, +							     stdout); +			else +				symbol__fprintf_symname(al->sym, stdout); +		} + +		if (print_dso) { +			printf(" ("); +			map__fprintf_dsoname(al->map, stdout); +			printf(")"); +		} + +		if (print_srcline) +			map__fprintf_srcline(al->map, al->addr, "\n  ", stdout); +	} +} + +int perf_session__cpu_bitmap(struct perf_session *session, +			     const char *cpu_list, unsigned long *cpu_bitmap)  { -	return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) + -	       __dsos__fprintf(&self->host_machine.user_dsos, fp) + -	       machines__fprintf_dsos(&self->machines, fp); +	int i, err = -1; +	struct cpu_map *map; + +	for (i = 0; i < PERF_TYPE_MAX; ++i) { +		struct perf_evsel *evsel; + +		evsel = perf_session__find_first_evtype(session, i); +		if (!evsel) +			continue; + +		if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) { +			pr_err("File does not contain CPU events. " +			       "Remove -c option to proceed.\n"); +			return -1; +		} +	} + +	map = cpu_map__new(cpu_list); +	if (map == NULL) { +		pr_err("Invalid cpu_list\n"); +		return -1; +	} + +	for (i = 0; i < map->nr; i++) { +		int cpu = map->map[i]; + +		if (cpu >= MAX_NR_CPUS) { +			pr_err("Requested CPU %d too large. " +			       "Consider raising MAX_NR_CPUS\n", cpu); +			goto out_delete_map; +		} + +		set_bit(cpu, cpu_bitmap); +	} + +	err = 0; + +out_delete_map: +	cpu_map__delete(map); +	return err;  } -size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp, -					  bool with_hits) +void perf_session__fprintf_info(struct perf_session *session, FILE *fp, +				bool full)  { -	size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits); -	return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits); +	struct stat st; +	int fd, ret; + +	if (session == NULL || fp == NULL) +		return; + +	fd = perf_data_file__fd(session->file); + +	ret = fstat(fd, &st); +	if (ret == -1) +		return; + +	fprintf(fp, "# ========\n"); +	fprintf(fp, "# captured on: %s", ctime(&st.st_ctime)); +	perf_header__fprintf_info(session, fp, full); +	fprintf(fp, "# ========\n#\n"); +} + + +int __perf_session__set_tracepoints_handlers(struct perf_session *session, +					     const struct perf_evsel_str_handler *assocs, +					     size_t nr_assocs) +{ +	struct perf_evsel *evsel; +	size_t i; +	int err; + +	for (i = 0; i < nr_assocs; i++) { +		/* +		 * Adding a handler for an event not in the session, +		 * just ignore it. +		 */ +		evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name); +		if (evsel == NULL) +			continue; + +		err = -EEXIST; +		if (evsel->handler != NULL) +			goto out; +		evsel->handler = assocs[i].handler; +	} + +	err = 0; +out: +	return err;  }  | 
