diff options
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r-- | kernel/trace/ring_buffer.c | 179 |
1 files changed, 137 insertions, 42 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b979426d16c..6989df2ba19 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3,8 +3,10 @@ * * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> */ +#include <linux/ftrace_event.h> #include <linux/ring_buffer.h> #include <linux/trace_clock.h> +#include <linux/trace_seq.h> #include <linux/spinlock.h> #include <linux/debugfs.h> #include <linux/uaccess.h> @@ -21,7 +23,6 @@ #include <linux/fs.h> #include <asm/local.h> -#include "trace.h" static void update_pages_handler(struct work_struct *work); @@ -177,7 +178,7 @@ void tracing_off_permanent(void) #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) #define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ -#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS # define RB_FORCE_8BYTE_ALIGNMENT 0 # define RB_ARCH_ALIGNMENT RB_ALIGNMENT #else @@ -185,6 +186,8 @@ void tracing_off_permanent(void) # define RB_ARCH_ALIGNMENT 8U #endif +#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT) + /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX @@ -333,7 +336,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data); struct buffer_data_page { u64 time_stamp; /* page time stamp */ local_t commit; /* write committed index */ - unsigned char data[]; /* data of buffer page */ + unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */ }; /* @@ -460,9 +463,10 @@ struct ring_buffer_per_cpu { unsigned long lost_events; unsigned long last_overrun; local_t entries_bytes; - local_t commit_overrun; - local_t overrun; local_t entries; + local_t overrun; + local_t commit_overrun; + local_t dropped_events; local_t committing; local_t commits; unsigned long read; @@ -1396,6 +1400,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) struct list_head *head_page_with_bit; head_page = &rb_set_head_page(cpu_buffer)->list; + if (!head_page) + break; prev_page = head_page->prev; first_page = pages->next; @@ -1820,7 +1826,7 @@ rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) } /** - * ring_buffer_update_event - update event type and data + * rb_update_event - update event type and data * @event: the even to update * @type: the type of event * @length: the size of the event field in the ring buffer @@ -2155,8 +2161,10 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, * If we are not in overwrite mode, * this is easy, just stop here. */ - if (!(buffer->flags & RB_FL_OVERWRITE)) + if (!(buffer->flags & RB_FL_OVERWRITE)) { + local_inc(&cpu_buffer->dropped_events); goto out_reset; + } ret = rb_handle_head_page(cpu_buffer, tail_page, @@ -2427,41 +2435,76 @@ rb_reserve_next_event(struct ring_buffer *buffer, #ifdef CONFIG_TRACING -#define TRACE_RECURSIVE_DEPTH 16 +/* + * The lock and unlock are done within a preempt disable section. + * The current_context per_cpu variable can only be modified + * by the current task between lock and unlock. But it can + * be modified more than once via an interrupt. To pass this + * information from the lock to the unlock without having to + * access the 'in_interrupt()' functions again (which do show + * a bit of overhead in something as critical as function tracing, + * we use a bitmask trick. + * + * bit 0 = NMI context + * bit 1 = IRQ context + * bit 2 = SoftIRQ context + * bit 3 = normal context. + * + * This works because this is the order of contexts that can + * preempt other contexts. A SoftIRQ never preempts an IRQ + * context. + * + * When the context is determined, the corresponding bit is + * checked and set (if it was set, then a recursion of that context + * happened). + * + * On unlock, we need to clear this bit. To do so, just subtract + * 1 from the current_context and AND it to itself. + * + * (binary) + * 101 - 1 = 100 + * 101 & 100 = 100 (clearing bit zero) + * + * 1010 - 1 = 1001 + * 1010 & 1001 = 1000 (clearing bit 1) + * + * The least significant bit can be cleared this way, and it + * just so happens that it is the same bit corresponding to + * the current context. + */ +static DEFINE_PER_CPU(unsigned int, current_context); -/* Keep this code out of the fast path cache */ -static noinline void trace_recursive_fail(void) +static __always_inline int trace_recursive_lock(void) { - /* Disable all tracing before we do anything else */ - tracing_off_permanent(); + unsigned int val = this_cpu_read(current_context); + int bit; - printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" - "HC[%lu]:SC[%lu]:NMI[%lu]\n", - trace_recursion_buffer(), - hardirq_count() >> HARDIRQ_SHIFT, - softirq_count() >> SOFTIRQ_SHIFT, - in_nmi()); - - WARN_ON_ONCE(1); -} - -static inline int trace_recursive_lock(void) -{ - trace_recursion_inc(); + if (in_interrupt()) { + if (in_nmi()) + bit = 0; + else if (in_irq()) + bit = 1; + else + bit = 2; + } else + bit = 3; - if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) - return 0; + if (unlikely(val & (1 << bit))) + return 1; - trace_recursive_fail(); + val |= (1 << bit); + this_cpu_write(current_context, val); - return -1; + return 0; } -static inline void trace_recursive_unlock(void) +static __always_inline void trace_recursive_unlock(void) { - WARN_ON_ONCE(!trace_recursion_buffer()); + unsigned int val = this_cpu_read(current_context); - trace_recursion_dec(); + val--; + val &= this_cpu_read(current_context); + this_cpu_write(current_context, val); } #else @@ -2720,8 +2763,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); * and not the length of the event which would hold the header. */ int ring_buffer_write(struct ring_buffer *buffer, - unsigned long length, - void *data) + unsigned long length, + void *data) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; @@ -2929,12 +2972,12 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ -unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) +u64 ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) { unsigned long flags; struct ring_buffer_per_cpu *cpu_buffer; struct buffer_page *bpage; - unsigned long ret; + u64 ret = 0; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; @@ -2949,7 +2992,8 @@ unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) bpage = cpu_buffer->reader_page; else bpage = rb_set_head_page(cpu_buffer); - ret = bpage->page->time_stamp; + if (bpage) + ret = bpage->page->time_stamp; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); return ret; @@ -2995,7 +3039,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); /** - * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer + * ring_buffer_overrun_cpu - get the number of overruns caused by the ring + * buffer wrapping around (only if RB_FL_OVERWRITE is on). * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ @@ -3015,7 +3060,9 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); /** - * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits + * ring_buffer_commit_overrun_cpu - get the number of overruns caused by + * commits failing due to the buffer wrapping around while there are uncommitted + * events, such as during an interrupt storm. * @buffer: The ring buffer * @cpu: The per CPU buffer to get the number of overruns from */ @@ -3036,6 +3083,46 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu); /** + * ring_buffer_dropped_events_cpu - get the number of dropped events caused by + * the ring buffer filling up (only if RB_FL_OVERWRITE is off). + * @buffer: The ring buffer + * @cpu: The per CPU buffer to get the number of overruns from + */ +unsigned long +ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long ret; + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + cpu_buffer = buffer->buffers[cpu]; + ret = local_read(&cpu_buffer->dropped_events); + + return ret; +} +EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); + +/** + * ring_buffer_read_events_cpu - get the number of events successfully read + * @buffer: The ring buffer + * @cpu: The per CPU buffer to get the number of events read + */ +unsigned long +ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + cpu_buffer = buffer->buffers[cpu]; + return cpu_buffer->read; +} +EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu); + +/** * ring_buffer_entries - get the number of entries in a buffer * @buffer: The ring buffer * @@ -3260,6 +3347,8 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * Splice the empty reader page into the list around the head. */ reader = rb_set_head_page(cpu_buffer); + if (!reader) + goto out; cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); cpu_buffer->reader_page->list.prev = reader->list.prev; @@ -3392,7 +3481,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) /* check for end of page padding */ if ((iter->head >= rb_page_size(iter->head_page)) && (iter->head_page != cpu_buffer->commit_page)) - rb_advance_iter(iter); + rb_inc_iter(iter); } static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) @@ -3778,12 +3867,17 @@ void ring_buffer_read_finish(struct ring_buffer_iter *iter) { struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; + unsigned long flags; /* * Ring buffer is disabled from recording, here's a good place - * to check the integrity of the ring buffer. + * to check the integrity of the ring buffer. + * Must prevent readers from trying to read, as the check + * clears the HEAD page and readers require it. */ + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); rb_check_pages(cpu_buffer); + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); atomic_dec(&cpu_buffer->record_disabled); atomic_dec(&cpu_buffer->buffer->resize_disabled); @@ -3864,9 +3958,10 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) local_set(&cpu_buffer->reader_page->page->commit, 0); cpu_buffer->reader_page->read = 0; - local_set(&cpu_buffer->commit_overrun, 0); local_set(&cpu_buffer->entries_bytes, 0); local_set(&cpu_buffer->overrun, 0); + local_set(&cpu_buffer->commit_overrun, 0); + local_set(&cpu_buffer->dropped_events, 0); local_set(&cpu_buffer->entries, 0); local_set(&cpu_buffer->committing, 0); local_set(&cpu_buffer->commits, 0); |