@@ -29,42 +29,44 @@ struct ring_buffer_event {
* If time_delta is non zero:
* array[0] holds the actual length
* size = 4 + length (bytes)
*
* @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta
* array[0] = time delta (28 .. 59)
* size = 8 bytes
*
- * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock
- * array[0] = tv_nsec
- * array[1..2] = tv_sec
- * size = 16 bytes
+ * @RINGBUF_TYPE_TIME_STAMP: Absolute timestamp
+ * Same format as TIME_EXTEND except that the
+ * value is an absolute timestamp, not a delta
+ * event.time_delta contains bottom 27 bits
+ * array[0] = top (28 .. 59) bits
+ * size = 8 bytes
*
* <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
* Data record
* If type_len is zero:
* array[0] holds the actual length
* array[1..(length+3)/4] holds data
* size = 4 + length (bytes)
* else
* length = type_len << 2
* array[0..(length+3)/4-1] holds data
* size = 4 + length (bytes)
*/
enum ring_buffer_type {
RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
RINGBUF_TYPE_PADDING,
RINGBUF_TYPE_TIME_EXTEND,
- /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
RINGBUF_TYPE_TIME_STAMP,
};
unsigned ring_buffer_event_length(struct ring_buffer_event *event);
void *ring_buffer_event_data(struct ring_buffer_event *event);
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event);
/*
* ring_buffer_discard_commit will remove an event that has not
* ben committed yet. If this is used, then ring_buffer_unlock_commit
* must not be called on the discarded event. This function
* will try to remove the event from the ring buffer completely
* if another event has not been written after it.
*
@@ -36,16 +36,18 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
trace_seq_puts(s, "\ttype_len : 5 bits\n");
trace_seq_puts(s, "\ttime_delta : 27 bits\n");
trace_seq_puts(s, "\tarray : 32 bits\n");
trace_seq_putc(s, '\n');
trace_seq_printf(s, "\tpadding : type == %d\n",
RINGBUF_TYPE_PADDING);
trace_seq_printf(s, "\ttime_extend : type == %d\n",
RINGBUF_TYPE_TIME_EXTEND);
+ trace_seq_printf(s, "\ttime_stamp : type == %d\n",
+ RINGBUF_TYPE_TIME_STAMP);
trace_seq_printf(s, "\tdata max type_len == %d\n",
RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
return !trace_seq_has_overflowed(s);
}
/*
* The ring buffer is made up of a list of pages. A separate list of pages is
@@ -135,22 +137,25 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
enum {
RB_LEN_TIME_EXTEND = 8,
- RB_LEN_TIME_STAMP = 16,
+ RB_LEN_TIME_STAMP = 8,
};
#define skip_time_extend(event) \
((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
+#define extended_time(event) \
+ (event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
+
static inline int rb_null_event(struct ring_buffer_event *event)
{
return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
}
static void rb_event_set_padding(struct ring_buffer_event *event)
{
/* padding has a NULL time_delta */
@@ -204,17 +209,17 @@ rb_event_length(struct ring_buffer_event *event)
* Return total length of time extend and data,
* or just the event length for all other events.
*/
static inline unsigned
rb_event_ts_length(struct ring_buffer_event *event)
{
unsigned len = 0;
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+ if (extended_time(event)) {
/* time extends include the data event after it */
len = RB_LEN_TIME_EXTEND;
event = skip_time_extend(event);
}
return len + rb_event_length(event);
}
/**
@@ -226,34 +231,34 @@ rb_event_ts_length(struct ring_buffer_event *event)
* returns the size of the event itself. With the exception
* of a TIME EXTEND, where it still returns the size of the
* data load of the data event after it.
*/
unsigned ring_buffer_event_length(struct ring_buffer_event *event)
{
unsigned length;
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
length = rb_event_length(event);
if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
return length;
length -= RB_EVNT_HDR_SIZE;
if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
length -= sizeof(event->array[0]);
return length;
}
EXPORT_SYMBOL_GPL(ring_buffer_event_length);
/* inline for ring buffer fast paths */
static __always_inline void *
rb_event_data(struct ring_buffer_event *event)
{
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
/* If length is in len field, then array[0] has the data */
if (event->type_len)
return (void *)&event->array[0];
/* Otherwise length is in array[0] and array[1] has the data */
return (void *)&event->array[1];
}
@@ -270,16 +275,37 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
#define for_each_buffer_cpu(buffer, cpu) \
for_each_cpu(cpu, buffer->cpumask)
#define TS_SHIFT 27
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
#define TS_DELTA_TEST (~TS_MASK)
+/**
+ * ring_buffer_event_time_stamp - return the event's extended timestamp
+ * @event: the event to get the timestamp of
+ *
+ * Returns the extended timestamp associated with a data event.
+ * An extended time_stamp is a 64-bit timestamp represented
+ * internally in a special way that makes the best use of space
+ * contained within a ring buffer event. This function decodes
+ * it and maps it to a straight u64 value.
+ */
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
+{
+ u64 ts;
+
+ ts = event->array[0];
+ ts <<= TS_SHIFT;
+ ts += event->time_delta;
+
+ return ts;
+}
+
/* Flag when events were overwritten */
#define RB_MISSED_EVENTS (1 << 31)
/* Missed count stored at end */
#define RB_MISSED_STORED (1 << 30)
#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED)
struct buffer_data_page {
@@ -2223,22 +2249,25 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
/* reset write */
rb_reset_tail(cpu_buffer, tail, info);
return NULL;
}
/* Slow path, do not inline */
static noinline struct ring_buffer_event *
-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
{
- event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+ if (abs)
+ event->type_len = RINGBUF_TYPE_TIME_STAMP;
+ else
+ event->type_len = RINGBUF_TYPE_TIME_EXTEND;
- /* Not the first event on the page? */
- if (rb_event_index(event)) {
+ /* Not the first event on the page, or not delta? */
+ if (abs || rb_event_index(event)) {
event->time_delta = delta & TS_MASK;
event->array[0] = delta >> TS_SHIFT;
} else {
/* nope, just zero it */
event->time_delta = 0;
event->array[0] = 0;
}
@@ -2271,17 +2300,19 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
delta = 0;
/*
* If we need to add a timestamp, then we
* add it to the start of the resevered space.
*/
if (unlikely(info->add_timestamp)) {
- event = rb_add_time_stamp(event, delta);
+ bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
+
+ event = rb_add_time_stamp(event, info->delta, abs);
length -= RB_LEN_TIME_EXTEND;
delta = 0;
}
event->time_delta = delta;
length -= RB_EVNT_HDR_SIZE;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
event->type_len = 0;
@@ -2459,17 +2490,17 @@ static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer
!local_read(&cpu_buffer->committing)) {
local_inc(&cpu_buffer->committing);
goto again;
}
}
static inline void rb_event_discard(struct ring_buffer_event *event)
{
- if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+ if (extended_time(event))
event = skip_time_extend(event);
/* array[0] holds the actual length for the discarded event */
event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
event->type_len = RINGBUF_TYPE_PADDING;
/* time delta must be non zero */
if (!event->time_delta)
event->time_delta = 1;
@@ -2503,20 +2534,21 @@ rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
/*
* A commit event that is first on a page
* updates the write timestamp with the page stamp
*/
if (!rb_event_index(event))
cpu_buffer->write_stamp =
cpu_buffer->commit_page->page->time_stamp;
else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
cpu_buffer->write_stamp += delta;
+ } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
+ delta = ring_buffer_event_time_stamp(event);
+ cpu_buffer->write_stamp = delta;
} else
cpu_buffer->write_stamp += event->time_delta;
}
}
static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
@@ -2686,17 +2718,17 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
/* set write to only the index of the write */
write &= RB_WRITE_MASK;
tail = write - info->length;
/*
* If this is the first commit on the page, then it has the same
* timestamp as the page itself.
*/
- if (!tail)
+ if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
info->delta = 0;
/* See if we shot pass the end of this buffer page */
if (unlikely(write > BUF_PAGE_SIZE))
return rb_move_tail(cpu_buffer, tail, info);
/* We reserved something on the buffer */
@@ -2763,18 +2795,21 @@ rb_reserve_next_event(struct ring_buffer *buffer,
goto out_fail;
info.ts = rb_time_stamp(cpu_buffer->buffer);
diff = info.ts - cpu_buffer->write_stamp;
/* make sure this diff is calculated here */
barrier();
- /* Did the write stamp get updated already? */
- if (likely(info.ts >= cpu_buffer->write_stamp)) {
+ if (ring_buffer_time_stamp_abs(buffer)) {
+ info.delta = info.ts;
+ rb_handle_timestamp(cpu_buffer, &info);
+ } else /* Did the write stamp get updated already? */
+ if (likely(info.ts >= cpu_buffer->write_stamp)) {
info.delta = diff;
if (unlikely(test_time_stamp(info.delta)))
rb_handle_timestamp(cpu_buffer, &info);
}
event = __rb_reserve_next(cpu_buffer, &info);
if (unlikely(PTR_ERR(event) == -EAGAIN)) {
@@ -3462,24 +3497,23 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
{
u64 delta;
switch (event->type_len) {
case RINGBUF_TYPE_PADDING:
return;
case RINGBUF_TYPE_TIME_EXTEND:
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
cpu_buffer->read_stamp += delta;
return;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ delta = ring_buffer_event_time_stamp(event);
+ cpu_buffer->read_stamp = delta;
return;
case RINGBUF_TYPE_DATA:
cpu_buffer->read_stamp += event->time_delta;
return;
default:
BUG();
@@ -3493,24 +3527,23 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
{
u64 delta;
switch (event->type_len) {
case RINGBUF_TYPE_PADDING:
return;
case RINGBUF_TYPE_TIME_EXTEND:
- delta = event->array[0];
- delta <<= TS_SHIFT;
- delta += event->time_delta;
+ delta = ring_buffer_event_time_stamp(event);
iter->read_stamp += delta;
return;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ delta = ring_buffer_event_time_stamp(event);
+ iter->read_stamp = delta;
return;
case RINGBUF_TYPE_DATA:
iter->read_stamp += event->time_delta;
return;
default:
BUG();
@@ -3724,16 +3757,18 @@ static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
static struct ring_buffer_event *
rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
unsigned long *lost_events)
{
struct ring_buffer_event *event;
struct buffer_page *reader;
int nr_loops = 0;
+ if (ts)
+ *ts = 0;
again:
/*
* We repeat when a time extend is encountered.
* Since the time extend is always attached to a data event,
* we should never loop more than once.
* (We never hit the following condition more than twice).
*/
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
@@ -3760,22 +3795,27 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
return event;
case RINGBUF_TYPE_TIME_EXTEND:
/* Internal data, OK to advance */
rb_advance_reader(cpu_buffer);
goto again;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ if (ts) {
+ *ts = ring_buffer_event_time_stamp(event);
+ ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+ cpu_buffer->cpu, ts);
+ }
+ /* Internal data, OK to advance */
rb_advance_reader(cpu_buffer);
goto again;
case RINGBUF_TYPE_DATA:
- if (ts) {
+ if (ts && !(*ts)) {
*ts = cpu_buffer->read_stamp + event->time_delta;
ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
cpu_buffer->cpu, ts);
}
if (lost_events)
*lost_events = rb_lost_events(cpu_buffer);
return event;
@@ -3790,16 +3830,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_peek);
static struct ring_buffer_event *
rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
{
struct ring_buffer *buffer;
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
int nr_loops = 0;
+ if (ts)
+ *ts = 0;
+
cpu_buffer = iter->cpu_buffer;
buffer = cpu_buffer->buffer;
/*
* Check if someone performed a consuming read to
* the buffer. A consuming read invalidates the iterator
* and we need to reset the iterator in this case.
*/
@@ -3842,22 +3885,27 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
return event;
case RINGBUF_TYPE_TIME_EXTEND:
/* Internal data, OK to advance */
rb_advance_iter(iter);
goto again;
case RINGBUF_TYPE_TIME_STAMP:
- /* FIXME: not implemented */
+ if (ts) {
+ *ts = ring_buffer_event_time_stamp(event);
+ ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+ cpu_buffer->cpu, ts);
+ }
+ /* Internal data, OK to advance */
rb_advance_iter(iter);
goto again;
case RINGBUF_TYPE_DATA:
- if (ts) {
+ if (ts && !(*ts)) {
*ts = iter->read_stamp + event->time_delta;
ring_buffer_normalize_time_stamp(buffer,
cpu_buffer->cpu, ts);
}
return event;
default:
BUG();