From: Steven Rostedt A new file is created: /sys/kernel/debug/tracing/trace_marker_raw This allows for appications to create data structures and write the binary data directly into it, and then read the trace data out from trace_pipe_raw into the same type of data structure. This saves on converting numbers into ASCII that would be required by trace_marker. Suggested-by: Olof Johansson Signed-off-by: Steven Rostedt --- Documentation/trace/ftrace.txt | 6 ++ kernel/trace/trace.c | 165 +++++++++++++++++++++++++++++++++-------- kernel/trace/trace.h | 2 + kernel/trace/trace_entries.h | 15 ++++ kernel/trace/trace_output.c | 30 ++++++++ 5 files changed, 187 insertions(+), 31 deletions(-) diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 1bc66c1db0cb..6c374c5fe400 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -396,6 +396,12 @@ of ftrace. Here is a list of some of the key files: trace_fd = open("trace_marker", WR_ONLY); + trace_marker_raw: + + This is similar to trace_marker above, but is meant for for binary data + to be written to it, where a tool can be used to parse the data + from trace_pipe_raw. + uprobe_events: Add dynamic tracepoints in programs. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d904516dfdab..57069e7f369c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4054,6 +4054,7 @@ static const char readme_msg[] = " x86-tsc: TSC cycle counter\n" #endif "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" + "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" " tracing_cpumask\t- Limit which CPUs to trace\n" " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" "\t\t\t Remove sub-buffer with rmdir\n" @@ -5514,35 +5515,15 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) return 0; } -static ssize_t -tracing_mark_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *fpos) +static inline int lock_user_pages(const char __user *ubuf, size_t cnt, + struct page **pages, void **map_page, + int *offset) { unsigned long addr = (unsigned long)ubuf; - struct trace_array *tr = filp->private_data; - struct ring_buffer_event *event; - struct ring_buffer *buffer; - struct print_entry *entry; - unsigned long irq_flags; - struct page *pages[2]; - void *map_page[2]; int nr_pages = 1; - ssize_t written; - int offset; - int size; - int len; int ret; int i; - if (tracing_disabled) - return -EINVAL; - - if (!(tr->trace_flags & TRACE_ITER_MARKERS)) - return -EINVAL; - - if (cnt > TRACE_BUF_SIZE) - cnt = TRACE_BUF_SIZE; - /* * Userspace is injecting traces into the kernel trace buffer. * We want to be as non intrusive as possible. @@ -5557,26 +5538,70 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, * pages directly. We then write the data directly into the * ring buffer. */ - BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); /* check if we cross pages */ if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK)) nr_pages = 2; - offset = addr & (PAGE_SIZE - 1); + *offset = addr & (PAGE_SIZE - 1); addr &= PAGE_MASK; ret = get_user_pages_fast(addr, nr_pages, 0, pages); if (ret < nr_pages) { while (--ret >= 0) put_page(pages[ret]); - written = -EFAULT; - goto out; + return -EFAULT; } for (i = 0; i < nr_pages; i++) map_page[i] = kmap_atomic(pages[i]); + return nr_pages; +} + +static inline void unlock_user_pages(struct page **pages, + void **map_page, int nr_pages) +{ + int i; + + for (i = nr_pages - 1; i >= 0; i--) { + kunmap_atomic(map_page[i]); + put_page(pages[i]); + } +} + +static ssize_t +tracing_mark_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *fpos) +{ + struct trace_array *tr = filp->private_data; + struct ring_buffer_event *event; + struct ring_buffer *buffer; + struct print_entry *entry; + unsigned long irq_flags; + struct page *pages[2]; + void *map_page[2]; + int nr_pages = 1; + ssize_t written; + int offset; + int size; + int len; + + if (tracing_disabled) + return -EINVAL; + + if (!(tr->trace_flags & TRACE_ITER_MARKERS)) + return -EINVAL; + + if (cnt > TRACE_BUF_SIZE) + cnt = TRACE_BUF_SIZE; + + BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); + + nr_pages = lock_user_pages(ubuf, cnt, pages, map_page, &offset); + if (nr_pages < 0) + return nr_pages; + local_save_flags(irq_flags); size = sizeof(*entry) + cnt + 2; /* possible \n added */ buffer = tr->trace_buffer.buffer; @@ -5611,11 +5636,79 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, *fpos += written; out_unlock: - for (i = nr_pages - 1; i >= 0; i--) { - kunmap_atomic(map_page[i]); - put_page(pages[i]); + unlock_user_pages(pages, map_page, nr_pages); + + return written; +} + +/* Limit it for now to 3K (including tag) */ +#define RAW_DATA_MAX_SIZE (1024*3) + +static ssize_t +tracing_mark_raw_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *fpos) +{ + struct trace_array *tr = filp->private_data; + struct ring_buffer_event *event; + struct ring_buffer *buffer; + struct raw_data_entry *entry; + unsigned long irq_flags; + struct page *pages[2]; + void *map_page[2]; + int nr_pages = 1; + ssize_t written; + int offset; + int size; + int len; + + if (tracing_disabled) + return -EINVAL; + + if (!(tr->trace_flags & TRACE_ITER_MARKERS)) + return -EINVAL; + + /* The marker must at least have a tag id */ + if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE) + return -EINVAL; + + if (cnt > TRACE_BUF_SIZE) + cnt = TRACE_BUF_SIZE; + + BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); + + nr_pages = lock_user_pages(ubuf, cnt, pages, map_page, &offset); + if (nr_pages < 0) + return nr_pages; + + local_save_flags(irq_flags); + size = sizeof(*entry) + cnt; + buffer = tr->trace_buffer.buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, + irq_flags, preempt_count()); + if (!event) { + /* Ring buffer disabled, return as if not open for write */ + written = -EBADF; + goto out_unlock; } - out: + + entry = ring_buffer_event_data(event); + + if (nr_pages == 2) { + len = PAGE_SIZE - offset; + memcpy(&entry->id, map_page[0] + offset, len); + memcpy(((char *)&entry->id) + len, map_page[1], cnt - len); + } else + memcpy(&entry->id, map_page[0] + offset, cnt); + + __buffer_unlock_commit(buffer, event); + + written = cnt; + + *fpos += written; + + out_unlock: + unlock_user_pages(pages, map_page, nr_pages); + return written; } @@ -5945,6 +6038,13 @@ static const struct file_operations tracing_mark_fops = { .release = tracing_release_generic_tr, }; +static const struct file_operations tracing_mark_raw_fops = { + .open = tracing_open_generic_tr, + .write = tracing_mark_raw_write, + .llseek = generic_file_llseek, + .release = tracing_release_generic_tr, +}; + static const struct file_operations trace_clock_fops = { .open = tracing_clock_open, .read = seq_read, @@ -7214,6 +7314,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) trace_create_file("trace_marker", 0220, d_tracer, tr, &tracing_mark_fops); + trace_create_file("trace_marker_raw", 0220, d_tracer, + tr, &tracing_mark_raw_fops); + trace_create_file("trace_clock", 0644, d_tracer, tr, &trace_clock_fops); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4b7918902ab8..9294f8606ade 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -40,6 +40,7 @@ enum trace_type { TRACE_BLK, TRACE_BPUTS, TRACE_HWLAT, + TRACE_RAW_DATA, __TRACE_LAST_TYPE, }; @@ -331,6 +332,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \ IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \ IF_ASSIGN(var, ent, struct hwlat_entry, TRACE_HWLAT); \ + IF_ASSIGN(var, ent, struct raw_data_entry, TRACE_RAW_DATA);\ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index d1cc37e78f99..eb7396b7e7c3 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -244,6 +244,21 @@ FTRACE_ENTRY(print, print_entry, FILTER_OTHER ); +FTRACE_ENTRY(raw_data, raw_data_entry, + + TRACE_RAW_DATA, + + F_STRUCT( + __field( unsigned int, id ) + __dynamic_array( char, buf ) + ), + + F_printk("id:%04x %08x", + __entry->id, (int)__entry->buf[0]), + + FILTER_OTHER +); + FTRACE_ENTRY(bputs, bputs_entry, TRACE_BPUTS, diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 3fc20422c166..5d33a7352919 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1288,6 +1288,35 @@ static struct trace_event trace_print_event = { .funcs = &trace_print_funcs, }; +static enum print_line_t trace_raw_data(struct trace_iterator *iter, int flags, + struct trace_event *event) +{ + struct raw_data_entry *field; + int i; + + trace_assign_type(field, iter->ent); + + trace_seq_printf(&iter->seq, "# %x buf:", field->id); + + for (i = 0; i < iter->ent_size - offsetof(struct raw_data_entry, buf); i++) + trace_seq_printf(&iter->seq, " %02x", + (unsigned char)field->buf[i]); + + trace_seq_putc(&iter->seq, '\n'); + + return trace_handle_return(&iter->seq); +} + +static struct trace_event_functions trace_raw_data_funcs = { + .trace = trace_raw_data, + .raw = trace_raw_data, +}; + +static struct trace_event trace_raw_data_event = { + .type = TRACE_RAW_DATA, + .funcs = &trace_raw_data_funcs, +}; + static struct trace_event *events[] __initdata = { &trace_fn_event, @@ -1299,6 +1328,7 @@ static struct trace_event *events[] __initdata = { &trace_bprint_event, &trace_print_event, &trace_hwlat_event, + &trace_raw_data_event, NULL }; -- 2.10.2