lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20201207215520.3da609f4@oasis.local.home>
Date:   Mon, 7 Dec 2020 21:55:20 -0500
From:   Steven Rostedt <rostedt@...dmis.org>
To:     Masami Hiramatsu <mhiramat@...nel.org>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        Shuah Khan <skhan@...uxfoundation.org>,
        Linux Trace Devel <linux-trace-devel@...r.kernel.org>,
        Ingo Molnar <mingo@...nel.org>
Subject: Re: [RFC][PATCH] ftrace/selftests: Add binary test to verify ring
 buffer timestamps


Masami?

-- Steve


On Tue, 1 Dec 2020 16:06:56 -0500
Steven Rostedt <rostedt@...dmis.org> wrote:

> From: Steven Rostedt (VMware) <rostedt@...dmis.org>
> 
> A bug was reported about the ftrace ring buffer going backwards:
> 
> Link: https://lore.kernel.org/r/20201124223917.795844-1-elavila@google.com
> 
> In debugging this code, I wrote a C program that uses libtracefs to enable
> all events and function tracing (if it exits), and then read the raw ring
> buffer binary data and make sure that all the events never go backwards. If
> they do, then it does a dump of the ring buffer sub buffer page and shows
> the layout of the events and their deltas.
> 
> This was a very useful tool, and can be used to make sure that the ring
> buffer's timestamps are consistently monotonic.
> 
> Adding this to the ftrace selftests seems to be a way that this can be
> tested more often. But this would introduce the first binary code to the
> ftracetests.
> 
> To make sure that the tests still work on embedded devices (where a
> compiler may not even exist), and also since this binary incorporates the
> yet-to-be-released libtracefs library, if the make fails, the test exits
> with UNTESTED. The UNTESTED is documented as being a place holder which
> this would be if the make does not work.
> 
> Thoughts?
> 
> Soon-to-be-signed-off-by: Steven Rostedt (VMware) <rostedt@...dmis.org>
> ---
> diff --git a/tools/testing/selftests/ftrace/test-ring-buffer/Makefile b/tools/testing/selftests/ftrace/test-ring-buffer/Makefile
> new file mode 100644
> index 000000000000..86f96b6fed9f
> --- /dev/null
> +++ b/tools/testing/selftests/ftrace/test-ring-buffer/Makefile
> @@ -0,0 +1,32 @@
> +
> +LD_PATH := -L/usr/local/lib/tracefs/ -L/usr/local/lib/traceevent
> +
> +TARGETS = test-ring-buffer
> +
> +PKG_CONFIG = pkg-config
> +TRACEFS_INCLUDES = $(shell $(PKG_CONFIG) --cflags libtracefs 2>/dev/null)
> +TRACEFS_LIBS = $(shell $(PKG_CONFIG) --libs libtracefs 2>/dev/null)
> +
> +TRACEEVENT_INCLUDES = $(shell $(PKG_CONFIG) --cflags libtraceevent 2>/dev/null)
> +TRACEEVENT_LIBS = $(shell $(PKG_CONFIG) --libs libtraceevent 2>/dev/null)
> +
> +ifeq ("$(TRACEFS_INCLUDES)","")
> +$(error no libtracefs found)
> +endif
> +
> +ifeq ("$(TRACEEVENT_INCLUDES)","")
> +$(error no libtraceevent found)
> +endif
> +
> +LIBS = $(TRACEFS_LIBS) $(TRACEEVENT_LIBS) -ldl
> +
> +CFLAGS := $(TRACEFS_INCLUDES) $(TRACEEVENT_INCLUDES)
> +
> +all: $(TARGETS)
> +
> +test-ring-buffer: test-ring-buffer.c
> +	gcc -g -Wall -o $@ $(CFLAGS) $^ $(LIBS)
> +
> +clean:
> +	$(RM) $(TARGETS) *.o
> +
> diff --git a/tools/testing/selftests/ftrace/test-ring-buffer/test-ring-buffer.c b/tools/testing/selftests/ftrace/test-ring-buffer/test-ring-buffer.c
> new file mode 100644
> index 000000000000..af823e2102ff
> --- /dev/null
> +++ b/tools/testing/selftests/ftrace/test-ring-buffer/test-ring-buffer.c
> @@ -0,0 +1,399 @@
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdarg.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <unistd.h>
> +#include <signal.h>
> +#include <stdbool.h>
> +#include <sys/types.h>
> +#include <fcntl.h>
> +
> +#include <tracefs.h>
> +#include <kbuffer.h>
> +
> +typedef long long u64;
> +
> +static char *THIS_NAME;
> +static struct tep_handle *tep;
> +
> +static void __vdie(const char *fmt, va_list ap, int err)
> +{
> +	int ret = errno;
> +
> +	if (err && errno)
> +		perror(THIS_NAME);
> +	else
> +		ret = -1;
> +
> +	fprintf(stderr, "  ");
> +	vfprintf(stderr, fmt, ap);
> +
> +	fprintf(stderr, "\n");
> +	exit(ret);
> +}
> +
> +void die(const char *fmt, ...)
> +{
> +	va_list ap;
> +
> +	va_start(ap, fmt);
> +	__vdie(fmt, ap, 0);
> +	va_end(ap);
> +}
> +
> +void pdie(const char *fmt, ...)
> +{
> +	va_list ap;
> +
> +	va_start(ap, fmt);
> +	__vdie(fmt, ap, 1);
> +	va_end(ap);
> +}
> +
> +static void usage(char **argv)
> +{
> +	char *arg = argv[0];
> +	char *p = arg+strlen(arg);
> +
> +	while (p >= arg && *p != '/')
> +		p--;
> +	p++;
> +
> +	printf("\nusage: %s timeout\n"
> +	       "\n",p);
> +	exit(-1);
> +}
> +
> +static int tfd;
> +static int result;
> +static int done = 1;
> +
> +static void stop(int sig)
> +{
> +	write(tfd, "0", 1);
> +	done = 0;
> +}
> +
> +static struct last_info {
> +	u64			timestamp;
> +	void 			*content;
> +	void 			*content_next;
> +	unsigned long		page;
> +	int			index;
> +	int			irq;
> +	int			sirq;
> +} *info;
> +
> +static int page_size;
> +
> +static void *get_meta_data(struct tep_record *record)
> +{
> +	void *p = record->data;
> +	int type_len;
> +
> +	if (record->size > 120) {
> +		int size;
> +
> +		p -= 4;
> +		size = *(unsigned int *)p;
> +		p -= 4;
> +		type_len = *(unsigned int *)p;
> +		if (size != record->size + 4)
> +			die("Expected size %d but found %d (%d)\n",
> +			    record->size + 4,
> +			    size, type_len);
> +	} else
> +		p -= 4;
> +
> +	return p;
> +}
> +
> +static void init_content(struct last_info *info, int cpu,
> +			 struct tep_record *record)
> +{
> +	int type_len;
> +	void *p;
> +
> +	p = get_meta_data(record);
> +	p -= 8;
> +
> +	type_len = *(unsigned int *)p;
> +	/* FIXME for big endian */
> +	type_len = type_len & ((1 << 5) - 1);
> +	switch (type_len) {
> +	case KBUFFER_TYPE_TIME_EXTEND:
> +	case KBUFFER_TYPE_TIME_STAMP:
> +		p -= 8;
> +		break;
> +	}
> +	p -= sizeof(long);
> +	memcpy(info[cpu].content, p, (record->data - p) + record->size);
> +	info[cpu].page = (unsigned long)p;
> +	info[cpu].content_next = record->data + record->size;
> +}
> +
> +static void update_content(struct last_info *info, int cpu,
> +			   struct tep_record *record)
> +{
> +	int size = (record->data + record->size) - info[cpu].content_next;
> +	int start = info[cpu].content_next - (void *)info[cpu].page;
> +
> +	memcpy(info[cpu].content + start, (void *)info[cpu].page + start, size);
> +	info[cpu].content_next = record->data + record->size;
> +}
> +
> +static int read_kbuf_record(struct kbuffer *kbuf, struct tep_record *record)
> +{
> +	unsigned long long ts;
> +	void *ptr;
> +
> +	ptr = kbuffer_read_event(kbuf, &ts);
> +	if (!ptr || !record)
> +		return -1;
> +
> +	memset(record, 0, sizeof(*record));
> +	record->ts = ts;
> +	record->size = kbuffer_event_size(kbuf);
> +	record->record_size = kbuffer_curr_size(kbuf);
> +	record->cpu = 0;
> +	record->data = ptr;
> +	record->ref_count = 1;
> +
> +	kbuffer_next_event(kbuf, NULL);
> +
> +	return 0;
> +}
> +
> +static unsigned get_delta(void *p)
> +{
> +	return (*(unsigned *)p & ((1<<27)-1) << 5) >> 5;
> +}
> +
> +static unsigned long long get_extend_delta(void *p)
> +{
> +	unsigned long long delta;
> +	unsigned long long extend;
> +
> +	delta = get_delta(p);
> +	p += 4;
> +	extend = *(unsigned *)p;
> +
> +	return (extend << 27) + delta;
> +}
> +
> +static void check_meta(void *p, bool show)
> +{
> +	int type_len;
> +
> +	type_len = *(unsigned *)p & ((1<<5)-1);
> +	if (show)
> +		printf(" type_len=%d\n", type_len);
> +	switch (type_len) {
> +	case KBUFFER_TYPE_TIME_EXTEND:
> +		printf("   TIME_EXTEND: delta: %lld\n", get_extend_delta(p));
> +		break;
> +	case KBUFFER_TYPE_TIME_STAMP:
> +		printf("   TIME_STAMP: stamp: %lld\n", get_extend_delta(p));
> +		break;
> +	}
> +}
> +
> +static void dump_content(struct last_info *info, int cpu,
> +			 struct tep_record *this_record)
> +{
> +	struct tep_record record;
> +	struct kbuffer *kbuf;
> +	enum kbuffer_long_size kls;
> +	int last = info[cpu].content_next - (void *)info[cpu].page;
> +	int ret;
> +	void *p;
> +
> +	switch (sizeof(long)) {
> +	case 4:
> +		kls = KBUFFER_LSIZE_4;
> +		break;
> +	default:
> +		kls = KBUFFER_LSIZE_8;
> +		break;
> +	}
> +
> +
> +	kbuf = kbuffer_alloc(kls, KBUFFER_ENDIAN_LITTLE);
> +	if (!kbuf)
> +		die("Allocating kbuffer");
> +	kbuffer_load_subbuffer(kbuf, info[cpu].content);
> +
> +	p = info[cpu].content;
> +
> +	printf(" [%lld] PAGE TIME STAMP\n", *(unsigned long long *)p);
> +
> +	p += 8 + sizeof(long);
> +	check_meta(p, true);
> +
> +	while (!(ret = read_kbuf_record(kbuf, &record))) {
> +		struct tep_event *event;
> +		int id;
> +
> +		id = tep_data_type(tep, &record);
> +		event = tep_find_event(tep, id);
> +		if (!event) {
> +			printf("Failed to find event!\n");
> +			continue;
> +		}
> +
> +		p = get_meta_data(&record);
> +
> +		printf(" [%lld] delta:%d (%d)\n", record.ts, get_delta(p), record.size);
> +
> +		if (info[cpu].content + last == record.data + record.size)
> +			break;
> +
> +		check_meta(record.data + record.size, false);
> +	}
> +	kbuffer_free(kbuf);
> +
> +	if (!this_record)
> +		return;
> +
> +	printf("\nDumping record buffer:\n");
> +
> +	init_content(info, cpu, this_record);
> +	dump_content(info, cpu, NULL);
> +}
> +
> +static int callback(struct tep_event *event,
> +		    struct tep_record *record,
> +		    int cpu, void *context)
> +{
> +	static int done;
> +	int index;
> +	int irq, sirq;
> +	int flags;
> +
> +	if (done)
> +		return 1;
> +
> +	if (!info[cpu].content) {
> +		info[cpu].content = calloc(1, page_size);
> +		if (!info[cpu].content)
> +			die("Allocating content");
> +	}
> +	if (!info[cpu].page)
> +		init_content(info, cpu, record);
> +
> +	index = (unsigned long)record->data - info[cpu].page;
> +
> +	if (index < 0) {
> +		printf("HERE!\n");
> +		info[cpu].page = (unsigned long)record->data;
> +		info[cpu].index += index;
> +		index = 0;
> +	}
> +
> +	flags = tep_data_flags(event->tep, record);
> +	irq = !!(flags & TRACE_FLAG_HARDIRQ);
> +	sirq = !!(flags & TRACE_FLAG_SOFTIRQ);
> +
> +	if (info[cpu].timestamp > record->ts) {
> +		stop(0);
> +		printf("Record on cpu %d went backwards: %lld to %lld\n"
> +		       "  last index = %d : this index = %d\n"
> +		       "  last softirq = %d : last hardirq = %d\n",
> +		       cpu, info[cpu].timestamp, record->ts,
> +		       info[cpu].index, index, info[cpu].sirq, info[cpu].irq);
> +		dump_content(info, cpu, record);
> +		done = 1;
> +		result = -1;
> +		return 1;
> +	}
> +
> +	if (info[cpu].content_next > record->data)
> +		init_content(info, cpu, record);
> +	else
> +		update_content(info, cpu, record);
> +
> +	info[cpu].timestamp = record->ts;
> +	if (info[cpu].index > index) {
> +		info[cpu].sirq = sirq;
> +		info[cpu].irq = irq;
> +	} else {
> +		info[cpu].sirq |= sirq;
> +		info[cpu].irq |= irq;
> +	}
> +	info[cpu].index = index;
> +	return 0;
> +}
> +
> +static void test_buffer(struct tep_handle *tep, int timeout)
> +{
> +	signal(SIGALRM, stop);
> +	alarm(timeout);
> +
> +	tracefs_iterate_raw_events(tep, NULL, callback, NULL);
> +}
> +
> +int main (int argc, char **argv)
> +{
> +	char *current_tracer;
> +	char *events_enable;
> +	char *tracing_on;
> +	int time;
> +	int cpus;
> +	int efd;
> +	int cfd;
> +
> +	THIS_NAME = argv[0];
> +
> +	if (argc < 2)
> +		usage(argv);
> +
> +	page_size = getpagesize();
> +
> +	time = atoi(argv[1]);
> +	if (time <= 0)
> +		usage(argv);
> +
> +	tep = tracefs_local_events(NULL);
> +	if (!tep)
> +		pdie("Error loading events");
> +
> +	events_enable = tracefs_instance_get_file(NULL, "events/enable");
> +	current_tracer = tracefs_instance_get_file(NULL, "current_tracer");
> +	tracing_on = tracefs_instance_get_file(NULL, "tracing_on");
> +
> +	if (!events_enable || !current_tracer || !tracing_on)
> +		pdie("Allocating strings");
> +
> +	cpus = sysconf(_SC_NPROCESSORS_CONF);
> +
> +	info = calloc(cpus, sizeof(*info));
> +	if (!info)
> +		pdie("Allocating per cpu info");
> +
> +	efd = open(events_enable, O_WRONLY);
> +	if (efd < 0)
> +		pdie(events_enable);
> +
> +	cfd = open(current_tracer, O_WRONLY);
> +	if (cfd < 0)
> +		pdie(current_tracer);
> +
> +	tfd = open(tracing_on, O_WRONLY);
> +	if (tfd < 0)
> +		pdie(current_tracer);
> +
> +	write(efd, "1", 1);
> +	write(cfd, "function", 8);
> +
> +	test_buffer(tep, time);
> +
> +	write(efd, "0", 1);
> +	write(cfd, "nop", 3);
> +	write(tfd, "1", 1);
> +
> +	close(efd);
> +	close(cfd);
> +	close(tfd);
> +	exit(result);
> +}
> diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/ring-buffer.tc b/tools/testing/selftests/ftrace/test.d/ftrace/ring-buffer.tc
> new file mode 100644
> index 000000000000..05f628124774
> --- /dev/null
> +++ b/tools/testing/selftests/ftrace/test.d/ftrace/ring-buffer.tc
> @@ -0,0 +1,16 @@
> +#!/bin/sh
> +# SPDX-License-Identifier: GPL-2.0
> +# description: ftrace - test ring buffer timestamps
> +
> +cd $TOP_DIR/test-ring-buffer
> +if ! make ; then
> +	cd $TRACING_DIR
> +	exit_untested
> +fi
> +
> +./test-ring-buffer 10
> +
> +make clean
> +
> +cd $TRACING_DIR
> +

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ