lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20191112113102.0ffc883c@gandalf.local.home>
Date:   Tue, 12 Nov 2019 11:31:02 -0500
From:   Steven Rostedt <rostedt@...dmis.org>
To:     Cong Wang <xiyou.wangcong@...il.com>
Cc:     linux-kernel@...r.kernel.org, Ingo Molnar <mingo@...hat.com>
Subject: Re: [PATCH v3] tracing: Introduce trace event injection


Again, sorry for the late review. Things have just been crazy :-/

On Wed,  4 Sep 2019 14:14:56 -0700
Cong Wang <xiyou.wangcong@...il.com> wrote:

> We have been trying to use rasdaemon to monitor hardware errors like
> correctable memory errors. rasdaemon uses trace events to monitor
> various hardware errors. In order to test it, we have to inject some
> hardware errors, unfortunately not all of them provide error
> injections. MCE does provide a way to inject MCE errors, but errors
> like PCI error and devlink error don't, it is not easy to add error
> injection to each of them. Instead, it is relatively easier to just
> allow users to inject trace events in a generic way so that all trace
> events can be injected.
> 
> This patch introduces trace event injection, where a new 'inject' is
> added to each tracepoint directory. Users could write into this file
> with key=value pairs to specify the value of each fields of the trace
> event, all unspecified fields are set to zero values by default.
> 
> For example, for the net/net_dev_queue tracepoint, we can inject:
> 
>   INJECT=/sys/kernel/debug/tracing/events/net/net_dev_queue/inject
>   echo "" > $INJECT
>   echo "name='test'" > $INJECT
>   echo "name='test' len=1024" > $INJECT
>   cat /sys/kernel/debug/tracing/trace
>   ...
>    <...>-614   [000] ....    36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
>    <...>-614   [001] ....   136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
>    <...>-614   [001] .N..   208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
> 
> Triggers could be triggered as usual too:
> 
>   echo "stacktrace if len == 1025" > /sys/kernel/debug/tracing/events/net/net_dev_queue/trigger
>   echo "len=1025" > $INJECT
>   cat /sys/kernel/debug/tracing/trace
>   ...
>       bash-614   [000] ....    36.571483: net_dev_queue: dev= skbaddr=00000000fbf338c2 len=0
>       bash-614   [001] ....   136.588252: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=0
>       bash-614   [001] .N..   208.431878: net_dev_queue: dev=test skbaddr=00000000fbf338c2 len=1024
>       bash-614   [001] .N.1   284.236349: <stack trace>
>  => event_inject_write
>  => vfs_write
>  => ksys_write
>  => do_syscall_64
>  => entry_SYSCALL_64_after_hwframe  
> 
> The only thing that can't be injected is string pointers as they
> require constant string pointers, this can't be done at run time.
> 
> Cc: Steven Rostedt <rostedt@...dmis.org>
> Cc: Ingo Molnar <mingo@...hat.com>
> Signed-off-by: Cong Wang <xiyou.wangcong@...il.com>
> ---
> v3: introduce a Kconfig
> v2: address Steven's review comments
> 
>  kernel/trace/Kconfig               |  10 +
>  kernel/trace/Makefile              |   1 +
>  kernel/trace/trace.h               |   1 +
>  kernel/trace/trace_events.c        |   6 +
>  kernel/trace/trace_events_inject.c | 328 +++++++++++++++++++++++++++++
>  5 files changed, 346 insertions(+)
>  create mode 100644 kernel/trace/trace_events_inject.c
> 
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 98da8998c25c..1c86fbf7e6d0 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -663,6 +663,16 @@ config HIST_TRIGGERS
>  	  See Documentation/trace/histogram.rst.
>  	  If in doubt, say N.
>  
> +config TRACE_EVENT_INJECT
> +	bool "Trace event injection"
> +	depends on TRACING
> +	default n

No need for the above "default n", that's the default.

> +	help
> +	  Allow user-space to inject a specific trace event into the ring
> +	  buffer. This is mainly used for testing purpose.
> +
> +	  If unsure, say N.
> +
>  config MMIOTRACE_TEST
>  	tristate "Test module for mmiotrace"
>  	depends on MMIOTRACE && m
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index c2b2148bb1d2..0e63db62225f 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -69,6 +69,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
>  endif
>  obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
>  obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
> +obj-$(CONFIG_TRACE_EVENT_INJECT) += trace_events_inject.o
>  obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
>  obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
>  obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 005f08629b8b..23741d9b59f1 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -1582,6 +1582,7 @@ extern struct list_head ftrace_events;
>  
>  extern const struct file_operations event_trigger_fops;
>  extern const struct file_operations event_hist_fops;
> +extern const struct file_operations event_inject_fops;
>  
>  #ifdef CONFIG_HIST_TRIGGERS
>  extern int register_trigger_hist_cmd(void);
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 648930823b57..30a0c42fb575 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -2018,6 +2018,12 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
>  	trace_create_file("format", 0444, file->dir, call,
>  			  &ftrace_event_format_fops);
>  
> +#ifdef CONFIG_TRACE_EVENT_INJECT
> +	if (call->event.type && call->class->reg)
> +		trace_create_file("inject", 0200, file->dir, file,
> +				  &event_inject_fops);
> +#endif
> +
>  	return 0;
>  }
>  
> diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
> new file mode 100644
> index 000000000000..6347029a1fa1
> --- /dev/null
> +++ b/kernel/trace/trace_events_inject.c
> @@ -0,0 +1,328 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * trace_events_inject - trace event injection
> + *
> + * Copyright (C) 2019 Cong Wang <cwang@...tter.com>
> + */
> +
> +#include <linux/module.h>
> +#include <linux/ctype.h>
> +#include <linux/mutex.h>
> +#include <linux/slab.h>
> +#include <linux/rculist.h>
> +
> +#include "trace.h"
> +
> +static int
> +trace_inject_entry(struct trace_event_file *file, void *rec, int len)
> +{
> +	struct trace_event_buffer fbuffer;
> +	struct ring_buffer *buffer;
> +	int written = 0;
> +	void *entry;
> +
> +	buffer = file->tr->trace_buffer.buffer;
> +	entry = trace_event_buffer_reserve(&fbuffer, file, len);
> +	if (entry) {
> +		memcpy(entry, rec, len);
> +		written = len;
> +		trace_event_buffer_commit(&fbuffer);
> +	}
> +
> +	return written;
> +}
> +
> +static int
> +parse_field(char *str, struct trace_event_call *call,
> +	    struct ftrace_event_field **pf, u64 *pv)
> +{
> +	struct ftrace_event_field *field;
> +	char *field_name;
> +	int s, i = 0;
> +	char *p;
> +	int len;
> +	u64 val;
> +
> +	if (!str[i])
> +		return 0;
> +	/* First find the field to associate to */
> +	while (isspace(str[i]))
> +		i++;
> +	s = i;
> +	while (isalnum(str[i]) || str[i] == '_')
> +		i++;
> +	len = i - s;
> +	if (!len)
> +		return -EINVAL;
> +
> +	field_name = kmemdup_nul(str + s, len, GFP_KERNEL);
> +	if (!field_name)
> +		return -ENOMEM;
> +	field = trace_find_event_field(call, field_name);
> +	kfree(field_name);
> +	if (!field)
> +		return -ENOENT;
> +
> +	*pf = field;
> +	p = strchr(str + i, '=');

Hmm, the above here will parse out:

	"field some garbarge = one"

It will skip the some garbage, and jump straight to 1.

> +	if (!p)
> +		return -EINVAL;
> +	i = p + 1 - str;
> +	while (isspace(str[i]))
> +		i++;
> +	s = i;
> +	if (isdigit(str[i]) || str[i] == '-') {
> +		char *num, c;
> +		int ret;
> +
> +		/* Make sure the field is not a string */
> +		if (is_string_field(field))
> +			return -EINVAL;
> +
> +		if (str[i] == '-')
> +			i++;
> +
> +		/* We allow 0xDEADBEEF */
> +		while (isalnum(str[i]))
> +			i++;
> +		num = str + s;
> +		c = str[i];

Probably should verify that c is whitespace or '\0'.

> +		str[i] = '\0';
> +		/* Make sure it is a value */
> +		if (field->is_signed)
> +			ret = kstrtoll(num, 0, &val);
> +		else
> +			ret = kstrtoull(num, 0, &val);
> +		str[i] = c;
> +		if (ret)
> +			return ret;
> +
> +		*pv = val;
> +		return i;
> +	} else if (str[i] == '\'' || str[i] == '"') {
> +		char q = str[i];
> +
> +		/* Make sure the field is OK for strings */
> +		if (!is_string_field(field))
> +			return -EINVAL;
> +
> +		for (i++; str[i]; i++) {
> +			if (str[i] == '\\' && str[i + 1]) {
> +				i++;
> +				continue;
> +			}
> +			if (str[i] == q)
> +				break;
> +		}
> +		if (!str[i])
> +			return -EINVAL;
> +
> +		/* Skip quotes */
> +		s++;
> +		len = i - s;
> +		if (len >= MAX_FILTER_STR_VAL)
> +			return -EINVAL;
> +
> +		*pv = (unsigned long)(str + s);
> +		str[i] = 0;
> +		/* go past the last quote */
> +		i++;
> +		return i;
> +	}
> +
> +	return -EINVAL;
> +}
> +
> +static int trace_get_entry_size(struct trace_event_call *call)
> +{
> +	struct ftrace_event_field *field;
> +	struct list_head *head;
> +	int size = 0;
> +
> +	head = trace_get_fields(call);
> +	list_for_each_entry(field, head, link) {
> +		if (field->size + field->offset > size)
> +			size = field->size + field->offset;
> +	}
> +
> +	return size;
> +}
> +
> +static void *trace_alloc_entry(struct trace_event_call *call, int *size)
> +{
> +	int entry_size = trace_get_entry_size(call);
> +	struct ftrace_event_field *field;
> +	struct list_head *head;
> +	void *entry = NULL;
> +
> +	/* We need an extra '\0' at the end. */
> +	entry = kzalloc(entry_size + 1, GFP_KERNEL);
> +	if (!entry)
> +		return NULL;
> +
> +	head = trace_get_fields(call);
> +	list_for_each_entry(field, head, link) {
> +		if (!is_string_field(field))
> +			continue;
> +		if (field->filter_type == FILTER_STATIC_STRING)
> +			continue;
> +		if (field->filter_type == FILTER_DYN_STRING) {
> +			u32 *str_item;
> +			int str_len = 0;
> +			int str_loc = entry_size & 0xffff;
> +
> +			str_item = (u32 *)(entry + field->offset);
> +			*str_item = (str_len << 16) | str_loc;

str_len is always zero here, right? Might as well just make this

 *str_item = str_loc;


> +		} else {
> +			char **paddr;
> +
> +			paddr = (char **)(entry + field->offset);
> +			*paddr = "";

Hmm, I know this is a default pointer to a sting, but perhaps we should
make it point to NULL, and not a string defined by this function?

> +		}
> +	}
> +
> +	*size = entry_size + 1;
> +	return entry;
> +}
> +
> +#define INJECT_STRING "STATIC STRING CAN NOT BE INJECTED"
> +
> +/* Caller is responsible to free the *pentry. */
> +static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
> +{
> +	struct ftrace_event_field *field;
> +	unsigned long irq_flags;
> +	void *entry = NULL;
> +	int entry_size;
> +	u64 val;
> +	int len;
> +
> +	entry = trace_alloc_entry(call, &entry_size);
> +	*pentry = entry;
> +	if (!entry)
> +		return -ENOMEM;
> +
> +	local_save_flags(irq_flags);
> +	tracing_generic_entry_update(entry, call->event.type, irq_flags,
> +				     preempt_count());
> +
> +	while ((len = parse_field(str, call, &field, &val)) > 0) {
> +		if (is_function_field(field))

Why not allow injecting function fields?

> +			return -EINVAL;
> +
> +		if (is_string_field(field)) {
> +			char *addr = (char *)(unsigned long) val;
> +
> +			if (field->filter_type == FILTER_STATIC_STRING) {
> +				strlcpy(entry + field->offset, addr, field->size);
> +			} else if (field->filter_type == FILTER_DYN_STRING) {
> +				int str_len = strlen(addr) + 1;
> +				int str_loc = entry_size & 0xffff;
> +				u32 *str_item;
> +
> +				entry_size += str_len;
> +				*pentry = krealloc(entry, entry_size, GFP_KERNEL);
> +				entry = *pentry;
> +				if (!entry)
> +					return -ENOMEM;

The above leaks memory, as if *pentry is NULL then the old entry is not
freed. The above needs to be:

				*pentry = krealloc(entry, entry_size, GFP_KERNEL);
				if (!*pentry) {
					kfree(entry);
					return -ENOMEM;
				}
				entry = *pentry;

> +
> +				strlcpy(entry + (entry_size - str_len), addr, str_len);
> +				str_item = (u32 *)(entry + field->offset);
> +				*str_item = (str_len << 16) | str_loc;
> +			} else {
> +				char **paddr;
> +
> +				/* TODO: can we find the constant string? */

				probably not ;-)


-- Steve

> +				paddr = (char **)(entry + field->offset);
> +				*paddr = INJECT_STRING;
> +			}
> +		} else {
> +			switch (field->size) {
> +			case 1: {
> +				u8 tmp = (u8) val;
> +
> +				memcpy(entry + field->offset, &tmp, 1);
> +				break;
> +			}
> +			case 2: {
> +				u16 tmp = (u16) val;
> +
> +				memcpy(entry + field->offset, &tmp, 2);
> +				break;
> +			}
> +			case 4: {
> +				u32 tmp = (u32) val;
> +
> +				memcpy(entry + field->offset, &tmp, 4);
> +				break;
> +			}
> +			case 8:
> +				memcpy(entry + field->offset, &val, 8);
> +				break;
> +			default:
> +				return -EINVAL;
> +			}
> +		}
> +
> +		str += len;
> +	}
> +
> +	if (len < 0)
> +		return len;
> +
> +	return entry_size;
> +}
> +
> +static ssize_t
> +event_inject_write(struct file *filp, const char __user *ubuf, size_t cnt,
> +		   loff_t *ppos)
> +{
> +	struct trace_event_call *call;
> +	struct trace_event_file *file;
> +	int err = -ENODEV, size;
> +	void *entry = NULL;
> +	char *buf;
> +
> +	if (cnt >= PAGE_SIZE)
> +		return -EINVAL;
> +
> +	buf = memdup_user_nul(ubuf, cnt);
> +	if (IS_ERR(buf))
> +		return PTR_ERR(buf);
> +	strim(buf);
> +
> +	mutex_lock(&event_mutex);
> +	file = event_file_data(filp);
> +	if (file) {
> +		call = file->event_call;
> +		size = parse_entry(buf, call, &entry);
> +		if (size < 0)
> +			err = size;
> +		else
> +			err = trace_inject_entry(file, entry, size);
> +	}
> +	mutex_unlock(&event_mutex);
> +
> +	kfree(entry);
> +	kfree(buf);
> +
> +	if (err < 0)
> +		return err;
> +
> +	*ppos += err;
> +	return cnt;
> +}
> +
> +static ssize_t
> +event_inject_read(struct file *file, char __user *buf, size_t size,
> +		  loff_t *ppos)
> +{
> +	return -EPERM;
> +}
> +
> +const struct file_operations event_inject_fops = {
> +	.open = tracing_open_generic,
> +	.read = event_inject_read,
> +	.write = event_inject_write,
> +};
> +

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ