linux-kernel - Re: [Patch 11/12] ftrace plugin for kernel symbol tracing using HWBreakpoint interfaces

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090512141944.GB6033@in.ibm.com>
Date:	Tue, 12 May 2009 19:49:44 +0530
From:	"K.Prasad" <prasad@...ux.vnet.ibm.com>
To:	Frederic Weisbecker <fweisbec@...il.com>
Cc:	Alan Stern <stern@...land.harvard.edu>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ingo Molnar <mingo@...e.hu>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Benjamin Herrenschmidt <benh@....ibm.com>,
	maneesh@...ux.vnet.ibm.com, Roland McGrath <roland@...hat.com>,
	Masami Hiramatsu <mhiramat@...hat.com>
Subject: Re: [Patch 11/12] ftrace plugin for kernel symbol tracing using
	HWBreakpoint interfaces - v4

On Tue, May 12, 2009 at 12:14:29AM +0200, Frederic Weisbecker wrote:
> On Mon, May 11, 2009 at 05:25:02PM +0530, K.Prasad wrote:
> > This patch adds an ftrace plugin to detect and profile memory access over kernel
> > variables. It uses HW Breakpoint interfaces to 'watch memory addresses.
> > 
> > +void ksym_collect_stats(unsigned long hbp_hit_addr)
> > +{
> > +	struct hlist_node *node;
> > +	struct trace_ksym *entry;
> > +
> > +	rcu_read_lock();
> > +	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
> > +		if ((entry->ksym_addr == hbp_hit_addr) &&
> > +		    (entry->counter <= MAX_UL_INT)) {
> > +			entry->counter++;
> > +			break;
> > +		}
> > +	}
> > +	rcu_read_unlock();
> 
> 
> 
> rcu looks a good idea to maintain your list.
>

True, and there weren't many choices either. The earlier implementations
with mutex/spin_lock turned out to be incorrect in their own way (while
mutexes cannot be used inside exception handler context, spinlocks led to
potential circular dependancy).
 
> > +static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
> > +{
> > +	struct trace_ksym *entry;
> > +	int ret;
> > +
> > +	if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> > +		printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> > +		" new requests for tracing can be accepted now.\n",
> > +			KSYM_TRACER_MAX);
> > +		return -ENOSPC;
> > +	}
> > +
> > +	entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
> > +	if (!entry)
> > +		return -ENOMEM;
> > +
> > +	entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> > +	if (!entry->ksym_hbp) {
> > +		kfree(entry);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	entry->ksym_hbp->info.name = ksymname;
> > +	entry->ksym_hbp->info.type = op;
> > +	entry->ksym_addr = entry->ksym_hbp->info.address = addr;
> > +#ifdef CONFIG_X86
> > +	entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
> > +#endif
> > +	entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
> > +
> > +	ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
> > +	if (ret < 0) {
> > +		printk(KERN_INFO "ksym_tracer request failed. Try again"
> > +					" later!!\n");
> > +		kfree(entry->ksym_hbp);
> > +		kfree(entry);
> > +		return -EAGAIN;
> > +	}
> > +	hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
> 
> 
> And then ksym_tracer_mutex protect concurrent writers.
> 
> 

Yes, they synchronise read/write operations over the list pointed by
ksym_filter_head.

> > +	ksym_filter_entry_count++;
> > +
> > +	return 0;
> > +}
> > +
> > +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> > +						size_t count, loff_t *ppos)
> > +{
> > +	struct trace_ksym *entry;
> > +	struct hlist_node *node;
> > +	char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> > +	ssize_t ret, cnt = 0;
> > +
> > +	mutex_lock(&ksym_tracer_mutex);
> > +
> > +	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> 
> 
> 
> And here you don't use the rcu version.
> I guess it's fine since you're protected by the writer lock...
> 

I couldn't use RCU here because the simple_read_from_buffer() is
inatomic, and hence the mutex.

> > +		cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> > +				entry->ksym_hbp->info.name);
> > +		if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
> > +			cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > +								"-w-\n");
> > +		else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
> > +			cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > +								"rw-\n");
> > +	}
> > +	ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> > +	mutex_unlock(&ksym_tracer_mutex);
> > +
> > +	return ret;
> > +}
> > +
> > +static ssize_t ksym_trace_filter_write(struct file *file,
> > +					const char __user *buffer,
> > +						size_t count, loff_t *ppos)
> > +{
> > +	struct trace_ksym *entry;
> > +	struct hlist_node *node;
> > +	char *input_string, *ksymname = NULL;
> > +	unsigned long ksym_addr = 0;
> > +	int ret, op, changed = 0;
> > +
> > +	/* Ignore echo "" > ksym_trace_filter */
> > +	if (count == 0)
> > +		return 0;
> > +
> > +	input_string = kzalloc(count, GFP_KERNEL);
> > +	if (!input_string)
> > +		return -ENOMEM;
> > +
> > +	if (copy_from_user(input_string, buffer, count)) {
> > +		kfree(input_string);
> > +		return -EFAULT;
> > +	}
> > +
> > +	ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> > +	if (ret < 0) {
> > +		kfree(input_string);
> > +		return ret;
> > +	}
> > +
> > +	mutex_lock(&ksym_tracer_mutex);
> > +
> > +	ret = -EINVAL;
> > +	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> 
> 
> 
> Same here, ok...
> 
> 
> > +static int ksym_trace_init(struct trace_array *tr)
> > +{
> > +	int cpu, ret = 0;
> > +
> > +	for_each_online_cpu(cpu)
> > +		tracing_reset(tr, cpu);
> > +
> > +	ksym_tracing_enabled = 1;
> > +	ksym_trace_array = tr;
> > +
> > +#ifdef CONFIG_FTRACE_SELFTEST
> > +	/* Check if we are re-entering self-test code during initialisation */
> > +	if (ksym_selftest_dummy)
> > +		goto ret_path;
> > +
> > +	ksym_selftest_dummy = 0;
> > +
> > +	/* Register the read-write tracing request */
> > +	ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
> > +					(unsigned long)(&ksym_selftest_dummy));
> > +
> > +	if (ret < 0) {
> > +		printk(KERN_CONT "ksym_trace read-write startup test failed\n");
> > +		goto ret_path;
> > +	}
> > +	/* Perform a read and a write operation over the dummy variable to
> > +	 * trigger the tracer
> > +	 */
> > +	if (ksym_selftest_dummy == 0)
> > +		ksym_selftest_dummy++;
> > +ret_path:
> > +#endif /* CONFIG_FTRACE_SELFTEST */
> 
> 
> It means that each time your tracer is selected, it will perform a selftest.
> I think we only need this selftest once during the boot.
> I would rather see that in the real selftest callback (trace_selftest_startup_kysm).
>

> > +   if (ksym_selftest_dummy)
> > +           goto ret_path;

The above check will help prevent a re-run of the test everytime init is
executed.

A part of the selftest was kept in trace_ksym.c (and hence in
ksym_trace_init()) in order to use functions local to this file, such as
process_new_ksym_entry().
 
> > +__init static int init_ksym_trace(void)
> > +{
> > +	struct dentry *d_tracer;
> > +	struct dentry *entry;
> > +
> > +	d_tracer = tracing_init_dentry();
> > +	ksym_filter_entry_count = 0;
> > +
> > +	entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
> 
> 
> 
> Still writeable for everyone?
> 
> Thanks,
> Frederic.
>

Looks like I missed the change! Please find the next patchset to contain
a '644' permission mode.

Thanks for reviewing the code.

-- K.Prasad

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/