lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160422074555.GB3448@twins.programming.kicks-ass.net>
Date:	Fri, 22 Apr 2016 09:45:55 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Cc:	Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org,
	vince@...ter.net, eranian@...gle.com,
	Arnaldo Carvalho de Melo <acme@...radead.org>,
	Mathieu Poirier <mathieu.poirier@...aro.org>
Subject: Re: [PATCH v1 4/5] perf: Introduce address range filtering

On Thu, Apr 21, 2016 at 06:17:02PM +0300, Alexander Shishkin wrote:
> +struct addr_filter_setup_data {
> +	struct perf_event	*event;
> +	unsigned long		*offs;
> +	unsigned long		gen;
> +};
> +
> +static int __perf_event_addr_filters_setup(void *info)
> +{
> +	struct addr_filter_setup_data *id = info;
> +	struct perf_event *event = id->event;
> +	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
> +	unsigned long flags;
> +
> +	if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
> +		return -EAGAIN;
> +
> +	/* matches smp_wmb() in event_sched_in() */
> +	smp_rmb();
> +
> +	/*
> +	 * There is a window with interrupts enabled before we get here,
> +	 * so we need to check again lest we try to stop another cpu's event.
> +	 */
> +	if (READ_ONCE(event->oncpu) != smp_processor_id())
> +		return -EAGAIN;
> +
> +	raw_spin_lock_irqsave(&ifh->lock, flags);

Since we only ever use this from cpu_function_call() IRQs are guaranteed
off already, you even rely on that in the above ->oncpu test, so the
_irqsave() thing is entirely redundant, no?

> +	/*
> +	 * In case of generations' mismatch, we don't have to do anything for
> +	 * this instance any, there will be another one with the *right* gen.
> +	 * If called to clear filters, always let it through.
> +	 */
> +	if (id->gen == event->addr_filters_gen || !id->offs)
> +		event->pmu->addr_filters_setup(event, id->offs, PERF_EF_RELOAD);
> +	raw_spin_unlock_irqrestore(&ifh->lock, flags);
> +
> +	return 0;
> +}
> +
> +static int perf_event_addr_filters_setup(struct perf_event *event,
> +					   unsigned long *offs,
> +					   unsigned long gen)
> +{
> +	struct addr_filter_setup_data id = {
> +		.event	= event,
> +		.offs	= offs,
> +		.gen	= gen,
> +	};
> +	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
> +	unsigned long flags;
> +	int ret = 0;
> +
> +	/*
> +	 * We can't use event_function_call() here, because that would
> +	 * require ctx::mutex, but one of our callers is called with
> +	 * mm::mmap_sem down, which would cause an inversion, see bullet
> +	 * (2) in put_event().
> +	 */
> +	do {
> +		if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE) {
> +			raw_spin_lock_irqsave(&ifh->lock, flags);

And here, like in all the other sites, IRQs must be enabled, no?

> +			/* see __perf_event_addr_filters_setup */

How is this not racy? The event could have gotten enabled between that
test and getting here, right?

> +			if (gen == event->addr_filters_gen || !offs)
> +				event->pmu->addr_filters_setup(event, offs, 0);
> +			raw_spin_unlock_irqrestore(&ifh->lock, flags);
> +
> +			if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
> +				break;

I r confused, calling ->addr_filters_setup() on an active event, from
the wrong cpu, is badness, no?

Is this something the callback has to handle?

> +			/* otherwise, fall through to the cross-call */
> +		}
> +
> +		/* matches smp_wmb() in event_sched_in() */
> +		smp_rmb();
> +
> +		ret = cpu_function_call(READ_ONCE(event->oncpu),
> +					__perf_event_addr_filters_setup, &id);
> +	} while (ret == -EAGAIN);
> +
> +	return ret;
> +}

This whole thing seems rather full of tricky :/


> @@ -6398,6 +6629,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
>  		/* .flags (attr_mmap2 only) */
>  	};
>  
> +	perf_addr_filters_adjust(vma);
>  	perf_event_mmap_event(&mmap_event);
>  }

And this is the 'offending' site that requires all the tricky..

> +/*
> + * Calculate event's address filters' ranges based on the
> + * task's existing mappings; if any of the existing mappings
> + * match the filters, update event's hw configuration and
> + * restart it if it's running.
> + */
> +static void perf_event_addr_filters_apply(struct perf_event *event)
> +{
> +	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
> +	struct perf_addr_filter *filter;
> +	struct task_struct *task = READ_ONCE(event->ctx->task);
> +	struct mm_struct *mm = NULL;
> +	unsigned int restart = 0, count = 0;
> +	unsigned long *offs, flags, gen;
> +
> +	offs = event->hw.addr_filters_offs;
> +
> +	/*
> +	 * We may observe TASK_TOMBSTONE, which means that the event tear-down
> +	 * will stop on the parent's child_mutex that our caller is also holding
> +	 */
> +	if (task == TASK_TOMBSTONE)
> +		return;
> +
> +	mm = get_task_mm(event->ctx->task);
> +	if (!mm)
> +		return;

kernel threads may not have a filter?

> +
> +	/* establish the initial hw configuration for this set of filters */
> +	perf_event_addr_filters_setup(event, NULL, 0);
> +
> +	down_read(&mm->mmap_sem);
> +
> +	raw_spin_lock_irqsave(&ifh->lock, flags);
> +	list_for_each_entry(filter, &ifh->list, entry) {
> +		offs[count] = 0;
> +
> +		if (perf_addr_filter_needs_mmap(filter)) {
> +			offs[count] = perf_addr_filter_apply(filter, mm);
> +
> +			if (offs[count])
> +				restart++;
> +		}
> +
> +		count++;
> +	}
> +
> +	gen = ++event->addr_filters_gen;
> +	raw_spin_unlock_irqrestore(&ifh->lock, flags);
> +
> +	up_read(&mm->mmap_sem);
> +
> +	if (restart)
> +		perf_event_addr_filters_setup(event, offs, gen);
> +
> +	mmput(mm);
> +}

> +/*
> + * Address range filtering: limiting the data to certain
> + * instruction address ranges. Filters are ioctl()ed to us from
> + * userspace as ascii strings.
> + *
> + * Filter string format:
> + *
> + * ACTION SOURCE:RANGE_SPEC
> + * where ACTION is one of the
> + *  * "filter": limit the trace to this region
> + *  * "start": start tracing from this address
> + *  * "stop": stop tracing at this address/region;
> + * SOURCE is either "file" or "kernel"
> + * RANGE_SPEC is
> + *  * for "kernel": <start address>[/<size>]
> + *  * for "file":   <start address>[/<size>]@</path/to/object/file>
> + *
> + * if <size> is not specified, the range is treated as a single address.
> + */

Scary bit of string manipulation there.. have you tried fuzzing it? ;-)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ