[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1461251823-12416-5-git-send-email-alexander.shishkin@linux.intel.com>
Date: Thu, 21 Apr 2016 18:17:02 +0300
From: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
To: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc: Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org,
vince@...ter.net, eranian@...gle.com,
Arnaldo Carvalho de Melo <acme@...radead.org>,
Mathieu Poirier <mathieu.poirier@...aro.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Subject: [PATCH v1 4/5] perf: Introduce address range filtering
Many instruction trace pmus out there support address range-based
filtering, which would, for example, generate trace data only for a
given range of instruction addresses, which is useful for tracing
individual functions, modules or libraries. Other pmus may also
utilize this functionality to allow filtering to or filtering out
code at certain address ranges.
This patch introduces the interface for userspace to specify these
filters and for the pmu drivers to apply these filters to hardware
configuration.
The user interface is an ascii string that is passed via an ioctl
and specifies (in a way similar to uprobe) address ranges within
certain object files or within kernel. There is no special treatment
for kernel modules yet, but it might be a worthy pursuit.
The pmu driver interface basically adds an extra callback to the
pmu driver structure, which validates the filter configuration proposed
by the user against what the hardware is actually capable of doing
and translates it into something that pmu::start can program into
hardware.
Signed-off-by: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
---
include/linux/perf_event.h | 95 +++++++
kernel/events/core.c | 622 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 705 insertions(+), 12 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b717902c99..f37c56e3fd 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -151,6 +151,18 @@ struct hw_perf_event {
*/
struct task_struct *target;
+ /*
+ * PMU would store hardware filter configuration
+ * here.
+ */
+ void *addr_filters;
+
+ /*
+ * Array of vma offsets for file-based filters,
+ * allocated by the pmu.
+ */
+ unsigned long *addr_filters_offs;
+
/*
* hw_perf_event::state flags; used to track the PERF_EF_* state.
*/
@@ -393,12 +405,72 @@ struct pmu {
void (*free_aux) (void *aux); /* optional */
/*
+ * Validate address range filters: make sure hw supports the
+ * requested configuration and number of filters; return 0 if the
+ * supplied filters are valid, -errno otherwise.
+ */
+ int (*addr_filters_validate) (struct list_head *filters);
+
+ /*
+ * Configure address range filters:
+ * translate hw-agnostic filter into hardware configuration in
+ * event::hw::addr_filters.
+ * @offs: array of vma load addresses for file-based filters or
+ * NULL: clear previously known file-based filters;
+ * ignore for kernel-based filters;
+ * @flags: PERF_EF_RELOAD: restart the event, will be set if the
+ * event is ACTIVE.
+ * The @offs array should be one element per filter on the list or
+ * NULL. Special case @offs[x]==0 means "keep previously known
+ * configuration for this filter". See __perf_addr_filters_adjust().
+ */
+ void (*addr_filters_setup) (struct perf_event *event,
+ unsigned long *offs, int flags);
+ /* optional */
+
+ /*
* Filter events for PMU-specific reasons.
*/
int (*filter_match) (struct perf_event *event); /* optional */
};
/**
+ * struct perf_addr_filter - address range filter definition
+ * @entry: event's filter list linkage
+ * @inode: object file's inode for file-based filters
+ * @offset: filter range offset
+ * @size: filter range size
+ * @range: 1: range, 0: address
+ * @filter: 1: filter/start, 0: stop
+ * @kernel: 1: kernel, 0: file-based
+ *
+ * This is a hardware-agnostic filter configuration as specified by the user.
+ */
+struct perf_addr_filter {
+ struct list_head entry;
+ struct inode *inode;
+ unsigned long offset;
+ unsigned long size;
+ unsigned int range : 1,
+ filter : 1,
+ kernel : 1;
+};
+
+/**
+ * struct perf_addr_filters_head - container for address range filters
+ * @list: list of filters for this event
+ * @lock: spinlock that serializes accesses to the @list and event's
+ * (and its children's) filter generations.
+ *
+ * A child event will use parent's @list (and therefore @lock), so they are
+ * bundled together; see perf_event_addr_filters().
+ */
+struct perf_addr_filters_head {
+ struct list_head list;
+ raw_spinlock_t lock;
+};
+
+/**
* enum perf_event_active_state - the states of a event
*/
enum perf_event_active_state {
@@ -571,6 +643,10 @@ struct perf_event {
atomic_t event_limit;
+ /* address range filters */
+ struct perf_addr_filters_head addr_filters;
+ unsigned long addr_filters_gen;
+
void (*destroy)(struct perf_event *);
struct rcu_head rcu_head;
@@ -685,6 +761,20 @@ struct perf_output_handle {
int page;
};
+/*
+ * An inherited event uses parent's filters
+ */
+static inline struct perf_addr_filters_head *
+perf_event_addr_filters(struct perf_event *event)
+{
+ struct perf_addr_filters_head *ifh = &event->addr_filters;
+
+ if (event->parent)
+ ifh = &event->parent->addr_filters;
+
+ return ifh;
+}
+
#ifdef CONFIG_CGROUP_PERF
/*
@@ -1063,6 +1153,11 @@ static inline bool is_write_backward(struct perf_event *event)
return !!event->attr.write_backward;
}
+static inline bool has_addr_filter(struct perf_event *event)
+{
+ return event->pmu->addr_filters_setup;
+}
+
extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size);
extern int perf_output_begin_forward(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9260a8a073..495c801e9b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -44,6 +44,8 @@
#include <linux/compat.h>
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/namei.h>
+#include <linux/parser.h>
#include "internal.h"
@@ -2387,6 +2389,102 @@ static int __perf_event_stop(void *info)
return 0;
}
+/*
+ * (Re-)programming address filter configuration into the hardware is a racy
+ * process: there is always a window between computing offset addresses and
+ * running the pmu callback on the target cpu.
+ *
+ * perf_event_addr_filters_setup() is called to update hw event configuration
+ * with the new mmap addresses. This is done in the following cases:
+ * (1) perf_addr_filters_apply(): adjusting filters' offsets based on
+ * pre-existing mappings, called once when new filters arrive via SET_FILTER
+ * ioctl;
+ * (2) perf_addr_filters_adjust(): adjusting filters' offsets based on newly
+ * registered mapping, called for every new mmap(), with mm::mmap_sem down
+ * for reading;
+ * (3) perf_itrace_exec(): clearing filters' offsets in the process of exec.
+ */
+struct addr_filter_setup_data {
+ struct perf_event *event;
+ unsigned long *offs;
+ unsigned long gen;
+};
+
+static int __perf_event_addr_filters_setup(void *info)
+{
+ struct addr_filter_setup_data *id = info;
+ struct perf_event *event = id->event;
+ struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+ unsigned long flags;
+
+ if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
+ return -EAGAIN;
+
+ /* matches smp_wmb() in event_sched_in() */
+ smp_rmb();
+
+ /*
+ * There is a window with interrupts enabled before we get here,
+ * so we need to check again lest we try to stop another cpu's event.
+ */
+ if (READ_ONCE(event->oncpu) != smp_processor_id())
+ return -EAGAIN;
+
+ raw_spin_lock_irqsave(&ifh->lock, flags);
+ /*
+ * In case of generations' mismatch, we don't have to do anything for
+ * this instance any, there will be another one with the *right* gen.
+ * If called to clear filters, always let it through.
+ */
+ if (id->gen == event->addr_filters_gen || !id->offs)
+ event->pmu->addr_filters_setup(event, id->offs, PERF_EF_RELOAD);
+ raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+ return 0;
+}
+
+static int perf_event_addr_filters_setup(struct perf_event *event,
+ unsigned long *offs,
+ unsigned long gen)
+{
+ struct addr_filter_setup_data id = {
+ .event = event,
+ .offs = offs,
+ .gen = gen,
+ };
+ struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+ unsigned long flags;
+ int ret = 0;
+
+ /*
+ * We can't use event_function_call() here, because that would
+ * require ctx::mutex, but one of our callers is called with
+ * mm::mmap_sem down, which would cause an inversion, see bullet
+ * (2) in put_event().
+ */
+ do {
+ if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE) {
+ raw_spin_lock_irqsave(&ifh->lock, flags);
+ /* see __perf_event_addr_filters_setup */
+ if (gen == event->addr_filters_gen || !offs)
+ event->pmu->addr_filters_setup(event, offs, 0);
+ raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+ if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE)
+ break;
+ /* otherwise, fall through to the cross-call */
+ }
+
+ /* matches smp_wmb() in event_sched_in() */
+ smp_rmb();
+
+ ret = cpu_function_call(READ_ONCE(event->oncpu),
+ __perf_event_addr_filters_setup, &id);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
static int _perf_event_refresh(struct perf_event *event, int refresh)
{
/*
@@ -3236,16 +3334,6 @@ out:
put_ctx(clone_ctx);
}
-void perf_event_exec(void)
-{
- int ctxn;
-
- rcu_read_lock();
- for_each_task_context_nr(ctxn)
- perf_event_enable_on_exec(ctxn);
- rcu_read_unlock();
-}
-
struct perf_read_data {
struct perf_event *event;
bool group;
@@ -3779,6 +3867,9 @@ static bool exclusive_event_installable(struct perf_event *event,
return true;
}
+static void perf_addr_filters_splice(struct perf_event *event,
+ struct list_head *head);
+
static void _free_event(struct perf_event *event)
{
irq_work_sync(&event->pending);
@@ -3806,6 +3897,7 @@ static void _free_event(struct perf_event *event)
}
perf_event_free_bpf_prog(event);
+ perf_addr_filters_splice(event, NULL);
if (event->destroy)
event->destroy(event);
@@ -5884,6 +5976,52 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
rcu_read_unlock();
}
+/*
+ * Clear all file-based filters at exec, they'll have to be
+ * re-instated when/if these objects are mmapped again.
+ */
+static void perf_itrace_exec(struct perf_event *event, void *data)
+{
+ struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+ struct perf_addr_filter *filter;
+ unsigned int restart = 0;
+ unsigned long flags;
+
+ if (!has_addr_filter(event))
+ return;
+
+ raw_spin_lock_irqsave(&ifh->lock, flags);
+ list_for_each_entry(filter, &ifh->list, entry) {
+ if (filter->kernel)
+ continue;
+
+ restart++;
+ }
+
+ raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+ if (restart)
+ perf_event_addr_filters_setup(event, NULL, 0);
+}
+
+void perf_event_exec(void)
+{
+ struct perf_event_context *ctx;
+ int ctxn;
+
+ rcu_read_lock();
+ for_each_task_context_nr(ctxn) {
+ ctx = current->perf_event_ctxp[ctxn];
+ if (!ctx)
+ continue;
+
+ perf_event_enable_on_exec(ctxn);
+
+ perf_event_aux_ctx(ctx, perf_itrace_exec, NULL, true);
+ }
+ rcu_read_unlock();
+}
+
struct remote_output {
struct ring_buffer *rb;
int err;
@@ -6367,6 +6505,99 @@ got_name:
kfree(buf);
}
+/*
+ * Whether this @filter depends on a dynamic object which is not loaded
+ * yet or its load addresses are not known.
+ */
+static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter)
+{
+ return filter->filter && !filter->kernel;
+}
+
+/*
+ * Check whether inode and address range match filter criteria.
+ */
+static bool perf_addr_filter_match(struct perf_addr_filter *filter,
+ struct file *file, unsigned long offset,
+ unsigned long size)
+{
+ if (filter->kernel)
+ return false;
+
+ if (filter->inode != file->f_inode)
+ return false;
+
+ if (filter->offset > offset + size)
+ return false;
+
+ if (filter->offset + filter->size < offset)
+ return false;
+
+ return true;
+}
+
+static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
+{
+ struct vm_area_struct *vma = data;
+ unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+ struct file *file = vma->vm_file;
+ struct perf_addr_filter *filter;
+ struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+ unsigned long *offs, flags, gen;
+ unsigned int restart = 0, count = 0;
+
+ if (!has_addr_filter(event))
+ return;
+
+ if (!file)
+ return;
+
+ offs = event->hw.addr_filters_offs;
+
+ raw_spin_lock_irqsave(&ifh->lock, flags);
+ list_for_each_entry(filter, &ifh->list, entry) {
+ /*
+ * By default, keep this filter in the previously known state:
+ * otherwise, for the filters that aren't impacted by this vma,
+ * we'll have to re-scan the whole address space.
+ */
+ offs[count] = 0;
+
+ if (perf_addr_filter_match(filter, file, off,
+ vma->vm_end - vma->vm_start)) {
+ offs[count] = vma->vm_start;
+ restart++;
+ }
+
+ count++;
+ }
+
+ gen = ++event->addr_filters_gen;
+ raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+ if (restart)
+ perf_event_addr_filters_setup(event, offs, gen);
+}
+
+/*
+ * Adjust all task's events' filters to the new vma
+ */
+static void perf_addr_filters_adjust(struct vm_area_struct *vma)
+{
+ struct perf_event_context *ctx;
+ int ctxn;
+
+ rcu_read_lock();
+ for_each_task_context_nr(ctxn) {
+ ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
+ if (!ctx)
+ continue;
+
+ perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true);
+ }
+ rcu_read_unlock();
+}
+
void perf_event_mmap(struct vm_area_struct *vma)
{
struct perf_mmap_event mmap_event;
@@ -6398,6 +6629,7 @@ void perf_event_mmap(struct vm_area_struct *vma)
/* .flags (attr_mmap2 only) */
};
+ perf_addr_filters_adjust(vma);
perf_event_mmap_event(&mmap_event);
}
@@ -7357,13 +7589,375 @@ void perf_bp_event(struct perf_event *bp, void *data)
}
#endif
+/*
+ * Allocate a new address filter
+ */
+static struct perf_addr_filter *
+perf_addr_filter_new(struct perf_event *event, struct list_head *filters)
+{
+ int node = cpu_to_node(event->cpu == -1 ? 0 : event->cpu);
+ struct perf_addr_filter *filter;
+
+ filter = kzalloc_node(sizeof(*filter), GFP_KERNEL, node);
+ if (!filter)
+ return NULL;
+
+ INIT_LIST_HEAD(&filter->entry);
+ list_add_tail(&filter->entry, filters);
+
+ return filter;
+}
+
+static void free_filters_list(struct list_head *filters)
+{
+ struct perf_addr_filter *filter, *iter;
+
+ list_for_each_entry_safe(filter, iter, filters, entry) {
+ if (filter->inode)
+ iput(filter->inode);
+ list_del(&filter->entry);
+ kfree(filter);
+ }
+}
+
+/*
+ * Free existing address filters and optionally install new ones
+ */
+static void perf_addr_filters_splice(struct perf_event *event,
+ struct list_head *head)
+{
+ unsigned long flags;
+ LIST_HEAD(list);
+
+ if (!has_addr_filter(event))
+ return;
+
+ /* don't bother with children, they don't have their own filters */
+ if (event->parent)
+ return;
+
+ raw_spin_lock_irqsave(&event->addr_filters.lock, flags);
+
+ list_splice_init(&event->addr_filters.list, &list);
+ if (head)
+ list_splice(head, &event->addr_filters.list);
+
+ raw_spin_unlock_irqrestore(&event->addr_filters.lock, flags);
+
+ free_filters_list(&list);
+}
+
+/*
+ * Scan through mm's vmas and see if one of them matches the
+ * @filter; if so, adjust filter's address range.
+ * Called with mm::mmap_sem down for reading.
+ */
+static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter,
+ struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+
+ for (vma = mm->mmap; vma->vm_next; vma = vma->vm_next) {
+ struct file *file = vma->vm_file;
+ unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long vma_size = vma->vm_end - vma->vm_start;
+
+ if (!file)
+ continue;
+
+ if (!perf_addr_filter_match(filter, file, off,
+ vma_size))
+ continue;
+
+ return vma->vm_start;
+ }
+
+ return 0;
+}
+
+/*
+ * Calculate event's address filters' ranges based on the
+ * task's existing mappings; if any of the existing mappings
+ * match the filters, update event's hw configuration and
+ * restart it if it's running.
+ */
+static void perf_event_addr_filters_apply(struct perf_event *event)
+{
+ struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
+ struct perf_addr_filter *filter;
+ struct task_struct *task = READ_ONCE(event->ctx->task);
+ struct mm_struct *mm = NULL;
+ unsigned int restart = 0, count = 0;
+ unsigned long *offs, flags, gen;
+
+ offs = event->hw.addr_filters_offs;
+
+ /*
+ * We may observe TASK_TOMBSTONE, which means that the event tear-down
+ * will stop on the parent's child_mutex that our caller is also holding
+ */
+ if (task == TASK_TOMBSTONE)
+ return;
+
+ mm = get_task_mm(event->ctx->task);
+ if (!mm)
+ return;
+
+ /* establish the initial hw configuration for this set of filters */
+ perf_event_addr_filters_setup(event, NULL, 0);
+
+ down_read(&mm->mmap_sem);
+
+ raw_spin_lock_irqsave(&ifh->lock, flags);
+ list_for_each_entry(filter, &ifh->list, entry) {
+ offs[count] = 0;
+
+ if (perf_addr_filter_needs_mmap(filter)) {
+ offs[count] = perf_addr_filter_apply(filter, mm);
+
+ if (offs[count])
+ restart++;
+ }
+
+ count++;
+ }
+
+ gen = ++event->addr_filters_gen;
+ raw_spin_unlock_irqrestore(&ifh->lock, flags);
+
+ up_read(&mm->mmap_sem);
+
+ if (restart)
+ perf_event_addr_filters_setup(event, offs, gen);
+
+ mmput(mm);
+}
+
+/*
+ * Address range filtering: limiting the data to certain
+ * instruction address ranges. Filters are ioctl()ed to us from
+ * userspace as ascii strings.
+ *
+ * Filter string format:
+ *
+ * ACTION SOURCE:RANGE_SPEC
+ * where ACTION is one of the
+ * * "filter": limit the trace to this region
+ * * "start": start tracing from this address
+ * * "stop": stop tracing at this address/region;
+ * SOURCE is either "file" or "kernel"
+ * RANGE_SPEC is
+ * * for "kernel": <start address>[/<size>]
+ * * for "file": <start address>[/<size>]@</path/to/object/file>
+ *
+ * if <size> is not specified, the range is treated as a single address.
+ */
+enum {
+ IF_ACT_FILTER,
+ IF_ACT_START,
+ IF_ACT_STOP,
+ IF_SRC_FILE,
+ IF_SRC_KERNEL,
+ IF_SRC_FILEADDR,
+ IF_SRC_KERNELADDR,
+};
+
+enum {
+ IF_STATE_ACTION = 0,
+ IF_STATE_SOURCE,
+ IF_STATE_END,
+};
+
+static const match_table_t if_tokens = {
+ { IF_ACT_FILTER, "filter" },
+ { IF_ACT_START, "start" },
+ { IF_ACT_STOP, "stop" },
+ { IF_SRC_FILE, "file:%u/%u@%s" },
+ { IF_SRC_KERNEL, "kernel:%u/%u" },
+ { IF_SRC_FILEADDR, "file:%u@%s" },
+ { IF_SRC_KERNELADDR, "kernel:%u" },
+};
+
+/*
+ * Address filter string parser
+ */
+static int
+perf_event_parse_addr_filter(struct perf_event *event, char *fstr,
+ struct list_head *filters)
+{
+ struct perf_addr_filter *filter = NULL;
+ char *start, *orig, *filename = NULL;
+ struct path path;
+ substring_t args[MAX_OPT_ARGS];
+ int state = IF_STATE_ACTION, token;
+ int ret = -EINVAL;
+
+ orig = fstr = kstrdup(fstr, GFP_KERNEL);
+ if (!fstr)
+ return -ENOMEM;
+
+ while ((start = strsep(&fstr, " ,\n")) != NULL) {
+ ret = -EINVAL;
+
+ if (!*start)
+ continue;
+
+ /* filter definition begins */
+ if (state == IF_STATE_ACTION) {
+ filter = perf_addr_filter_new(event, filters);
+ if (!filter)
+ goto fail;
+ }
+
+ token = match_token(start, if_tokens, args);
+ switch (token) {
+ case IF_ACT_FILTER:
+ case IF_ACT_START:
+ filter->filter = 1;
+
+ case IF_ACT_STOP:
+ if (state != IF_STATE_ACTION)
+ goto fail;
+
+ state = IF_STATE_SOURCE;
+ break;
+
+ case IF_SRC_KERNELADDR:
+ case IF_SRC_KERNEL:
+ filter->kernel = 1;
+
+ case IF_SRC_FILEADDR:
+ case IF_SRC_FILE:
+ if (state != IF_STATE_SOURCE)
+ goto fail;
+
+ if (token == IF_SRC_FILE || token == IF_SRC_KERNEL)
+ filter->range = 1;
+
+ *args[0].to = 0;
+ ret = kstrtoul(args[0].from, 0, &filter->offset);
+ if (ret)
+ goto fail;
+
+ if (filter->range) {
+ *args[1].to = 0;
+ ret = kstrtoul(args[1].from, 0, &filter->size);
+ if (ret)
+ goto fail;
+ }
+
+ if (token == IF_SRC_FILE) {
+ filename = match_strdup(&args[2]);
+ if (!filename) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ }
+
+ state = IF_STATE_END;
+ break;
+
+ default:
+ goto fail;
+ }
+
+ /*
+ * Filter definition is fully parsed, validate and install it.
+ * Make sure that it doesn't contradict itself or the event's
+ * attribute.
+ */
+ if (state == IF_STATE_END) {
+ if (filter->kernel && event->attr.exclude_kernel)
+ goto fail;
+
+ if (!filter->kernel) {
+ if (!filename)
+ goto fail;
+
+ /* look up the path and grab its inode */
+ ret = kern_path(filename, LOOKUP_FOLLOW, &path);
+ if (ret)
+ goto fail_free_name;
+
+ filter->inode = igrab(d_inode(path.dentry));
+ path_put(&path);
+ kfree(filename);
+ filename = NULL;
+ }
+
+ /* ready to consume more filters */
+ state = IF_STATE_ACTION;
+ filter = NULL;
+ }
+ }
+
+ if (state != IF_STATE_ACTION)
+ goto fail;
+
+ kfree(orig);
+
+ return 0;
+
+fail_free_name:
+ kfree(filename);
+fail:
+ free_filters_list(filters);
+ kfree(orig);
+
+ return ret;
+}
+
+static int
+perf_event_set_addr_filter(struct perf_event *event, char *filter_str)
+{
+ LIST_HEAD(filters);
+ int ret;
+
+ /*
+ * Since this is called in perf_ioctl() path, we're already holding
+ * ctx::mutex.
+ */
+ lockdep_assert_held(&event->ctx->mutex);
+
+ if (WARN_ON_ONCE(event->parent))
+ return -EINVAL;
+
+ /*
+ * For now, we only support filtering in per-task events; doing so
+ * for cpu-wide events requires additional context switching trickery,
+ * since same object code will be mapped at different virtual
+ * addresses in different processes.
+ */
+ if (!event->ctx->task)
+ return -EOPNOTSUPP;
+
+ ret = perf_event_parse_addr_filter(event, filter_str, &filters);
+ if (ret)
+ return ret;
+
+ ret = event->pmu->addr_filters_validate(&filters);
+ if (ret) {
+ free_filters_list(&filters);
+ return ret;
+ }
+
+ /* remove existing filters, if any */
+ perf_addr_filters_splice(event, &filters);
+
+ /* install new filters */
+ perf_event_for_each_child(event, perf_event_addr_filters_apply);
+
+ return ret;
+}
+
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
{
char *filter_str;
int ret = -EINVAL;
- if (event->attr.type != PERF_TYPE_TRACEPOINT ||
- !IS_ENABLED(CONFIG_EVENT_TRACING))
+ if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
+ !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
+ !has_addr_filter(event))
return -EINVAL;
filter_str = strndup_user(arg, PAGE_SIZE);
@@ -7374,6 +7968,8 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)
event->attr.type == PERF_TYPE_TRACEPOINT)
ret = ftrace_profile_set_filter(event, event->attr.config,
filter_str);
+ else if (has_addr_filter(event))
+ ret = perf_event_set_addr_filter(event, filter_str);
kfree(filter_str);
return ret;
@@ -8196,6 +8792,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
INIT_LIST_HEAD(&event->sibling_list);
INIT_LIST_HEAD(&event->rb_entry);
INIT_LIST_HEAD(&event->active_entry);
+ INIT_LIST_HEAD(&event->addr_filters.list);
INIT_HLIST_NODE(&event->hlist_entry);
INIT_LIST_HEAD(&event->sb_list);
@@ -8203,6 +8800,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
init_irq_work(&event->pending, perf_pending_event);
mutex_init(&event->mmap_mutex);
+ raw_spin_lock_init(&event->addr_filters.lock);
atomic_long_set(&event->refcount, 1);
event->cpu = cpu;
--
2.8.0.rc3
Powered by blists - more mailing lists