lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20191028091229.GJ4131@hirez.programming.kicks-ass.net>
Date:   Mon, 28 Oct 2019 10:12:29 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Jiri Olsa <jolsa@...hat.com>
Cc:     Daniel Xu <dxu@...uu.xyz>, bpf@...r.kernel.org,
        songliubraving@...com, yhs@...com, andriin@...com,
        mingo@...hat.com, acme@...nel.org, ast@...com,
        alexander.shishkin@...ux.intel.com, namhyung@...nel.org,
        linux-kernel@...r.kernel.org, netdev@...r.kernel.org,
        kernel-team@...com
Subject: Re: [PATCH bpf-next 1/5] perf/core: Add PERF_FORMAT_LOST read_format

On Tue, Sep 24, 2019 at 10:33:42AM +0200, Jiri Olsa wrote:
> On Tue, Sep 17, 2019 at 06:30:52AM -0700, Daniel Xu wrote:
> 
> SNIP
> 
> > +	PERF_FORMAT_MAX = 1U << 5,		/* non-ABI */
> >  };
> >  
> >  #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index 0463c1151bae..ee08d3ed6299 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -1715,6 +1715,9 @@ static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
> >  	if (event->attr.read_format & PERF_FORMAT_ID)
> >  		entry += sizeof(u64);
> >  
> > +	if (event->attr.read_format & PERF_FORMAT_LOST)
> > +		entry += sizeof(u64);
> > +
> >  	if (event->attr.read_format & PERF_FORMAT_GROUP) {
> >  		nr += nr_siblings;
> >  		size += sizeof(u64);
> > @@ -4734,6 +4737,24 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
> >  }
> >  EXPORT_SYMBOL_GPL(perf_event_read_value);
> >  
> > +static struct pmu perf_kprobe;
> > +static u64 perf_event_lost(struct perf_event *event)
> > +{
> > +	struct ring_buffer *rb;
> > +	u64 lost = 0;
> > +
> > +	rcu_read_lock();
> > +	rb = rcu_dereference(event->rb);
> > +	if (likely(!!rb))
> > +		lost += local_read(&rb->lost);
> > +	rcu_read_unlock();
> > +
> > +	if (event->attr.type == perf_kprobe.type)
> > +		lost += perf_kprobe_missed(event);
> 
> not sure what was the peterz's suggestion, but here you are mixing
> ring buffer's lost count with kprobes missed count, seems wrong

Jiri is right, this isn't quite what I meant.

The below is what I was thinking of (I also renamed everything to
missing, to avoid confusion).

But now that I wrote it, I'm a little scared of what I had to do for
__perf_sw_event(). Let me ponder that a little bit more.

---
 include/linux/perf_event.h      |  1 +
 include/linux/trace_events.h    |  1 +
 include/uapi/linux/perf_event.h |  5 ++++-
 kernel/events/core.c            | 42 +++++++++++++++++++++++++++++++++--------
 kernel/trace/trace_event_perf.c |  4 +++-
 kernel/trace/trace_kprobe.c     |  8 ++++++++
 6 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a9ef8be8c83a..ec6c867203c3 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -625,6 +625,7 @@ struct perf_event {
 	unsigned int			attach_state;
 	local64_t			count;
 	atomic64_t			child_count;
+	local64_t			missed;
 
 	/*
 	 * These are the total time in nanoseconds that the event
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index a379255c14a9..18d315a0f0f9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -603,6 +603,7 @@ extern int bpf_get_kprobe_info(const struct perf_event *event,
 			       u32 *fd_type, const char **symbol,
 			       u64 *probe_offset, u64 *probe_addr,
 			       bool perf_type_tracepoint);
+extern u64 perf_kprobe_missed(const struct perf_event *event);
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
 extern int  perf_uprobe_init(struct perf_event *event,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index bb7b271397a6..2dd3c3f21087 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -273,6 +273,7 @@ enum {
  *	  { u64		time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
  *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
  *	  { u64		id;           } && PERF_FORMAT_ID
+ *	  { u64		missed;       } && PERF_FORMAT_MISSED
  *	} && !PERF_FORMAT_GROUP
  *
  *	{ u64		nr;
@@ -280,6 +281,7 @@ enum {
  *	  { u64		time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
  *	  { u64		value;
  *	    { u64	id;           } && PERF_FORMAT_ID
+ *	    { u64	missed;       } && PERF_FORMAT_MISSED
  *	  }		cntr[nr];
  *	} && PERF_FORMAT_GROUP
  * };
@@ -289,8 +291,9 @@ enum perf_event_read_format {
 	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
 	PERF_FORMAT_ID				= 1U << 2,
 	PERF_FORMAT_GROUP			= 1U << 3,
+	PERF_FORMAT_MISSED			= 1U << 4,
 
-	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
+	PERF_FORMAT_MAX = 1U << 5,		/* non-ABI */
 };
 
 #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d8b9034857d7..7e72f919d2e7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1817,6 +1817,9 @@ static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
 	if (event->attr.read_format & PERF_FORMAT_ID)
 		entry += sizeof(u64);
 
+	if (event->attr.read_format & PERF_FORMAT_MISSED)
+		entry += sizeof(u64);
+
 	if (event->attr.read_format & PERF_FORMAT_GROUP) {
 		nr += nr_siblings;
 		size += sizeof(u64);
@@ -4994,6 +4997,15 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 }
 EXPORT_SYMBOL_GPL(perf_event_read_value);
 
+static struct pmu perf_kprobe;
+static u64 perf_event_missed(struct perf_event *event)
+{
+	if (event->attr.type == perf_kprobe.type)
+		return perf_kprobe_missed(event);
+
+	return local64_read(&event->missed);
+}
+
 static int __perf_read_group_add(struct perf_event *leader,
 					u64 read_format, u64 *values)
 {
@@ -5030,11 +5042,15 @@ static int __perf_read_group_add(struct perf_event *leader,
 	values[n++] += perf_event_count(leader);
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(leader);
+	if (read_format & PERF_FORMAT_MISSED)
+		values[n++] = perf_event_missed(leader);
 
 	for_each_sibling_event(sub, leader) {
 		values[n++] += perf_event_count(sub);
 		if (read_format & PERF_FORMAT_ID)
 			values[n++] = primary_event_id(sub);
+		if (read_format & PERF_FORMAT_MISSED)
+			values[n++] = perf_event_missed(sub);
 	}
 
 	raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -5091,7 +5107,7 @@ static int perf_read_one(struct perf_event *event,
 				 u64 read_format, char __user *buf)
 {
 	u64 enabled, running;
-	u64 values[4];
+	u64 values[5];
 	int n = 0;
 
 	values[n++] = __perf_event_read_value(event, &enabled, &running);
@@ -5101,6 +5117,8 @@ static int perf_read_one(struct perf_event *event,
 		values[n++] = running;
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(event);
+	if (read_format & PERF_FORMAT_MISSED)
+		values[n++] = perf_event_lost(event);
 
 	if (copy_to_user(buf, values, n * sizeof(u64)))
 		return -EFAULT;
@@ -6427,7 +6445,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
 				 u64 enabled, u64 running)
 {
 	u64 read_format = event->attr.read_format;
-	u64 values[4];
+	u64 values[5];
 	int n = 0;
 
 	values[n++] = perf_event_count(event);
@@ -6441,6 +6459,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
 	}
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(event);
+	if (read_format & PERF_FORMAT_MISSED)
+		values[n++] = perf_event_lost(event);
 
 	__output_copy(handle, values, n * sizeof(u64));
 }
@@ -6451,7 +6471,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 {
 	struct perf_event *leader = event->group_leader, *sub;
 	u64 read_format = event->attr.read_format;
-	u64 values[5];
+	u64 values[6];
 	int n = 0;
 
 	values[n++] = 1 + leader->nr_siblings;
@@ -6469,6 +6489,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 	values[n++] = perf_event_count(leader);
 	if (read_format & PERF_FORMAT_ID)
 		values[n++] = primary_event_id(leader);
+	if (read_format & PERF_FORMAT_MISSED)
+		values[n++] = perf_event_lost(leader);
 
 	__output_copy(handle, values, n * sizeof(u64));
 
@@ -6482,6 +6504,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 		values[n++] = perf_event_count(sub);
 		if (read_format & PERF_FORMAT_ID)
 			values[n++] = primary_event_id(sub);
+		if (read_format & PERF_FORMAT_MISSED)
+			values[n++] = perf_event_lost(sub);
 
 		__output_copy(handle, values, n * sizeof(u64));
 	}
@@ -8500,7 +8524,6 @@ static int perf_exclude_event(struct perf_event *event,
 static int perf_swevent_match(struct perf_event *event,
 				enum perf_type_id type,
 				u32 event_id,
-				struct perf_sample_data *data,
 				struct pt_regs *regs)
 {
 	if (event->attr.type != type)
@@ -8579,8 +8602,12 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 		goto end;
 
 	hlist_for_each_entry_rcu(event, head, hlist_entry) {
-		if (perf_swevent_match(event, type, event_id, data, regs))
-			perf_swevent_event(event, nr, data, regs);
+		if (perf_swevent_match(event, type, event_id, regs)) {
+			if (nr == ~0ULL)
+				local64_inc(&event->missed);
+			else
+				perf_swevent_event(event, nr, data, regs);
+		}
 	}
 end:
 	rcu_read_unlock();
@@ -8621,12 +8648,11 @@ void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
 	preempt_disable_notrace();
 	rctx = perf_swevent_get_recursion_context();
 	if (unlikely(rctx < 0))
-		goto fail;
+		nr = ~0ULL;
 
 	___perf_sw_event(event_id, nr, regs, addr);
 
 	perf_swevent_put_recursion_context(rctx);
-fail:
 	preempt_enable_notrace();
 }
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 0917fee6ee7c..73a0de204d7a 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -458,8 +458,10 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
 	perf_fetch_caller_regs(&regs);
 
 	entry = perf_trace_buf_alloc(ENTRY_SIZE, NULL, &rctx);
-	if (!entry)
+	if (!entry) {
+		local64_inc(&event->missed);
 		return;
+	}
 
 	entry->ip = ip;
 	entry->parent_ip = parent_ip;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 66e0a8ff1c01..5e1889c161e3 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -233,6 +233,14 @@ bool trace_kprobe_error_injectable(struct trace_event_call *call)
 	       false;
 }
 
+u64 perf_kprobe_missed(const struct perf_event *event)
+{
+	struct trace_event_call *call = event->tp_event;
+	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+
+	return tk->rp.kp.nmissed;
+}
+
 static int register_kprobe_event(struct trace_kprobe *tk);
 static int unregister_kprobe_event(struct trace_kprobe *tk);
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ