lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1484745662-15928-3-git-send-email-kan.liang@intel.com>
Date:   Wed, 18 Jan 2017 08:21:02 -0500
From:   kan.liang@...el.com
To:     linux-kernel@...r.kernel.org, peterz@...radead.org,
        mingo@...hat.com
Cc:     alexander.shishkin@...ux.intel.com, eranian@...gle.com,
        ak@...ux.intel.com, Kan Liang <kan.liang@...el.com>
Subject: [PATCH 2/2] perf,core: use parent avg sample period as child initial period

From: Kan Liang <kan.liang@...el.com>

perf brings additional overhead when monitoring the task which
frequently generates child task.

When inheriting a event from parent task to child task, the
sample_period of original parent event (parent_event->parent) will be
assigned to child event as its initial period, which is usually the
default sample_period 1. But too many very short period like 1 will
increase overhead and may cause various problems.

avg_time_stamp is introduced to keep the average sample period. Each
child event can use its original parent event's avg period as its initial
sample period, which can reduce the overhead.

The avg_time_stamp doesn't update more than once every tick to avoid the
contention.
For each new child event, the parent event refcount++. Parent will not
go away until all children go away. So it's safe to access its parent.

Here is some data from the overhead test on Broadwell server
  perf record -e $TEST_EVENTS -- ./loop.sh 50000

loop.sh
  start=$(date +%s%N)
  i=0
  while [ "$i" -le "$1" ]
  do
          date > /dev/null
          i=`expr $i + 1`
  done
  end=$(date +%s%N)
  elapsed=`expr $end - $start`

Event#	Original Elapsed time	Elapsed time with patch		delta
1	196,573,192,397		188,480,366,278			-4.12%
2	257,567,753,013		242,256,126,043			-5.94%
3	398,730,726,971		373,882,492,502			-6.23%
4	824,983,761,120		750,906,525,917			-8.98%
5	1,883,411,923,498	1,648,192,098,897		-12.49%

Signed-off-by: Kan Liang <kan.liang@...el.com>
---
 include/linux/perf_event.h |  3 +++
 kernel/events/core.c       | 20 ++++++++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 78ed810..84b0f47 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -648,6 +648,9 @@ struct perf_event {
 	struct list_head		child_list;
 	struct perf_event		*parent;
 
+	atomic64_t			avg_sample_period;
+	u64				avg_time_stamp;
+
 	int				oncpu;
 	int				cpu;
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 924268c..82a2c0e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3237,9 +3237,11 @@ static DEFINE_PER_CPU(u64, perf_throttled_seq);
 
 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bool disable)
 {
+	struct perf_event *head_event = (event->parent != NULL) ? event->parent : event;
 	struct hw_perf_event *hwc = &event->hw;
 	s64 period, sample_period;
 	s64 delta;
+	u64 now;
 
 	period = perf_calculate_period(event, nsec, count);
 
@@ -3253,6 +3255,15 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bo
 
 	hwc->sample_period = sample_period;
 
+	now = perf_clock();
+	if ((now - head_event->avg_time_stamp) > TICK_NSEC) {
+		s64 avg_period;
+
+		head_event->avg_time_stamp = now;
+		avg_period = (atomic64_read(&head_event->avg_sample_period) + sample_period) / 2;
+		atomic64_set(&head_event->avg_sample_period, avg_period);
+	}
+
 	if (local64_read(&hwc->period_left) > 8*sample_period) {
 		if (disable)
 			event->pmu->stop(event, PERF_EF_UPDATE);
@@ -9231,8 +9242,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
 	hwc = &event->hw;
 	hwc->sample_period = attr->sample_period;
-	if (attr->freq && attr->sample_freq)
+	if (attr->freq && attr->sample_freq) {
 		hwc->sample_period = 1;
+		if (parent_event)
+			hwc->sample_period = atomic64_read(&parent_event->avg_sample_period);
+		else
+			atomic64_set(&event->avg_sample_period, hwc->sample_period);
+	}
 	hwc->last_period = hwc->sample_period;
 
 	local64_set(&hwc->period_left, hwc->sample_period);
@@ -10464,8 +10480,8 @@ inherit_event(struct perf_event *parent_event,
 		child_event->state = PERF_EVENT_STATE_OFF;
 
 	if (parent_event->attr.freq) {
-		u64 sample_period = parent_event->hw.sample_period;
 		struct hw_perf_event *hwc = &child_event->hw;
+		u64 sample_period = atomic64_read(&parent_event->avg_sample_period);
 
 		hwc->sample_period = sample_period;
 		hwc->last_period   = sample_period;
-- 
2.4.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ