lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue,  5 Sep 2017 16:30:21 +0300
From:   Alexander Shishkin <alexander.shishkin@...ux.intel.com>
To:     Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc:     Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org,
        acme@...hat.com, kirill.shutemov@...ux.intel.com,
        Borislav Petkov <bp@...en8.de>, rric@...nel.org,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Subject: [RFC PATCH 12/17] perf: Track pinned events per user

Maintain a per-user cpu-indexed array of shmemfs-backed events, same
way as mlock accounting.

Signed-off-by: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
---
 include/linux/sched/user.h  |  6 ++++
 kernel/events/core.c        | 14 ++++-----
 kernel/events/ring_buffer.c | 69 +++++++++++++++++++++++++++++++++++++--------
 kernel/user.c               |  1 +
 4 files changed, 71 insertions(+), 19 deletions(-)

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 5d5415e129..bf10f95250 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -5,6 +5,7 @@
 #include <linux/atomic.h>
 
 struct key;
+struct perf_event;
 
 /*
  * Some day this will be a full-fledged user tracking system..
@@ -39,6 +40,11 @@ struct user_struct {
 #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
 	atomic_long_t locked_vm;
 #endif
+#ifdef CONFIG_PERF_EVENTS
+	atomic_long_t nr_pinnable_events;
+	struct mutex pinned_mutex;
+	struct perf_event ** __percpu pinned_events;
+#endif
 };
 
 extern int uids_sysfs_init(void);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1fed69d4ba..e00f1f6aaf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -384,7 +384,6 @@ static atomic_t perf_sched_count;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
-static DEFINE_PER_CPU(struct perf_event *, shmem_events);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -2086,7 +2085,8 @@ enum pin_event_t {
 
 static enum pin_event_t pin_event_pages(struct perf_event *event)
 {
-	struct perf_event **pinned_event = this_cpu_ptr(&shmem_events);
+	struct user_struct *user = event->rb->mmap_user;
+	struct perf_event **pinned_event = this_cpu_ptr(user->pinned_events);
 	struct perf_event *old_event = *pinned_event;
 
 	if (old_event == event)
@@ -4281,13 +4281,14 @@ static void _free_event(struct perf_event *event)
 	unaccount_event(event);
 
 	if (event->attach_state & PERF_ATTACH_SHMEM) {
+		struct user_struct *user = event->rb->mmap_user;
 		struct perf_event_context *ctx = event->ctx;
 		int cpu;
 
 		atomic_set(&event->xpinned, 0);
 		for_each_possible_cpu(cpu) {
 			struct perf_event **pinned_event =
-				per_cpu_ptr(&shmem_events, cpu);
+				per_cpu_ptr(user->pinned_events, cpu);
 
 			cmpxchg(pinned_event, event, NULL);
 		}
@@ -9530,7 +9531,7 @@ perf_event_detach(struct perf_event *event, struct perf_event *parent_event,
 {
 	struct ring_buffer *parent_rb = parent_event ? parent_event->rb : NULL;
 	char *filename;
-	int err;
+	int err = -ENOMEM;
 
 	filename = kasprintf(GFP_KERNEL, "%s:%x.event",
 			     task ? "task" : "cpu",
@@ -9550,10 +9551,9 @@ perf_event_detach(struct perf_event *event, struct perf_event *parent_event,
 	if (err) {
 		tracefs_remove(event->dent);
 		event->dent = NULL;
-		return err;
 	}
 
-	return 0;
+	return err;
 }
 /*
  * Allocate and initialize a event structure
@@ -10290,7 +10290,7 @@ SYSCALL_DEFINE5(perf_event_open,
 	}
 
 	if (detached) {
-		err = perf_event_detach(event, task, NULL);
+		err = perf_event_detach(event, NULL, task, NULL);
 		if (err)
 			goto err_context;
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 896d441642..8d37e4e591 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -563,6 +563,44 @@ void *perf_get_aux(struct perf_output_handle *handle)
 	return handle->rb->aux_priv;
 }
 
+static struct user_struct *get_users_pinned_events(void)
+{
+	struct user_struct *user = current_user(), *ret = NULL;
+
+	if (atomic_long_inc_not_zero(&user->nr_pinnable_events))
+		return user;
+
+	mutex_lock(&user->pinned_mutex);
+	if (!atomic_long_read(&user->nr_pinnable_events)) {
+		if (WARN_ON_ONCE(!!user->pinned_events))
+			goto unlock;
+
+		user->pinned_events = alloc_percpu(struct perf_event *);
+		if (!user->pinned_events) {
+			goto unlock;
+		} else {
+			atomic_long_inc(&user->nr_pinnable_events);
+			ret = get_current_user();
+		}
+	}
+
+unlock:
+	mutex_unlock(&user->pinned_mutex);
+
+	return ret;
+}
+
+static void put_users_pinned_events(struct user_struct *user)
+{
+	if (!atomic_long_dec_and_test(&user->nr_pinnable_events))
+		return;
+
+	mutex_lock(&user->pinned_mutex);
+	free_percpu(user->pinned_events);
+	user->pinned_events = NULL;
+	mutex_unlock(&user->pinned_mutex);
+}
+
 /*
  * Check if the current user can afford @nr_pages, considering the
  * perf_event_mlock sysctl and their mlock limit. If the former is exceeded,
@@ -574,11 +612,14 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
                                  unsigned long nr_pages, unsigned long *locked)
 {
 	unsigned long total, limit, pinned;
+	struct user_struct *user;
 
 	if (!mm)
 		mm = rb->mmap_mapping;
 
-	rb->mmap_user = current_user();
+	user = get_users_pinned_events();
+	if (!user)
+		return -ENOMEM;
 
 	limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
 
@@ -587,10 +628,7 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
 	 */
 	limit *= num_online_cpus();
 
-	total = atomic_long_read(&rb->mmap_user->locked_vm) + nr_pages;
-
-	free_uid(rb->mmap_user);
-	rb->mmap_user = NULL;
+	total = atomic_long_read(&user->locked_vm) + nr_pages;
 
 	pinned = 0;
 	if (total > limit) {
@@ -599,7 +637,7 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
 		 * limit needs to be accounted to the consumer's mm.
 		 */
 		if (!mm)
-			return -EPERM;
+			goto err_put_user;
 
 		pinned = total - limit;
 
@@ -608,9 +646,8 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
 		total = mm->pinned_vm + pinned;
 
 		if ((total > limit) && perf_paranoid_tracepoint_raw() &&
-		    !capable(CAP_IPC_LOCK)) {
-			return -EPERM;
-		}
+		    !capable(CAP_IPC_LOCK))
+			goto err_put_user;
 
 		*locked = pinned;
 		mm->pinned_vm += pinned;
@@ -619,10 +656,15 @@ static int __ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
 	if (!rb->mmap_mapping)
 		rb->mmap_mapping = mm;
 
-	rb->mmap_user = get_current_user();
-	atomic_long_add(nr_pages, &rb->mmap_user->locked_vm);
+	rb->mmap_user = user;
+	atomic_long_add(nr_pages, &user->locked_vm);
 
 	return 0;
+
+err_put_user:
+	put_users_pinned_events(user);
+
+	return -EPERM;
 }
 
 static int ring_buffer_account(struct ring_buffer *rb, struct mm_struct *mm,
@@ -657,7 +699,7 @@ void ring_buffer_unaccount(struct ring_buffer *rb, bool aux)
 	if (rb->mmap_mapping)
 		rb->mmap_mapping->pinned_vm -= pinned;
 
-	free_uid(rb->mmap_user);
+	put_users_pinned_events(rb->mmap_user);
 }
 
 #define PERF_AUX_GFP	(GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
@@ -1124,6 +1166,7 @@ rb_shmem_account(struct ring_buffer *rb, struct ring_buffer *parent_rb)
 
 		rb->acct_refcount = parent_rb->acct_refcount;
 		atomic_inc(rb->acct_refcount);
+		rb->mmap_user = get_uid(parent_rb->mmap_user);
 
 		return 0;
 	}
@@ -1146,6 +1189,8 @@ rb_shmem_account(struct ring_buffer *rb, struct ring_buffer *parent_rb)
 
 static void rb_shmem_unaccount(struct ring_buffer *rb)
 {
+	free_uid(rb->mmap_user);
+
 	if (!atomic_dec_and_test(rb->acct_refcount)) {
 		rb->acct_refcount = NULL;
 		return;
diff --git a/kernel/user.c b/kernel/user.c
index 00281add65..e95a82d31d 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -185,6 +185,7 @@ struct user_struct *alloc_uid(kuid_t uid)
 
 		new->uid = uid;
 		atomic_set(&new->__count, 1);
+		mutex_init(&new->pinned_mutex);
 
 		/*
 		 * Before adding this, check whether we raced
-- 
2.14.1

Powered by blists - more mailing lists