lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <147999864851.15705.3363371034116608966.stgit@hbathini.in.ibm.com>
Date:   Thu, 24 Nov 2016 20:14:29 +0530
From:   Hari Bathini <hbathini@...ux.vnet.ibm.com>
To:     ast@...com, peterz@...radead.org,
        lkml <linux-kernel@...r.kernel.org>, acme@...nel.org,
        alexander.shishkin@...ux.intel.com, mingo@...hat.com
Cc:     daniel@...earbox.net, rostedt@...dmis.org,
        Ananth N Mavinakayanahalli <ananth@...ux.vnet.ibm.com>,
        ebiederm@...ssion.com, sargun@...gun.me,
        Aravinda Prasad <aravinda@...ux.vnet.ibm.com>,
        brendan.d.gregg@...il.com
Subject: [PATCH v2 1/3] perf: add PERF_RECORD_NAMESPACES to include
 namespaces related info

With the advert of container technologies like docker, that depend
on namespaces for isolation, there is a need for tracing support for
namespaces. This patch introduces new PERF_RECORD_NAMESPACES event
for tracing based on namespaces related info.

Signed-off-by: Hari Bathini <hbathini@...ux.vnet.ibm.com>
---

Changes from v1:
* Introducing indices for all the namespaces.
* Allowing namespaces events to be recorded for users with
  CAP_SYS_ADMIN capability only.
* Added device number information.


 include/linux/perf_event.h      |    1 
 include/uapi/linux/perf_event.h |   28 ++++++++
 kernel/events/core.c            |  138 +++++++++++++++++++++++++++++++++++++++
 kernel/fork.c                   |    3 +
 kernel/nsproxy.c                |    5 +
 5 files changed, 174 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecd..243b988 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1110,6 +1110,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
 
 extern void perf_event_exec(void);
 extern void perf_event_comm(struct task_struct *tsk, bool exec);
+extern void perf_event_namespaces(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
 
 /* Callchains */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c66a485..6749e9e 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
 				use_clockid    :  1, /* use @clockid for time fields */
 				context_switch :  1, /* context switch data */
 				write_backward :  1, /* Write ring buffer from end to beginning */
-				__reserved_1   : 36;
+				namespaces     :  1, /* include namespaces data */
+				__reserved_1   : 35;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -610,6 +611,18 @@ struct perf_event_header {
 	__u16	size;
 };
 
+enum {
+	NET_NS_INDEX		= 0,
+	UTS_NS_INDEX		= 1,
+	IPC_NS_INDEX		= 2,
+	PID_NS_INDEX		= 3,
+	USER_NS_INDEX		= 4,
+	MNT_NS_INDEX		= 5,
+	CGROUP_NS_INDEX		= 6,
+
+	NAMESPACES_MAX,		/* maximum available namespaces */
+};
+
 enum perf_event_type {
 
 	/*
@@ -862,6 +875,19 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_SWITCH_CPU_WIDE		= 15,
 
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				time;
+	 *	u64				dev_num;
+	 *	u64				inode_num[NAMESPACES_MAX];
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_NAMESPACES			= 16,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0e29213..11d54df 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -46,6 +46,10 @@
 #include <linux/filter.h>
 #include <linux/namei.h>
 #include <linux/parser.h>
+#include <linux/proc_ns.h>
+#include <linux/mount.h>
+#include <linux/ipc_namespace.h>
+#include <linux/utsname.h>
 
 #include "internal.h"
 
@@ -375,6 +379,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
+static atomic_t nr_namespaces_events __read_mostly;
 static atomic_t nr_task_events __read_mostly;
 static atomic_t nr_freq_events __read_mostly;
 static atomic_t nr_switch_events __read_mostly;
@@ -3874,6 +3879,8 @@ static void unaccount_event(struct perf_event *event)
 		atomic_dec(&nr_mmap_events);
 	if (event->attr.comm)
 		atomic_dec(&nr_comm_events);
+	if (event->attr.namespaces)
+		atomic_dec(&nr_namespaces_events);
 	if (event->attr.task)
 		atomic_dec(&nr_task_events);
 	if (event->attr.freq)
@@ -6374,6 +6381,7 @@ static void perf_event_task(struct task_struct *task,
 void perf_event_fork(struct task_struct *task)
 {
 	perf_event_task(task, NULL, 1);
+	perf_event_namespaces(task);
 }
 
 /*
@@ -6476,6 +6484,129 @@ void perf_event_comm(struct task_struct *task, bool exec)
 }
 
 /*
+ * namespaces tracking
+ */
+
+struct namespaces_event_id {
+	struct perf_event_header	header;
+	u32				pid;
+	u32				tid;
+	u64				time;
+	u64				dev_num;
+	u64				inode_num[NAMESPACES_MAX];
+};
+
+struct perf_namespaces_event {
+	struct task_struct		*task;
+
+	struct namespaces_event_id	event_id;
+};
+
+static int perf_event_namespaces_match(struct perf_event *event)
+{
+	return event->attr.namespaces;
+}
+
+static void perf_event_namespaces_output(struct perf_event *event,
+					 void *data)
+{
+	struct perf_namespaces_event *namespaces_event = data;
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	struct path ns_path;
+	struct task_struct *task = namespaces_event->task;
+	struct namespaces_event_id *ei = &namespaces_event->event_id;
+	struct ns_common *ns_common;
+	struct nsproxy *nsproxy;
+	void *error;
+	int ret;
+
+	if (!perf_event_namespaces_match(event))
+		return;
+
+	perf_event_header__init_id(&ei->header, &sample, event);
+	ret = perf_output_begin(&handle, event, ei->header.size);
+	if (ret)
+		return;
+
+	ei->pid = perf_event_pid(event, task);
+	ei->tid = perf_event_tid(event, task);
+
+	error = ns_get_path(&ns_path, task, &mntns_operations);
+	if (!error)
+		ei->dev_num = ns_path.mnt->mnt_sb->s_dev;
+
+	ns_common = mntns_operations.get(task);
+	ei->inode_num[MNT_NS_INDEX] = ns_common->inum;
+	mntns_operations.put(ns_common);
+
+#ifdef CONFIG_USER_NS
+	ei->inode_num[USER_NS_INDEX] = __task_cred(task)->user_ns->ns.inum;
+#endif
+
+	if (task != current)
+		task_lock(task);
+
+	nsproxy = task->nsproxy;
+	if (nsproxy != NULL) {
+#ifdef CONFIG_NET_NS
+		ei->inode_num[NET_NS_INDEX] = nsproxy->net_ns->ns.inum;
+#endif
+#ifdef CONFIG_UTS_NS
+		ei->inode_num[UTS_NS_INDEX] = nsproxy->uts_ns->ns.inum;
+#endif
+#ifdef CONFIG_IPC_NS
+		ei->inode_num[IPC_NS_INDEX] = nsproxy->ipc_ns->ns.inum;
+#endif
+#ifdef CONFIG_PID_NS
+		ei->inode_num[PID_NS_INDEX] = nsproxy->pid_ns_for_children->ns.inum;
+#endif
+#ifdef CONFIG_CGROUPS
+		ei->inode_num[CGROUP_NS_INDEX] = nsproxy->cgroup_ns->ns.inum;
+#endif
+	}
+
+	if (task != current)
+		task_unlock(task);
+
+	ei->time = perf_event_clock(event);
+
+	perf_output_put(&handle, namespaces_event->event_id);
+
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
+void perf_event_namespaces(struct task_struct *task)
+{
+	struct perf_namespaces_event namespaces_event;
+
+	if (!atomic_read(&nr_namespaces_events))
+		return;
+
+	namespaces_event = (struct perf_namespaces_event){
+		.task	= task,
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_NAMESPACES,
+				.misc = 0,
+				.size = sizeof(namespaces_event.event_id),
+			},
+			/* .pid */
+			/* .tid */
+			/* .time */
+			/* .dev_num */
+			/* .inode_num[NAMESPACES_MAX] */
+		},
+	};
+
+	perf_iterate_sb(perf_event_namespaces_output,
+			&namespaces_event,
+			NULL);
+}
+
+/*
  * mmap tracking
  */
 
@@ -9018,6 +9149,8 @@ static void account_event(struct perf_event *event)
 		atomic_inc(&nr_mmap_events);
 	if (event->attr.comm)
 		atomic_inc(&nr_comm_events);
+	if (event->attr.namespaces)
+		atomic_inc(&nr_namespaces_events);
 	if (event->attr.task)
 		atomic_inc(&nr_task_events);
 	if (event->attr.freq)
@@ -9532,6 +9665,11 @@ SYSCALL_DEFINE5(perf_event_open,
 			return -EACCES;
 	}
 
+	if (attr.namespaces) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+	}
+
 	if (attr.freq) {
 		if (attr.sample_freq > sysctl_perf_event_sample_rate)
 			return -EINVAL;
diff --git a/kernel/fork.c b/kernel/fork.c
index 997ac1d..399a44b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2280,6 +2280,9 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 		free_fs_struct(new_fs);
 
 bad_unshare_out:
+	if (!err)
+		perf_event_namespaces(current);
+
 	return err;
 }
 
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 782102e..4c25e6e 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,6 +26,7 @@
 #include <linux/file.h>
 #include <linux/syscalls.h>
 #include <linux/cgroup.h>
+#include <linux/perf_event.h>
 
 static struct kmem_cache *nsproxy_cachep;
 
@@ -264,6 +265,10 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 	switch_task_namespaces(tsk, new_nsproxy);
 out:
 	fput(file);
+
+	if (!err)
+		perf_event_namespaces(tsk);
+
 	return err;
 }
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ