lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <148654268436.27983.5340774362600828412.stgit@hbathini.in.ibm.com>
Date:   Wed, 08 Feb 2017 14:01:24 +0530
From:   Hari Bathini <hbathini@...ux.vnet.ibm.com>
To:     ast@...com, peterz@...radead.org,
        lkml <linux-kernel@...r.kernel.org>, acme@...nel.org,
        alexander.shishkin@...ux.intel.com, mingo@...hat.com
Cc:     daniel@...earbox.net, rostedt@...dmis.org,
        Ananth N Mavinakayanahalli <ananth@...ux.vnet.ibm.com>,
        ebiederm@...ssion.com, sargun@...gun.me,
        Aravinda Prasad <aravinda@...ux.vnet.ibm.com>,
        brendan.d.gregg@...il.com, jolsa@...hat.com
Subject: [PATCH v6 1/3] perf: add PERF_RECORD_NAMESPACES to include
 namespaces related info

With the advert of container technologies like docker, that depend
on namespaces for isolation, there is a need for tracing support for
namespaces. This patch introduces new PERF_RECORD_NAMESPACES event
for tracing based on namespaces related info. This event records
the device and inode numbers for every namespace of all processes.

While device number is same for all namespaces currently, that may
change in future, to avoid the need for a namespace of namespaces.
Recording device number along with inode number will take care of such
scenario. Also, recording device and inode numbers for every namespace
lets the userspace take a call on the definition of a container and
update perf tool accordingly.

Signed-off-by: Hari Bathini <hbathini@...ux.vnet.ibm.com>
---
 include/linux/perf_event.h      |    2 +
 include/uapi/linux/perf_event.h |   38 ++++++++++
 kernel/events/core.c            |  142 +++++++++++++++++++++++++++++++++++++++
 kernel/fork.c                   |    3 +
 kernel/nsproxy.c                |    5 +
 5 files changed, 189 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5c58e93..4547fb6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1110,6 +1110,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks
 
 extern void perf_event_exec(void);
 extern void perf_event_comm(struct task_struct *tsk, bool exec);
+extern void perf_event_namespaces(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
 
 /* Callchains */
@@ -1313,6 +1314,7 @@ static inline int perf_unregister_guest_info_callbacks
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_exec(void)				{ }
 static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
+static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
 static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c66a485..ee60f54 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
 				use_clockid    :  1, /* use @clockid for time fields */
 				context_switch :  1, /* context switch data */
 				write_backward :  1, /* Write ring buffer from end to beginning */
-				__reserved_1   : 36;
+				namespaces     :  1, /* include namespaces data */
+				__reserved_1   : 35;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -610,6 +611,29 @@ struct perf_event_header {
 	__u16	size;
 };
 
+/*
+ * The maximum size of the name of each namespace
+ */
+#define NS_NAME_SIZE				8
+
+struct perf_ns_link_info {
+	char	name[NS_NAME_SIZE];
+	__u64	dev;
+	__u64	ino;
+};
+
+enum {
+	NET_NS_INDEX		= 0,
+	UTS_NS_INDEX		= 1,
+	IPC_NS_INDEX		= 2,
+	PID_NS_INDEX		= 3,
+	USER_NS_INDEX		= 4,
+	MNT_NS_INDEX		= 5,
+	CGROUP_NS_INDEX		= 6,
+
+	NAMESPACES_MAX,		/* maximum available namespaces */
+};
+
 enum perf_event_type {
 
 	/*
@@ -862,6 +886,18 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_SWITCH_CPU_WIDE		= 15,
 
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid;
+	 *	u32				tid;
+	 *	u32				nr_namespaces;
+	 *	struct namespace_link_info	link_info[NAMESPACES_MAX];
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_NAMESPACES			= 16,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 88676ff..4427102 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -46,6 +46,8 @@
 #include <linux/filter.h>
 #include <linux/namei.h>
 #include <linux/parser.h>
+#include <linux/proc_ns.h>
+#include <linux/mount.h>
 
 #include "internal.h"
 
@@ -377,6 +379,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
+static atomic_t nr_namespaces_events __read_mostly;
 static atomic_t nr_task_events __read_mostly;
 static atomic_t nr_freq_events __read_mostly;
 static atomic_t nr_switch_events __read_mostly;
@@ -3982,6 +3985,8 @@ static void unaccount_event(struct perf_event *event)
 		atomic_dec(&nr_mmap_events);
 	if (event->attr.comm)
 		atomic_dec(&nr_comm_events);
+	if (event->attr.namespaces)
+		atomic_dec(&nr_namespaces_events);
 	if (event->attr.task)
 		atomic_dec(&nr_task_events);
 	if (event->attr.freq)
@@ -6482,6 +6487,7 @@ static void perf_event_task(struct task_struct *task,
 void perf_event_fork(struct task_struct *task)
 {
 	perf_event_task(task, NULL, 1);
+	perf_event_namespaces(task);
 }
 
 /*
@@ -6584,6 +6590,135 @@ void perf_event_comm(struct task_struct *task, bool exec)
 }
 
 /*
+ * namespaces tracking
+ */
+
+struct namespaces_event_id {
+	struct perf_event_header	header;
+
+	u32				pid;
+	u32				tid;
+	u32				nr_namespaces;
+	struct perf_ns_link_info	link_info[NAMESPACES_MAX];
+};
+
+struct perf_namespaces_event {
+	struct task_struct		*task;
+
+	struct namespaces_event_id	event_id;
+};
+
+static int perf_event_namespaces_match(struct perf_event *event)
+{
+	return event->attr.namespaces;
+}
+
+static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
+				   struct task_struct *task,
+				   const struct proc_ns_operations *ns_ops)
+{
+	struct path ns_path;
+	struct inode *ns_inode;
+	void *error;
+
+	error = ns_get_path(&ns_path, task, ns_ops);
+	if (!error) {
+		snprintf(ns_link_info->name, NS_NAME_SIZE,
+			 "%s", ns_path.dentry->d_iname);
+
+		ns_inode = ns_path.dentry->d_inode;
+		ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
+		ns_link_info->ino = ns_inode->i_ino;
+	}
+}
+
+static void perf_event_namespaces_output(struct perf_event *event,
+					 void *data)
+{
+	struct perf_namespaces_event *namespaces_event = data;
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	struct namespaces_event_id *ei;
+	struct task_struct *task = namespaces_event->task;
+	int ret;
+
+	if (!perf_event_namespaces_match(event))
+		return;
+
+	ei = &namespaces_event->event_id;
+	perf_event_header__init_id(&ei->header, &sample, event);
+	ret = perf_output_begin(&handle, event,	ei->header.size);
+	if (ret)
+		return;
+
+	ei->pid = perf_event_pid(event, task);
+	ei->tid = perf_event_tid(event, task);
+
+	ei->nr_namespaces = NAMESPACES_MAX;
+
+	perf_fill_ns_link_info(&ei->link_info[MNT_NS_INDEX],
+			       task, &mntns_operations);
+
+#ifdef CONFIG_USER_NS
+	perf_fill_ns_link_info(&ei->link_info[USER_NS_INDEX],
+			       task, &userns_operations);
+#endif
+#ifdef CONFIG_NET_NS
+	perf_fill_ns_link_info(&ei->link_info[NET_NS_INDEX],
+			       task, &netns_operations);
+#endif
+#ifdef CONFIG_UTS_NS
+	perf_fill_ns_link_info(&ei->link_info[UTS_NS_INDEX],
+			       task, &utsns_operations);
+#endif
+#ifdef CONFIG_IPC_NS
+	perf_fill_ns_link_info(&ei->link_info[IPC_NS_INDEX],
+			       task, &ipcns_operations);
+#endif
+#ifdef CONFIG_PID_NS
+	perf_fill_ns_link_info(&ei->link_info[PID_NS_INDEX],
+			       task, &pidns_operations);
+#endif
+#ifdef CONFIG_CGROUPS
+	perf_fill_ns_link_info(&ei->link_info[CGROUP_NS_INDEX],
+			       task, &cgroupns_operations);
+#endif
+
+	perf_output_put(&handle, namespaces_event->event_id);
+
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
+void perf_event_namespaces(struct task_struct *task)
+{
+	struct perf_namespaces_event namespaces_event;
+
+	if (!atomic_read(&nr_namespaces_events))
+		return;
+
+	namespaces_event = (struct perf_namespaces_event){
+		.task	= task,
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_NAMESPACES,
+				.misc = 0,
+				.size = sizeof(namespaces_event.event_id),
+			},
+			/* .pid */
+			/* .tid */
+			/* .nr_namespaces */
+			/* .link_info[NAMESPACES_MAX] */
+		},
+	};
+
+	perf_iterate_sb(perf_event_namespaces_output,
+			&namespaces_event,
+			NULL);
+}
+
+/*
  * mmap tracking
  */
 
@@ -9122,6 +9257,8 @@ static void account_event(struct perf_event *event)
 		atomic_inc(&nr_mmap_events);
 	if (event->attr.comm)
 		atomic_inc(&nr_comm_events);
+	if (event->attr.namespaces)
+		atomic_inc(&nr_namespaces_events);
 	if (event->attr.task)
 		atomic_inc(&nr_task_events);
 	if (event->attr.freq)
@@ -9667,6 +9804,11 @@ SYSCALL_DEFINE5(perf_event_open,
 			return -EACCES;
 	}
 
+	if (attr.namespaces) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EACCES;
+	}
+
 	if (attr.freq) {
 		if (attr.sample_freq > sysctl_perf_event_sample_rate)
 			return -EINVAL;
diff --git a/kernel/fork.c b/kernel/fork.c
index 11c5c8a..fd77e67 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2289,6 +2289,9 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 		free_fs_struct(new_fs);
 
 bad_unshare_out:
+	if (!err)
+		perf_event_namespaces(current);
+
 	return err;
 }
 
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 782102e..4c25e6e 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,6 +26,7 @@
 #include <linux/file.h>
 #include <linux/syscalls.h>
 #include <linux/cgroup.h>
+#include <linux/perf_event.h>
 
 static struct kmem_cache *nsproxy_cachep;
 
@@ -264,6 +265,10 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 	switch_task_namespaces(tsk, new_nsproxy);
 out:
 	fput(file);
+
+	if (!err)
+		perf_event_namespaces(tsk);
+
 	return err;
 }
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ