lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1460417755-18201-7-git-send-email-avagin@openvz.org>
Date:	Mon, 11 Apr 2016 16:35:46 -0700
From:	Andrey Vagin <avagin@...nvz.org>
To:	linux-kernel@...r.kernel.org
Cc:	Andrey Vagin <avagin@...nvz.org>, Oleg Nesterov <oleg@...hat.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Cyrill Gorcunov <gorcunov@...nvz.org>,
	Pavel Emelyanov <xemul@...allels.com>,
	Roger Luethi <rl@...lgate.ch>, Arnd Bergmann <arnd@...db.de>,
	Arnaldo Carvalho de Melo <acme@...nel.org>,
	David Ahern <dsahern@...il.com>,
	Andy Lutomirski <luto@...capital.net>,
	Pavel Odintsov <pavel.odintsov@...il.com>
Subject: [PATCH 06/15] task_diag: add a new group to get tasks memory mappings (v2)

v2: Fixes from David Ahern
* Fix 8-byte alignment
* Change implementation of DIAG_VMA attribute:

This patch puts the filename into the task_diag_vma struct and
converts TASK_DIAG_VMA attribute into a series of task_diag_vma.
Now is there is a single TASK_DIAG_VMA attribute that is parsed
as:

| struct task_diag_vma | filename | ...

Cc: David Ahern <dsahern@...il.com>
Signed-off-by: Andrey Vagin <avagin@...nvz.org>
---
 fs/proc/internal.h             |  21 ++++
 fs/proc/task_diag.c            | 279 ++++++++++++++++++++++++++++++++++++++++-
 fs/proc/task_mmu.c             |  18 +--
 include/uapi/linux/task_diag.h |  85 +++++++++++++
 4 files changed, 385 insertions(+), 18 deletions(-)

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 2a2b1e6..75b57a3 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -316,3 +316,24 @@ task_next_child(struct task_struct *parent, struct task_struct *prev, unsigned i
 struct task_struct *task_first_tid(struct pid *pid, int tid, loff_t f_pos,
 					struct pid_namespace *ns);
 struct task_struct *task_next_tid(struct task_struct *start);
+
+struct mem_size_stats {
+	unsigned long resident;
+	unsigned long shared_clean;
+	unsigned long shared_dirty;
+	unsigned long private_clean;
+	unsigned long private_dirty;
+	unsigned long referenced;
+	unsigned long anonymous;
+	unsigned long anonymous_thp;
+	unsigned long swap;
+	unsigned long shared_hugetlb;
+	unsigned long private_hugetlb;
+	u64 pss;
+	u64 swap_pss;
+	bool check_shmem_swap;
+};
+
+struct mm_walk;
+int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+			   struct mm_walk *walk);
diff --git a/fs/proc/task_diag.c b/fs/proc/task_diag.c
index fc31771..9c1ed45 100644
--- a/fs/proc/task_diag.c
+++ b/fs/proc/task_diag.c
@@ -7,6 +7,8 @@
 #include <linux/taskstats.h>
 #include <net/sock.h>
 
+#include "internal.h"
+
 struct task_diag_cb {
 	struct sk_buff		*req;
 	struct sk_buff		*resp;
@@ -14,6 +16,11 @@ struct task_diag_cb {
 	pid_t			pid;
 	int			pos;
 	int			attr;
+	union { /* per-attribute */
+		struct {
+			unsigned long mark;
+		} vma;
+	};
 };
 
 /*
@@ -122,6 +129,267 @@ static int fill_creds(struct task_struct *p, struct sk_buff *skb,
 	return 0;
 }
 
+static u64 get_vma_flags(struct vm_area_struct *vma)
+{
+	u64 flags = 0;
+
+	static const u64 mnemonics[BITS_PER_LONG] = {
+		/*
+		 * In case if we meet a flag we don't know about.
+		 */
+		[0 ... (BITS_PER_LONG-1)] = 0,
+
+		[ilog2(VM_READ)]	= TASK_DIAG_VMA_F_READ,
+		[ilog2(VM_WRITE)]	= TASK_DIAG_VMA_F_WRITE,
+		[ilog2(VM_EXEC)]	= TASK_DIAG_VMA_F_EXEC,
+		[ilog2(VM_SHARED)]	= TASK_DIAG_VMA_F_SHARED,
+		[ilog2(VM_MAYREAD)]	= TASK_DIAG_VMA_F_MAYREAD,
+		[ilog2(VM_MAYWRITE)]	= TASK_DIAG_VMA_F_MAYWRITE,
+		[ilog2(VM_MAYEXEC)]	= TASK_DIAG_VMA_F_MAYEXEC,
+		[ilog2(VM_MAYSHARE)]	= TASK_DIAG_VMA_F_MAYSHARE,
+		[ilog2(VM_GROWSDOWN)]	= TASK_DIAG_VMA_F_GROWSDOWN,
+		[ilog2(VM_PFNMAP)]	= TASK_DIAG_VMA_F_PFNMAP,
+		[ilog2(VM_DENYWRITE)]	= TASK_DIAG_VMA_F_DENYWRITE,
+#ifdef CONFIG_X86_INTEL_MPX
+		[ilog2(VM_MPX)]		= TASK_DIAG_VMA_F_MPX,
+#endif
+		[ilog2(VM_LOCKED)]	= TASK_DIAG_VMA_F_LOCKED,
+		[ilog2(VM_IO)]		= TASK_DIAG_VMA_F_IO,
+		[ilog2(VM_SEQ_READ)]	= TASK_DIAG_VMA_F_SEQ_READ,
+		[ilog2(VM_RAND_READ)]	= TASK_DIAG_VMA_F_RAND_READ,
+		[ilog2(VM_DONTCOPY)]	= TASK_DIAG_VMA_F_DONTCOPY,
+		[ilog2(VM_DONTEXPAND)]	= TASK_DIAG_VMA_F_DONTEXPAND,
+		[ilog2(VM_ACCOUNT)]	= TASK_DIAG_VMA_F_ACCOUNT,
+		[ilog2(VM_NORESERVE)]	= TASK_DIAG_VMA_F_NORESERVE,
+		[ilog2(VM_HUGETLB)]	= TASK_DIAG_VMA_F_HUGETLB,
+		[ilog2(VM_ARCH_1)]	= TASK_DIAG_VMA_F_ARCH_1,
+		[ilog2(VM_DONTDUMP)]	= TASK_DIAG_VMA_F_DONTDUMP,
+#ifdef CONFIG_MEM_SOFT_DIRTY
+		[ilog2(VM_SOFTDIRTY)]	= TASK_DIAG_VMA_F_SOFTDIRTY,
+#endif
+		[ilog2(VM_MIXEDMAP)]	= TASK_DIAG_VMA_F_MIXEDMAP,
+		[ilog2(VM_HUGEPAGE)]	= TASK_DIAG_VMA_F_HUGEPAGE,
+		[ilog2(VM_NOHUGEPAGE)]	= TASK_DIAG_VMA_F_NOHUGEPAGE,
+		[ilog2(VM_MERGEABLE)]	= TASK_DIAG_VMA_F_MERGEABLE,
+	};
+	size_t i;
+
+	for (i = 0; i < BITS_PER_LONG; i++) {
+		if (vma->vm_flags & (1UL << i))
+			flags |= mnemonics[i];
+	}
+
+	return flags;
+}
+
+/*
+ * use a tmp variable and copy to input arg to deal with
+ * alignment issues. diag_vma contains u64 elements which
+ * means extended load operations can be used and those can
+ * require 8-byte alignment (e.g., sparc)
+ */
+static void fill_diag_vma(struct vm_area_struct *vma,
+			  struct task_diag_vma *diag_vma)
+{
+	struct task_diag_vma tmp;
+
+	/* We don't show the stack guard page in /proc/maps */
+	tmp.start = vma->vm_start;
+	if (stack_guard_page_start(vma, tmp.start))
+		tmp.start += PAGE_SIZE;
+
+	tmp.end = vma->vm_end;
+	if (stack_guard_page_end(vma, tmp.end))
+		tmp.end -= PAGE_SIZE;
+	tmp.vm_flags = get_vma_flags(vma);
+
+	if (vma->vm_file) {
+		struct inode *inode = file_inode(vma->vm_file);
+		dev_t dev;
+
+		dev = inode->i_sb->s_dev;
+		tmp.major = MAJOR(dev);
+		tmp.minor = MINOR(dev);
+		tmp.inode = inode->i_ino;
+		tmp.generation = inode->i_generation;
+		tmp.pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
+	} else {
+		tmp.major = 0;
+		tmp.minor = 0;
+		tmp.inode = 0;
+		tmp.generation = 0;
+		tmp.pgoff = 0;
+	}
+
+	memcpy(diag_vma, &tmp, sizeof(*diag_vma));
+}
+
+static const char *get_vma_name(struct vm_area_struct *vma, char *page)
+{
+	const char *name = NULL;
+
+	if (vma->vm_file) {
+		name = d_path(&vma->vm_file->f_path, page, PAGE_SIZE);
+		goto out;
+	}
+
+	if (vma->vm_ops && vma->vm_ops->name) {
+		name = vma->vm_ops->name(vma);
+		if (name)
+			goto out;
+	}
+
+	name = arch_vma_name(vma);
+
+out:
+	return name;
+}
+
+static void fill_diag_vma_stat(struct vm_area_struct *vma,
+				struct task_diag_vma_stat *stat)
+{
+	struct task_diag_vma_stat tmp;
+	struct mem_size_stats mss;
+	struct mm_walk smaps_walk = {
+		.pmd_entry = smaps_pte_range,
+		.mm = vma->vm_mm,
+		.private = &mss,
+	};
+
+	memset(&mss, 0, sizeof(mss));
+	memset(&tmp, 0, sizeof(tmp));
+
+	/* mmap_sem is held in m_start */
+	walk_page_vma(vma, &smaps_walk);
+
+	tmp.resident		= mss.resident;
+	tmp.pss			= mss.pss;
+	tmp.shared_clean	= mss.shared_clean;
+	tmp.private_clean	= mss.private_clean;
+	tmp.private_dirty	= mss.private_dirty;
+	tmp.referenced		= mss.referenced;
+	tmp.anonymous		= mss.anonymous;
+	tmp.anonymous_thp	= mss.anonymous_thp;
+	tmp.swap		= mss.swap;
+
+	memcpy(stat, &tmp, sizeof(*stat));
+}
+
+static int fill_vma(struct task_struct *p, struct sk_buff *skb,
+		    struct task_diag_cb *cb, bool *progress, u64 show_flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	struct nlattr *attr = NULL;
+	struct task_diag_vma *diag_vma;
+	unsigned long mark = 0;
+	char *page;
+	int i, rc = -EMSGSIZE, size;
+
+	if (cb)
+		mark = cb->vma.mark;
+
+	mm = p->mm;
+	if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+		return 0;
+
+	page = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!page) {
+		mmput(mm);
+		return -ENOMEM;
+	}
+
+	size = NLA_ALIGN(sizeof(struct task_diag_vma));
+	if (show_flags & TASK_DIAG_SHOW_VMA_STAT)
+		size += NLA_ALIGN(sizeof(struct task_diag_vma_stat));
+
+	down_read(&mm->mmap_sem);
+	for (vma = mm->mmap; vma; vma = vma->vm_next, i++) {
+		unsigned char *b = skb_tail_pointer(skb);
+		const char *name;
+		void *pfile;
+
+
+		if (mark >= vma->vm_start)
+			continue;
+
+		/* setup pointer for next map */
+		if (attr == NULL) {
+			attr = nla_reserve(skb, TASK_DIAG_VMA, size);
+			if (!attr)
+				goto err;
+
+			diag_vma = nla_data(attr);
+		} else {
+			diag_vma = nla_reserve_nohdr(skb, size);
+
+			if (diag_vma == NULL) {
+				nlmsg_trim(skb, b);
+				goto out;
+			}
+		}
+
+		fill_diag_vma(vma, diag_vma);
+
+		if (show_flags & TASK_DIAG_SHOW_VMA_STAT) {
+			struct task_diag_vma_stat *stat;
+
+			stat = (void *) diag_vma + NLA_ALIGN(sizeof(*diag_vma));
+
+			fill_diag_vma_stat(vma, stat);
+			diag_vma->stat_len = sizeof(struct task_diag_vma_stat);
+			diag_vma->stat_off = (void *) stat - (void *)diag_vma;
+		} else {
+			diag_vma->stat_len = 0;
+			diag_vma->stat_off = 0;
+		}
+
+		name = get_vma_name(vma, page);
+		if (IS_ERR(name)) {
+			nlmsg_trim(skb, b);
+			rc = PTR_ERR(name);
+			goto out;
+		}
+
+		if (name) {
+			diag_vma->name_len = strlen(name) + 1;
+
+			/* reserves NLA_ALIGN(len) */
+			pfile = nla_reserve_nohdr(skb, diag_vma->name_len);
+			if (pfile == NULL) {
+				nlmsg_trim(skb, b);
+				goto out;
+			}
+			diag_vma->name_off = pfile - (void *) diag_vma;
+			memcpy(pfile, name, diag_vma->name_len);
+		} else {
+			diag_vma->name_len = 0;
+			diag_vma->name_off = 0;
+		}
+
+		mark = vma->vm_start;
+
+		diag_vma->vma_len = skb_tail_pointer(skb) - (unsigned char *) diag_vma;
+
+		*progress = true;
+	}
+
+	rc = 0;
+	mark = 0;
+out:
+	if (*progress)
+		attr->nla_len = skb_tail_pointer(skb) - (unsigned char *) attr;
+
+err:
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+	free_page((unsigned long) page);
+	if (cb)
+		cb->vma.mark = mark;
+
+	return rc;
+}
+
 static int task_diag_fill(struct task_struct *tsk, struct sk_buff *skb,
 			  struct task_diag_pid *req,
 			  struct task_diag_cb *cb, struct pid_namespace *pidns,
@@ -131,6 +399,7 @@ static int task_diag_fill(struct task_struct *tsk, struct sk_buff *skb,
 	struct nlmsghdr *nlh;
 	struct task_diag_msg *msg;
 	int err = 0, i = 0, n = 0;
+	bool progress = false;
 	int flags = 0;
 
 	if (cb) {
@@ -163,13 +432,21 @@ static int task_diag_fill(struct task_struct *tsk, struct sk_buff *skb,
 		i++;
 	}
 
+	if (show_flags & TASK_DIAG_SHOW_VMA) {
+		if (i >= n)
+			err = fill_vma(tsk, skb, cb, &progress, show_flags);
+		if (err)
+			goto err;
+		i++;
+	}
+
 	nlmsg_end(skb, nlh);
 	if (cb)
 		cb->attr = 0;
 
 	return 0;
 err:
-	if (err == -EMSGSIZE && (i > n)) {
+	if (err == -EMSGSIZE && (i > n || progress)) {
 		if (cb)
 			cb->attr = i;
 		nlmsg_end(skb, nlh);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 229cb54..211147e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -439,22 +439,6 @@ const struct file_operations proc_tid_maps_operations = {
 #define PSS_SHIFT 12
 
 #ifdef CONFIG_PROC_PAGE_MONITOR
-struct mem_size_stats {
-	unsigned long resident;
-	unsigned long shared_clean;
-	unsigned long shared_dirty;
-	unsigned long private_clean;
-	unsigned long private_dirty;
-	unsigned long referenced;
-	unsigned long anonymous;
-	unsigned long anonymous_thp;
-	unsigned long swap;
-	unsigned long shared_hugetlb;
-	unsigned long private_hugetlb;
-	u64 pss;
-	u64 swap_pss;
-	bool check_shmem_swap;
-};
 
 static void smaps_account(struct mem_size_stats *mss, struct page *page,
 		bool compound, bool young, bool dirty)
@@ -586,7 +570,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
 }
 #endif
 
-static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			   struct mm_walk *walk)
 {
 	struct vm_area_struct *vma = walk->vma;
diff --git a/include/uapi/linux/task_diag.h b/include/uapi/linux/task_diag.h
index ea500c6..3486f2f 100644
--- a/include/uapi/linux/task_diag.h
+++ b/include/uapi/linux/task_diag.h
@@ -16,6 +16,8 @@ struct task_diag_msg {
 enum {
 	TASK_DIAG_BASE	= 0,
 	TASK_DIAG_CRED,
+	TASK_DIAG_VMA,
+	TASK_DIAG_VMA_STAT,
 
 	__TASK_DIAG_ATTR_MAX
 #define TASK_DIAG_ATTR_MAX (__TASK_DIAG_ATTR_MAX - 1)
@@ -23,6 +25,8 @@ enum {
 
 #define TASK_DIAG_SHOW_BASE	(1ULL << TASK_DIAG_BASE)
 #define TASK_DIAG_SHOW_CRED	(1ULL << TASK_DIAG_CRED)
+#define TASK_DIAG_SHOW_VMA	(1ULL << TASK_DIAG_VMA)
+#define TASK_DIAG_SHOW_VMA_STAT	(1ULL << TASK_DIAG_VMA_STAT)
 
 enum {
 	TASK_DIAG_RUNNING,
@@ -66,6 +70,87 @@ struct task_diag_creds {
 	__u32 sgid;
 	__u32 fsgid;
 };
+
+#define TASK_DIAG_VMA_F_READ		(1ULL <<  0)
+#define TASK_DIAG_VMA_F_WRITE		(1ULL <<  1)
+#define TASK_DIAG_VMA_F_EXEC		(1ULL <<  2)
+#define TASK_DIAG_VMA_F_SHARED		(1ULL <<  3)
+#define TASK_DIAG_VMA_F_MAYREAD		(1ULL <<  4)
+#define TASK_DIAG_VMA_F_MAYWRITE	(1ULL <<  5)
+#define TASK_DIAG_VMA_F_MAYEXEC		(1ULL <<  6)
+#define TASK_DIAG_VMA_F_MAYSHARE	(1ULL <<  7)
+#define TASK_DIAG_VMA_F_GROWSDOWN	(1ULL <<  8)
+#define TASK_DIAG_VMA_F_PFNMAP		(1ULL <<  9)
+#define TASK_DIAG_VMA_F_DENYWRITE	(1ULL << 10)
+#define TASK_DIAG_VMA_F_MPX		(1ULL << 11)
+#define TASK_DIAG_VMA_F_LOCKED		(1ULL << 12)
+#define TASK_DIAG_VMA_F_IO		(1ULL << 13)
+#define TASK_DIAG_VMA_F_SEQ_READ	(1ULL << 14)
+#define TASK_DIAG_VMA_F_RAND_READ	(1ULL << 15)
+#define TASK_DIAG_VMA_F_DONTCOPY	(1ULL << 16)
+#define TASK_DIAG_VMA_F_DONTEXPAND	(1ULL << 17)
+#define TASK_DIAG_VMA_F_ACCOUNT		(1ULL << 18)
+#define TASK_DIAG_VMA_F_NORESERVE	(1ULL << 19)
+#define TASK_DIAG_VMA_F_HUGETLB		(1ULL << 20)
+#define TASK_DIAG_VMA_F_ARCH_1		(1ULL << 21)
+#define TASK_DIAG_VMA_F_DONTDUMP	(1ULL << 22)
+#define TASK_DIAG_VMA_F_SOFTDIRTY	(1ULL << 23)
+#define TASK_DIAG_VMA_F_MIXEDMAP	(1ULL << 24)
+#define TASK_DIAG_VMA_F_HUGEPAGE	(1ULL << 25)
+#define TASK_DIAG_VMA_F_NOHUGEPAGE	(1ULL << 26)
+#define TASK_DIAG_VMA_F_MERGEABLE	(1ULL << 27)
+
+struct task_diag_vma_stat {
+	__u64 resident;
+	__u64 shared_clean;
+	__u64 shared_dirty;
+	__u64 private_clean;
+	__u64 private_dirty;
+	__u64 referenced;
+	__u64 anonymous;
+	__u64 anonymous_thp;
+	__u64 swap;
+	__u64 pss;
+} __attribute__((__aligned__(NLA_ALIGNTO)));
+
+/* task_diag_vma must be NLA_ALIGN'ed */
+struct task_diag_vma {
+	__u64 start, end;
+	__u64 vm_flags;
+	__u64 pgoff;
+	__u32 major;
+	__u32 minor;
+	__u64 inode;
+	__u32 generation;
+	__u16 vma_len;
+	__u16 name_off;
+	__u16 name_len;
+	__u16 stat_off;
+	__u16 stat_len;
+} __attribute__((__aligned__(NLA_ALIGNTO)));
+
+static inline char *task_diag_vma_name(struct task_diag_vma *vma)
+{
+	if (!vma->name_len)
+		return NULL;
+
+	return ((char *)vma) + vma->name_off;
+}
+
+static inline
+struct task_diag_vma_stat *task_diag_vma_stat(struct task_diag_vma *vma)
+{
+	if (!vma->stat_len)
+		return NULL;
+
+	return ((void *)vma) + vma->stat_off;
+}
+
+#define task_diag_for_each_vma(vma, attr)			\
+	for (vma = nla_data(attr);				\
+		(void *) vma < nla_data(attr) + nla_len(attr);	\
+		vma = (void *) vma + vma->vma_len)
+
 #define TASK_DIAG_DUMP_ALL	0
 #define TASK_DIAG_DUMP_ONE	1
 
-- 
2.5.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ