lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 5 Mar 2008 17:00:20 +0100
From:	Hans Rosenfeld <hans.rosenfeld@....com>
To:	Matt Mackall <mpm@...enic.com>
Cc:	Dave Hansen <haveblue@...ibm.com>, linux-kernel@...r.kernel.org,
	Adam Litke <aglitke@...il.com>, nacc <nacc@...ux.vnet.ibm.com>,
	Jon Tollefson <kniht@...ux.vnet.ibm.com>,
	Adam Litke <agl@...ux.vnet.ibm.com>
Subject: Re: [RFC][PATCH] make /proc/pid/pagemap work with huge pages and return page size

On Fri, Feb 29, 2008 at 04:46:12PM -0600, Matt Mackall wrote:
> Well in any case, step 1 should be to finalize the interface. Let's
> start with a single patch that does nothing but move the exported bits
> around and makes room for the page shift bits. Are we agreed on what
> that should look like now?

Attached is a patch that changes the current pagemap code just to use
the pseudo-pte, which can now also include swap information. The
pseudo-pte is made available through the linux/pagemap_ppte.h header.
The x86-specific huge page support has been left out for now, so the
interface will return wrong information for huge pages.

There are a few reserved bits in the pte part of the union that could be
used to extend the pfn or add more status bits in future (a ronly bit,
indicating that a page is mapped read-only, has been added, but is
currently not set anywhere). The identical parts of the two bitfields in
the union are the present and swap bits, and the page shift.



Signed-off-by: Hans Rosenfeld <hans.rosenfeld@....com>

---
 fs/proc/task_mmu.c           |   41 ++++++++++++++++--------------------
 include/linux/pagemap_ppte.h |   47 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/pagemap_ppte.h

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 49958cf..958a37a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -5,6 +5,7 @@
 #include <linux/highmem.h>
 #include <linux/ptrace.h>
 #include <linux/pagemap.h>
+#include <linux/pagemap_ppte.h>
 #include <linux/ptrace.h>
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
@@ -527,16 +528,10 @@ struct pagemapread {
 	char __user *out, *end;
 };
 
-#define PM_ENTRY_BYTES sizeof(u64)
-#define PM_RESERVED_BITS    3
-#define PM_RESERVED_OFFSET  (64 - PM_RESERVED_BITS)
-#define PM_RESERVED_MASK    (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET)
-#define PM_SPECIAL(nr)      (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK)
-#define PM_NOT_PRESENT      PM_SPECIAL(1LL)
-#define PM_SWAP             PM_SPECIAL(2LL)
-#define PM_END_OF_BUFFER    1
+#define PM_ENTRY_BYTES   sizeof(pagemap_ppte_t)
+#define PM_END_OF_BUFFER 1
 
-static int add_to_pagemap(unsigned long addr, u64 pfn,
+static int add_to_pagemap(unsigned long addr, pagemap_ppte_t ppte,
 			  struct pagemapread *pm)
 {
 	/*
@@ -545,13 +540,13 @@ static int add_to_pagemap(unsigned long addr, u64 pfn,
 	 * the pfn.
 	 */
 	if (pm->out + PM_ENTRY_BYTES >= pm->end) {
-		if (copy_to_user(pm->out, &pfn, pm->end - pm->out))
+		if (copy_to_user(pm->out, &ppte, pm->end - pm->out))
 			return -EFAULT;
 		pm->out = pm->end;
 		return PM_END_OF_BUFFER;
 	}
 
-	if (put_user(pfn, pm->out))
+	if (copy_to_user(pm->out, &ppte, sizeof(ppte)))
 		return -EFAULT;
 	pm->out += PM_ENTRY_BYTES;
 	return 0;
@@ -571,12 +566,6 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
 	return err;
 }
 
-u64 swap_pte_to_pagemap_entry(pte_t pte)
-{
-	swp_entry_t e = pte_to_swp_entry(pte);
-	return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
-}
-
 static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			     void *private)
 {
@@ -585,15 +574,21 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	int err = 0;
 
 	for (; addr != end; addr += PAGE_SIZE) {
-		u64 pfn = PM_NOT_PRESENT;
+                pagemap_ppte_t ppte = PM_NOT_PRESENT;
+
 		pte = pte_offset_map(pmd, addr);
-		if (is_swap_pte(*pte))
-			pfn = swap_pte_to_pagemap_entry(*pte);
-		else if (pte_present(*pte))
-			pfn = pte_pfn(*pte);
+		if (is_swap_pte(*pte)) {
+			swp_entry_t e = pte_to_swp_entry(*pte);
+			ppte.pm_swap   = 1;
+			ppte.pm_type   = swp_type(e);
+			ppte.pm_offset = swp_offset(e);
+		} else if (pte_present(*pte)) {
+			ppte.pm_present = 1;
+			ppte.pm_frame   = pte_pfn(*pte);
+		}
 		/* unmap so we're not in atomic when we copy to userspace */
 		pte_unmap(pte);
-		err = add_to_pagemap(addr, pfn, pm);
+		err = add_to_pagemap(addr, ppte, pm);
 		if (err)
 			return err;
 	}
diff --git a/include/linux/pagemap_ppte.h b/include/linux/pagemap_ppte.h
new file mode 100644
index 0000000..a983500
--- /dev/null
+++ b/include/linux/pagemap_ppte.h
@@ -0,0 +1,47 @@
+#ifndef _LINUX_PAGEMAP_PPTE_H
+#define _LINUX_PAGEMAP_PPTE_H
+
+#include <linux/types.h>
+
+/*
+ * structure used by /proc/pid/pagemap to describe mappings
+ */	
+
+union pagemap_ppte {
+	struct pagemap_ppte_pte {
+		__u64 _shift:6;
+		__u64 _frame:40;
+		__u64 _resvd:15;
+		__u64 _ronly:1;
+		__u64 _swap:1;
+		__u64 _present:1;
+	} pte;
+
+	struct pagemap_ppte_swp {
+		__u64 _shift:6;
+		__u64 _offset:51;
+		__u64 _type:5;
+		__u64 _swap:1;
+		__u64 _present:1;
+	} swp;
+
+	__u64 val;
+};
+
+#define pm_shift   pte._shift
+#define pm_frame   pte._frame
+#define pm_offset  swp._offset
+#define pm_type    swp._type
+#define pm_ronly   pte._ronly
+#define pm_swap    pte._swap
+#define pm_present pte._present
+
+typedef union pagemap_ppte pagemap_ppte_t;
+
+#define PM_NOT_PRESENT   ((pagemap_ppte_t) { .pm_present = 0,	 \
+					     .pm_swap    = 0, \
+					     .pm_offset  = 0, \
+					     .pm_type    = 0, \
+					     .pm_shift   = PAGE_SHIFT})
+
+#endif /* _LINUX_PAGEMAP_PPTE_H */
-- 
1.5.3.7


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ