lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1588812129-8596-11-git-send-email-anthony.yznaga@oracle.com>
Date:   Wed,  6 May 2020 17:41:36 -0700
From:   Anthony Yznaga <anthony.yznaga@...cle.com>
To:     linux-mm@...ck.org, linux-kernel@...r.kernel.org
Cc:     willy@...radead.org, corbet@....net, tglx@...utronix.de,
        mingo@...hat.com, bp@...en8.de, x86@...nel.org, hpa@...or.com,
        dave.hansen@...ux.intel.com, luto@...nel.org, peterz@...radead.org,
        rppt@...ux.ibm.com, akpm@...ux-foundation.org, hughd@...gle.com,
        ebiederm@...ssion.com, masahiroy@...nel.org, ardb@...nel.org,
        ndesaulniers@...gle.com, dima@...ovin.in, daniel.kiper@...cle.com,
        nivedita@...m.mit.edu, rafael.j.wysocki@...el.com,
        dan.j.williams@...el.com, zhenzhong.duan@...cle.com,
        jroedel@...e.de, bhe@...hat.com, guro@...com,
        Thomas.Lendacky@....com, andriy.shevchenko@...ux.intel.com,
        keescook@...omium.org, hannes@...xchg.org, minchan@...nel.org,
        mhocko@...nel.org, ying.huang@...el.com,
        yang.shi@...ux.alibaba.com, gustavo@...eddedor.com,
        ziqian.lzq@...fin.com, vdavydov.dev@...il.com,
        jason.zeng@...el.com, kevin.tian@...el.com, zhiyuan.lv@...el.com,
        lei.l.li@...el.com, paul.c.lai@...el.com, ashok.raj@...el.com,
        linux-fsdevel@...r.kernel.org, linux-doc@...r.kernel.org,
        kexec@...ts.infradead.org
Subject: [RFC 10/43] PKRAM: add code for walking the preserved pages pagetable

Add the ability to walk the pkram pagetable from high to low addresses
and execute a callback for each contiguous range of preserved or not
preserved memory found.  The reason for walking high to low is to align
with high to low memblock allocation when finding holes that memblocks
can safely be allocated from as will be seen in a later patch.

Signed-off-by: Anthony Yznaga <anthony.yznaga@...cle.com>
---
 include/linux/pkram.h |  15 +++++
 mm/Makefile           |   2 +-
 mm/pkram_pagetable.c  | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 185 insertions(+), 1 deletion(-)
 create mode 100644 mm/pkram_pagetable.c

diff --git a/include/linux/pkram.h b/include/linux/pkram.h
index a58dd2ea835a..b6fa973d37cc 100644
--- a/include/linux/pkram.h
+++ b/include/linux/pkram.h
@@ -25,6 +25,21 @@ struct pkram_stream {
 
 #define PKRAM_NAME_MAX		256	/* including nul */
 
+struct pkram_pg_state {
+	int (*range_cb)(struct pkram_pg_state *state, unsigned long base,
+			unsigned long size);
+	unsigned long curr_addr;
+	unsigned long end_addr;
+	unsigned long min_addr;
+	unsigned long max_addr;
+	unsigned long min_size;
+	bool tracking;
+	bool find_holes;
+	unsigned long retval;
+};
+
+void pkram_walk_pgt_rev(struct pkram_pg_state *st, pgd_t *pgd);
+
 int pkram_prepare_save(struct pkram_stream *ps, const char *name,
 		       gfp_t gfp_mask);
 int pkram_prepare_save_obj(struct pkram_stream *ps);
diff --git a/mm/Makefile b/mm/Makefile
index 59cd381194af..c4ad1c56e237 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -112,4 +112,4 @@ obj-$(CONFIG_MEMFD_CREATE) += memfd.o
 obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
 obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
 obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o
-obj-$(CONFIG_PKRAM) += pkram.o
+obj-$(CONFIG_PKRAM) += pkram.o pkram_pagetable.o
diff --git a/mm/pkram_pagetable.c b/mm/pkram_pagetable.c
new file mode 100644
index 000000000000..d31aa36207ba
--- /dev/null
+++ b/mm/pkram_pagetable.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bitops.h>
+#include <asm/pgtable.h>
+#include <linux/pkram.h>
+
+#define pgd_none(a)  (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
+
+static int note_page_rev(struct pkram_pg_state *st, unsigned long curr_size, bool present)
+{
+	unsigned long curr_addr = st->curr_addr;
+	bool track_page = present ^ st->find_holes;
+
+	if (!st->tracking && track_page) {
+		unsigned long end_addr = curr_addr + curr_size;
+
+		if (end_addr <= st->min_addr)
+			return 1;
+
+		st->end_addr = min(end_addr, st->max_addr);
+		st->tracking = true;
+	} else if (st->tracking) {
+		unsigned long base, size;
+
+		/* Continue tracking if lower bound has not been reached */
+		if (track_page && curr_addr && curr_addr >= st->min_addr)
+			return 0;
+
+		if (!track_page)
+			base = max(curr_addr + curr_size, st->min_addr);
+		else
+			base = st->min_addr;
+
+		size = st->end_addr - base;
+		st->tracking = false;
+
+		return st->range_cb(st, base, size);
+	}
+
+	return 0;
+}
+
+static int walk_pte_level_rev(struct pkram_pg_state *st, pmd_t addr, unsigned long P)
+{
+	unsigned long *bitmap;
+	int present;
+	int i, ret;
+
+	bitmap = __va(pmd_val(addr));
+	for (i = PTRS_PER_PTE - 1; i >= 0; i--) {
+		unsigned long curr_addr = P + i * PAGE_SIZE;
+
+		if (curr_addr >= st->max_addr)
+			continue;
+		st->curr_addr = curr_addr;
+
+		present = test_bit(i, bitmap);
+		ret = note_page_rev(st, PAGE_SIZE, present);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int walk_pmd_level_rev(struct pkram_pg_state *st, pud_t addr, unsigned long P)
+{
+	pmd_t *start;
+	int i, ret;
+
+	start = (pmd_t *)pud_page_vaddr(addr) + PTRS_PER_PMD - 1;
+	for (i = PTRS_PER_PMD - 1; i >= 0; i--, start--) {
+		unsigned long curr_addr = P + i * PMD_SIZE;
+
+		if (curr_addr >= st->max_addr)
+			continue;
+		st->curr_addr = curr_addr;
+
+		if (!pmd_none(*start)) {
+			if (pmd_large(*start))
+				ret = note_page_rev(st, PMD_SIZE, true);
+			else
+				ret = walk_pte_level_rev(st, *start, curr_addr);
+		} else
+			ret = note_page_rev(st, PMD_SIZE, false);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int walk_pud_level_rev(struct pkram_pg_state *st, p4d_t addr, unsigned long P)
+{
+	pud_t *start;
+	int i, ret;
+
+	start = (pud_t *)p4d_page_vaddr(addr) + PTRS_PER_PUD - 1;
+	for (i = PTRS_PER_PUD - 1; i >= 0 ; i--, start--) {
+		unsigned long curr_addr = P + i * PUD_SIZE;
+
+		if (curr_addr >= st->max_addr)
+			continue;
+		st->curr_addr = curr_addr;
+
+		if (!pud_none(*start)) {
+			if (pud_large(*start))
+				ret = note_page_rev(st, PUD_SIZE, true);
+			else
+				ret = walk_pmd_level_rev(st, *start, curr_addr);
+		} else
+			ret = note_page_rev(st, PUD_SIZE, false);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static int walk_p4d_level_rev(struct pkram_pg_state *st, pgd_t addr, unsigned long P)
+{
+	p4d_t *start;
+	int i, ret;
+
+	if (PTRS_PER_P4D == 1)
+		return walk_pud_level_rev(st, __p4d(pgd_val(addr)), P);
+
+	start = (p4d_t *)pgd_page_vaddr(addr) + PTRS_PER_P4D - 1;
+	for (i = PTRS_PER_P4D - 1; i >= 0; i--, start--) {
+		unsigned long curr_addr = P + i * P4D_SIZE;
+
+		if (curr_addr >= st->max_addr)
+			continue;
+		st->curr_addr = curr_addr;
+
+		if (!p4d_none(*start)) {
+			if (p4d_large(*start))
+				ret = note_page_rev(st, P4D_SIZE, true);
+			else
+				ret = walk_pud_level_rev(st, *start, curr_addr);
+		} else
+			ret = note_page_rev(st, P4D_SIZE, false);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+void pkram_walk_pgt_rev(struct pkram_pg_state *st, pgd_t *pgd)
+{
+	pgd_t *start;
+	int i, ret;
+
+	start = pgd + PTRS_PER_PGD - 1;
+	for (i = PTRS_PER_PGD - 1; i >= 0; i--, start--) {
+		unsigned long curr_addr = i * PGDIR_SIZE;
+
+		if (curr_addr >= st->max_addr)
+			continue;
+		st->curr_addr = curr_addr;
+
+		if (!pgd_none(*start))
+			ret = walk_p4d_level_rev(st, *start, curr_addr);
+		else
+			ret = note_page_rev(st, PGDIR_SIZE, false);
+		if (ret)
+			break;
+	}
+}
-- 
2.13.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ