lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID:
 <MW4PR12MB687563EFB56373E8D55DDEABB92B2@MW4PR12MB6875.namprd12.prod.outlook.com>
Date: Tue, 12 Mar 2024 01:16:17 +0000
From: Shivansh Vij <shivanshvij@...look.com>
To:
Cc: shivanshvij@...pholelabs.io,
	Shivansh Vij <shivanshvij@...look.com>,
	Catalin Marinas <catalin.marinas@....com>,
	Will Deacon <will@...nel.org>,
	Joey Gouly <joey.gouly@....com>,
	Ryan Roberts <ryan.roberts@....com>,
	Mark Rutland <mark.rutland@....com>,
	Marc Zyngier <maz@...nel.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Kemeng Shi <shikemeng@...weicloud.com>,
	David Hildenbrand <david@...hat.com>,
	"Mike Rapoport (IBM)" <rppt@...nel.org>,
	"Matthew Wilcox (Oracle)" <willy@...radead.org>,
	Anshuman Khandual <anshuman.khandual@....com>,
	James Houghton <jthoughton@...gle.com>,
	Rick Edgecombe <rick.p.edgecombe@...el.com>,
	Peter Collingbourne <pcc@...gle.com>,
	linux-arm-kernel@...ts.infradead.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH] arm64/mm: adds soft dirty page tracking

Checkpoint-Restore in Userspace (CRIU) needs to be able
to track a memory page's changes if we want to enable
pre-dumping, which is important for live migrations.

The PTE_DIRTY bit (defined in pgtable-prot.h) is already
used to track software dirty pages, and the PTE_WRITE and
PTE_READ bits are used to track hardware dirty pages.

This patch enables full soft dirty page tracking
(including swap PTE support) for arm64 systems, and is
based very closely on the x86 implementation.

It is based on an unfinished patch by
Bin Lu (bin.lu@....com) from 2017
(https://patchwork.kernel.org/project/linux-arm-kernel/patch/1512029649-61312-1-git-send-email-bin.lu@arm.com/),
but has been updated for newer 6.x kernels as well as
tested on various 5.x kernels.

The main difference is this attempts to fix the bug
identified in the original patch where calling pte_mkclean()
on a page would result in pte_soft_dirty() == false. This
is invalid behaviour because pte_soft_dirty() should only
return false if the PTE_DIRTY bit is not set and
pte_mksoft_dirty() function has not been called. The x86
implementation expects this behaviour as well.

To achieve this, an additional software dirty bit called
PTE_SOFT_DIRTY is defined (in pgtable-prot.h), which is used
exclusively to track soft dirty pages.

This patch also reuses the _PAGE_SWP_SOFT_DIRTY
bit (defined in pgtable.h) from the original patch to add
support for swapped pages and for THP page MADV_FREE because
pmd_* functions have also been implemented.

This patch has been tested with CRIU's ZDTM test suite on
5.x and 6.x kernels using the following command:
test/zdtm.py run --page-server --remote-lazy-pages --keep-going --pre 3 -a

Signed-off-by: Shivansh Vij <shivanshvij@...look.com>
---
 arch/arm64/Kconfig                    |  1 +
 arch/arm64/include/asm/pgtable-prot.h |  6 +++
 arch/arm64/include/asm/pgtable.h      | 54 ++++++++++++++++++++++++++-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index aa7c1d435139..fe73d4809c7e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -178,6 +178,7 @@ config ARM64
 	select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_STACKLEAK
+	select HAVE_ARCH_SOFT_DIRTY
 	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 483dbfa39c4c..1b4119bbdf01 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -27,6 +27,12 @@
  */
 #define PMD_PRESENT_INVALID	(_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */
 
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define PTE_SOFT_DIRTY          (_AT(pteval_t, 1) << 60) /* for soft dirty tracking */
+#else
+#define PTE_SOFT_DIRTY          0UL
+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
 #define _PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define _PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 79ce70fbb751..0e699e7d96da 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -198,7 +198,7 @@ static inline pte_t pte_mkclean(pte_t pte)
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+	pte = set_pte_bit(pte, __pgprot(PTE_DIRTY | PTE_SOFT_DIRTY));
 
 	if (pte_write(pte))
 		pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
@@ -443,6 +443,29 @@ static inline pgprot_t pte_pgprot(pte_t pte)
 	return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
 }
 
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline bool pte_soft_dirty(pte_t pte)
+{
+	return pte_sw_dirty(pte) || (!!(pte_val(pte) & PTE_SOFT_DIRTY));
+}
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+	pte = set_pte_bit(pte, __pgprot(PTE_SOFT_DIRTY));
+	return pte;
+}
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+	pte = clear_pte_bit(pte, __pgprot(PTE_SOFT_DIRTY));
+	return pte;
+}
+
+#define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
+#define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
+#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * See the comment in include/linux/pgtable.h
@@ -1013,10 +1036,12 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
  *	bits 3-7:	swap type
  *	bits 8-57:	swap offset
  *	bit  58:	PTE_PROT_NONE (must be zero)
+ *	bit  59:        swap software dirty tracking
  */
 #define __SWP_TYPE_SHIFT	3
 #define __SWP_TYPE_BITS		5
 #define __SWP_OFFSET_BITS	50
+#define __SWP_PROT_NONE_BITS    1
 #define __SWP_TYPE_MASK		((1 << __SWP_TYPE_BITS) - 1)
 #define __SWP_OFFSET_SHIFT	(__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
 #define __SWP_OFFSET_MASK	((1UL << __SWP_OFFSET_BITS) - 1)
@@ -1033,6 +1058,33 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 #define __swp_entry_to_pmd(swp)		__pmd((swp).val)
 #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
 
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SWP_SOFT_DIRTY   (1UL << (__SWP_OFFSET_SHIFT + __SWP_OFFSET_BITS + __SWP_PROT_NONE_BITS))
+#else
+#define _PAGE_SWP_SOFT_DIRTY    0UL
+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline bool pte_swp_soft_dirty(pte_t pte)
+{
+	return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
+}
+
+#define pmd_swp_soft_dirty(pmd)        pte_swp_soft_dirty(pmd_pte(pmd))
+#define pmd_swp_mksoft_dirty(pmd)      pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
+#define pmd_swp_clear_soft_dirty(pmd)  pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
 /*
  * Ensure that there are not more swap files than can be encoded in the kernel
  * PTEs.
-- 
2.34.3


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ