lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon,  6 Jun 2022 20:37:14 +0000
From:   Ankur Arora <ankur.a.arora@...cle.com>
To:     linux-kernel@...r.kernel.org, linux-mm@...ck.org, x86@...nel.org
Cc:     torvalds@...ux-foundation.org, akpm@...ux-foundation.org,
        mike.kravetz@...cle.com, mingo@...nel.org, luto@...nel.org,
        tglx@...utronix.de, bp@...en8.de, peterz@...radead.org,
        ak@...ux.intel.com, arnd@...db.de, jgg@...dia.com,
        jon.grimm@....com, boris.ostrovsky@...cle.com,
        konrad.wilk@...cle.com, joao.m.martins@...cle.com,
        ankur.a.arora@...cle.com
Subject: [PATCH v3 10/21] x86/asm: add clear_pages_clzero()

Add clear_pages_clzero(), which uses CLZERO as the clearing primitive.
CLZERO skips the memory hierarchy, so this provides a non-polluting
implementation of clear_page(). Available if X86_FEATURE_CLZERO is set.

CLZERO, from the AMD architecture guide (Vol 3, Rev 3.30):
 "Clears the cache line specified by the logical address in rAX by
  writing a zero to every byte in the line. The instruction uses an
  implied non temporal memory type, similar to a streaming store, and
  uses the write combining protocol to minimize cache pollution.

  CLZERO is weakly-ordered with respect to other instructions that
  operate on memory. Software should use an SFENCE or stronger to
  enforce memory ordering of CLZERO with respect to other store
  instructions.

  The CLZERO instruction executes at any privilege level. CLZERO
  performs all the segmentation and paging checks that a store of
  the specified cache line would perform."

The use-case is similar to clear_page_movnt(), except that
clear_pages_clzero() is expected to be more performant.

Cc: jon.grimm@....com
Signed-off-by: Ankur Arora <ankur.a.arora@...cle.com>
---
 arch/x86/include/asm/page_64.h |  1 +
 arch/x86/lib/clear_page_64.S   | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 3affc4ecb8da..e8d4698fda65 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -56,6 +56,7 @@ void clear_pages_orig(void *page, unsigned long npages);
 void clear_pages_rep(void *page, unsigned long npages);
 void clear_pages_erms(void *page, unsigned long npages);
 void clear_pages_movnt(void *page, unsigned long npages);
+void clear_pages_clzero(void *page, unsigned long npages);
 
 #define __HAVE_ARCH_CLEAR_USER_PAGES
 static inline void clear_pages(void *page, unsigned int npages)
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index 83d14f1c9f57..00203103cf77 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -79,3 +79,22 @@ SYM_FUNC_START(clear_pages_movnt)
 	ja      .Lstart
 	RET
 SYM_FUNC_END(clear_pages_movnt)
+
+/*
+ * Zero a page using clzero (On AMD, with CPU_FEATURE_CLZERO.)
+ *
+ * Caller needs to issue a sfence at the end.
+ */
+SYM_FUNC_START(clear_pages_clzero)
+	movq	%rdi,%rax
+	movq	%rsi,%rcx
+	shlq    $PAGE_SHIFT, %rcx
+
+	.p2align 4
+.Liter:
+	clzero
+	addq    $0x40, %rax
+	subl    $0x40, %ecx
+	ja      .Liter
+	RET
+SYM_FUNC_END(clear_pages_clzero)
-- 
2.31.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ