[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20230119230659.pda5jigd5qxpnpq4@box.shutemov.name>
Date: Fri, 20 Jan 2023 02:06:59 +0300
From: "Kirill A. Shutemov" <kirill@...temov.name>
To: Peter Zijlstra <peterz@...radead.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
Dave Hansen <dave.hansen@...ux.intel.com>,
Andy Lutomirski <luto@...nel.org>, x86@...nel.org,
Kostya Serebryany <kcc@...gle.com>,
Andrey Ryabinin <ryabinin.a.a@...il.com>,
Andrey Konovalov <andreyknvl@...il.com>,
Alexander Potapenko <glider@...gle.com>,
Taras Madan <tarasmadan@...gle.com>,
Dmitry Vyukov <dvyukov@...gle.com>,
"H . J . Lu" <hjl.tools@...il.com>,
Andi Kleen <ak@...ux.intel.com>,
Rick Edgecombe <rick.p.edgecombe@...el.com>,
Bharata B Rao <bharata@....com>,
Jacob Pan <jacob.jun.pan@...ux.intel.com>,
Ashok Raj <ashok.raj@...el.com>,
Linus Torvalds <torvalds@...ux-foundation.org>,
linux-mm@...ck.org, linux-kernel@...r.kernel.org,
Sami Tolvanen <samitolvanen@...gle.com>,
ndesaulniers@...gle.com, joao@...rdrivepizza.com,
Andrew Cooper <andrew.cooper3@...rix.com>
Subject: Re: [PATCHv14 08/17] x86/mm: Reduce untagged_addr() overhead until
the first LAM user
On Tue, Jan 17, 2023 at 04:02:06PM +0100, Peter Zijlstra wrote:
> On Tue, Jan 17, 2023 at 04:57:03PM +0300, Kirill A. Shutemov wrote:
> > On Tue, Jan 17, 2023 at 02:05:22PM +0100, Peter Zijlstra wrote:
> > > On Wed, Jan 11, 2023 at 03:37:27PM +0300, Kirill A. Shutemov wrote:
> > >
> > > > #define __untagged_addr(untag_mask, addr)
> > > > u64 __addr = (__force u64)(addr); \
> > > > - s64 sign = (s64)__addr >> 63; \
> > > > - __addr &= untag_mask | sign; \
> > > > + if (static_branch_likely(&tagged_addr_key)) { \
> > > > + s64 sign = (s64)__addr >> 63; \
> > > > + __addr &= untag_mask | sign; \
> > > > + } \
> > > > (__force __typeof__(addr))__addr; \
> > > > })
> > > >
> > > > #define untagged_addr(addr) __untagged_addr(current_untag_mask(), addr)
> > >
> > > Is the compiler clever enough to put the memop inside the branch?
> >
> > Hm. You mean current_untag_mask() inside static_branch_likely()?
> >
> > But it is preprocessor who does this, not compiler. So, yes, the memop is
> > inside the branch.
> >
> > Or I didn't understand your question.
>
> Nah, call it a pre-lunch dip, I overlooked the whole CPP angle -- d'0h.
>
> That said, I did just put it through a compiler to see wth it did and it
> is pretty gross:
I tried to replace static branch with alternative. It kinda works, but
required few hack. Thanks to Andrew Cooper for helping to untangle them.
I am not sure if it worth the effort. I don't have any evidence that it
helps. untagged_addr() overhead is rather small and hides in noise of
syscall cost.
I only made alternative for untagged_addr(), but not for
untagged_addr_remote(). _remote() case has very few users.
BTW, it would be nice to be able to apply alternative later, delaying it
until the first user of LAM, like I did with static_branch.
We don't have a way to do this right?
Any opinions? I am okay dropping the patch altogether.
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index c44b56f7ffba..3f0c31044f02 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -75,6 +75,12 @@
# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
#endif
+#ifdef CONFIG_ADDRESS_MASKING
+# define DISABLE_LAM 0
+#else
+# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31))
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
@@ -115,7 +121,7 @@
#define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
DISABLE_CALL_DEPTH_TRACKING)
-#define DISABLED_MASK12 0
+#define DISABLED_MASK12 (DISABLE_LAM)
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index f9f85d596581..57ccb91fcccf 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -9,6 +9,7 @@
#include <linux/kasan-checks.h>
#include <linux/mm_types.h>
#include <linux/string.h>
+#include <linux/mmap_lock.h>
#include <asm/asm.h>
#include <asm/page.h>
#include <asm/smap.h>
@@ -24,28 +25,48 @@ static inline bool pagefault_disabled(void);
#endif
#ifdef CONFIG_ADDRESS_MASKING
-DECLARE_STATIC_KEY_FALSE(tagged_addr_key);
+static inline unsigned long __untagged_addr(unsigned long addr)
+{
+ /*
+ * Magic with the 'sign' allows to untag userspace pointer without
+ * any branches while leaving kernel addresses intact.
+ */
+ long sign;
+
+ /*
+ * Refer tlbstate_untag_mask directly to avoid RIP-relative relocation
+ * in alternative instructions. The relocation gets wrong when gets
+ * copied to the target place.
+ */
+ asm (ALTERNATIVE("",
+ "sar $63, %[sign]\n\t" /* user_ptr ? 0 : -1UL */
+ "or %%gs:tlbstate_untag_mask, %[sign]\n\t"
+ "and %[sign], %[addr]\n\t", X86_FEATURE_LAM)
+ : [addr] "+r" (addr), [sign] "=r" (sign)
+ : "m" (tlbstate_untag_mask), "[sign]" (addr));
+
+ return addr;
+}
-/*
- * Mask out tag bits from the address.
- *
- * Magic with the 'sign' allows to untag userspace pointer without any branches
- * while leaving kernel addresses intact.
- */
-#define __untagged_addr(untag_mask, addr) ({ \
- u64 __addr = (__force u64)(addr); \
- if (static_branch_likely(&tagged_addr_key)) { \
- s64 sign = (s64)__addr >> 63; \
- __addr &= untag_mask | sign; \
- } \
- (__force __typeof__(addr))__addr; \
+#define untagged_addr(addr) ({ \
+ unsigned long __addr = (__force unsigned long)(addr); \
+ (__force __typeof__(addr))__untagged_addr(__addr); \
})
-#define untagged_addr(addr) __untagged_addr(current_untag_mask(), addr)
+static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
+ unsigned long addr)
+{
+ long sign = addr >> 63;
+
+ mmap_assert_locked(mm);
+ addr &= (mm)->context.untag_mask | sign;
+
+ return addr;
+}
#define untagged_addr_remote(mm, addr) ({ \
- mmap_assert_locked(mm); \
- __untagged_addr((mm)->context.untag_mask, addr); \
+ unsigned long __addr = (__force unsigned long)(addr); \
+ (__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \
})
#else
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0831d2be190f..e006725afdf1 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -745,9 +745,6 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
#ifdef CONFIG_ADDRESS_MASKING
-DEFINE_STATIC_KEY_FALSE(tagged_addr_key);
-EXPORT_SYMBOL_GPL(tagged_addr_key);
-
#define LAM_U57_BITS 6
static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
@@ -787,8 +784,6 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags);
mmap_write_unlock(mm);
-
- static_branch_enable(&tagged_addr_key);
return 0;
}
#endif
--
Kiryl Shutsemau / Kirill A. Shutemov
Powered by blists - more mailing lists