[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180918121416.GO24142@hirez.programming.kicks-ass.net>
Date: Tue, 18 Sep 2018 14:14:16 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Thomas Gleixner <tglx@...utronix.de>
Cc: LKML <linux-kernel@...r.kernel.org>, x86@...nel.org,
Bin Yang <bin.yang@...el.com>,
Dave Hansen <dave.hansen@...el.com>,
Mark Gross <mark.gross@...el.com>
Subject: Re: [patch V3 02/11] x86/mm/cpa: Split, rename and clean up
try_preserve_large_page()
On Tue, Sep 18, 2018 at 10:19:09AM +0200, Peter Zijlstra wrote:
> On Mon, Sep 17, 2018 at 04:29:08PM +0200, Thomas Gleixner wrote:
> > @@ -1288,23 +1287,23 @@ static int __change_page_attr(struct cpa
> > err = split_large_page(cpa, kpte, address);
> > if (!err) {
> > /*
> > + * Do a global flush tlb after splitting the large page
> > + * and before we do the actual change page attribute in the PTE.
> > + *
> > + * With out this, we violate the TLB application note, that says
> > + * "The TLBs may contain both ordinary and large-page
> > * translations for a 4-KByte range of linear addresses. This
> > * may occur if software modifies the paging structures so that
> > * the page size used for the address range changes. If the two
> > * translations differ with respect to page frame or attributes
> > * (e.g., permissions), processor behavior is undefined and may
> > * be implementation-specific."
> > + *
> > + * We do this global tlb flush inside the cpa_lock, so that we
> > * don't allow any other cpu, with stale tlb entries change the
> > * page attribute in parallel, that also falls into the
> > * just split large page entry.
> > + */
> > flush_tlb_all();
> > goto repeat;
> > }
>
> this made me look at the tlb invalidation of that thing again; do we
> want something like the below?
>
Further cleanups are possible...
---
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -285,25 +285,20 @@ static void cpa_flush_all(unsigned long
on_each_cpu(__cpa_flush_all, (void *) cache, 1);
}
-static void __cpa_flush_range(void *arg)
-{
- /*
- * We could optimize that further and do individual per page
- * tlb invalidates for a low number of pages. Caveat: we must
- * flush the high aliases on 64bit as well.
- */
- __flush_tlb_all();
-}
-
static void cpa_flush_range(unsigned long start, int numpages, int cache)
{
- unsigned int i, level;
- unsigned long addr;
+ unsigned long addr, end = start + PAGE_SIZE * numpages;
+ unsigned int level;
BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
WARN_ON(PAGE_ALIGN(start) != start);
- on_each_cpu(__cpa_flush_range, NULL, 1);
+ if (!static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ cpa_flush_all(cache);
+ return;
+ }
+
+ flush_tlb_kernel_range(start, end);
if (!cache)
return;
@@ -314,7 +309,7 @@ static void cpa_flush_range(unsigned lon
* will cause all other CPUs to flush the same
* cachelines:
*/
- for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
pte_t *pte = lookup_address(addr, &level);
/*
@@ -325,30 +320,22 @@ static void cpa_flush_range(unsigned lon
}
}
-static void cpa_flush_array(unsigned long *start, int numpages, int cache,
+static void cpa_flush_array(unsigned long baddr, unsigned long *start,
+ int numpages, int cache,
int in_flags, struct page **pages)
{
unsigned int i, level;
-#ifdef CONFIG_PREEMPT
- /*
- * Avoid wbinvd() because it causes latencies on all CPUs,
- * regardless of any CPU isolation that may be in effect.
- *
- * This should be extended for CAT enabled systems independent of
- * PREEMPT because wbinvd() does not respect the CAT partitions and
- * this is exposed to unpriviledged users through the graphics
- * subsystem.
- */
- unsigned long do_wbinvd = 0;
-#else
- unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
-#endif
BUG_ON(irqs_disabled() && !early_boot_irqs_disabled);
- on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
+ if (!static_cpu_has(X86_FEATURE_CLFLUSH)) {
+ cpa_flush_all(cache);
+ return;
+ }
+
+ flush_tlb_kernel_range(baddr, baddr + PAGE_SIZE * numpages);
- if (!cache || do_wbinvd)
+ if (!cache)
return;
/*
@@ -1006,14 +993,24 @@ __split_large_page(struct cpa_data *cpa,
__set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
/*
- * Intel Atom errata AAH41 workaround.
+ * Do a global flush tlb after splitting the large page
+ * and before we do the actual change page attribute in the PTE.
*
- * The real fix should be in hw or in a microcode update, but
- * we also probabilistically try to reduce the window of having
- * a large TLB mixed with 4K TLBs while instruction fetches are
- * going on.
+ * Without this, we violate the TLB application note, that says
+ * "The TLBs may contain both ordinary and large-page
+ * translations for a 4-KByte range of linear addresses. This
+ * may occur if software modifies the paging structures so that
+ * the page size used for the address range changes. If the two
+ * translations differ with respect to page frame or attributes
+ * (e.g., permissions), processor behavior is undefined and may
+ * be implementation-specific."
+ *
+ * We do this global tlb flush inside the cpa_lock, so that we
+ * don't allow any other cpu, with stale tlb entries change the
+ * page attribute in parallel, that also falls into the
+ * just split large page entry.
*/
- __flush_tlb_all();
+ flush_tlb_all();
spin_unlock(&pgd_lock);
return 0;
@@ -1538,28 +1535,8 @@ static int __change_page_attr(struct cpa
* We have to split the large page:
*/
err = split_large_page(cpa, kpte, address);
- if (!err) {
- /*
- * Do a global flush tlb after splitting the large page
- * and before we do the actual change page attribute in the PTE.
- *
- * With out this, we violate the TLB application note, that says
- * "The TLBs may contain both ordinary and large-page
- * translations for a 4-KByte range of linear addresses. This
- * may occur if software modifies the paging structures so that
- * the page size used for the address range changes. If the two
- * translations differ with respect to page frame or attributes
- * (e.g., permissions), processor behavior is undefined and may
- * be implementation-specific."
- *
- * We do this global tlb flush inside the cpa_lock, so that we
- * don't allow any other cpu, with stale tlb entries change the
- * page attribute in parallel, that also falls into the
- * just split large page entry.
- */
- flush_tlb_all();
+ if (!err)
goto repeat;
- }
return err;
}
@@ -1786,9 +1763,9 @@ static int change_page_attr_set_clr(unsi
* error case we fall back to cpa_flush_all (which uses
* WBINVD):
*/
- if (!ret && boot_cpu_has(X86_FEATURE_CLFLUSH)) {
+ if (!ret) {
if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
- cpa_flush_array(addr, numpages, cache,
+ cpa_flush_array(baddr, addr, numpages, cache,
cpa.flags, pages);
} else
cpa_flush_range(baddr, numpages, cache);
@@ -2108,10 +2085,7 @@ static int __set_memory_enc_dec(unsigned
/*
* Before changing the encryption attribute, we need to flush caches.
*/
- if (static_cpu_has(X86_FEATURE_CLFLUSH))
- cpa_flush_range(start, numpages, 1);
- else
- cpa_flush_all(1);
+ cpa_flush_range(start, numpages, 1);
ret = __change_page_attr_set_clr(&cpa, 1);
@@ -2122,10 +2096,7 @@ static int __set_memory_enc_dec(unsigned
* in case TLB flushing gets optimized in the cpa_flush_range()
* path use the same logic as above.
*/
- if (static_cpu_has(X86_FEATURE_CLFLUSH))
- cpa_flush_range(start, numpages, 0);
- else
- cpa_flush_all(0);
+ cpa_flush_range(start, numpages, 0);
return ret;
}
Powered by blists - more mailing lists