lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20141203032542.15388.17340.stgit@linux-yegoshin>
Date:	Tue, 2 Dec 2014 19:25:42 -0800
From:	Leonid Yegoshin <Leonid.Yegoshin@...tec.com>
To:	<linux-mips@...ux-mips.org>, <james.hogan@...tec.com>,
	<keescook@...omium.org>, <paul.burton@...tec.com>,
	<linux-kernel@...r.kernel.org>, <ralf@...ux-mips.org>,
	<lars.persson@...s.com>, <manuel.lauss@...il.com>,
	<pbonzini@...hat.com>, <akpm@...ux-foundation.org>,
	<blogic@...nwrt.org>, <markos.chandras@...tec.com>
Subject: [PATCH] Revert "MIPS: Remove race window in page fault handling"

This reverts commit 64f23ab30b1fec86b91a48bf1f088840e5299971
(commit 2a4a8b1e5d9d343e13ff22e19af7b353f7b52d6f in Linux 'master')

Unfortunately the original commit effectively kills Imagination MIPS CPUs
performance because it enforces page cache flush each time then PTE is changed
for our CPUs. Basically - each CoW, each process start, each library attachment
or detachment. And it happens even in "fully-coherent" systems (in MIPS sense -
we have non coherent I-cache). Last tendency for increasing page size to 16K-64K
even exaggerate the performance loss.

Original commit discussion says:

    Kernel call stacks showed one thread handling an illegal instruction
    exception while another thread was somewhere around the
    set_pte_at/update_mmu_cache calls for the same page.

If this is taken correct then it points to a major problem unrelated to MIPS -
if by chance a first thread hits the page just before set_pte_at then it should
get a non-present PTE: set_pte_at with _PAGE_PRESENT/_PAGE_VALID flags can be
used only on "disactivated" PTE, after pte_clear* functions, effectively -
non-present, non-valid. And application can get SIGSEGV. It is a major race
condition and kernel should not offer it to applications. And in my
understanding set_pte_at is used in cases then page is available after kernel
finishes page handling, at least in theory.

Test note: I ran MIPS 1004K with 8 VPEs on 4 cores around 1.5 month on SOAK test
and never seen that problem on 3.10 kernel.

Signed-off-by: Leonid Yegoshin <Leonid.Yegoshin@...tec.com>
---
 arch/mips/include/asm/pgtable.h |    8 +++++---
 arch/mips/mm/cache.c            |   27 ++++++++-------------------
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index d6d1928539b1..cd5fbf42b864 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -122,9 +122,6 @@ do {									\
 	}								\
 } while(0)
 
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
-	pte_t pteval);
-
 #if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32)
 
 #define pte_none(pte)		(!(((pte).pte_low | (pte).pte_high) & ~_PAGE_GLOBAL))
@@ -148,6 +145,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
 		}
 	}
 }
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
@@ -185,6 +183,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
 	}
 #endif
 }
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
@@ -401,12 +400,15 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 
 extern void __update_tlb(struct vm_area_struct *vma, unsigned long address,
 	pte_t pte);
+extern void __update_cache(struct vm_area_struct *vma, unsigned long address,
+	pte_t pte);
 
 static inline void update_mmu_cache(struct vm_area_struct *vma,
 	unsigned long address, pte_t *ptep)
 {
 	pte_t pte = *ptep;
 	__update_tlb(vma, address, pte);
+	__update_cache(vma, address, pte);
 }
 
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 7e3ea7766822..f7b91d3a371d 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -119,36 +119,25 @@ void __flush_anon_page(struct page *page, unsigned long vmaddr)
 
 EXPORT_SYMBOL(__flush_anon_page);
 
-static void mips_flush_dcache_from_pte(pte_t pteval, unsigned long address)
+void __update_cache(struct vm_area_struct *vma, unsigned long address,
+	pte_t pte)
 {
 	struct page *page;
-	unsigned long pfn = pte_pfn(pteval);
+	unsigned long pfn, addr;
+	int exec = (vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc;
 
+	pfn = pte_pfn(pte);
 	if (unlikely(!pfn_valid(pfn)))
 		return;
-
 	page = pfn_to_page(pfn);
 	if (page_mapping(page) && Page_dcache_dirty(page)) {
-		unsigned long page_addr = (unsigned long) page_address(page);
-
-		if (!cpu_has_ic_fills_f_dc ||
-		    pages_do_alias(page_addr, address & PAGE_MASK))
-			flush_data_cache_page(page_addr);
+		addr = (unsigned long) page_address(page);
+		if (exec || pages_do_alias(addr, address & PAGE_MASK))
+			flush_data_cache_page(addr);
 		ClearPageDcacheDirty(page);
 	}
 }
 
-void set_pte_at(struct mm_struct *mm, unsigned long addr,
-        pte_t *ptep, pte_t pteval)
-{
-        if (cpu_has_dc_aliases || !cpu_has_ic_fills_f_dc) {
-                if (pte_present(pteval))
-                        mips_flush_dcache_from_pte(pteval, addr);
-        }
-
-        set_pte(ptep, pteval);
-}
-
 unsigned long _page_cachable_default;
 EXPORT_SYMBOL(_page_cachable_default);
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ