lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20070921223211.BB50413DCD@wotan.suse.de>
Date:	Sat, 22 Sep 2007 00:32:11 +0200 (CEST)
From:	Andi Kleen <ak@...e.de>
To:	patches@...-64.org, linux-kernel@...r.kernel.org
Subject: [PATCH] [13/50] x86: Fix and reenable CLFLUSH support in change_page_attr()


Reenable CLFLUSH support in change_page_attr()

Mark pages that need to be cache flushed with a special bit
before putting them into the deferred list.
(PG_owner_priv_1).  Then only cache flush these pages
and don't free them.

Takes especial care to handle cases where the page's LRU
or owner_priv_1 bit is already used. Fall back to full cache
flushes then.

They probably do not happen right now, but this makes
it more future proof. 

TBD port to i386

Signed-off-by: Andi Kleen <ak@...e.de>

---
 arch/i386/mm/pageattr.c      |   69 +++++++++++++++++++++++++++++++++--------
 arch/x86_64/mm/pageattr.c    |   72 +++++++++++++++++++++++++++++++++----------
 include/asm-i386/pgtable.h   |    2 +
 include/asm-x86_64/pgtable.h |    1 
 4 files changed, 115 insertions(+), 29 deletions(-)

Index: linux/arch/x86_64/mm/pageattr.c
===================================================================
--- linux.orig/arch/x86_64/mm/pageattr.c
+++ linux/arch/x86_64/mm/pageattr.c
@@ -13,6 +13,10 @@
 #include <asm/tlbflush.h>
 #include <asm/io.h>
 
+#define PageFlush(p) test_bit(PG_owner_priv_1, &(p)->flags)
+#define SetPageFlush(p) set_bit(PG_owner_priv_1, &(p)->flags)
+#define TestClearPageFlush(p) test_and_clear_bit(PG_owner_priv_1, &(p)->flags)
+
 pte_t *lookup_address(unsigned long address)
 { 
 	pgd_t *pgd = pgd_offset_k(address);
@@ -61,6 +65,11 @@ static struct page *split_large_page(uns
 	return base;
 } 
 
+struct flush_arg {
+	int full_flush;
+	struct list_head l;
+};
+
 static void cache_flush_page(void *adr)
 {
 	int i;
@@ -70,17 +79,16 @@ static void cache_flush_page(void *adr)
 
 static void flush_kernel_map(void *arg)
 {
-	struct list_head *l = (struct list_head *)arg;
+	struct flush_arg *a = (struct flush_arg *)arg;
 	struct page *pg;
 
-	/* When clflush is available always use it because it is
+	/* When clflush is available use it because it is
 	   much cheaper than WBINVD. */
-	/* clflush is still broken. Disable for now. */
-	if (1 || !cpu_has_clflush)
+	if (a->full_flush || !cpu_has_clflush)
 		asm volatile("wbinvd" ::: "memory");
-	else list_for_each_entry(pg, l, lru) {
-		void *adr = page_address(pg);
-		cache_flush_page(adr);
+	else list_for_each_entry(pg, &a->l, lru) {
+		if (PageFlush(pg))
+			cache_flush_page(page_address(pg));
 	}
 	__flush_tlb_all();
 }
@@ -90,11 +98,17 @@ static inline void flush_map(struct list
 	on_each_cpu(flush_kernel_map, l, 1, 1);
 }
 
-static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
+/* both protected by init_mm.mmap_sem */
+static int full_flush;
+static LIST_HEAD(deferred_pages);
 
-static inline void save_page(struct page *fpage)
+static inline void save_page(struct page *fpage, int data)
 {
-	if (!test_and_set_bit(PG_arch_1, &fpage->flags))
+	if (data && cpu_has_clflush)
+		SetPageFlush(fpage);
+	if (test_and_set_bit(PG_arch_1, &fpage->flags))
+		return;
+	if (cpu_has_clflush || !data)
 		list_add(&fpage->lru, &deferred_pages);
 }
 
@@ -122,6 +136,17 @@ static void revert_page(unsigned long ad
 	set_pte((pte_t *)pmd, large_pte);
 }      
 
+static struct page *flush_page(unsigned long address)
+{
+	struct page *p;
+	if (!(pfn_valid(__pa(address) >> PAGE_SHIFT)))
+		return NULL;
+	p = virt_to_page(address);
+	if ((PageFlush(p) || PageLRU(p)) && !test_bit(PG_arch_1, &p->flags))
+		return NULL;
+	return p;
+}
+
 static int
 __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
 				   pgprot_t ref_prot)
@@ -133,8 +158,19 @@ __change_page_attr(unsigned long address
 	kpte = lookup_address(address);
 	if (!kpte) return 0;
 	kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
-	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
+	BUG_ON(PageLRU(kpte_page));
+
+	/* Do caching attributes change?
+	   Note: this will need changes if the PAT bit is used (it isn't
+   	   currently) because that one varies between 2MB and 4K pages. */
+	if ((pte_val(*kpte)&_PAGE_CACHE) != (pgprot_val(prot)&_PAGE_CACHE)) {
+		struct page *p = flush_page(address);
+		if (!p)
+			full_flush = 1;
+		else
+			save_page(p, 1);
+	}
 	if (pgprot_val(prot) != pgprot_val(ref_prot)) { 
 		if (!pte_huge(*kpte)) {
 			set_pte(kpte, pfn_pte(pfn, prot));
@@ -162,7 +198,7 @@ __change_page_attr(unsigned long address
 	/* on x86-64 the direct mapping set at boot is not using 4k pages */
  	BUG_ON(PageReserved(kpte_page));
 
-	save_page(kpte_page);
+	save_page(kpte_page, 0);
 	if (page_private(kpte_page) == 0)
 		revert_page(address, ref_prot);
 	return 0;
@@ -227,17 +263,21 @@ int change_page_attr(struct page *page, 
 void global_flush_tlb(void)
 { 
 	struct page *pg, *next;
-	struct list_head l;
+	struct flush_arg arg;
 
 	down_read(&init_mm.mmap_sem);
-	list_replace_init(&deferred_pages, &l);
+	arg.full_flush = full_flush;
+	full_flush = 0;
+	list_replace_init(&deferred_pages, &arg.l);
 	up_read(&init_mm.mmap_sem);
 
-	flush_map(&l);
+	flush_map(&arg);
 
-	list_for_each_entry_safe(pg, next, &l, lru) {
+	list_for_each_entry_safe(pg, next, &arg.l, lru) {
 		list_del(&pg->lru);
 		clear_bit(PG_arch_1, &pg->flags);
+		if (TestClearPageFlush(pg))
+			continue;
 		if (page_private(pg) != 0)
 			continue;
 		ClearPagePrivate(pg);
Index: linux/include/asm-x86_64/pgtable.h
===================================================================
--- linux.orig/include/asm-x86_64/pgtable.h
+++ linux/include/asm-x86_64/pgtable.h
@@ -165,6 +165,7 @@ static inline pte_t ptep_get_and_clear_f
 
 #define _PAGE_PROTNONE	0x080	/* If not present */
 #define _PAGE_NX        (_AC(1,UL)<<_PAGE_BIT_NX)
+#define _PAGE_CACHE	(_PAGE_PCD|_PAGE_PWT)
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
Index: linux/arch/i386/mm/pageattr.c
===================================================================
--- linux.orig/arch/i386/mm/pageattr.c
+++ linux/arch/i386/mm/pageattr.c
@@ -14,7 +14,13 @@
 #include <asm/pgalloc.h>
 #include <asm/sections.h>
 
+#define PageFlush(p) test_bit(PG_owner_priv_1, &(p)->flags)
+#define SetPageFlush(p) set_bit(PG_owner_priv_1, &(p)->flags)
+#define TestClearPageFlush(p) test_and_clear_bit(PG_owner_priv_1, &(p)->flags)
+
 static DEFINE_SPINLOCK(cpa_lock);
+/* Both protected by cpa_lock */
+static int full_flush;
 static struct list_head df_list = LIST_HEAD_INIT(df_list);
 
 
@@ -68,6 +74,11 @@ static struct page *split_large_page(uns
 	return base;
 } 
 
+struct flush_arg {
+	int full_flush;
+	struct list_head l;
+};
+
 static void cache_flush_page(struct page *p)
 { 
 	void *adr = page_address(p);
@@ -78,13 +89,14 @@ static void cache_flush_page(struct page
 
 static void flush_kernel_map(void *arg)
 {
-	struct list_head *lh = (struct list_head *)arg;
+	struct flush_arg *a = (struct flush_arg *)arg;
 	struct page *p;
 
-	/* High level code is not ready for clflush yet */
-	if (0 && cpu_has_clflush) {
-		list_for_each_entry (p, lh, lru)
-			cache_flush_page(p);
+	if (!a->full_flush && cpu_has_clflush) {
+		list_for_each_entry (p, &a->l, lru) {
+			if (PageFlush(p))
+				cache_flush_page(p);
+		}
 	} else if (boot_cpu_data.x86_model >= 4)
 		wbinvd();
 
@@ -136,10 +148,25 @@ static inline void revert_page(struct pa
 			    ref_prot));
 }
 
-static inline void save_page(struct page *kpte_page)
+static inline void save_page(struct page *fpage, int data)
+{
+	if (data && cpu_has_clflush)
+		SetPageFlush(fpage);
+	if (test_and_set_bit(PG_arch_1, &fpage->flags))
+		return;
+	if (!data || cpu_has_clflush)
+		list_add(&fpage->lru, &df_list);
+}
+
+static struct page *flush_page(unsigned long address)
 {
-	if (!test_and_set_bit(PG_arch_1, &kpte_page->flags))
-		list_add(&kpte_page->lru, &df_list);
+	struct page *p;
+	if (!(pfn_valid(__pa(address) >> PAGE_SHIFT)))
+		return NULL;
+	p = virt_to_page(address);
+	if ((PageFlush(p) || PageLRU(p)) && !test_bit(PG_arch_1, &p->flags))
+		return NULL;
+	return p;
 }
 
 static int
@@ -158,6 +185,18 @@ __change_page_attr(struct page *page, pg
 	kpte_page = virt_to_page(kpte);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
+	BUG_ON(PageLRU(kpte_page));
+
+	/* Do caching attributes change?
+	   Note: this will need changes if the PAT bit is used (it isn't
+   	   currently) because that one varies between 2MB and 4K pages. */
+	if ((pte_val(*kpte)&_PAGE_CACHE) != (pgprot_val(prot)&_PAGE_CACHE)) {
+		struct page *p = flush_page(address);
+		if (!p)
+			full_flush = 1;
+		else
+			save_page(p, 1);
+	}
 
 	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 
 		if (!pte_huge(*kpte)) {
@@ -189,7 +228,7 @@ __change_page_attr(struct page *page, pg
 	 * replace it with a largepage.
 	 */
 
-	save_page(kpte_page);
+	save_page(kpte_page, 0);
 	if (!PageReserved(kpte_page)) {
 		if (cpu_has_pse && (page_private(kpte_page) == 0)) {
 			paravirt_release_pt(page_to_pfn(kpte_page));
@@ -235,18 +274,22 @@ int change_page_attr(struct page *page, 
 
 void global_flush_tlb(void)
 {
-	struct list_head l;
+	struct flush_arg arg;
 	struct page *pg, *next;
 
 	BUG_ON(irqs_disabled());
 
 	spin_lock_irq(&cpa_lock);
-	list_replace_init(&df_list, &l);
+	arg.full_flush = full_flush;
+	full_flush = 0;
+	list_replace_init(&df_list, &arg.l);
 	spin_unlock_irq(&cpa_lock);
-	flush_map(&l);
-	list_for_each_entry_safe(pg, next, &l, lru) {
+	flush_map(&arg);
+	list_for_each_entry_safe(pg, next, &arg.l, lru) {
 		list_del(&pg->lru);
 		clear_bit(PG_arch_1, &pg->flags);
+		if (TestClearPageFlush(pg))
+			continue;
 		if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0)
 			continue;
 		ClearPagePrivate(pg);
Index: linux/include/asm-i386/pgtable.h
===================================================================
--- linux.orig/include/asm-i386/pgtable.h
+++ linux/include/asm-i386/pgtable.h
@@ -128,6 +128,8 @@ void paging_init(void);
 #else
 #define _PAGE_NX	0
 #endif
+#define _PAGE_CACHE	(_PAGE_PCD|_PAGE_PWT)
+
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ