lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <466E691D.76E4.0078.0@novell.com>
Date:	Tue, 12 Jun 2007 09:36:29 +0200
From:	"Jan Beulich" <jbeulich@...ell.com>
To:	"Andi Kleen" <ak@...e.de>
Cc:	<linux-kernel@...r.kernel.org>, <patches@...-64.org>
Subject: [PATCH] x86: fix change_page_attr() TLB and cache flushing

change_page_attr() failed to flush the caches for the affected pages
for quite a while, and on x86-64 only recently got a brute-force fix to
flush the entire TLB (which now gets reduced to just the affected pages
when possible).

Signed-off-by: Jan Beulich <jbeulich@...ell.com>

 arch/i386/mm/ioremap.c       |    4 
 arch/i386/mm/pageattr.c      |  181 +++++++++++++++++++++++++++-----------
 arch/x86_64/mm/ioremap.c     |    6 -
 arch/x86_64/mm/pageattr.c    |  202 ++++++++++++++++++++++++++++++-------------
 include/asm-i386/page.h      |   10 ++
 include/asm-i386/pgtable.h   |    3 
 include/asm-x86_64/page.h    |    2 
 include/asm-x86_64/pgtable.h |    4 
 include/asm-x86_64/system.h  |    2 
 9 files changed, 292 insertions(+), 122 deletions(-)

--- linux-2.6.22-rc4/arch/i386/mm/ioremap.c	2007-04-26 05:08:32.000000000 +0200
+++ 2.6.22-rc4-x86-change_page_attr/arch/i386/mm/ioremap.c	2007-06-11 09:13:57.000000000 +0200
@@ -193,10 +193,10 @@ void iounmap(volatile void __iomem *addr
 		return;
 	}
 
-	/* Reset the direct mapping. Can block */
+	/* Reset the direct mapping. Can block. Heed the trailing guard page. */
 	if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
 		change_page_attr(virt_to_page(__va(p->phys_addr)),
-				 p->size >> PAGE_SHIFT,
+				 (p->size >> PAGE_SHIFT) - 1,
 				 PAGE_KERNEL);
 		global_flush_tlb();
 	} 
--- linux-2.6.22-rc4/arch/i386/mm/pageattr.c	2007-06-11 18:09:52.000000000 +0200
+++ 2.6.22-rc4-x86-change_page_attr/arch/i386/mm/pageattr.c	2007-06-11 09:13:57.000000000 +0200
@@ -16,7 +16,29 @@
 
 static DEFINE_SPINLOCK(cpa_lock);
 static struct list_head df_list = LIST_HEAD_INIT(df_list);
+static struct df_info {
+	unsigned long address;
+	int count;
+} deferred[2];
+#define TLB 0
+#define CACHE 1
 
+static inline void update_deferred(struct df_info *df,
+				   unsigned long address, int count)
+{
+	if (df->count == 0) {
+		df->address = address;
+		df->count = count;
+	} else if (df->count > 0) {
+		if (df->address + (df->count << PAGE_SHIFT) == address)
+			df->count += count;
+		else if (address + (count << PAGE_SHIFT) == df->address) {
+			df->address = address;
+			df->count += count;
+		} else
+			df->count = -1;
+	}
+}
 
 pte_t *lookup_address(unsigned long address) 
 { 
@@ -70,19 +92,29 @@ static struct page *split_large_page(uns
 
 static void flush_kernel_map(void *arg)
 { 
-	unsigned long adr = (unsigned long)arg;
-
-	if (adr && cpu_has_clflush) {
-		int i;
-		for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
-			asm volatile("clflush (%0)" :: "r" (adr + i));
-	} else if (boot_cpu_data.x86_model >= 4)
-		wbinvd();
+	const struct df_info *info = arg;
 
 	/* Flush all to work around Errata in early athlons regarding 
 	 * large page flushing. 
 	 */
 	__flush_tlb_all(); 	
+
+	if (info[CACHE].count != 0 && boot_cpu_data.x86_model >= 4) {
+		if (info[CACHE].count < 0)
+			wbinvd();
+		else {
+			unsigned long addr = info[CACHE].address;
+			int count = info[CACHE].count;
+
+			while (count-- > 0) {
+				int i;
+
+				for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
+					asm volatile("clflush (%0)" :: "r" (addr + i));
+				addr += PAGE_SIZE;
+			}
+		}
+	}
 }
 
 static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 
@@ -107,32 +139,39 @@ static void set_pmd_pte(pte_t *kpte, uns
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
+static pgprot_t _ref_prot[KERNEL_PGD_PTRS * PTRS_PER_PMD];
+#define ref_prot(addr) _ref_prot[__pa(addr) >> PMD_SHIFT]
+
 /* 
  * No more special protections in this 2/4MB area - revert to a
  * large page again. 
  */
 static inline void revert_page(struct page *kpte_page, unsigned long address)
 {
-	pgprot_t ref_prot;
 	pte_t *linear;
 
-	ref_prot =
-	((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
-		? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE;
-
 	linear = (pte_t *)
 		pmd_offset(pud_offset(pgd_offset_k(address), address), address);
 	set_pmd_pte(linear,  address,
-		    pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
-			    ref_prot));
+		    pte_mkhuge(pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
+				       ref_prot(address))));
 }
 
-static int
-__change_page_attr(struct page *page, pgprot_t prot)
+static inline int pgprot_match(pgprot_t prot1, pgprot_t prot2)
+{
+	return !((pgprot_val(prot1) ^ pgprot_val(prot2))
+#ifdef CONFIG_X86_PAE
+		 & __supported_pte_mask
+#endif
+		 & ~(_PAGE_ACCESSED|_PAGE_DIRTY));
+}
+
+static int __change_page_attr(struct page *page, pgprot_t prot)
 { 
 	pte_t *kpte; 
 	unsigned long address;
 	struct page *kpte_page;
+	pgprot_t old_prot, ref_prot;
 
 	BUG_ON(PageHighMem(page));
 	address = (unsigned long)page_address(page);
@@ -141,37 +180,55 @@ __change_page_attr(struct page *page, pg
 	if (!kpte)
 		return -EINVAL;
 	kpte_page = virt_to_page(kpte);
-	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 
+	old_prot = pte_pgprot(pte_clrhuge(*kpte));
+	ref_prot = ref_prot(address);
+	if (!pgprot_match(prot, ref_prot)) {
 		if (!pte_huge(*kpte)) {
 			set_pte_atomic(kpte, mk_pte(page, prot)); 
 		} else {
-			pgprot_t ref_prot;
-			struct page *split;
-
-			ref_prot =
-			((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
-				? PAGE_KERNEL_EXEC : PAGE_KERNEL;
-			split = split_large_page(address, prot, ref_prot);
-			if (!split)
+			BUG_ON(!pgprot_match(old_prot, ref_prot));
+			kpte_page = split_large_page(address, prot, ref_prot);
+			if (!kpte_page)
 				return -ENOMEM;
-			set_pmd_pte(kpte,address,mk_pte(split, ref_prot));
-			kpte_page = split;
+			set_pmd_pte(kpte, address,
+				    mk_pte(kpte_page, PAGE_KERNEL_EXEC));
+		}
+		if (!PageReserved(kpte_page)
+		    && pgprot_match(old_prot, ref_prot))
+			page_private(kpte_page)++;
+	} else if (!pgprot_match(ref_prot, old_prot)) {
+		BUG_ON(pte_huge(*kpte));
+		set_pte_atomic(kpte, mk_pte(page, ref_prot));
+		if (!PageReserved(kpte_page)) {
+			BUG_ON(page_private(kpte_page) == 0);
+			page_private(kpte_page)--;
 		}
-		page_private(kpte_page)++;
-	} else if (!pte_huge(*kpte)) {
-		set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
-		BUG_ON(page_private(kpte_page) == 0);
-		page_private(kpte_page)--;
 	} else
-		BUG();
+		return 0;
+
+	if ((pgprot_val(prot) ^ pgprot_val(old_prot)) & (_PAGE_PCD|_PAGE_PWT)) {
+		if (!cpu_has_clflush || !pte_present(pfn_pte(0, prot)))
+			deferred[CACHE].count = -1;
+		else
+			update_deferred(deferred + CACHE, address, 1);
+	}
 
 	/*
 	 * If the pte was reserved, it means it was created at boot
 	 * time (not via split_large_page) and in turn we must not
 	 * replace it with a largepage.
 	 */
-	if (!PageReserved(kpte_page)) {
-		if (cpu_has_pse && (page_private(kpte_page) == 0)) {
+	if (!PageReserved(kpte_page) && cpu_has_pse) {
+		if (page_private(kpte_page) == PTRS_PER_PTE) {
+			unsigned i;
+
+			kpte = page_address(kpte_page);
+			for (i = 0; i < PTRS_PER_PTE; ++i, ++kpte)
+				if (pgprot_match(pte_pgprot(*kpte), prot))
+					page_private(kpte_page)--;
+			ref_prot(address) = prot;
+		}
+		if (page_private(kpte_page) == 0) {
 			ClearPagePrivate(kpte_page);
 			paravirt_release_pt(page_to_pfn(kpte_page));
 			list_add(&kpte_page->lru, &df_list);
@@ -181,13 +238,8 @@ __change_page_attr(struct page *page, pg
 	return 0;
 } 
 
-static inline void flush_map(void *adr)
-{
-	on_each_cpu(flush_kernel_map, adr, 1, 1);
-}
-
 /*
- * Change the page attributes of an page in the linear mapping.
+ * Change the page attributes of a page in the linear mapping.
  *
  * This should be used when a page is mapped with a different caching policy
  * than write-back somewhere - some CPUs do not like it when mappings with
@@ -204,34 +256,59 @@ int change_page_attr(struct page *page, 
 	int err = 0; 
 	int i; 
 	unsigned long flags;
+	struct list_head l;
+	struct page *next;
+	static char first = 1;
 
 	spin_lock_irqsave(&cpa_lock, flags);
+
+	if (unlikely(first)) {
+		unsigned long addr = PAGE_OFFSET & PMD_MASK;
+
+		/* This must match is_kernel_text(). */
+		for (; addr <= (unsigned long)__init_end; addr += PMD_SIZE)
+			ref_prot(addr) = PAGE_KERNEL_EXEC;
+		for (; addr > PAGE_OFFSET; addr += PMD_SIZE)
+			ref_prot(addr) = PAGE_KERNEL;
+		first = 0;
+	}
+
+	/* No need to call update_deferred() as long flush_kernel_map()
+	 * uses __flush_tlb_all(). */
+	deferred[TLB].count = -1;
+
 	for (i = 0; i < numpages; i++, page++) { 
 		err = __change_page_attr(page, prot);
 		if (err) 
 			break; 
 	} 	
+
+	list_replace_init(&df_list, &l);
+
 	spin_unlock_irqrestore(&cpa_lock, flags);
+
+	list_for_each_entry_safe(page, next, &l, lru)
+		__free_page(page);
+
 	return err;
 }
 
 void global_flush_tlb(void)
 {
-	struct list_head l;
-	struct page *pg, *next;
+	unsigned i;
+	struct df_info info[ARRAY_SIZE(deferred)];
 
 	BUG_ON(irqs_disabled());
 
 	spin_lock_irq(&cpa_lock);
-	list_replace_init(&df_list, &l);
-	spin_unlock_irq(&cpa_lock);
-	if (!cpu_has_clflush)
-		flush_map(NULL);
-	list_for_each_entry_safe(pg, next, &l, lru) {
-		if (cpu_has_clflush)
-			flush_map(page_address(pg));
-		__free_page(pg);
+	for (i = 0; i < ARRAY_SIZE(info); ++i) {
+		info[i] = deferred[i];
+		deferred[i].count = 0;
 	}
+	spin_unlock_irq(&cpa_lock);
+
+	if (info[TLB].count)
+		on_each_cpu(flush_kernel_map, info, 1, 1);
 }
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
--- linux-2.6.22-rc4/arch/x86_64/mm/ioremap.c	2007-06-11 18:10:04.000000000 +0200
+++ 2.6.22-rc4-x86-change_page_attr/arch/x86_64/mm/ioremap.c	2007-06-11 09:13:57.000000000 +0200
@@ -48,7 +48,7 @@ ioremap_change_attr(unsigned long phys_a
  		 * Must use a address here and not struct page because the phys addr
 		 * can be a in hole between nodes and not have an memmap entry.
 		 */
-		err = change_page_attr_addr(vaddr,npages,__pgprot(__PAGE_KERNEL|flags));
+		err = change_page_attr_addr(vaddr,npages,MAKE_GLOBAL(__PAGE_KERNEL|flags));
 		if (!err)
 			global_flush_tlb();
 	}
@@ -197,9 +197,9 @@ void iounmap(volatile void __iomem *addr
 		return;
 	}
 
-	/* Reset the direct mapping. Can block */
+	/* Reset the direct mapping. Can block. Heed the trailing guard page. */
 	if (p->flags >> 20)
-		ioremap_change_attr(p->phys_addr, p->size, 0);
+		ioremap_change_attr(p->phys_addr, p->size - PAGE_SIZE, 0);
 
 	/* Finally remove it */
 	o = remove_vm_area((void *)addr);
--- linux-2.6.22-rc4/arch/x86_64/mm/pageattr.c	2007-06-11 18:10:04.000000000 +0200
+++ 2.6.22-rc4-x86-change_page_attr/arch/x86_64/mm/pageattr.c	2007-06-11 09:13:57.000000000 +0200
@@ -61,7 +61,31 @@ static struct page *split_large_page(uns
 	return base;
 } 
 
-static void cache_flush_page(void *adr)
+struct deferred_info {
+	unsigned long address;
+	int count;
+};
+#define TLB 0
+#define CACHE 1
+
+static inline void update_deferred(struct deferred_info *df,
+				   unsigned long address, int count)
+{
+	if (df->count == 0) {
+		df->address = address;
+		df->count = count;
+	} else if (df->count > 0) {
+		if (df->address + (df->count << PAGE_SHIFT) == address)
+			df->count += count;
+		else if (address + (count << PAGE_SHIFT) == df->address) {
+			df->address = address;
+			df->count += count;
+		} else
+			df->count = -1;
+	}
+}
+
+static inline void cache_flush_page(unsigned long adr)
 {
 	int i;
 	for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
@@ -70,38 +94,60 @@ static void cache_flush_page(void *adr)
 
 static void flush_kernel_map(void *arg)
 {
-	struct list_head *l = (struct list_head *)arg;
-	struct page *pg;
+	const struct deferred_info *info = arg;
+
+	if (info[TLB].count < 0)
+		__flush_tlb_all();
+	else {
+		unsigned long addr = info[TLB].address;
+		int count = info[TLB].count;
+
+		while (count-- > 0) {
+			__flush_tlb_one(addr);
+			if (__pa(addr) < KERNEL_TEXT_SIZE)
+				__flush_tlb_one(__START_KERNEL_map + __pa(addr));
+			addr += PAGE_SIZE;
+		}
+	}
 
+	if (info[CACHE].count == 0)
+		;
 	/* When clflush is available always use it because it is
 	   much cheaper than WBINVD */
-	if (!cpu_has_clflush)
-		asm volatile("wbinvd" ::: "memory");
-	list_for_each_entry(pg, l, lru) {
-		void *adr = page_address(pg);
-		if (cpu_has_clflush)
-			cache_flush_page(adr);
+	else if (info[CACHE].count < 0)
+		wbinvd();
+	else {
+		unsigned long addr = info[CACHE].address;
+		int count = info[CACHE].count;
+
+		BUG_ON(!cpu_has_clflush);
+		while (count-- > 0) {
+			cache_flush_page(addr);
+			if (__pa(addr) < KERNEL_TEXT_SIZE) {
+				unsigned long kaddr = __START_KERNEL_map + __pa(addr);
+
+				if (lookup_address(kaddr))
+					cache_flush_page(kaddr);
+			}
+			addr += PAGE_SIZE;
+		}
 	}
-	__flush_tlb_all();
 }
 
-static inline void flush_map(struct list_head *l)
-{	
-	on_each_cpu(flush_kernel_map, l, 1, 1);
-}
-
-static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
-
-static inline void save_page(struct page *fpage)
-{
-	list_add(&fpage->lru, &deferred_pages);
-}
+/* protected by init_mm.mmap_sem */
+static LIST_HEAD(deferred_pages);
+static struct deferred_info deferred[2];
+static pgprot_t kref_prot[] =	{
+	[0 ... (KERNEL_TEXT_SIZE - 1) >> PMD_SHIFT] = PAGE_KERNEL_EXEC
+};
+#define kref_prot(kaddr) kref_prot[((kaddr) - __START_KERNEL_map) >> PMD_SHIFT]
 
 /* 
- * No more special protections in this 2/4MB area - revert to a
+ * No more special protections in this 2MB area - revert to a
  * large page again. 
  */
-static void revert_page(unsigned long address, pgprot_t ref_prot)
+static void revert_page(struct page *kpte_page, unsigned long address,
+			pgprot_t ref_prot)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -109,6 +155,8 @@ static void revert_page(unsigned long ad
 	pte_t large_pte;
 	unsigned long pfn;
 
+	list_add(&kpte_page->lru, &deferred_pages);
+
 	pgd = pgd_offset_k(address);
 	BUG_ON(pgd_none(*pgd));
 	pud = pud_offset(pgd,address);
@@ -121,52 +169,71 @@ static void revert_page(unsigned long ad
 	set_pte((pte_t *)pmd, large_pte);
 }      
 
-static int
-__change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
-				   pgprot_t ref_prot)
+static inline int pgprot_match(pgprot_t prot1, pgprot_t prot2)
+{
+	return !((pgprot_val(prot1) ^ pgprot_val(prot2))
+		 & __supported_pte_mask & ~(_PAGE_ACCESSED|_PAGE_DIRTY));
+}
+
+static int __change_page_attr(unsigned long address, unsigned long pfn,
+			      pgprot_t prot, pgprot_t ref_prot)
 { 
-	pte_t *kpte; 
+	pte_t *kpte = lookup_address(address);
 	struct page *kpte_page;
-	pgprot_t ref_prot2;
-	kpte = lookup_address(address);
+	pgprot_t old_prot;
+
 	if (!kpte) return 0;
-	kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
-	if (pgprot_val(prot) != pgprot_val(ref_prot)) { 
+	kpte_page = virt_to_page(kpte);
+	old_prot = pte_pgprot(pte_clrhuge(*kpte));
+	if (!pgprot_match(prot, ref_prot)) {
 		if (!pte_huge(*kpte)) {
 			set_pte(kpte, pfn_pte(pfn, prot));
 		} else {
- 			/*
-			 * split_large_page will take the reference for this
-			 * change_page_attr on the split page.
- 			 */
-			struct page *split;
-			ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
-			split = split_large_page(address, prot, ref_prot2);
-			if (!split)
+			BUG_ON(!pgprot_match(old_prot, ref_prot));
+			kpte_page = split_large_page(address, prot, ref_prot);
+			if (!kpte_page)
 				return -ENOMEM;
-			set_pte(kpte, mk_pte(split, ref_prot2));
-			kpte_page = split;
+			set_pte(kpte, mk_pte(kpte_page, PAGE_KERNEL_EXEC));
 		}
-		page_private(kpte_page)++;
-	} else if (!pte_huge(*kpte)) {
+		if (pgprot_match(old_prot, ref_prot))
+			page_private(kpte_page)++;
+	} else if (!pgprot_match(ref_prot, old_prot)) {
+		BUG_ON(pte_huge(*kpte));
 		set_pte(kpte, pfn_pte(pfn, ref_prot));
 		BUG_ON(page_private(kpte_page) == 0);
 		page_private(kpte_page)--;
 	} else
-		BUG();
+		return 0;
+
+	if ((pgprot_val(prot) ^ pgprot_val(old_prot)) & (_PAGE_PCD|_PAGE_PWT)) {
+		if (!cpu_has_clflush || !pte_present(pfn_pte(0, prot)))
+			deferred[CACHE].count = -1;
+		else
+			update_deferred(deferred + CACHE, address, 1);
+	}
 
 	/* on x86-64 the direct mapping set at boot is not using 4k pages */
  	BUG_ON(PageReserved(kpte_page));
 
-	if (page_private(kpte_page) == 0) {
-		save_page(kpte_page);
-		revert_page(address, ref_prot);
- 	}
+	if (page_private(kpte_page) == PTRS_PER_PTE
+	    && address >= __START_KERNEL_map
+	    && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
+		unsigned i;
+
+		kpte = page_address(kpte_page);
+		for (i = 0; i < PTRS_PER_PTE; ++i, ++kpte)
+			if (pgprot_match(pte_pgprot(*kpte), prot))
+				page_private(kpte_page)--;
+		kref_prot(address) = ref_prot = prot;
+	}
+	if (page_private(kpte_page) == 0)
+		revert_page(kpte_page, address, ref_prot);
+
 	return 0;
 } 
 
 /*
- * Change the page attributes of an page in the linear mapping.
+ * Change the page attributes of a page in the linear mapping.
  *
  * This should be used when a page is mapped with a different caching policy
  * than write-back somewhere - some CPUs do not like it when mappings with
@@ -182,6 +249,8 @@ int change_page_attr_addr(unsigned long 
 {
 	int err = 0, kernel_map = 0;
 	int i; 
+	struct page *pg, *next;
+	struct list_head l;
 
 	if (address >= __START_KERNEL_map
 	    && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
@@ -190,6 +259,9 @@ int change_page_attr_addr(unsigned long 
 	}
 
 	down_write(&init_mm.mmap_sem);
+
+	update_deferred(deferred + TLB, address, numpages);
+
 	for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
 		unsigned long pfn = __pa(address) >> PAGE_SHIFT;
 
@@ -207,10 +279,19 @@ int change_page_attr_addr(unsigned long 
 			/* Make sure the kernel mappings stay executable */
 			prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
 			err = __change_page_attr(addr2, pfn, prot2,
-						 PAGE_KERNEL_EXEC);
+						 kref_prot(addr2));
 		} 
 	} 	
+
+	list_replace_init(&deferred_pages, &l);
+
 	up_write(&init_mm.mmap_sem); 
+
+	list_for_each_entry_safe(pg, next, &l, lru) {
+		ClearPagePrivate(pg);
+		__free_page(pg);
+	}
+
 	return err;
 }
 
@@ -223,19 +304,18 @@ int change_page_attr(struct page *page, 
 
 void global_flush_tlb(void)
 { 
-	struct page *pg, *next;
-	struct list_head l;
-
-	down_read(&init_mm.mmap_sem);
-	list_replace_init(&deferred_pages, &l);
-	up_read(&init_mm.mmap_sem);
+	unsigned i;
+	struct deferred_info info[ARRAY_SIZE(deferred)];
 
-	flush_map(&l);
+	down_write(&init_mm.mmap_sem);
+	for (i = 0; i < ARRAY_SIZE(info); ++i) {
+		info[i] = deferred[i];
+		deferred[i].count = 0;
+	}
+	up_write(&init_mm.mmap_sem);
 
-	list_for_each_entry_safe(pg, next, &l, lru) {
-		ClearPagePrivate(pg);
-		__free_page(pg);
-	} 
+	if (info[TLB].count)
+		on_each_cpu(flush_kernel_map, info, 1, 1);
 } 
 
 EXPORT_SYMBOL(change_page_attr);
--- linux-2.6.22-rc4/include/asm-i386/page.h	2007-06-11 18:10:43.000000000 +0200
+++ 2.6.22-rc4-x86-change_page_attr/include/asm-i386/page.h	2007-06-11 09:13:57.000000000 +0200
@@ -6,6 +6,16 @@
 #define PAGE_SIZE	(1UL << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
+#ifdef CONFIG_X86_PAE
+#define __PHYSICAL_MASK_SHIFT	52
+#define __PHYSICAL_MASK		((1ULL << __PHYSICAL_MASK_SHIFT) - 1)
+#define PHYSICAL_PAGE_MASK	(~(PAGE_SIZE - 1ULL) & __PHYSICAL_MASK)
+#else
+#define __PHYSICAL_MASK_SHIFT	32
+#define __PHYSICAL_MASK		(~0UL)
+#define PHYSICAL_PAGE_MASK	(PAGE_MASK & __PHYSICAL_MASK)
+#endif
+
 #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
 #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
 
--- linux-2.6.22-rc4/include/asm-i386/pgtable.h	2007-06-11 18:10:43.000000000 +0200
+++ 2.6.22-rc4-x86-change_page_attr/include/asm-i386/pgtable.h	2007-06-11 09:13:57.000000000 +0200
@@ -235,6 +235,7 @@ static inline pte_t pte_exprotect(pte_t 
 static inline pte_t pte_mkclean(pte_t pte)	{ (pte).pte_low &= ~_PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkold(pte_t pte)	{ (pte).pte_low &= ~_PAGE_ACCESSED; return pte; }
 static inline pte_t pte_wrprotect(pte_t pte)	{ (pte).pte_low &= ~_PAGE_RW; return pte; }
+static inline pte_t pte_clrhuge(pte_t pte)	{ (pte).pte_low &= ~_PAGE_PSE; return pte; }
 static inline pte_t pte_mkread(pte_t pte)	{ (pte).pte_low |= _PAGE_USER; return pte; }
 static inline pte_t pte_mkexec(pte_t pte)	{ (pte).pte_low |= _PAGE_USER; return pte; }
 static inline pte_t pte_mkdirty(pte_t pte)	{ (pte).pte_low |= _PAGE_DIRTY; return pte; }
@@ -242,6 +243,8 @@ static inline pte_t pte_mkyoung(pte_t pt
 static inline pte_t pte_mkwrite(pte_t pte)	{ (pte).pte_low |= _PAGE_RW; return pte; }
 static inline pte_t pte_mkhuge(pte_t pte)	{ (pte).pte_low |= _PAGE_PSE; return pte; }
 
+#define pte_pgprot(pte) (__pgprot(pte_val(pte) & ~PHYSICAL_PAGE_MASK))
+
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
 #else
--- linux-2.6.22-rc4/include/asm-x86_64/page.h	2007-06-11 18:10:48.000000000 +0200
+++ 2.6.22-rc4-x86-change_page_attr/include/asm-x86_64/page.h	2007-06-11 09:13:57.000000000 +0200
@@ -97,7 +97,7 @@ extern unsigned long phys_base;
 #define PAGE_ALIGN(addr)	(((addr)+PAGE_SIZE-1)&PAGE_MASK)
 
 /* See Documentation/x86_64/mm.txt for a description of the memory map. */
-#define __PHYSICAL_MASK_SHIFT	46
+#define __PHYSICAL_MASK_SHIFT	52
 #define __PHYSICAL_MASK		((_AC(1,UL) << __PHYSICAL_MASK_SHIFT) - 1)
 #define __VIRTUAL_MASK_SHIFT	48
 #define __VIRTUAL_MASK		((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - 1)
--- linux-2.6.22-rc4/include/asm-x86_64/pgtable.h	2007-06-11 18:10:48.000000000 +0200
+++ 2.6.22-rc4-x86-change_page_attr/include/asm-x86_64/pgtable.h	2007-06-11 09:13:57.000000000 +0200
@@ -356,9 +356,9 @@ static inline int pmd_large(pmd_t pte) {
 #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
 #define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
 
-#define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+#define pte_to_pgoff(pte) (pte_val(pte) >> PAGE_SHIFT)
 #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
-#define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
+#define PTE_FILE_MAX_BITS (64 - PAGE_SHIFT)
 
 /* PTE - Level 1 access. */
 
--- linux-2.6.22-rc4/include/asm-x86_64/system.h	2007-06-11 18:10:48.000000000 +0200
+++ 2.6.22-rc4-x86-change_page_attr/include/asm-x86_64/system.h	2007-06-11 09:13:57.000000000 +0200
@@ -109,7 +109,7 @@ static inline void write_cr4(unsigned lo
 #define stts() write_cr0(8 | read_cr0())
 
 #define wbinvd() \
-	__asm__ __volatile__ ("wbinvd": : :"memory");
+	__asm__ __volatile__ ("wbinvd": : :"memory")
 
 /*
  * On SMP systems, when the scheduler does migration-cost autodetection,


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ