[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20080103172704.1DCB514DDD@wotan.suse.de>
Date: Thu, 3 Jan 2008 18:27:04 +0100 (CET)
From: Andi Kleen <ak@...e.de>
To: linux-kernel@...r.kernel.org
Subject: [PATCH] [7/8] GBPAGES: Implement GBpages support in change_page_attr()
Teach c_p_a() to split and unsplit GB pages.
Signed-off-by: Andi Kleen <ak@...e.de>
---
arch/x86/mm/pageattr_64.c | 149 ++++++++++++++++++++++++++++++++++++----------
1 file changed, 118 insertions(+), 31 deletions(-)
Index: linux/arch/x86/mm/pageattr_64.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr_64.c
+++ linux/arch/x86/mm/pageattr_64.c
@@ -14,6 +14,8 @@
#include <asm/io.h>
#include <asm/kdebug.h>
+#define Cprintk(x...)
+
enum flush_mode { FLUSH_NONE, FLUSH_CACHE, FLUSH_TLB };
struct flush {
@@ -40,6 +42,9 @@ pte_t *lookup_address(unsigned long addr
pud = pud_offset(pgd, address);
if (!pud_present(*pud))
return NULL;
+ *level = 2;
+ if (pud_large(*pud))
+ return (pte_t *)pud;
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd))
return NULL;
@@ -53,30 +58,88 @@ pte_t *lookup_address(unsigned long addr
return pte;
}
-static struct page *split_large_page(unsigned long address, pgprot_t prot,
- pgprot_t ref_prot)
-{
- int i;
+static pte_t *alloc_split_page(struct page **base)
+{
+ struct page *p = alloc_page(GFP_KERNEL);
+ if (!p)
+ return NULL;
+ SetPagePrivate(p);
+ page_private(p) = 0;
+ *base = p;
+ return page_address(p);
+}
+
+static struct page *free_split_page(struct page *base)
+{
+ BUG_ON(!PagePrivate(base));
+ BUG_ON(page_private(base) != 0);
+ ClearPagePrivate(base);
+ __free_page(base);
+ return NULL;
+}
+
+static struct page *
+split_pmd(unsigned long paddr, pgprot_t prot, pgprot_t ref_prot)
+{
+ int i;
unsigned long addr;
- struct page *base = alloc_pages(GFP_KERNEL, 0);
- pte_t *pbase;
- if (!base)
+ struct page *base;
+ pte_t *pbase = alloc_split_page(&base);
+ if (!pbase)
return NULL;
- /*
- * page_private is used to track the number of entries in
- * the page table page have non standard attributes.
- */
- SetPagePrivate(base);
- page_private(base) = 0;
- address = __pa(address);
- addr = address & PMD_PAGE_MASK;
- pbase = (pte_t *)page_address(base);
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
- pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
- addr == address ? prot : ref_prot);
+ Cprintk("cpa split l3 paddr %lx\n", paddr);
+ addr = paddr & PMD_PAGE_MASK;
+ for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
+ pbase[i] = pfn_pte(addr >> PAGE_SHIFT,
+ addr == paddr ? prot : ref_prot);
+
+ return base;
+}
+
+static struct page *
+split_gb(unsigned long paddr, pgprot_t prot, pgprot_t ref_prot)
+{
+ unsigned long addr;
+ int i;
+ struct page *base;
+ pte_t *pbase = alloc_split_page(&base);
+
+ if (!pbase)
+ return NULL;
+ Cprintk("cpa split gb paddr %lx\n", paddr);
+ addr = paddr & PUD_PAGE_MASK;
+ for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_PAGE_SIZE) {
+ if (paddr >= addr && paddr < addr + PMD_PAGE_SIZE) {
+ struct page *l3;
+ l3 = split_pmd(paddr, prot, ref_prot);
+ if (!l3)
+ return free_split_page(base);
+ page_private(l3)++;
+ pbase[i] = mk_pte(l3, ref_prot);
+ } else {
+ pbase[i] = pfn_pte(addr>>PAGE_SHIFT, ref_prot);
+ pbase[i] = pte_mkhuge(pbase[i]);
+ }
}
return base;
+}
+
+static struct page *split_large_page(unsigned long address, pgprot_t prot,
+ pgprot_t ref_prot, int level)
+{
+ unsigned long paddr = __pa(address);
+ Cprintk("cpa splitting %lx level %d\n", address, level);
+ if (level == 2)
+ return split_gb(paddr, prot, ref_prot);
+ else if (level == 3)
+ return split_pmd(paddr, prot, ref_prot);
+ else {
+ printk("address %lx\n", address);
+ dump_pagetable(address);
+ BUG();
+ }
+ return NULL;
}
struct flush_arg {
@@ -132,17 +195,42 @@ static inline void save_page(struct page
list_add(&fpage->lru, &deferred_pages);
}
+static void reset_large_pte(pte_t *pte, unsigned long addr, pgprot_t prot)
+{
+ unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
+ set_pte(pte, pte_mkhuge(pfn_pte(pfn, prot)));
+}
+
+static void
+revert_gb(unsigned long address, pud_t *pud, pmd_t *pmd, pgprot_t ref_prot)
+{
+ struct page *p = virt_to_page(pmd);
+
+ /* Reserved pages means it has been already set up at boot. Don't touch those. */
+ if (PageReserved(p))
+ return;
+
+ Cprintk("cpa revert gb %lx count %ld\n", address, page_private(p));
+ --page_private(p);
+ BUG_ON(page_private(p) < 0);
+ if (page_private(p) == 0) {
+ save_page(p);
+ reset_large_pte((pte_t *)pud, address & PUD_PAGE_MASK, ref_prot);
+ }
+}
+
/*
* No more special protections in this 2MB area - revert to a
- * large page again.
+ * large or GB page again.
*/
+
static void revert_page(unsigned long address, pgprot_t ref_prot)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
- pte_t large_pte;
- unsigned long pfn;
+
+ Cprintk("cpa revert %lx\n", address);
pgd = pgd_offset_k(address);
BUG_ON(pgd_none(*pgd));
@@ -150,10 +238,9 @@ static void revert_page(unsigned long ad
BUG_ON(pud_none(*pud));
pmd = pmd_offset(pud, address);
BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
- pfn = (__pa(address) & PMD_PAGE_MASK) >> PAGE_SHIFT;
- large_pte = pfn_pte(pfn, ref_prot);
- large_pte = pte_mkhuge(large_pte);
- set_pte((pte_t *)pmd, large_pte);
+ reset_large_pte((pte_t *)pmd, address & PMD_PAGE_MASK, ref_prot);
+
+ revert_gb(address, pud, pmd, ref_prot);
}
/*
@@ -189,6 +276,7 @@ static void set_tlb_flush(unsigned long
static unsigned short pat_bit[5] = {
[4] = _PAGE_PAT,
[3] = _PAGE_PAT_LARGE,
+ [2] = _PAGE_PAT_LARGE,
};
static int cache_attr_changed(pte_t pte, pgprot_t prot, int level)
@@ -224,15 +312,14 @@ __change_page_attr(unsigned long address
page_private(kpte_page)++;
set_pte(kpte, pfn_pte(pfn, prot));
} else {
- /*
- * split_large_page will take the reference for this
- * change_page_attr on the split page.
- */
struct page *split;
ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
- split = split_large_page(address, prot, ref_prot2);
+ split = split_large_page(address, prot, ref_prot2,
+ level);
if (!split)
return -ENOMEM;
+ if (level == 3 && !PageReserved(kpte_page))
+ page_private(kpte_page)++;
pgprot_val(ref_prot2) &= ~_PAGE_NX;
set_pte(kpte, mk_pte(split, ref_prot2));
kpte_page = split;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists