Straight forward port of pat-conflict.patch to x86 tree. Use a linear list to keep track of all reserved region mappings. Only UC access is allowed for RAM regions for now. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Index: linux-2.6.git/arch/x86/mm/ioremap_64.c =================================================================== --- linux-2.6.git.orig/arch/x86/mm/ioremap_64.c 2008-01-08 12:43:13.000000000 -0800 +++ linux-2.6.git/arch/x86/mm/ioremap_64.c 2008-01-08 12:44:20.000000000 -0800 @@ -20,6 +20,7 @@ #include #include #include +#include unsigned long __phys_addr(unsigned long x) { @@ -105,12 +106,23 @@ remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); return NULL; } + + /* For plain ioremap() get the existing attributes. Otherwise + check against the existing ones */ + if (reserve_mattr(phys_addr, phys_addr + size, flags, + flags ? NULL : &flags) < 0) + goto out; + if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) { - area->flags &= 0xffffff; - vunmap(addr); - return NULL; + free_mattr(phys_addr, phys_addr + size, flags); + goto out; } return (__force void __iomem *) (offset + (char *)addr); + +out: + area->flags &= 0xffffff; + vunmap(addr); + return NULL; } EXPORT_SYMBOL(__ioremap); @@ -178,8 +190,11 @@ } /* Reset the direct mapping. Can block */ - if (p->flags >> 20) - ioremap_change_attr(p->phys_addr, p->size, 0); + if (p->flags >> 20) { + free_mattr(p->phys_addr, p->phys_addr + get_vm_area_size(p), + p->flags>>20); + ioremap_change_attr(p->phys_addr, get_vm_area_size(p), 0); + } /* Finally remove it */ o = remove_vm_area((void *)addr); Index: linux-2.6.git/arch/x86/mm/pat.c =================================================================== --- linux-2.6.git.orig/arch/x86/mm/pat.c 2008-01-08 12:43:13.000000000 -0800 +++ linux-2.6.git/arch/x86/mm/pat.c 2008-01-08 12:45:05.000000000 -0800 @@ -5,6 +5,9 @@ #include #include #include +#include +#include +#include static u64 boot_pat_state; int pat_wc_enabled = 0; @@ -68,3 +71,116 @@ { } +/* The global memattr list keeps track of caching attributes for specific + physical memory areas. Conflicting caching attributes in different + mappings can cause CPU cache corruption. To avoid this we keep track. + + The list is sorted and can contain multiple entries for each address + (this allows reference counting for overlapping areas). All the aliases + have the same cache attributes of course. Zero attributes are represente + as holes. + + Currently the data structure is a list because the number of mappings + are right now expected to be relatively small. If this should be a problem + it could be changed to a rbtree or similar. + + mattr_lock protects the whole list. */ + +struct memattr { + u64 start; + u64 end; + unsigned long attr; + struct list_head nd; +}; + +static LIST_HEAD(mattr_list); +static DEFINE_SPINLOCK(mattr_lock); /* protects memattr list */ + +int reserve_mattr(u64 start, u64 end, unsigned long attr, unsigned long *fattr) +{ + struct memattr *ma = NULL, *ml; + int err = 0; + + if (fattr) + *fattr = attr; + + if (is_memory_any_valid(start, end)) { + if (!is_memory_all_valid(start, end) && !fattr) + return -EINVAL; + + if (attr & _PAGE_WC) { + if (!fattr) + return -EINVAL; + else + *fattr = _PAGE_PCD; + } + + return 0; + } + + ma = kmalloc(sizeof(struct memattr), GFP_KERNEL); + if (!ma) + return -ENOMEM; + ma->start = start; + ma->end = end; + ma->attr = attr; + + spin_lock(&mattr_lock); + list_for_each_entry(ml, &mattr_list, nd) { + if (ml->start <= start && ml->end >= end) { + if (fattr) + ma->attr = *fattr = ml->attr; + + if (!fattr && attr != ml->attr) { + printk( + KERN_DEBUG "%s:%d conflicting cache attribute %Lx-%Lx %lx<->%lx\n", + current->comm, current->pid, + start, end, attr, ml->attr); + err = -EBUSY; + break; + } + } else if (ml->start >= end) { + list_add(&ma->nd, ml->nd.prev); + ma = NULL; + break; + } + } + + if (err) + kfree(ma); + else if (ma) + list_add_tail(&ma->nd, &mattr_list); + + spin_unlock(&mattr_lock); + return err; +} + +int free_mattr(u64 start, u64 end, unsigned long attr) +{ + struct memattr *ml; + int err = attr ? -EBUSY : 0; + + if (is_memory_any_valid(start, end)) + return 0; + + spin_lock(&mattr_lock); + list_for_each_entry(ml, &mattr_list, nd) { + if (ml->start == start && ml->end == end) { + if (ml->attr != attr) + printk(KERN_DEBUG + "%s:%d conflicting cache attributes on free %Lx-%Lx %lx<->%lx\n", + current->comm, current->pid, start, end, attr,ml->attr); + list_del(&ml->nd); + kfree(ml); + err = 0; + break; + } + } + spin_unlock(&mattr_lock); + if (err) + printk(KERN_DEBUG "%s:%d freeing invalid mattr %Lx-%Lx %lx\n", + current->comm, current->pid, + start, end, attr); + return err; +} + Index: linux-2.6.git/include/asm-x86/pat.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.git/include/asm-x86/pat.h 2008-01-08 12:44:20.000000000 -0800 @@ -0,0 +1,12 @@ +#ifndef _ASM_PAT_H +#define _ASM_PAT_H 1 + +#include + +/* Handle the page attribute table (PAT) of the CPU */ + +int reserve_mattr(u64 start, u64 end, unsigned long attr, unsigned long *fattr); +int free_mattr(u64 start, u64 end, unsigned long attr); + +#endif + Index: linux-2.6.git/arch/x86/mm/ioremap_32.c =================================================================== --- linux-2.6.git.orig/arch/x86/mm/ioremap_32.c 2008-01-08 12:43:13.000000000 -0800 +++ linux-2.6.git/arch/x86/mm/ioremap_32.c 2008-01-08 12:44:20.000000000 -0800 @@ -17,6 +17,7 @@ #include #include #include +#include #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 @@ -26,6 +27,42 @@ */ /* + * Fix up the linear direct mapping of the kernel to avoid cache attribute + * conflicts. + */ +static int +ioremap_change_attr(unsigned long phys_addr, unsigned long size, + unsigned long flags) +{ + unsigned long last_addr; + int err = 0; + + /* Guaranteed to be > phys_addr, as per __ioremap() */ + last_addr = phys_addr + size - 1; + if (last_addr < virt_to_phys(high_memory) - 1) { + unsigned long vaddr = (unsigned long)__va(phys_addr); + unsigned long npages; + + phys_addr &= PAGE_MASK; + + /* This might overflow and become zero.. */ + last_addr = PAGE_ALIGN(last_addr); + + /* .. but that's ok, because modulo-2**n arithmetic will make + * the page-aligned "last - first" come out right. + */ + npages = (last_addr - phys_addr) >> PAGE_SHIFT; + + err = change_page_attr_addr(vaddr, npages, + __pgprot(__PAGE_KERNEL|flags)); + if (!err) + global_flush_tlb(); + } + + return err; +} + +/* * Remap an arbitrary physical address space into the kernel virtual * address space. Needed when the kernel wants to access high addresses * directly. @@ -90,7 +127,25 @@ vunmap((void __force *) addr); return NULL; } + + /* + * For plain ioremap() get the existing attributes. Otherwise + * check against the existing ones. + */ + if (reserve_mattr(phys_addr, phys_addr + size, flags, + flags ? NULL : &flags) < 0) + goto out; + + if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) { + free_mattr(phys_addr, phys_addr + size, flags); + goto out; + } return (void __iomem *) (offset + (char __iomem *)addr); + +out: + area->flags &= 0xffffff; + vunmap(addr); + return NULL; } EXPORT_SYMBOL(__ioremap); @@ -118,36 +173,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) { - unsigned long last_addr; - void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD); - if (!p) - return p; - - /* Guaranteed to be > phys_addr, as per __ioremap() */ - last_addr = phys_addr + size - 1; - - if (last_addr < virt_to_phys(high_memory) - 1) { - struct page *ppage = virt_to_page(__va(phys_addr)); - unsigned long npages; - - phys_addr &= PAGE_MASK; - - /* This might overflow and become zero.. */ - last_addr = PAGE_ALIGN(last_addr); - - /* .. but that's ok, because modulo-2**n arithmetic will make - * the page-aligned "last - first" come out right. - */ - npages = (last_addr - phys_addr) >> PAGE_SHIFT; - - if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) { - iounmap(p); - p = NULL; - } - global_flush_tlb(); - } - - return p; + return __ioremap(phys_addr, size, _PAGE_PCD); } EXPORT_SYMBOL(ioremap_nocache); @@ -194,12 +220,11 @@ } /* Reset the direct mapping. Can block */ - if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) { - change_page_attr(virt_to_page(__va(p->phys_addr)), - get_vm_area_size(p) >> PAGE_SHIFT, - PAGE_KERNEL); - global_flush_tlb(); - } + if (p->flags >> 20) { + free_mattr(p->phys_addr, p->phys_addr + get_vm_area_size(p), + p->flags>>20); + ioremap_change_attr(p->phys_addr, get_vm_area_size(p), 0); + } /* Finally remove it */ o = remove_vm_area((void *)addr); -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/