[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20080201220952.GA3875@sgi.com>
Date: Fri, 1 Feb 2008 16:09:52 -0600
From: Robin Holt <holt@....com>
To: Christoph Lameter <clameter@....com>
Cc: Andrea Arcangeli <andrea@...ranet.com>, Robin Holt <holt@....com>,
Avi Kivity <avi@...ranet.com>, Izik Eidus <izike@...ranet.com>,
kvm-devel@...ts.sourceforge.net,
Peter Zijlstra <a.p.zijlstra@...llo.nl>, steiner@....com,
linux-kernel@...r.kernel.org, linux-mm@...ck.org,
daniel.blueman@...drics.com
Subject: Re: [patch 2/4] mmu_notifier: Callbacks to invalidate address
ranges
Christoph,
The following code in do_wp_page is a problem.
We are getting this callout when we transition the pte from a read-only
to read-write. Jack and I can not see a reason we would need that
callout. It is causing problems for xpmem in that a write fault goes
to get_user_pages which gets back to do_wp_page that does the callout.
XPMEM only allows either faulting or invalidating to occur for an mm.
As you can see, the case above needs it to be in both states.
Thanks,
Robin
> @@ -1630,6 +1646,8 @@ gotten:
> goto oom;
> cow_user_page(new_page, old_page, address, vma);
>
> + mmu_notifier(invalidate_range_begin, mm, address,
> + address + PAGE_SIZE, 0);
> /*
> * Re-check the pte - we dropped the lock
> */
> @@ -1668,6 +1686,8 @@ gotten:
> page_cache_release(old_page);
> unlock:
> pte_unmap_unlock(page_table, ptl);
> + mmu_notifier(invalidate_range_end, mm,
> + address, address + PAGE_SIZE, 0);
> if (dirty_page) {
> if (vma->vm_file)
> file_update_time(vma->vm_file);
> Index: linux-2.6/mm/mmap.c
> ===================================================================
> --- linux-2.6.orig/mm/mmap.c 2008-01-31 20:58:05.000000000 -0800
> +++ linux-2.6/mm/mmap.c 2008-01-31 20:59:14.000000000 -0800
> @@ -1744,11 +1744,13 @@ static void unmap_region(struct mm_struc
> lru_add_drain();
> tlb = tlb_gather_mmu(mm, 0);
> update_hiwater_rss(mm);
> + mmu_notifier(invalidate_range_begin, mm, start, end, 0);
> unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
> vm_unacct_memory(nr_accounted);
> free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
> next? next->vm_start: 0);
> tlb_finish_mmu(tlb, start, end);
> + mmu_notifier(invalidate_range_end, mm, start, end, 0);
> }
>
> /*
> Index: linux-2.6/mm/hugetlb.c
> ===================================================================
> --- linux-2.6.orig/mm/hugetlb.c 2008-01-31 20:56:03.000000000 -0800
> +++ linux-2.6/mm/hugetlb.c 2008-01-31 20:59:14.000000000 -0800
> @@ -14,6 +14,7 @@
> #include <linux/mempolicy.h>
> #include <linux/cpuset.h>
> #include <linux/mutex.h>
> +#include <linux/mmu_notifier.h>
>
> #include <asm/page.h>
> #include <asm/pgtable.h>
> @@ -743,6 +744,7 @@ void __unmap_hugepage_range(struct vm_ar
> BUG_ON(start & ~HPAGE_MASK);
> BUG_ON(end & ~HPAGE_MASK);
>
> + mmu_notifier(invalidate_range_begin, mm, start, end, 1);
> spin_lock(&mm->page_table_lock);
> for (address = start; address < end; address += HPAGE_SIZE) {
> ptep = huge_pte_offset(mm, address);
> @@ -763,6 +765,7 @@ void __unmap_hugepage_range(struct vm_ar
> }
> spin_unlock(&mm->page_table_lock);
> flush_tlb_range(vma, start, end);
> + mmu_notifier(invalidate_range_end, mm, start, end, 1);
> list_for_each_entry_safe(page, tmp, &page_list, lru) {
> list_del(&page->lru);
> put_page(page);
> Index: linux-2.6/mm/filemap_xip.c
> ===================================================================
> --- linux-2.6.orig/mm/filemap_xip.c 2008-01-31 20:56:03.000000000 -0800
> +++ linux-2.6/mm/filemap_xip.c 2008-01-31 20:59:14.000000000 -0800
> @@ -13,6 +13,7 @@
> #include <linux/module.h>
> #include <linux/uio.h>
> #include <linux/rmap.h>
> +#include <linux/mmu_notifier.h>
> #include <linux/sched.h>
> #include <asm/tlbflush.h>
>
> @@ -189,6 +190,8 @@ __xip_unmap (struct address_space * mapp
> address = vma->vm_start +
> ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
> BUG_ON(address < vma->vm_start || address >= vma->vm_end);
> + mmu_notifier(invalidate_range_begin, mm, address,
> + address + PAGE_SIZE, 1);
> pte = page_check_address(page, mm, address, &ptl);
> if (pte) {
> /* Nuke the page table entry. */
> @@ -200,6 +203,8 @@ __xip_unmap (struct address_space * mapp
> pte_unmap_unlock(pte, ptl);
> page_cache_release(page);
> }
> + mmu_notifier(invalidate_range_end, mm,
> + address, address + PAGE_SIZE, 1);
> }
> spin_unlock(&mapping->i_mmap_lock);
> }
> Index: linux-2.6/mm/mremap.c
> ===================================================================
> --- linux-2.6.orig/mm/mremap.c 2008-01-31 20:56:03.000000000 -0800
> +++ linux-2.6/mm/mremap.c 2008-01-31 20:59:14.000000000 -0800
> @@ -18,6 +18,7 @@
> #include <linux/highmem.h>
> #include <linux/security.h>
> #include <linux/syscalls.h>
> +#include <linux/mmu_notifier.h>
>
> #include <asm/uaccess.h>
> #include <asm/cacheflush.h>
> @@ -124,12 +125,15 @@ unsigned long move_page_tables(struct vm
> unsigned long old_addr, struct vm_area_struct *new_vma,
> unsigned long new_addr, unsigned long len)
> {
> - unsigned long extent, next, old_end;
> + unsigned long extent, next, old_start, old_end;
> pmd_t *old_pmd, *new_pmd;
>
> + old_start = old_addr;
> old_end = old_addr + len;
> flush_cache_range(vma, old_addr, old_end);
>
> + mmu_notifier(invalidate_range_begin, vma->vm_mm,
> + old_addr, old_end, 0);
> for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
> cond_resched();
> next = (old_addr + PMD_SIZE) & PMD_MASK;
> @@ -150,6 +154,7 @@ unsigned long move_page_tables(struct vm
> move_ptes(vma, old_pmd, old_addr, old_addr + extent,
> new_vma, new_pmd, new_addr);
> }
> + mmu_notifier(invalidate_range_end, vma->vm_mm, old_start, old_end, 0);
>
> return len + old_addr - old_end; /* how much done */
> }
>
> --
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@...ck.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@...ck.org"> email@...ck.org </a>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists