linux-kernel - [PATCHv6 2.6.35-rc3-tip 1/12] mm: Move replace_page() / write_protect

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20100628055750.6869.16288.sendpatchset@localhost6.localdomain6>
Date:	Mon, 28 Jun 2010 11:27:50 +0530
From:	Srikar Dronamraju <srikar@...ux.vnet.ibm.com>
To:	Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...e.hu>
Cc:	Masami Hiramatsu <mhiramat@...hat.com>, Mel Gorman <mel@....ul.ie>,
	Srikar Dronamraju <srikar@...ux.vnet.ibm.com>,
	Roland McGrath <roland@...hat.com>,
	Randy Dunlap <rdunlap@...otime.net>,
	Arnaldo Carvalho de Melo <acme@...radead.org>,
	Steven Rostedt <rostedt@...dmis.org>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	"H. Peter Anvin" <hpa@...or.com>,
	Christoph Hellwig <hch@...radead.org>,
	Ananth N Mavinakayanahalli <ananth@...ibm.com>,
	Oleg Nesterov <oleg@...hat.com>,
	Mark Wielaard <mjw@...hat.com>,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Jim Keniston <jkenisto@...ux.vnet.ibm.com>,
	Frederic Weisbecker <fweisbec@...il.com>,
	"Rafael J. Wysocki" <rjw@...k.pl>,
	"Frank Ch. Eigler" <fche@...hat.com>,
	LKML <linux-kernel@...r.kernel.org>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Subject: [PATCHv6 2.6.35-rc3-tip 1/12]  mm: Move replace_page() / write_protect_page() to mm/memory.c


mm: Move replace_page() / write_protect_page() to mm/memory.c

User bkpt will use background page replacement approach to insert/delete
breakpoints. Background page replacement approach will be based on
replace_page and write_protect_page.
Now replace_page() loses its static attribute.

Signed-off-by: Srikar Dronamraju <srikar@...ux.vnet.ibm.com>
Signed-off-by: Ananth N Mavinakayanahalli <ananth@...ibm.com>
---

 include/linux/mm.h |    4 ++
 mm/ksm.c           |  112 -------------------------------------------------
 mm/memory.c        |  120 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 124 insertions(+), 112 deletions(-)


diff --git a/include/linux/mm.h b/include/linux/mm.h
index b969efb..206008e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -858,6 +858,10 @@ void account_page_dirtied(struct page *page, struct address_space *mapping);
 int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
+int replace_page(struct vm_area_struct *vma, struct page *page,
+					struct page *kpage, pte_t orig_pte);
+int write_protect_page(struct vm_area_struct *vma, struct page *page,
+						      pte_t *orig_pte);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
diff --git a/mm/ksm.c b/mm/ksm.c
index 6c3e99b..ce432e1 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -713,118 +713,6 @@ static inline int pages_identical(struct page *page1, struct page *page2)
 	return !memcmp_pages(page1, page2);
 }
 
-static int write_protect_page(struct vm_area_struct *vma, struct page *page,
-			      pte_t *orig_pte)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	unsigned long addr;
-	pte_t *ptep;
-	spinlock_t *ptl;
-	int swapped;
-	int err = -EFAULT;
-
-	addr = page_address_in_vma(page, vma);
-	if (addr == -EFAULT)
-		goto out;
-
-	ptep = page_check_address(page, mm, addr, &ptl, 0);
-	if (!ptep)
-		goto out;
-
-	if (pte_write(*ptep)) {
-		pte_t entry;
-
-		swapped = PageSwapCache(page);
-		flush_cache_page(vma, addr, page_to_pfn(page));
-		/*
-		 * Ok this is tricky, when get_user_pages_fast() run it doesnt
-		 * take any lock, therefore the check that we are going to make
-		 * with the pagecount against the mapcount is racey and
-		 * O_DIRECT can happen right after the check.
-		 * So we clear the pte and flush the tlb before the check
-		 * this assure us that no O_DIRECT can happen after the check
-		 * or in the middle of the check.
-		 */
-		entry = ptep_clear_flush(vma, addr, ptep);
-		/*
-		 * Check that no O_DIRECT or similar I/O is in progress on the
-		 * page
-		 */
-		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
-			set_pte_at(mm, addr, ptep, entry);
-			goto out_unlock;
-		}
-		entry = pte_wrprotect(entry);
-		set_pte_at_notify(mm, addr, ptep, entry);
-	}
-	*orig_pte = *ptep;
-	err = 0;
-
-out_unlock:
-	pte_unmap_unlock(ptep, ptl);
-out:
-	return err;
-}
-
-/**
- * replace_page - replace page in vma by new ksm page
- * @vma:      vma that holds the pte pointing to page
- * @page:     the page we are replacing by kpage
- * @kpage:    the ksm page we replace page by
- * @orig_pte: the original value of the pte
- *
- * Returns 0 on success, -EFAULT on failure.
- */
-static int replace_page(struct vm_area_struct *vma, struct page *page,
-			struct page *kpage, pte_t orig_pte)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *ptep;
-	spinlock_t *ptl;
-	unsigned long addr;
-	int err = -EFAULT;
-
-	addr = page_address_in_vma(page, vma);
-	if (addr == -EFAULT)
-		goto out;
-
-	pgd = pgd_offset(mm, addr);
-	if (!pgd_present(*pgd))
-		goto out;
-
-	pud = pud_offset(pgd, addr);
-	if (!pud_present(*pud))
-		goto out;
-
-	pmd = pmd_offset(pud, addr);
-	if (!pmd_present(*pmd))
-		goto out;
-
-	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
-	if (!pte_same(*ptep, orig_pte)) {
-		pte_unmap_unlock(ptep, ptl);
-		goto out;
-	}
-
-	get_page(kpage);
-	page_add_anon_rmap(kpage, vma, addr);
-
-	flush_cache_page(vma, addr, pte_pfn(*ptep));
-	ptep_clear_flush(vma, addr, ptep);
-	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
-
-	page_remove_rmap(page);
-	put_page(page);
-
-	pte_unmap_unlock(ptep, ptl);
-	err = 0;
-out:
-	return err;
-}
-
 /*
  * try_to_merge_one_page - take two pages and merge them into one
  * @vma: the vma that holds the pte pointing to page
diff --git a/mm/memory.c b/mm/memory.c
index 119b7cc..3fb2b9d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2582,6 +2582,126 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
+/**
+ * replace_page - replace page in vma by new ksm page
+ * @vma:      vma that holds the pte pointing to page
+ * @page:     the page we are replacing by kpage
+ * @kpage:    the ksm page we replace page by
+ * @orig_pte: the original value of the pte
+ *
+ * Returns 0 on success, -EFAULT on failure.
+ */
+int replace_page(struct vm_area_struct *vma, struct page *page,
+			struct page *kpage, pte_t orig_pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *ptep;
+	spinlock_t *ptl;
+	unsigned long addr;
+	int err = -EFAULT;
+
+	addr = page_address_in_vma(page, vma);
+	if (addr == -EFAULT)
+		goto out;
+
+	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		goto out;
+
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud))
+		goto out;
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd))
+		goto out;
+
+	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	if (!pte_same(*ptep, orig_pte)) {
+		pte_unmap_unlock(ptep, ptl);
+		goto out;
+	}
+
+	get_page(kpage);
+	page_add_anon_rmap(kpage, vma, addr);
+
+	flush_cache_page(vma, addr, pte_pfn(*ptep));
+	ptep_clear_flush(vma, addr, ptep);
+	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+
+	page_remove_rmap(page);
+	put_page(page);
+
+	pte_unmap_unlock(ptep, ptl);
+	err = 0;
+out:
+	return err;
+}
+
+/**
+ * write_protect_page - mark the page readonly
+ * @vma:      vma that holds the page we want to mark
+ * @page:     page that needs to be marked readonly
+ * @orig_pte: pte for the protected page.
+ *
+ * Returns 0 on success, -EFAULT on failure.
+ */
+int write_protect_page(struct vm_area_struct *vma, struct page *page,
+						      pte_t *orig_pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long addr;
+	pte_t *ptep;
+	spinlock_t *ptl;
+	int swapped;
+	int err = -EFAULT;
+
+	addr = page_address_in_vma(page, vma);
+	if (addr == -EFAULT)
+		goto out;
+
+	ptep = page_check_address(page, mm, addr, &ptl, 0);
+	if (!ptep)
+		goto out;
+
+	if (pte_write(*ptep)) {
+		pte_t entry;
+
+		swapped = PageSwapCache(page);
+		flush_cache_page(vma, addr, page_to_pfn(page));
+		/*
+		 * Ok this is tricky, when get_user_pages_fast() run it doesnt
+		 * take any lock, therefore the check that we are going to make
+		 * with the pagecount against the mapcount is racey and
+		 * O_DIRECT can happen right after the check.
+		 * So we clear the pte and flush the tlb before the check
+		 * this assure us that no O_DIRECT can happen after the check
+		 * or in the middle of the check.
+		 */
+		entry = ptep_clear_flush(vma, addr, ptep);
+		/*
+		 * Check that no O_DIRECT or similar I/O is in progress on the
+		 * page
+		 */
+		if (page_mapcount(page) + 1 + swapped != page_count(page)) {
+			set_pte_at(mm, addr, ptep, entry);
+			goto out_unlock;
+		}
+		entry = pte_wrprotect(entry);
+		set_pte_at_notify(mm, addr, ptep, entry);
+	}
+	*orig_pte = *ptep;
+	err = 0;
+
+out_unlock:
+	pte_unmap_unlock(ptep, ptl);
+out:
+	return err;
+}
+
 int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 {
 	struct address_space *mapping = inode->i_mapping;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/