lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230922193639.10158-3-vishal.moola@gmail.com>
Date:   Fri, 22 Sep 2023 12:36:39 -0700
From:   "Vishal Moola (Oracle)" <vishal.moola@...il.com>
To:     linux-mm@...ck.org
Cc:     akpm@...ux-foundation.org, linux-kernel@...r.kernel.org,
        "Vishal Moola (Oracle)" <vishal.moola@...il.com>
Subject: [RFC PATCH 2/2] mm/khugepaged: Remove compound_pagelist

Currently, khugepaged builds a compound_pagelist while scanning, which
is used to properly account for compound pages. We can now account
for a compound page as a singular folio instead, so remove this list.

Large folios are guaranteed to have consecutive ptes and addresses, so
once the first pte of a large folio is found skip over the rest.

This helps convert khugepaged to use folios. It removes 3 compound_head
calls in __collapse_huge_page_copy_succeeded(), and removes 980 bytes of
kernel text.

Signed-off-by: Vishal Moola (Oracle) <vishal.moola@...il.com>
---
 mm/khugepaged.c | 76 ++++++++++++-------------------------------------
 1 file changed, 18 insertions(+), 58 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index f46a7a7c489f..b6c7d55a8231 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -498,10 +498,9 @@ static void release_pte_page(struct page *page)
 	release_pte_folio(page_folio(page));
 }
 
-static void release_pte_pages(pte_t *pte, pte_t *_pte,
-		struct list_head *compound_pagelist)
+static void release_pte_folios(pte_t *pte, pte_t *_pte)
 {
-	struct folio *folio, *tmp;
+	struct folio *folio;
 
 	while (--_pte >= pte) {
 		pte_t pteval = ptep_get(_pte);
@@ -514,12 +513,7 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte,
 			continue;
 		folio = pfn_folio(pfn);
 		if (folio_test_large(folio))
-			continue;
-		release_pte_folio(folio);
-	}
-
-	list_for_each_entry_safe(folio, tmp, compound_pagelist, lru) {
-		list_del(&folio->lru);
+			_pte -= folio_nr_pages(folio) - 1;
 		release_pte_folio(folio);
 	}
 }
@@ -538,8 +532,7 @@ static bool is_refcount_suitable(struct page *page)
 static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 					unsigned long address,
 					pte_t *pte,
-					struct collapse_control *cc,
-					struct list_head *compound_pagelist)
+					struct collapse_control *cc)
 {
 	struct folio *folio = NULL;
 	pte_t *_pte;
@@ -588,19 +581,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 			}
 		}
 
-		if (folio_test_large(folio)) {
-			struct folio *f;
-
-			/*
-			 * Check if we have dealt with the compound page
-			 * already
-			 */
-			list_for_each_entry(f, compound_pagelist, lru) {
-				if (folio == f)
-					goto next;
-			}
-		}
-
 		/*
 		 * We can do it before isolate_lru_page because the
 		 * page can't be freed from under us. NOTE: PG_lock
@@ -644,9 +624,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 		VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
 		VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
 
-		if (folio_test_large(folio))
-			list_add_tail(&folio->lru, compound_pagelist);
-next:
 		/*
 		 * If collapse was initiated by khugepaged, check that there is
 		 * enough young pte to justify collapsing the page
@@ -660,6 +637,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 		if (pte_write(pteval))
 			writable = true;
 
+		if (folio_test_large(folio)) {
+			_pte += folio_nr_pages(folio) - 1;
+			address += folio_size(folio) - PAGE_SIZE;
+		}
 	}
 
 	if (unlikely(!writable)) {
@@ -673,7 +654,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 		return result;
 	}
 out:
-	release_pte_pages(pte, _pte, compound_pagelist);
+	release_pte_folios(pte, _pte);
 	trace_mm_collapse_huge_page_isolate(&folio->page, none_or_zero,
 					    referenced, writable, result);
 	return result;
@@ -682,11 +663,9 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 static void __collapse_huge_page_copy_succeeded(pte_t *pte,
 						struct vm_area_struct *vma,
 						unsigned long address,
-						spinlock_t *ptl,
-						struct list_head *compound_pagelist)
+						spinlock_t *ptl)
 {
 	struct page *src_page;
-	struct page *tmp;
 	pte_t *_pte;
 	pte_t pteval;
 
@@ -706,8 +685,7 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
 			}
 		} else {
 			src_page = pte_page(pteval);
-			if (!PageCompound(src_page))
-				release_pte_page(src_page);
+			release_pte_page(src_page);
 			/*
 			 * ptl mostly unnecessary, but preempt has to
 			 * be disabled to update the per-cpu stats
@@ -720,23 +698,12 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
 			free_page_and_swap_cache(src_page);
 		}
 	}
-
-	list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
-		list_del(&src_page->lru);
-		mod_node_page_state(page_pgdat(src_page),
-				    NR_ISOLATED_ANON + page_is_file_lru(src_page),
-				    -compound_nr(src_page));
-		unlock_page(src_page);
-		free_swap_cache(src_page);
-		putback_lru_page(src_page);
-	}
 }
 
 static void __collapse_huge_page_copy_failed(pte_t *pte,
 					     pmd_t *pmd,
 					     pmd_t orig_pmd,
-					     struct vm_area_struct *vma,
-					     struct list_head *compound_pagelist)
+					     struct vm_area_struct *vma)
 {
 	spinlock_t *pmd_ptl;
 
@@ -753,7 +720,7 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
 	 * Release both raw and compound pages isolated
 	 * in __collapse_huge_page_isolate.
 	 */
-	release_pte_pages(pte, pte + HPAGE_PMD_NR, compound_pagelist);
+	release_pte_folios(pte, pte + HPAGE_PMD_NR);
 }
 
 /*
@@ -769,7 +736,6 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
  * @vma: the original raw pages' virtual memory area
  * @address: starting address to copy
  * @ptl: lock on raw pages' PTEs
- * @compound_pagelist: list that stores compound pages
  */
 static int __collapse_huge_page_copy(pte_t *pte,
 				     struct page *page,
@@ -777,8 +743,7 @@ static int __collapse_huge_page_copy(pte_t *pte,
 				     pmd_t orig_pmd,
 				     struct vm_area_struct *vma,
 				     unsigned long address,
-				     spinlock_t *ptl,
-				     struct list_head *compound_pagelist)
+				     spinlock_t *ptl)
 {
 	struct page *src_page;
 	pte_t *_pte;
@@ -804,11 +769,9 @@ static int __collapse_huge_page_copy(pte_t *pte,
 	}
 
 	if (likely(result == SCAN_SUCCEED))
-		__collapse_huge_page_copy_succeeded(pte, vma, address, ptl,
-						    compound_pagelist);
+		__collapse_huge_page_copy_succeeded(pte, vma, address, ptl);
 	else
-		__collapse_huge_page_copy_failed(pte, pmd, orig_pmd, vma,
-						 compound_pagelist);
+		__collapse_huge_page_copy_failed(pte, pmd, orig_pmd, vma);
 
 	return result;
 }
@@ -1081,7 +1044,6 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 			      int referenced, int unmapped,
 			      struct collapse_control *cc)
 {
-	LIST_HEAD(compound_pagelist);
 	pmd_t *pmd, _pmd;
 	pte_t *pte;
 	pgtable_t pgtable;
@@ -1168,8 +1130,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 
 	pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl);
 	if (pte) {
-		result = __collapse_huge_page_isolate(vma, address, pte, cc,
-						      &compound_pagelist);
+		result = __collapse_huge_page_isolate(vma, address, pte, cc);
 		spin_unlock(pte_ptl);
 	} else {
 		result = SCAN_PMD_NULL;
@@ -1198,8 +1159,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 	anon_vma_unlock_write(vma->anon_vma);
 
 	result = __collapse_huge_page_copy(pte, hpage, pmd, _pmd,
-					   vma, address, pte_ptl,
-					   &compound_pagelist);
+					   vma, address, pte_ptl);
 	pte_unmap(pte);
 	if (unlikely(result != SCAN_SUCCEED))
 		goto out_up_write;
-- 
2.40.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ