lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1424696322-21952-7-git-send-email-vbabka@suse.cz>
Date:	Mon, 23 Feb 2015 13:58:42 +0100
From:	Vlastimil Babka <vbabka@...e.cz>
To:	linux-mm@...ck.org
Cc:	linux-kernel@...r.kernel.org,
	Andrew Morton <akpm@...ux-foundation.org>,
	Hugh Dickins <hughd@...gle.com>,
	Andrea Arcangeli <aarcange@...hat.com>,
	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
	Rik van Riel <riel@...hat.com>, Mel Gorman <mgorman@...e.de>,
	Michal Hocko <mhocko@...e.cz>,
	Ebru Akagunduz <ebru.akagunduz@...il.com>,
	Alex Thorlton <athorlton@....com>,
	David Rientjes <rientjes@...gle.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...nel.org>,
	Vlastimil Babka <vbabka@...e.cz>
Subject: [RFC 6/6] mm, thp: remove no longer needed khugepaged code

With collapse scanning moved to processes, we can remove lot of code from
khugepaged, mostly related to maintenance of mm_slots, where khugepaged used
to track which mm's to scan.

We keep the hooks for vma operations such as khugepaged_enter() only to set
the MMF_VM_HUGEPAGE bit, which enables the scanning for given mm.

Signed-off-by: Vlastimil Babka <vbabka@...e.cz>
---
 include/linux/khugepaged.h |  14 +---
 kernel/fork.c              |   1 -
 mm/huge_memory.c           | 193 +--------------------------------------------
 3 files changed, 3 insertions(+), 205 deletions(-)

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 51b2cc5..5af0f35 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -31,16 +31,10 @@ extern bool khugepaged_scan_mm(struct mm_struct *mm,
 static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
 	if (test_bit(MMF_VM_HUGEPAGE, &oldmm->flags))
-		return __khugepaged_enter(mm);
+		set_bit(MMF_VM_HUGEPAGE, &mm->flags);
 	return 0;
 }
 
-static inline void khugepaged_exit(struct mm_struct *mm)
-{
-	if (test_bit(MMF_VM_HUGEPAGE, &mm->flags))
-		__khugepaged_exit(mm);
-}
-
 static inline int khugepaged_enter(struct vm_area_struct *vma,
 				   unsigned long vm_flags)
 {
@@ -48,8 +42,7 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
 		if ((khugepaged_always() ||
 		     (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
 		    !(vm_flags & VM_NOHUGEPAGE))
-			if (__khugepaged_enter(vma->vm_mm))
-				return -ENOMEM;
+			set_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags);
 	return 0;
 }
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -57,9 +50,6 @@ static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
 	return 0;
 }
-static inline void khugepaged_exit(struct mm_struct *mm)
-{
-}
 static inline int khugepaged_enter(struct vm_area_struct *vma,
 				   unsigned long vm_flags)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index cf65139..5541a9f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -659,7 +659,6 @@ void mmput(struct mm_struct *mm)
 		uprobe_clear_state(mm);
 		exit_aio(mm);
 		ksm_exit(mm);
-		khugepaged_exit(mm); /* must run before exit_mmap */
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9172c7f..f497e6b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -56,7 +56,6 @@ unsigned int khugepaged_scan_sleep_millisecs __read_mostly = 10000;
 static unsigned int khugepaged_alloc_sleep_millisecs __read_mostly = 60000;
 static struct task_struct *khugepaged_thread __read_mostly;
 static DEFINE_MUTEX(khugepaged_mutex);
-static DEFINE_SPINLOCK(khugepaged_mm_lock);
 static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
 /*
  * default collapse hugepages if there is at least one pte mapped like
@@ -66,41 +65,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
 static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
 
 static int khugepaged(void *none);
-static int khugepaged_slab_init(void);
 
-#define MM_SLOTS_HASH_BITS 10
-static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
-
-static struct kmem_cache *mm_slot_cache __read_mostly;
-
-/**
- * struct mm_slot - hash lookup from mm to mm_slot
- * @hash: hash collision list
- * @mm_node: khugepaged scan list headed in khugepaged_scan.mm_head
- * @mm: the mm that this information is valid for
- */
-struct mm_slot {
-	struct hlist_node hash;
-	struct list_head mm_node;
-	struct mm_struct *mm;
-};
-
-/**
- * struct khugepaged_scan - cursor for scanning
- * @mm_head: the head of the mm list to scan
- * @mm_slot: the current mm_slot we are scanning
- * @address: the next address inside that to be scanned
- *
- * There is only the one khugepaged_scan instance of this cursor structure.
- */
-struct khugepaged_scan {
-	struct list_head mm_head;
-	struct mm_slot *mm_slot;
-	unsigned long address;
-};
-static struct khugepaged_scan khugepaged_scan = {
-	.mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
-};
 static nodemask_t thp_avail_nodes = NODE_MASK_ALL;
 
 static int set_recommended_min_free_kbytes(void)
@@ -601,21 +566,12 @@ delete_obj:
 	return err;
 }
 
-static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj)
-{
-	sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group);
-	sysfs_remove_group(hugepage_kobj, &hugepage_attr_group);
-	kobject_put(hugepage_kobj);
-}
 #else
 static inline int hugepage_init_sysfs(struct kobject **hugepage_kobj)
 {
 	return 0;
 }
 
-static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj)
-{
-}
 #endif /* CONFIG_SYSFS */
 
 static int __init hugepage_init(void)
@@ -632,10 +588,6 @@ static int __init hugepage_init(void)
 	if (err)
 		return err;
 
-	err = khugepaged_slab_init();
-	if (err)
-		goto out;
-
 	register_shrinker(&huge_zero_page_shrinker);
 
 	/*
@@ -649,9 +601,6 @@ static int __init hugepage_init(void)
 	start_khugepaged();
 
 	return 0;
-out:
-	hugepage_exit_sysfs(hugepage_kobj);
-	return err;
 }
 subsys_initcall(hugepage_init);
 
@@ -1979,83 +1928,6 @@ int hugepage_madvise(struct vm_area_struct *vma,
 	return 0;
 }
 
-static int __init khugepaged_slab_init(void)
-{
-	mm_slot_cache = kmem_cache_create("khugepaged_mm_slot",
-					  sizeof(struct mm_slot),
-					  __alignof__(struct mm_slot), 0, NULL);
-	if (!mm_slot_cache)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static inline struct mm_slot *alloc_mm_slot(void)
-{
-	if (!mm_slot_cache)	/* initialization failed */
-		return NULL;
-	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
-}
-
-static inline void free_mm_slot(struct mm_slot *mm_slot)
-{
-	kmem_cache_free(mm_slot_cache, mm_slot);
-}
-
-static struct mm_slot *get_mm_slot(struct mm_struct *mm)
-{
-	struct mm_slot *mm_slot;
-
-	hash_for_each_possible(mm_slots_hash, mm_slot, hash, (unsigned long)mm)
-		if (mm == mm_slot->mm)
-			return mm_slot;
-
-	return NULL;
-}
-
-static void insert_to_mm_slots_hash(struct mm_struct *mm,
-				    struct mm_slot *mm_slot)
-{
-	mm_slot->mm = mm;
-	hash_add(mm_slots_hash, &mm_slot->hash, (long)mm);
-}
-
-static inline int khugepaged_test_exit(struct mm_struct *mm)
-{
-	return atomic_read(&mm->mm_users) == 0;
-}
-
-int __khugepaged_enter(struct mm_struct *mm)
-{
-	struct mm_slot *mm_slot;
-	int wakeup;
-
-	mm_slot = alloc_mm_slot();
-	if (!mm_slot)
-		return -ENOMEM;
-
-	/* __khugepaged_exit() must not run from under us */
-	VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
-	if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
-		free_mm_slot(mm_slot);
-		return 0;
-	}
-
-	spin_lock(&khugepaged_mm_lock);
-	insert_to_mm_slots_hash(mm, mm_slot);
-	/*
-	 * Insert just behind the scanning cursor, to let the area settle
-	 * down a little.
-	 */
-	wakeup = list_empty(&khugepaged_scan.mm_head);
-	list_add_tail(&mm_slot->mm_node, &khugepaged_scan.mm_head);
-	spin_unlock(&khugepaged_mm_lock);
-
-	atomic_inc(&mm->mm_count);
-
-	return 0;
-}
-
 int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 			       unsigned long vm_flags)
 {
@@ -2077,38 +1949,6 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 	return 0;
 }
 
-void __khugepaged_exit(struct mm_struct *mm)
-{
-	struct mm_slot *mm_slot;
-	int free = 0;
-
-	spin_lock(&khugepaged_mm_lock);
-	mm_slot = get_mm_slot(mm);
-	if (mm_slot && khugepaged_scan.mm_slot != mm_slot) {
-		hash_del(&mm_slot->hash);
-		list_del(&mm_slot->mm_node);
-		free = 1;
-	}
-	spin_unlock(&khugepaged_mm_lock);
-
-	if (free) {
-		clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
-		free_mm_slot(mm_slot);
-		mmdrop(mm);
-	} else if (mm_slot) {
-		/*
-		 * This is required to serialize against
-		 * khugepaged_test_exit() (which is guaranteed to run
-		 * under mmap sem read mode). Stop here (after we
-		 * return all pagetables will be destroyed) until
-		 * khugepaged has finished working on the pagetables
-		 * under the mmap_sem.
-		 */
-		down_write(&mm->mmap_sem);
-		up_write(&mm->mmap_sem);
-	}
-}
-
 static void release_pte_page(struct page *page)
 {
 	/* 0 stands for page_is_file_cache(page) == false */
@@ -2450,8 +2290,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	 * handled by the anon_vma lock + PG_lock.
 	 */
 	down_write(&mm->mmap_sem);
-	if (unlikely(khugepaged_test_exit(mm)))
-		goto out;
+	VM_BUG_ON(atomic_read(&mm->mm_users) == 0);
 
 	vma = find_vma(mm, address);
 	if (!vma)
@@ -2629,29 +2468,6 @@ out:
 	return ret;
 }
 
-static void collect_mm_slot(struct mm_slot *mm_slot)
-{
-	struct mm_struct *mm = mm_slot->mm;
-
-	VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock));
-
-	if (khugepaged_test_exit(mm)) {
-		/* free mm_slot */
-		hash_del(&mm_slot->hash);
-		list_del(&mm_slot->mm_node);
-
-		/*
-		 * Not strictly needed because the mm exited already.
-		 *
-		 * clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
-		 */
-
-		/* khugepaged_mm_lock actually not necessary for the below */
-		free_mm_slot(mm_slot);
-		mmdrop(mm);
-	}
-}
-
 bool khugepaged_scan_mm(struct mm_struct *mm, unsigned long *start, long pages)
 {
 	struct vm_area_struct *vma;
@@ -2750,7 +2566,6 @@ static void khugepaged_wait_work(bool did_alloc)
 
 static int khugepaged(void *none)
 {
-	struct mm_slot *mm_slot;
 	bool did_alloc;
 
 	set_freezable();
@@ -2761,12 +2576,6 @@ static int khugepaged(void *none)
 		khugepaged_wait_work(did_alloc);
 	}
 
-	spin_lock(&khugepaged_mm_lock);
-	mm_slot = khugepaged_scan.mm_slot;
-	khugepaged_scan.mm_slot = NULL;
-	if (mm_slot)
-		collect_mm_slot(mm_slot);
-	spin_unlock(&khugepaged_mm_lock);
 	return 0;
 }
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ