lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon,  2 Nov 2020 16:28:39 -0800
From:   Mike Kravetz <mike.kravetz@...cle.com>
To:     linux-mm@...ck.org, linux-kernel@...r.kernel.org
Cc:     Hugh Dickins <hughd@...gle.com>,
        Naoya Horiguchi <n-horiguchi@...jp.nec.com>,
        Michal Hocko <mhocko@...nel.org>,
        "Aneesh Kumar K . V" <aneesh.kumar@...ux.vnet.ibm.com>,
        Andrea Arcangeli <aarcange@...hat.com>,
        "Kirill A . Shutemov" <kirill.shutemov@...ux.intel.com>,
        Davidlohr Bueso <dave@...olabs.net>,
        Prakash Sangappa <prakash.sangappa@...cle.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Mike Kravetz <mike.kravetz@...cle.com>, stable@...r.kernel.org
Subject: [PATCH 2/4] hugetlbfs: add hinode_rwsem to hugetlb specific inode

The hugetlb pmd sharing code needs additional synchronization.  This is
because sharing established via a call huge_pte_alloc, could be undone
before control is returned to the caller.  As a result, the returned
value may be invalid.  Ideally, i_mmap_rwsem would be used for this type
of synchronization.  However, previous attempts at using i_mmap_rwsem
have failed.  This is partly due to conflicts with the existing uses
of i_mmap_rwsem that force a locking order not compatible with it's use
for pmd sharing.

Introduce a rwsem (hinode_rwsem) that resides in the hugetlb specific inode
for the purpose of pmd sharing synchronization.  This patch adds the
semaphore to the inode and also provides routines for using the semaphore.
To minimize performance impacts, the routines only acquire the semaphore
if pmd sharing is possible.  In addition, routines which can be used with
lockdep to help ensure proper locking are also added.

Use of the new semaphore and supporting routines will be provided in a
later patch.

Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing
synchronization")
Cc: <stable@...r.kernel.org>
Signed-off-by: Mike Kravetz <mike.kravetz@...cle.com>
---
 fs/hugetlbfs/inode.c    |  12 ++++
 include/linux/hugetlb.h | 121 ++++++++++++++++++++++++++++++++++++++++
 mm/hugetlb.c            |  13 -----
 3 files changed, 133 insertions(+), 13 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c1057378dbf4..4f1404b9f354 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -85,6 +85,17 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
 	{}
 };
 
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+static inline void init_hinode_rwsem(struct hugetlbfs_inode_info *info)
+{
+	init_rwsem(&info->hinode_rwsem);
+}
+#else
+static inline void init_hinode_rwsem(struct hugetlbfs_inode_info *info)
+{
+}
+#endif
+
 #ifdef CONFIG_NUMA
 static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
 					struct inode *inode, pgoff_t index)
@@ -831,6 +842,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 		inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
 		inode->i_mapping->private_data = resv_map;
 		info->seals = F_SEAL_SEAL;
+		init_hinode_rwsem(info);
 		switch (mode & S_IFMT) {
 		default:
 			init_special_inode(inode, mode, dev);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ebca2ef02212..c6a59c2dbc30 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -424,6 +424,9 @@ struct hugetlbfs_inode_info {
 	struct shared_policy policy;
 	struct inode vfs_inode;
 	unsigned int seals;
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+	struct rw_semaphore hinode_rwsem;
+#endif
 };
 
 static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
@@ -449,6 +452,101 @@ static inline struct hstate *hstate_inode(struct inode *i)
 {
 	return HUGETLBFS_SB(i->i_sb)->hstate;
 }
+
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+static inline bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
+{
+	unsigned long base = addr & PUD_MASK;
+	unsigned long end = base + PUD_SIZE;
+
+	/* check on proper vm_flags and page table alignment */
+	if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
+		return true;
+	return false;
+}
+
+/*
+ * hugetlb specific hinode_rwsem is used for pmd sharing synchronization.
+ * This routine will take the semaphore in read mode if necessary.  If vma
+ * and addr are NULL, the routine will always acquire the semaphore. If
+ * values are supplied for vma and addr, they are used to determine if pmd
+ * sharing is actually possible, and only acquire the semaphore if possible.
+ * Returns true if lock was acquired, otherwise false.
+ */
+static inline bool hinode_lock_read(struct inode *inode,
+					struct vm_area_struct *vma,
+					unsigned long addr)
+{
+	if (vma && !addr)
+		addr = round_up(vma->vm_start, PUD_SIZE);
+	if (vma && !vma_shareable(vma, addr))
+		return false;
+
+	down_read(&HUGETLBFS_I(inode)->hinode_rwsem);
+	return true;
+}
+
+static inline void hinode_unlock_read(struct inode *inode)
+{
+	up_read(&HUGETLBFS_I(inode)->hinode_rwsem);
+}
+
+/*
+ * Take hinode_rwsem semaphore in write mode if necessary.  See,
+ * hinode_lock_read for details.
+ * Returns true is lock was acquired, otherwise false.
+ */
+static inline bool hinode_lock_write(struct inode *inode,
+					struct vm_area_struct *vma,
+					unsigned long addr)
+{
+	if (vma && !addr)
+		addr = round_up(vma->vm_start, PUD_SIZE);
+	if (vma && !vma_shareable(vma, addr))
+		return false;
+
+	down_write(&HUGETLBFS_I(inode)->hinode_rwsem);
+	return true;
+}
+
+static inline void hinode_unlock_write(struct inode *inode)
+{
+	up_write(&HUGETLBFS_I(inode)->hinode_rwsem);
+}
+
+static inline void hinode_assert_locked(struct address_space *mapping)
+{
+	lockdep_assert_held(&HUGETLBFS_I(mapping->host)->hinode_rwsem);
+}
+
+static inline void hinode_assert_write_locked(struct address_space *mapping)
+{
+	lockdep_assert_held_write(&HUGETLBFS_I(mapping->host)->hinode_rwsem);
+}
+#else
+static inline bool hinode_lock_read(struct inode *inode,
+					struct vm_area_struct *vma,
+					unsigned long addr)
+{
+		return false;
+}
+
+static inline void hinode_unlock_read(struct inode *inode)
+{
+}
+
+static inline bool hinode_lock_write(struct inode *inode,
+					struct vm_area_struct *vma,
+					unsigned long addr)
+{
+	return false;
+}
+
+static inline void hinode_unlock_write(struct inode *inode)
+{
+}
+#endif
+
 #else /* !CONFIG_HUGETLBFS */
 
 #define is_file_hugepages(file)			false
@@ -923,6 +1021,29 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
 					pte_t *ptep, pte_t pte, unsigned long sz)
 {
 }
+
+static inline bool hinode_lock_read(struct inode *inode,
+					struct vm_area_struct *vma,
+					unsigned long addr)
+{
+		return false;
+}
+
+static inline void hinode_unlock_read(struct inode *inode)
+{
+}
+
+static inline bool hinode_lock_write(struct inode *inode,
+					struct vm_area_struct *vma,
+					unsigned long addr)
+{
+	return false;
+}
+
+static inline void hinode_unlock_write(struct inode *inode)
+{
+}
+
 #endif	/* CONFIG_HUGETLB_PAGE */
 
 static inline spinlock_t *huge_pte_lock(struct hstate *h,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8a82b90ca3ee..da57018926e4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5296,19 +5296,6 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
 	return saddr;
 }
 
-static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
-{
-	unsigned long base = addr & PUD_MASK;
-	unsigned long end = base + PUD_SIZE;
-
-	/*
-	 * check on proper vm_flags and page table alignment
-	 */
-	if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
-		return true;
-	return false;
-}
-
 /*
  * Determine if start,end range within vma could be mapped by shared pmd.
  * If yes, adjust start and end to cover range associated with possible
-- 
2.28.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ