[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <4834724ec012da591863371cf4423d1971771d99.1649370874.git.khalid.aziz@oracle.com>
Date: Mon, 11 Apr 2022 10:05:58 -0600
From: Khalid Aziz <khalid.aziz@...cle.com>
To: akpm@...ux-foundation.org, willy@...radead.org
Cc: Khalid Aziz <khalid.aziz@...cle.com>, aneesh.kumar@...ux.ibm.com,
arnd@...db.de, 21cnbao@...il.com, corbet@....net,
dave.hansen@...ux.intel.com, david@...hat.com,
ebiederm@...ssion.com, hagen@...u.net, jack@...e.cz,
keescook@...omium.org, kirill@...temov.name, kucharsk@...il.com,
linkinjeon@...nel.org, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-mm@...ck.org,
longpeng2@...wei.com, luto@...nel.org, markhemm@...glemail.com,
pcc@...gle.com, rppt@...nel.org, sieberf@...zon.com,
sjpark@...zon.de, surenb@...gle.com, tst@...oebel-theuer.de,
yzaikin@...gle.com
Subject: [PATCH v1 14/14] mm/mshare: Copy PTEs to host mm
VMAs for shared addresses are hosted by a separate host mm. Copy over
original PTEs from the donor process to host mm so the PTEs are
maintained independent of donor process.
Signed-off-by: Khalid Aziz <khalid.aziz@...cle.com>
---
include/linux/mm.h | 2 ++
mm/memory.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++
mm/mshare.c | 14 +++++---------
3 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d9456d424202..78c22891a792 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1845,6 +1845,8 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long ceiling);
int
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
+int
+mshare_copy_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
int follow_invalidate_pte(struct mm_struct *mm, unsigned long address,
struct mmu_notifier_range *range, pte_t **ptepp,
pmd_t **pmdpp, spinlock_t **ptlp);
diff --git a/mm/memory.c b/mm/memory.c
index e7c5bc6f8836..9010d68f053a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1234,6 +1234,54 @@ copy_p4d_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
return 0;
}
+/*
+ * Copy PTEs for mshare'd pages.
+ * This code is based upon copy_page_range()
+ */
+int
+mshare_copy_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
+{
+ pgd_t *src_pgd, *dst_pgd;
+ unsigned long next;
+ unsigned long addr = src_vma->vm_start;
+ unsigned long end = src_vma->vm_end;
+ struct mm_struct *dst_mm = dst_vma->vm_mm;
+ struct mm_struct *src_mm = src_vma->vm_mm;
+ struct mmu_notifier_range range;
+ int ret = 0;
+
+ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
+ 0, src_vma, src_mm, addr, end);
+ mmu_notifier_invalidate_range_start(&range);
+ /*
+ * Disabling preemption is not needed for the write side, as
+ * the read side doesn't spin, but goes to the mmap_lock.
+ *
+ * Use the raw variant of the seqcount_t write API to avoid
+ * lockdep complaining about preemptibility.
+ */
+ mmap_assert_write_locked(src_mm);
+ raw_write_seqcount_begin(&src_mm->write_protect_seq);
+
+ dst_pgd = pgd_offset(dst_mm, addr);
+ src_pgd = pgd_offset(src_mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(src_pgd))
+ continue;
+ if (unlikely(copy_p4d_range(dst_vma, src_vma, dst_pgd, src_pgd,
+ addr, next))) {
+ ret = -ENOMEM;
+ break;
+ }
+ } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+
+ raw_write_seqcount_end(&src_mm->write_protect_seq);
+ mmu_notifier_invalidate_range_end(&range);
+
+ return ret;
+}
+
int
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
{
diff --git a/mm/mshare.c b/mm/mshare.c
index fba31f3c190f..a399234bf106 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -385,7 +385,6 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr,
* Copy this vma over to host mm
*/
vma->vm_private_data = info;
- vma->vm_mm = new_mm;
vma->vm_flags |= VM_SHARED_PT;
new_vma = vm_area_dup(vma);
if (!new_vma) {
@@ -394,6 +393,7 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr,
err = -ENOMEM;
goto free_info;
}
+ new_vma->vm_mm = new_mm;
err = insert_vm_struct(new_mm, new_vma);
if (err) {
mmap_write_unlock(new_mm);
@@ -402,17 +402,13 @@ SYSCALL_DEFINE5(mshare, const char __user *, name, unsigned long, addr,
goto free_info;
}
+ /* Copy over current PTEs */
+ err = mshare_copy_ptes(new_vma, vma);
+ if (err != 0)
+ goto free_info;
vma = vma->vm_next;
}
- /*
- * Copy over current PTEs
- */
- myaddr = addr;
- while (myaddr < new_mm->task_size) {
- *pgd_offset(new_mm, myaddr) = *pgd_offset(old_mm, myaddr);
- myaddr += PGDIR_SIZE;
- }
/*
* TODO: Free the corresponding page table in calling
* process
--
2.32.0
Powered by blists - more mailing lists