[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJuCfpGb5Amo9Sk0yyruJt9NKaYe9-y+5jmU442NSf3+VT5-dA@mail.gmail.com>
Date: Wed, 27 Sep 2023 11:07:48 -0700
From: Suren Baghdasaryan <surenb@...gle.com>
To: Jann Horn <jannh@...gle.com>
Cc: akpm@...ux-foundation.org, viro@...iv.linux.org.uk,
brauner@...nel.org, shuah@...nel.org, aarcange@...hat.com,
lokeshgidra@...gle.com, peterx@...hat.com, david@...hat.com,
hughd@...gle.com, mhocko@...e.com, axelrasmussen@...gle.com,
rppt@...nel.org, willy@...radead.org, Liam.Howlett@...cle.com,
zhangpeng362@...wei.com, bgeffon@...gle.com,
kaleshsingh@...gle.com, ngeoffray@...gle.com, jdduke@...gle.com,
linux-mm@...ck.org, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-kselftest@...r.kernel.org,
kernel-team@...roid.com
Subject: Re: [PATCH v2 2/3] userfaultfd: UFFDIO_REMAP uABI
On Wed, Sep 27, 2023 at 5:47 AM Jann Horn <jannh@...gle.com> wrote:
>
> On Sat, Sep 23, 2023 at 3:31 AM Suren Baghdasaryan <surenb@...gle.com> wrote:
> > From: Andrea Arcangeli <aarcange@...hat.com>
> >
> > This implements the uABI of UFFDIO_REMAP.
> >
> > Notably one mode bitflag is also forwarded (and in turn known) by the
> > lowlevel remap_pages method.
> >
> > Signed-off-by: Andrea Arcangeli <aarcange@...hat.com>
> > Signed-off-by: Suren Baghdasaryan <surenb@...gle.com>
> [...]
> > +int remap_pages_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
> > + pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval,
> > + struct vm_area_struct *dst_vma,
> > + struct vm_area_struct *src_vma,
> > + unsigned long dst_addr, unsigned long src_addr)
> > +{
> > + pmd_t _dst_pmd, src_pmdval;
> > + struct page *src_page;
> > + struct folio *src_folio;
> > + struct anon_vma *src_anon_vma, *dst_anon_vma;
> > + spinlock_t *src_ptl, *dst_ptl;
> > + pgtable_t src_pgtable, dst_pgtable;
> > + struct mmu_notifier_range range;
> > + int err = 0;
> > +
> > + src_pmdval = *src_pmd;
> > + src_ptl = pmd_lockptr(src_mm, src_pmd);
> > +
> > + BUG_ON(!spin_is_locked(src_ptl));
> > + mmap_assert_locked(src_mm);
> > + mmap_assert_locked(dst_mm);
> > +
> > + BUG_ON(!pmd_trans_huge(src_pmdval));
> > + BUG_ON(!pmd_none(dst_pmdval));
> > + BUG_ON(src_addr & ~HPAGE_PMD_MASK);
> > + BUG_ON(dst_addr & ~HPAGE_PMD_MASK);
> > +
> > + src_page = pmd_page(src_pmdval);
> > + if (unlikely(!PageAnonExclusive(src_page))) {
> > + spin_unlock(src_ptl);
> > + return -EBUSY;
> > + }
> > +
> > + src_folio = page_folio(src_page);
> > + folio_get(src_folio);
> > + spin_unlock(src_ptl);
> > +
> > + /* preallocate dst_pgtable if needed */
> > + if (dst_mm != src_mm) {
> > + dst_pgtable = pte_alloc_one(dst_mm);
> > + if (unlikely(!dst_pgtable)) {
> > + err = -ENOMEM;
> > + goto put_folio;
> > + }
> > + } else {
> > + dst_pgtable = NULL;
> > + }
> > +
> > + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, src_mm, src_addr,
> > + src_addr + HPAGE_PMD_SIZE);
> > + mmu_notifier_invalidate_range_start(&range);
> > +
> > + /* block all concurrent rmap walks */
> > + folio_lock(src_folio);
> > +
> > + /*
> > + * split_huge_page walks the anon_vma chain without the page
> > + * lock. Serialize against it with the anon_vma lock, the page
> > + * lock is not enough.
> > + */
> > + src_anon_vma = folio_get_anon_vma(src_folio);
> > + if (!src_anon_vma) {
> > + err = -EAGAIN;
> > + goto unlock_folio;
> > + }
> > + anon_vma_lock_write(src_anon_vma);
> > +
> > + dst_ptl = pmd_lockptr(dst_mm, dst_pmd);
> > + double_pt_lock(src_ptl, dst_ptl);
> > + if (unlikely(!pmd_same(*src_pmd, src_pmdval) ||
> > + !pmd_same(*dst_pmd, dst_pmdval) ||
> > + folio_mapcount(src_folio) != 1)) {
>
> I think this is also supposed to be PageAnonExclusive()?
Yes. Will fix.
>
> > + double_pt_unlock(src_ptl, dst_ptl);
> > + err = -EAGAIN;
> > + goto put_anon_vma;
> > + }
> > +
> > + BUG_ON(!folio_test_head(src_folio));
> > + BUG_ON(!folio_test_anon(src_folio));
> > +
> > + dst_anon_vma = (void *)dst_vma->anon_vma + PAGE_MAPPING_ANON;
> > + WRITE_ONCE(src_folio->mapping, (struct address_space *) dst_anon_vma);
> > + WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
> > +
> > + src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
> > + _dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
> > + _dst_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma);
> > + set_pmd_at(dst_mm, dst_addr, dst_pmd, _dst_pmd);
> > +
> > + src_pgtable = pgtable_trans_huge_withdraw(src_mm, src_pmd);
> > + if (dst_pgtable) {
> > + pgtable_trans_huge_deposit(dst_mm, dst_pmd, dst_pgtable);
> > + pte_free(src_mm, src_pgtable);
> > + dst_pgtable = NULL;
> > +
> > + mm_inc_nr_ptes(dst_mm);
> > + mm_dec_nr_ptes(src_mm);
> > + add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
> > + add_mm_counter(src_mm, MM_ANONPAGES, -HPAGE_PMD_NR);
> > + } else {
> > + pgtable_trans_huge_deposit(dst_mm, dst_pmd, src_pgtable);
> > + }
> > + double_pt_unlock(src_ptl, dst_ptl);
> > +
> > +put_anon_vma:
> > + anon_vma_unlock_write(src_anon_vma);
> > + put_anon_vma(src_anon_vma);
> > +unlock_folio:
> > + /* unblock rmap walks */
> > + folio_unlock(src_folio);
> > + mmu_notifier_invalidate_range_end(&range);
> > + if (dst_pgtable)
> > + pte_free(dst_mm, dst_pgtable);
> > +put_folio:
> > + folio_put(src_folio);
> > +
> > + return err;
> > +}
> > +#endif /* CONFIG_USERFAULTFD */
> [...]
> > +static int remap_anon_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
> > + struct vm_area_struct *dst_vma,
> > + struct vm_area_struct *src_vma,
> > + unsigned long dst_addr, unsigned long src_addr,
> > + pte_t *dst_pte, pte_t *src_pte,
> > + pte_t orig_dst_pte, pte_t orig_src_pte,
> > + spinlock_t *dst_ptl, spinlock_t *src_ptl,
> > + struct folio *src_folio)
> > +{
> > + struct anon_vma *dst_anon_vma;
> > +
> > + double_pt_lock(dst_ptl, src_ptl);
> > +
> > + if (!pte_same(*src_pte, orig_src_pte) ||
> > + !pte_same(*dst_pte, orig_dst_pte) ||
> > + folio_test_large(src_folio) ||
> > + folio_estimated_sharers(src_folio) != 1) {
> > + double_pt_unlock(dst_ptl, src_ptl);
> > + return -EAGAIN;
> > + }
> > +
> > + BUG_ON(!folio_test_anon(src_folio));
> > +
> > + dst_anon_vma = (void *)dst_vma->anon_vma + PAGE_MAPPING_ANON;
> > + WRITE_ONCE(src_folio->mapping,
> > + (struct address_space *) dst_anon_vma);
> > + WRITE_ONCE(src_folio->index, linear_page_index(dst_vma,
> > + dst_addr));
> > +
> > + orig_src_pte = ptep_clear_flush(src_vma, src_addr, src_pte);
> > + orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
> > + orig_dst_pte = maybe_mkwrite(pte_mkdirty(orig_dst_pte),
> > + dst_vma);
>
> I think there's still a theoretical issue here that you could fix by
> checking for the AnonExclusive flag, similar to the huge page case.
>
> Consider the following scenario:
>
> 1. process P1 does a write fault in a private anonymous VMA, creating
> and mapping a new anonymous page A1
> 2. process P1 forks and creates two children P2 and P3. afterwards, A1
> is mapped in P1, P2 and P3 as a COW page, with mapcount 3.
> 3. process P1 removes its mapping of A1, dropping its mapcount to 2.
> 4. process P2 uses vmsplice() to grab a reference to A1 with get_user_pages()
> 5. process P2 removes its mapping of A1, dropping its mapcount to 1.
>
> If at this point P3 does a write fault on its mapping of A1, it will
> still trigger copy-on-write thanks to the AnonExclusive mechanism; and
> this is necessary to avoid P3 mapping A1 as writable and writing data
> into it that will become visible to P2, if P2 and P3 are in different
> security contexts.
>
> But if P3 instead moves its mapping of A1 to another address with
> remap_anon_pte() which only does a page mapcount check, the
> maybe_mkwrite() will directly make the mapping writable, circumventing
> the AnonExclusive mechanism.
I see. Thanks for the detailed explanation! I will add
PageAnonExclusive() check in this path to prevent this scenario.
>
> > + set_pte_at(dst_mm, dst_addr, dst_pte, orig_dst_pte);
> > +
> > + if (dst_mm != src_mm) {
> > + inc_mm_counter(dst_mm, MM_ANONPAGES);
> > + dec_mm_counter(src_mm, MM_ANONPAGES);
> > + }
> > +
> > + double_pt_unlock(dst_ptl, src_ptl);
> > +
> > + return 0;
> > +}
> > +
> > +static int remap_swap_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
> > + unsigned long dst_addr, unsigned long src_addr,
> > + pte_t *dst_pte, pte_t *src_pte,
> > + pte_t orig_dst_pte, pte_t orig_src_pte,
> > + spinlock_t *dst_ptl, spinlock_t *src_ptl)
> > +{
> > + if (!pte_swp_exclusive(orig_src_pte))
> > + return -EBUSY;
> > +
> > + double_pt_lock(dst_ptl, src_ptl);
> > +
> > + if (!pte_same(*src_pte, orig_src_pte) ||
> > + !pte_same(*dst_pte, orig_dst_pte)) {
> > + double_pt_unlock(dst_ptl, src_ptl);
> > + return -EAGAIN;
> > + }
> > +
> > + orig_src_pte = ptep_get_and_clear(src_mm, src_addr, src_pte);
> > + set_pte_at(dst_mm, dst_addr, dst_pte, orig_src_pte);
> > +
> > + if (dst_mm != src_mm) {
> > + inc_mm_counter(dst_mm, MM_ANONPAGES);
> > + dec_mm_counter(src_mm, MM_ANONPAGES);
>
> I think this is the wrong counter. Looking at zap_pte_range(), in the
> "Genuine swap entry" case, we modify the MM_SWAPENTS counter, not
> MM_ANONPAGES.
Oops, my bad. Will fix.
>
> > + }
> > +
> > + double_pt_unlock(dst_ptl, src_ptl);
> > +
> > + return 0;
> > +}
> > +
> > +/*
> > + * The mmap_lock for reading is held by the caller. Just move the page
> > + * from src_pmd to dst_pmd if possible, and return true if succeeded
> > + * in moving the page.
> > + */
> > +static int remap_pages_pte(struct mm_struct *dst_mm,
> > + struct mm_struct *src_mm,
> > + pmd_t *dst_pmd,
> > + pmd_t *src_pmd,
> > + struct vm_area_struct *dst_vma,
> > + struct vm_area_struct *src_vma,
> > + unsigned long dst_addr,
> > + unsigned long src_addr,
> > + __u64 mode)
> > +{
> > + swp_entry_t entry;
> > + pte_t orig_src_pte, orig_dst_pte;
> > + spinlock_t *src_ptl, *dst_ptl;
> > + pte_t *src_pte = NULL;
> > + pte_t *dst_pte = NULL;
> > +
> > + struct folio *src_folio = NULL;
> > + struct anon_vma *src_anon_vma = NULL;
> > + struct mmu_notifier_range range;
> > + int err = 0;
> > +
> > + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, src_mm,
> > + src_addr, src_addr + PAGE_SIZE);
> > + mmu_notifier_invalidate_range_start(&range);
> > +retry:
>
> This retry looks a bit dodgy. On this retry label, we restart almost
> the entire operation, including re-reading the source PTE; the only
> variables that carry state forward from the previous retry loop
> iteration are src_folio and src_anon_vma.
>
> > + dst_pte = pte_offset_map_nolock(dst_mm, dst_pmd, dst_addr, &dst_ptl);
> > +
> > + /* If an huge pmd materialized from under us fail */
> > + if (unlikely(!dst_pte)) {
> > + err = -EFAULT;
> > + goto out;
> > + }
> [...]
> > + spin_lock(dst_ptl);
> > + orig_dst_pte = *dst_pte;
> > + spin_unlock(dst_ptl);
> > + if (!pte_none(orig_dst_pte)) {
> > + err = -EEXIST;
> > + goto out;
> > + }
> > +
> > + spin_lock(src_ptl);
> > + orig_src_pte = *src_pte;
>
> Here we read an entirely new orig_src_pte value. Something like a
> concurrent MADV_DONTNEED+pagefault could have made the PTE point to a
> different page between loop iterations.
>
> > + spin_unlock(src_ptl);
>
> I think you have to insert something like the following here:
>
> if (src_folio && (orig_dst_pte != previous_src_pte)) {
> err = -EAGAIN;
> goto out;
> }
> previous_src_pte = orig_dst_pte;
Yes, this definitely needs to be rechecked. Will fix.
>
> Otherwise:
>
> > + if (pte_none(orig_src_pte)) {
> > + if (!(mode & UFFDIO_REMAP_MODE_ALLOW_SRC_HOLES))
> > + err = -ENOENT;
> > + else /* nothing to do to remap a hole */
> > + err = 0;
> > + goto out;
> > + }
> > +
> > + if (pte_present(orig_src_pte)) {
> > + /*
> > + * Pin and lock both source folio and anon_vma. Since we are in
> > + * RCU read section, we can't block, so on contention have to
> > + * unmap the ptes, obtain the lock and retry.
> > + */
> > + if (!src_folio) {
>
> If we already found a src_folio in the previous iteration (but the
> trylock failed), we keep using the same src_folio without rechecking
> if the current PTE still points to the same folio.
>
> > + struct folio *folio;
> > +
> > + /*
> > + * Pin the page while holding the lock to be sure the
> > + * page isn't freed under us
> > + */
> > + spin_lock(src_ptl);
> > + if (!pte_same(orig_src_pte, *src_pte)) {
> > + spin_unlock(src_ptl);
> > + err = -EAGAIN;
> > + goto out;
> > + }
> > +
> > + folio = vm_normal_folio(src_vma, src_addr, orig_src_pte);
> > + if (!folio || !folio_test_anon(folio) ||
> > + folio_test_large(folio) ||
> > + folio_estimated_sharers(folio) != 1) {
> > + spin_unlock(src_ptl);
> > + err = -EBUSY;
> > + goto out;
> > + }
> > +
> > + folio_get(folio);
> > + src_folio = folio;
> > + spin_unlock(src_ptl);
> > +
> > + /* block all concurrent rmap walks */
> > + if (!folio_trylock(src_folio)) {
> > + pte_unmap(&orig_src_pte);
> > + pte_unmap(&orig_dst_pte);
> > + src_pte = dst_pte = NULL;
> > + /* now we can block and wait */
> > + folio_lock(src_folio);
> > + goto retry;
> > + }
> > + }
> > +
> > + if (!src_anon_vma) {
>
> (And here, if we already saw a src_anon_vma but the trylock failed,
> we'll keep using that src_anon_vma.)
Ack. The check for previous_src_pte should handle that as well.
>
> > + /*
> > + * folio_referenced walks the anon_vma chain
> > + * without the folio lock. Serialize against it with
> > + * the anon_vma lock, the folio lock is not enough.
> > + */
> > + src_anon_vma = folio_get_anon_vma(src_folio);
> > + if (!src_anon_vma) {
> > + /* page was unmapped from under us */
> > + err = -EAGAIN;
> > + goto out;
> > + }
> > + if (!anon_vma_trylock_write(src_anon_vma)) {
> > + pte_unmap(&orig_src_pte);
> > + pte_unmap(&orig_dst_pte);
> > + src_pte = dst_pte = NULL;
> > + /* now we can block and wait */
> > + anon_vma_lock_write(src_anon_vma);
> > + goto retry;
> > + }
> > + }
>
> So at this point we have:
>
> - the current src_pte
> - some referenced+locked src_folio that used to be mapped exclusively
> at src_addr
> - (the anon_vma associated with the src_folio)
>
> > + err = remap_anon_pte(dst_mm, src_mm, dst_vma, src_vma,
> > + dst_addr, src_addr, dst_pte, src_pte,
> > + orig_dst_pte, orig_src_pte,
> > + dst_ptl, src_ptl, src_folio);
>
> And then this will, without touching folio mapcounts/refcounts, delete
> the current PTE at src_addr, and create a PTE at dst_addr pointing to
> the old src_folio, leading to incorrect refcounts/mapcounts?
I assume this still points to the missing previous_src_pte check
discussed in the previous comments. Is that correct or is there yet
another issue?
>
> > + } else {
> [...]
> > + }
> > +
> > +out:
> > + if (src_anon_vma) {
> > + anon_vma_unlock_write(src_anon_vma);
> > + put_anon_vma(src_anon_vma);
> > + }
> > + if (src_folio) {
> > + folio_unlock(src_folio);
> > + folio_put(src_folio);
> > + }
> > + if (dst_pte)
> > + pte_unmap(dst_pte);
> > + if (src_pte)
> > + pte_unmap(src_pte);
> > + mmu_notifier_invalidate_range_end(&range);
> > +
> > + return err;
> > +}
> [...]
> > +ssize_t remap_pages(struct mm_struct *dst_mm, struct mm_struct *src_mm,
> > + unsigned long dst_start, unsigned long src_start,
> > + unsigned long len, __u64 mode)
> > +{
> > + struct vm_area_struct *src_vma, *dst_vma;
> > + unsigned long src_addr, dst_addr;
> > + pmd_t *src_pmd, *dst_pmd;
> > + long err = -EINVAL;
> > + ssize_t moved = 0;
> > +
> > + /*
> > + * Sanitize the command parameters:
> > + */
> > + BUG_ON(src_start & ~PAGE_MASK);
> > + BUG_ON(dst_start & ~PAGE_MASK);
> > + BUG_ON(len & ~PAGE_MASK);
> > +
> > + /* Does the address range wrap, or is the span zero-sized? */
> > + BUG_ON(src_start + len <= src_start);
> > + BUG_ON(dst_start + len <= dst_start);
> > +
> > + /*
> > + * Because these are read sempahores there's no risk of lock
> > + * inversion.
> > + */
> > + mmap_read_lock(dst_mm);
> > + if (dst_mm != src_mm)
> > + mmap_read_lock(src_mm);
> > +
> > + /*
> > + * Make sure the vma is not shared, that the src and dst remap
> > + * ranges are both valid and fully within a single existing
> > + * vma.
> > + */
> > + src_vma = find_vma(src_mm, src_start);
> > + if (!src_vma || (src_vma->vm_flags & VM_SHARED))
> > + goto out;
> > + if (src_start < src_vma->vm_start ||
> > + src_start + len > src_vma->vm_end)
> > + goto out;
> > +
> > + dst_vma = find_vma(dst_mm, dst_start);
> > + if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
> > + goto out;
> > + if (dst_start < dst_vma->vm_start ||
> > + dst_start + len > dst_vma->vm_end)
> > + goto out;
> > +
> > + err = validate_remap_areas(src_vma, dst_vma);
> > + if (err)
> > + goto out;
> > +
> > + for (src_addr = src_start, dst_addr = dst_start;
> > + src_addr < src_start + len;) {
> > + spinlock_t *ptl;
> > + pmd_t dst_pmdval;
> > + unsigned long step_size;
> > +
> > + BUG_ON(dst_addr >= dst_start + len);
> > + /*
> > + * Below works because anonymous area would not have a
> > + * transparent huge PUD. If file-backed support is added,
> > + * that case would need to be handled here.
> > + */
> > + src_pmd = mm_find_pmd(src_mm, src_addr);
> > + if (unlikely(!src_pmd)) {
> > + if (!(mode & UFFDIO_REMAP_MODE_ALLOW_SRC_HOLES)) {
> > + err = -ENOENT;
> > + break;
> > + }
> > + src_pmd = mm_alloc_pmd(src_mm, src_addr);
> > + if (unlikely(!src_pmd)) {
> > + err = -ENOMEM;
> > + break;
> > + }
> > + }
> > + dst_pmd = mm_alloc_pmd(dst_mm, dst_addr);
> > + if (unlikely(!dst_pmd)) {
> > + err = -ENOMEM;
> > + break;
> > + }
> > +
> > + dst_pmdval = pmdp_get_lockless(dst_pmd);
> > + /*
> > + * If the dst_pmd is mapped as THP don't override it and just
> > + * be strict. If dst_pmd changes into TPH after this check, the
> > + * remap_pages_huge_pmd() will detect the change and retry
> > + * while remap_pages_pte() will detect the change and fail.
> > + */
> > + if (unlikely(pmd_trans_huge(dst_pmdval))) {
> > + err = -EEXIST;
> > + break;
> > + }
> > +
> > + ptl = pmd_trans_huge_lock(src_pmd, src_vma);
> > + if (ptl && !pmd_trans_huge(*src_pmd)) {
> > + spin_unlock(ptl);
> > + ptl = NULL;
> > + }
>
> This still looks wrong - we do still have to split_huge_pmd()
> somewhere so that remap_pages_pte() works.
Hmm, I guess this extra check is not even needed...
>
> > + if (ptl) {
> > + /*
> > + * Check if we can move the pmd without
> > + * splitting it. First check the address
> > + * alignment to be the same in src/dst. These
> > + * checks don't actually need the PT lock but
> > + * it's good to do it here to optimize this
> > + * block away at build time if
> > + * CONFIG_TRANSPARENT_HUGEPAGE is not set.
> > + */
> > + if ((src_addr & ~HPAGE_PMD_MASK) || (dst_addr & ~HPAGE_PMD_MASK) ||
> > + src_start + len - src_addr < HPAGE_PMD_SIZE || !pmd_none(dst_pmdval)) {
> > + spin_unlock(ptl);
> > + split_huge_pmd(src_vma, src_pmd, src_addr);
> > + continue;
> > + }
> > +
> > + err = remap_pages_huge_pmd(dst_mm, src_mm,
> > + dst_pmd, src_pmd,
> > + dst_pmdval,
> > + dst_vma, src_vma,
> > + dst_addr, src_addr);
> > + step_size = HPAGE_PMD_SIZE;
> > + } else {
> > + if (pmd_none(*src_pmd)) {
> > + if (!(mode & UFFDIO_REMAP_MODE_ALLOW_SRC_HOLES)) {
> > + err = -ENOENT;
> > + break;
> > + }
> > + if (unlikely(__pte_alloc(src_mm, src_pmd))) {
> > + err = -ENOMEM;
> > + break;
> > + }
> > + }
> > +
> > + if (unlikely(pte_alloc(dst_mm, dst_pmd))) {
> > + err = -ENOMEM;
> > + break;
> > + }
> > +
> > + err = remap_pages_pte(dst_mm, src_mm,
> > + dst_pmd, src_pmd,
> > + dst_vma, src_vma,
> > + dst_addr, src_addr,
> > + mode);
> > + step_size = PAGE_SIZE;
> > + }
> > +
> > + cond_resched();
> > +
> > + if (!err) {
> > + dst_addr += step_size;
> > + src_addr += step_size;
> > + moved += step_size;
> > + }
> > +
> > + if ((!err || err == -EAGAIN) &&
> > + fatal_signal_pending(current))
> > + err = -EINTR;
> > +
> > + if (err && err != -EAGAIN)
> > + break;
> > + }
> > +
> > +out:
> > + mmap_read_unlock(dst_mm);
> > + if (dst_mm != src_mm)
> > + mmap_read_unlock(src_mm);
> > + BUG_ON(moved < 0);
> > + BUG_ON(err > 0);
> > + BUG_ON(!moved && !err);
> > + return moved ? moved : err;
> > +}
> > --
> > 2.42.0.515.g380fc7ccd1-goog
> >
Powered by blists - more mailing lists