[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1373673d-721e-a7a2-166f-244c16f236a3@nvidia.com>
Date: Wed, 20 Feb 2019 15:47:50 -0800
From: John Hubbard <jhubbard@...dia.com>
To: <jglisse@...hat.com>, <linux-mm@...ck.org>
CC: <linux-kernel@...r.kernel.org>,
Ralph Campbell <rcampbell@...dia.com>,
Andrew Morton <akpm@...ux-foundation.org>
Subject: Re: [PATCH 01/10] mm/hmm: use reference counting for HMM struct
On 1/29/19 8:54 AM, jglisse@...hat.com wrote:
> From: Jérôme Glisse <jglisse@...hat.com>
>
> Every time i read the code to check that the HMM structure does not
> vanish before it should thanks to the many lock protecting its removal
> i get a headache. Switch to reference counting instead it is much
> easier to follow and harder to break. This also remove some code that
> is no longer needed with refcounting.
Hi Jerome,
That is an excellent idea. Some review comments below:
[snip]
> static int hmm_invalidate_range_start(struct mmu_notifier *mn,
> const struct mmu_notifier_range *range)
> {
> struct hmm_update update;
> - struct hmm *hmm = range->mm->hmm;
> + struct hmm *hmm = hmm_get(range->mm);
> + int ret;
>
> VM_BUG_ON(!hmm);
>
> + /* Check if hmm_mm_destroy() was call. */
> + if (hmm->mm == NULL)
> + return 0;
Let's delete that NULL check. It can't provide true protection. If there
is a way for that to race, we need to take another look at refcounting.
Is there a need for mmgrab()/mmdrop(), to keep the mm around while HMM
is using it?
> +
> update.start = range->start;
> update.end = range->end;
> update.event = HMM_UPDATE_INVALIDATE;
> update.blockable = range->blockable;
> - return hmm_invalidate_range(hmm, true, &update);
> + ret = hmm_invalidate_range(hmm, true, &update);
> + hmm_put(hmm);
> + return ret;
> }
>
> static void hmm_invalidate_range_end(struct mmu_notifier *mn,
> const struct mmu_notifier_range *range)
> {
> struct hmm_update update;
> - struct hmm *hmm = range->mm->hmm;
> + struct hmm *hmm = hmm_get(range->mm);
>
> VM_BUG_ON(!hmm);
>
> + /* Check if hmm_mm_destroy() was call. */
> + if (hmm->mm == NULL)
> + return;
> +
Another one to delete, same reasoning as above.
[snip]
> @@ -717,14 +746,18 @@ int hmm_vma_get_pfns(struct hmm_range *range)
> hmm = hmm_register(vma->vm_mm);
> if (!hmm)
> return -ENOMEM;
> - /* Caller must have registered a mirror, via hmm_mirror_register() ! */
> - if (!hmm->mmu_notifier.ops)
> +
> + /* Check if hmm_mm_destroy() was call. */
> + if (hmm->mm == NULL) {
> + hmm_put(hmm);
> return -EINVAL;
> + }
>
Another hmm->mm NULL check to remove.
[snip]
> @@ -802,25 +842,27 @@ EXPORT_SYMBOL(hmm_vma_get_pfns);
> */
> bool hmm_vma_range_done(struct hmm_range *range)
> {
> - unsigned long npages = (range->end - range->start) >> PAGE_SHIFT;
> - struct hmm *hmm;
> + bool ret = false;
>
> - if (range->end <= range->start) {
> + /* Sanity check this really should not happen. */
> + if (range->hmm == NULL || range->end <= range->start) {
> BUG();
> return false;
> }
>
> - hmm = hmm_register(range->vma->vm_mm);
> - if (!hmm) {
> - memset(range->pfns, 0, sizeof(*range->pfns) * npages);
> - return false;
> - }
> -
> - spin_lock(&hmm->lock);
> + spin_lock(&range->hmm->lock);
> list_del_rcu(&range->list);
> - spin_unlock(&hmm->lock);
> + ret = range->valid;
> + spin_unlock(&range->hmm->lock);
>
> - return range->valid;
> + /* Is the mm still alive ? */
> + if (range->hmm->mm == NULL)
> + ret = false;
And another one here.
> +
> + /* Drop reference taken by hmm_vma_fault() or hmm_vma_get_pfns() */
> + hmm_put(range->hmm);
> + range->hmm = NULL;
> + return ret;
> }
> EXPORT_SYMBOL(hmm_vma_range_done);
>
> @@ -880,6 +922,8 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
> struct hmm *hmm;
> int ret;
>
> + range->hmm = NULL;
> +
> /* Sanity check, this really should not happen ! */
> if (range->start < vma->vm_start || range->start >= vma->vm_end)
> return -EINVAL;
> @@ -891,14 +935,18 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
> hmm_pfns_clear(range, range->pfns, range->start, range->end);
> return -ENOMEM;
> }
> - /* Caller must have registered a mirror using hmm_mirror_register() */
> - if (!hmm->mmu_notifier.ops)
> +
> + /* Check if hmm_mm_destroy() was call. */
> + if (hmm->mm == NULL) {
> + hmm_put(hmm);
> return -EINVAL;
> + }
And here.
>
> /* FIXME support hugetlb fs */
> if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
> vma_is_dax(vma)) {
> hmm_pfns_special(range);
> + hmm_put(hmm);
> return -EINVAL;
> }
>
> @@ -910,6 +958,7 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
> * operations such has atomic access would not work.
> */
> hmm_pfns_clear(range, range->pfns, range->start, range->end);
> + hmm_put(hmm);
> return -EPERM;
> }
>
> @@ -945,7 +994,16 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
> hmm_pfns_clear(range, &range->pfns[i], hmm_vma_walk.last,
> range->end);
> hmm_vma_range_done(range);
> + hmm_put(hmm);
> + } else {
> + /*
> + * Transfer hmm reference to the range struct it will be drop
> + * inside the hmm_vma_range_done() function (which _must_ be
> + * call if this function return 0).
> + */
> + range->hmm = hmm;
Is that thread-safe? Is there anything preventing two or more threads from
changing range->hmm at the same time?
thanks,
--
John Hubbard
NVIDIA
Powered by blists - more mailing lists