[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Y5fDwH6XiM808oUM@monkey>
Date: Mon, 12 Dec 2022 16:13:52 -0800
From: Mike Kravetz <mike.kravetz@...cle.com>
To: James Houghton <jthoughton@...gle.com>
Cc: Muchun Song <songmuchun@...edance.com>,
Peter Xu <peterx@...hat.com>,
David Hildenbrand <david@...hat.com>,
David Rientjes <rientjes@...gle.com>,
Axel Rasmussen <axelrasmussen@...gle.com>,
Mina Almasry <almasrymina@...gle.com>,
Zach O'Keefe <zokeefe@...gle.com>,
Manish Mishra <manish.mishra@...anix.com>,
Naoya Horiguchi <naoya.horiguchi@....com>,
"Dr . David Alan Gilbert" <dgilbert@...hat.com>,
"Matthew Wilcox (Oracle)" <willy@...radead.org>,
Vlastimil Babka <vbabka@...e.cz>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
Miaohe Lin <linmiaohe@...wei.com>,
Yang Shi <shy828301@...il.com>,
Andrew Morton <akpm@...ux-foundation.org>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH v2 08/47] hugetlb: add HGM enablement functions
On 10/21/22 16:36, James Houghton wrote:
> Currently it is possible for all shared VMAs to use HGM, but it must be
> enabled first. This is because with HGM, we lose PMD sharing, and page
> table walks require additional synchronization (we need to take the VMA
> lock).
Not sure yet, but I expect Peter's series will help with locking for
hugetlb specific page table walks.
>
> Signed-off-by: James Houghton <jthoughton@...gle.com>
> ---
> include/linux/hugetlb.h | 22 +++++++++++++
> mm/hugetlb.c | 69 +++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 91 insertions(+)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 534958499ac4..6e0c36b08a0c 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -123,6 +123,9 @@ struct hugetlb_vma_lock {
>
> struct hugetlb_shared_vma_data {
> struct hugetlb_vma_lock vma_lock;
> +#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
> + bool hgm_enabled;
> +#endif
> };
>
> extern struct resv_map *resv_map_alloc(void);
> @@ -1179,6 +1182,25 @@ static inline void hugetlb_unregister_node(struct node *node)
> }
> #endif /* CONFIG_HUGETLB_PAGE */
>
> +#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
> +bool hugetlb_hgm_enabled(struct vm_area_struct *vma);
> +bool hugetlb_hgm_eligible(struct vm_area_struct *vma);
> +int enable_hugetlb_hgm(struct vm_area_struct *vma);
> +#else
> +static inline bool hugetlb_hgm_enabled(struct vm_area_struct *vma)
> +{
> + return false;
> +}
> +static inline bool hugetlb_hgm_eligible(struct vm_area_struct *vma)
> +{
> + return false;
> +}
> +static inline int enable_hugetlb_hgm(struct vm_area_struct *vma)
> +{
> + return -EINVAL;
> +}
> +#endif
> +
> static inline spinlock_t *huge_pte_lock(struct hstate *h,
> struct mm_struct *mm, pte_t *pte)
> {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 5ae8bc8c928e..a18143add956 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -6840,6 +6840,10 @@ static bool pmd_sharing_possible(struct vm_area_struct *vma)
> #ifdef CONFIG_USERFAULTFD
> if (uffd_disable_huge_pmd_share(vma))
> return false;
> +#endif
> +#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
> + if (hugetlb_hgm_enabled(vma))
> + return false;
> #endif
> /*
> * Only shared VMAs can share PMDs.
> @@ -7033,6 +7037,9 @@ static int hugetlb_vma_data_alloc(struct vm_area_struct *vma)
> kref_init(&data->vma_lock.refs);
> init_rwsem(&data->vma_lock.rw_sema);
> data->vma_lock.vma = vma;
> +#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
> + data->hgm_enabled = false;
> +#endif
> vma->vm_private_data = data;
> return 0;
> }
> @@ -7290,6 +7297,68 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h)
>
> #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
>
> +#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
> +bool hugetlb_hgm_eligible(struct vm_area_struct *vma)
> +{
> + /*
> + * All shared VMAs may have HGM.
> + *
> + * HGM requires using the VMA lock, which only exists for shared VMAs.
> + * To make HGM work for private VMAs, we would need to use another
> + * scheme to prevent collapsing/splitting from invalidating other
> + * threads' page table walks.
> + */
> + return vma && (vma->vm_flags & VM_MAYSHARE);
I am not yet 100% convinced you can/will take care of all possible code
paths where hugetlb_vma_data allocation may fail. If not, then you
should be checking vm_private_data here as well.
> +}
> +bool hugetlb_hgm_enabled(struct vm_area_struct *vma)
> +{
> + struct hugetlb_shared_vma_data *data = vma->vm_private_data;
> +
> + if (!vma || !(vma->vm_flags & VM_MAYSHARE))
> + return false;
> +
> + return data && data->hgm_enabled;
> +}
> +
> +/*
> + * Enable high-granularity mapping (HGM) for this VMA. Once enabled, HGM
> + * cannot be turned off.
> + *
> + * PMDs cannot be shared in HGM VMAs.
> + */
> +int enable_hugetlb_hgm(struct vm_area_struct *vma)
> +{
> + int ret;
> + struct hugetlb_shared_vma_data *data;
> +
> + if (!hugetlb_hgm_eligible(vma))
> + return -EINVAL;
> +
> + if (hugetlb_hgm_enabled(vma))
> + return 0;
> +
> + /*
> + * We must hold the mmap lock for writing so that callers can rely on
> + * hugetlb_hgm_enabled returning a consistent result while holding
> + * the mmap lock for reading.
> + */
> + mmap_assert_write_locked(vma->vm_mm);
> +
> + /* HugeTLB HGM requires the VMA lock to synchronize collapsing. */
> + ret = hugetlb_vma_data_alloc(vma);
> + if (ret)
> + return ret;
> +
> + data = vma->vm_private_data;
> + BUG_ON(!data);
Would rather have hugetlb_hgm_eligible check for vm_private_data as
suggested above instead of the BUG here.
--
Mike Kravetz
> + data->hgm_enabled = true;
> +
> + /* We don't support PMD sharing with HGM. */
> + hugetlb_unshare_all_pmds(vma);
> + return 0;
> +}
> +#endif /* CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING */
> +
> /*
> * These functions are overwritable if your architecture needs its own
> * behavior.
> --
> 2.38.0.135.g90850a2211-goog
>
Powered by blists - more mailing lists