[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a77781a4-2d3c-403a-88a6-57242c7f0246@intel.com>
Date: Mon, 25 Dec 2023 10:59:16 +0800
From: "Liu, Jingqi" <jingqi.liu@...el.com>
To: Pasha Tatashin <pasha.tatashin@...een.com>, <akpm@...ux-foundation.org>,
<linux-mm@...ck.org>, <linux-kernel@...r.kernel.org>, <rientjes@...gle.com>,
<dwmw2@...radead.org>, <baolu.lu@...ux.intel.com>, <joro@...tes.org>,
<will@...nel.org>, <robin.murphy@....com>, <iommu@...ts.linux.dev>
Subject: Re: [RFC 1/3] iommu/intel: Use page->refcount to count number of
entries in IOMMU
On 12/21/2023 11:19 AM, Pasha Tatashin wrote:
> In order to be able to efficiently free empty page table levels, count the
> number of entries in each page table my incremeanting and decremeanting
s/incremeanting/incrementing
s/decremeanting/decrementing
> refcount every time a PTE is inserted or removed form the page table.
s/form/from
>
> For this to work correctly, add two helper function:
> dma_clear_pte and dma_set_pte where counting is performed,
>
> Also, modify the code so every page table entry is always updated using the
> two new functions.
>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@...een.com>
> ---
> drivers/iommu/intel/iommu.c | 40 +++++++++++++++++++++---------------
> drivers/iommu/intel/iommu.h | 41 +++++++++++++++++++++++++++++++------
> 2 files changed, 58 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
> index 897159dba47d..4688ef797161 100644
> --- a/drivers/iommu/intel/iommu.c
> +++ b/drivers/iommu/intel/iommu.c
> @@ -949,7 +949,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
> if (domain->use_first_level)
> pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
>
> - if (cmpxchg64(&pte->val, 0ULL, pteval))
> + if (dma_set_pte(pte, pteval))
> /* Someone else set it while we were thinking; use theirs. */
> free_pgtable_page(tmp_page);
> else
> @@ -1021,7 +1021,8 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
> continue;
> }
> do {
> - dma_clear_pte(pte);
> + if (dma_pte_present(pte))
> + dma_clear_pte(pte);
> start_pfn += lvl_to_nr_pages(large_page);
> pte++;
> } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
> @@ -1062,7 +1063,8 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,
> */
> if (level < retain_level && !(start_pfn > level_pfn ||
> last_pfn < level_pfn + level_size(level) - 1)) {
> - dma_clear_pte(pte);
> + if (dma_pte_present(pte))
> + dma_clear_pte(pte);
> domain_flush_cache(domain, pte, sizeof(*pte));
> free_pgtable_page(level_pte);
> }
> @@ -1093,12 +1095,13 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
> }
> }
>
> -/* When a page at a given level is being unlinked from its parent, we don't
> - need to *modify* it at all. All we need to do is make a list of all the
> - pages which can be freed just as soon as we've flushed the IOTLB and we
> - know the hardware page-walk will no longer touch them.
> - The 'pte' argument is the *parent* PTE, pointing to the page that is to
> - be freed. */
> +/*
> + * A given page at a given level is being unlinked from its parent.
> + * We need to make a list of all the pages which can be freed just as soon as
> + * we've flushed the IOTLB and we know the hardware page-walk will no longer
> + * touch them. The 'pte' argument is the *parent* PTE, pointing to the page
> + * that is to be freed.
> + */
> static void dma_pte_list_pagetables(struct dmar_domain *domain,
> int level, struct dma_pte *pte,
> struct list_head *freelist)
> @@ -1106,17 +1109,20 @@ static void dma_pte_list_pagetables(struct dmar_domain *domain,
> struct page *pg;
>
> pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
> - list_add_tail(&pg->lru, freelist);
> -
> - if (level == 1)
> - return;
> -
> pte = page_address(pg);
> +
> do {
> - if (dma_pte_present(pte) && !dma_pte_superpage(pte))
> - dma_pte_list_pagetables(domain, level - 1, pte, freelist);
> + if (dma_pte_present(pte)) {
> + if (level > 1 && !dma_pte_superpage(pte)) {
> + dma_pte_list_pagetables(domain, level - 1, pte,
> + freelist);
> + }
> + dma_clear_pte(pte);
> + }
> pte++;
> } while (!first_pte_in_page(pte));
> +
> + list_add_tail(&pg->lru, freelist);
> }
>
How about calculating the page decrement when the pages in the freelist
are really freed in iommu_free_pgtbl_pages() ?
Thanks,
Jingqi
Powered by blists - more mailing lists