[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <zyqk4zyxpcde7sjzu5xgo7yyntk3w6opoqdspvff4tyud4p6qn@wcnzwwq7d3b6>
Date: Thu, 8 May 2025 16:03:45 +0300
From: "kirill.shutemov@...ux.intel.com" <kirill.shutemov@...ux.intel.com>
To: "Huang, Kai" <kai.huang@...el.com>
Cc: "pbonzini@...hat.com" <pbonzini@...hat.com>,
"seanjc@...gle.com" <seanjc@...gle.com>, "Edgecombe, Rick P" <rick.p.edgecombe@...el.com>,
"bp@...en8.de" <bp@...en8.de>, "dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>,
"x86@...nel.org" <x86@...nel.org>, "mingo@...hat.com" <mingo@...hat.com>,
"Zhao, Yan Y" <yan.y.zhao@...el.com>, "tglx@...utronix.de" <tglx@...utronix.de>,
"kvm@...r.kernel.org" <kvm@...r.kernel.org>, "linux-coco@...ts.linux.dev" <linux-coco@...ts.linux.dev>,
"Yamahata, Isaku" <isaku.yamahata@...el.com>, "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [RFC, PATCH 02/12] x86/virt/tdx: Allocate reference counters for
PAMT memory
On Mon, May 05, 2025 at 11:05:12AM +0000, Huang, Kai wrote:
>
> > +static atomic_t *pamt_refcounts;
> > +
> > static enum tdx_module_status_t tdx_module_status;
> > static DEFINE_MUTEX(tdx_module_lock);
> >
> > @@ -1035,9 +1038,108 @@ static int config_global_keyid(void)
> > return ret;
> > }
> >
> > +atomic_t *tdx_get_pamt_refcount(unsigned long hpa)
> > +{
> > + return &pamt_refcounts[hpa / PMD_SIZE];
> > +}
> > +EXPORT_SYMBOL_GPL(tdx_get_pamt_refcount);
>
> It's not quite clear why this function needs to be exported in this patch. IMO
> it's better to move the export to the patch which actually needs it.
>
> Looking at patch 5, tdx_pamt_get()/put() use it, and they are in KVM code. But
> I think we should just put them here in this file. tdx_alloc_page() and
> tdx_free_page() should be in this file too.
>
> And instead of exporting tdx_get_pamt_refcount(), the TDX core code here can
> export tdx_alloc_page() and tdx_free_page(), providing two high level helpers to
> allow the TDX users (e.g., KVM) to allocate/free TDX private pages. How PAMT
> pages are allocated is then hidden in the core TDX code.
We would still need tdx_get_pamt_refcount() to handle case when we need to
bump refcount for page allocated elsewhere.
> > +
> > +static int pamt_refcount_populate(pte_t *pte, unsigned long addr, void *data)
> > +{
> > + unsigned long vaddr;
> > + pte_t entry;
> > +
> > + if (!pte_none(ptep_get(pte)))
> > + return 0;
> > +
> > + vaddr = __get_free_page(GFP_KERNEL | __GFP_ZERO);
> > + if (!vaddr)
> > + return -ENOMEM;
> > +
> > + entry = pfn_pte(PFN_DOWN(__pa(vaddr)), PAGE_KERNEL);
> > +
> > + spin_lock(&init_mm.page_table_lock);
> > + if (pte_none(ptep_get(pte)))
> > + set_pte_at(&init_mm, addr, pte, entry);
> > + else
> > + free_page(vaddr);
> > + spin_unlock(&init_mm.page_table_lock);
> > +
> > + return 0;
> > +}
> > +
> > +static int pamt_refcount_depopulate(pte_t *pte, unsigned long addr,
> > + void *data)
> > +{
> > + unsigned long vaddr;
> > +
> > + vaddr = (unsigned long)__va(PFN_PHYS(pte_pfn(ptep_get(pte))));
> > +
> > + spin_lock(&init_mm.page_table_lock);
> > + if (!pte_none(ptep_get(pte))) {
> > + pte_clear(&init_mm, addr, pte);
> > + free_page(vaddr);
> > + }
> > + spin_unlock(&init_mm.page_table_lock);
> > +
> > + return 0;
> > +}
> > +
> > +static int alloc_tdmr_pamt_refcount(struct tdmr_info *tdmr)
> > +{
> > + unsigned long start, end;
> > +
> > + start = (unsigned long)tdx_get_pamt_refcount(tdmr->base);
> > + end = (unsigned long)tdx_get_pamt_refcount(tdmr->base + tdmr->size);
> > + start = round_down(start, PAGE_SIZE);
> > + end = round_up(end, PAGE_SIZE);
> > +
> > + return apply_to_page_range(&init_mm, start, end - start,
> > + pamt_refcount_populate, NULL);
> > +}
>
> IIUC, populating refcount based on TDMR will slightly waste memory. The reason
> is IIUC we don't need to populate the refcount for a 2M range if the range is
> completely marked as reserved in TDMR, because it's not possible for the kernel
> to use such range for TDX.
>
> Populating based on the list of TDX memory blocks should be better. In
> practice, the difference should be unnoticeable, but conceptually, using TDX
> memory blocks is better.
Okay, I will look into this after dealing with huge pages.
> > +
> > +static int init_pamt_metadata(void)
> > +{
> > + size_t size = max_pfn / PTRS_PER_PTE * sizeof(*pamt_refcounts);
> > + struct vm_struct *area;
> > +
> > + if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
> > + return 0;
> > +
> > + /*
> > + * Reserve vmalloc range for PAMT reference counters. It covers all
> > + * physical address space up to max_pfn. It is going to be populated
> > + * from init_tdmr() only for present memory that available for TDX use.
> > + */
> > + area = get_vm_area(size, VM_IOREMAP);
> > + if (!area)
> > + return -ENOMEM;
> > +
> > + pamt_refcounts = area->addr;
> > + return 0;
> > +}
> > +
> > +static void free_pamt_metadata(void)
> > +{
> > + size_t size = max_pfn / PTRS_PER_PTE * sizeof(*pamt_refcounts);
> > +
> > + size = round_up(size, PAGE_SIZE);
> > + apply_to_existing_page_range(&init_mm,
> > + (unsigned long)pamt_refcounts,
> > + size, pamt_refcount_depopulate,
> > + NULL);
> > + vfree(pamt_refcounts);
> > + pamt_refcounts = NULL;
> > +}
> > +
> > static int init_tdmr(struct tdmr_info *tdmr)
> > {
> > u64 next;
> > + int ret;
> > +
> > + ret = alloc_tdmr_pamt_refcount(tdmr);
> > + if (ret)
> > + return ret;
> >
> > /*
> > * Initializing a TDMR can be time consuming. To avoid long
> > @@ -1048,7 +1150,6 @@ static int init_tdmr(struct tdmr_info *tdmr)
> > struct tdx_module_args args = {
> > .rcx = tdmr->base,
> > };
> > - int ret;
> >
> > ret = seamcall_prerr_ret(TDH_SYS_TDMR_INIT, &args);
> > if (ret)
> > @@ -1134,10 +1235,15 @@ static int init_tdx_module(void)
> > if (ret)
> > goto err_reset_pamts;
> >
> > + /* Reserve vmalloc range for PAMT reference counters */
> > + ret = init_pamt_metadata();
> > + if (ret)
> > + goto err_reset_pamts;
> > +
> > /* Initialize TDMRs to complete the TDX module initialization */
> > ret = init_tdmrs(&tdx_tdmr_list);
> > if (ret)
> > - goto err_reset_pamts;
> > + goto err_free_pamt_metadata;
> >
> > pr_info("%lu KB allocated for PAMT\n", tdmrs_count_pamt_kb(&tdx_tdmr_list));
> >
> > @@ -1149,6 +1255,9 @@ static int init_tdx_module(void)
> > put_online_mems();
> > return ret;
> >
> > +err_free_pamt_metadata:
> > + free_pamt_metadata();
> > +
> > err_reset_pamts:
> > /*
> > * Part of PAMTs may already have been initialized by the
>
--
Kiryl Shutsemau / Kirill A. Shutemov
Powered by blists - more mailing lists