lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <zyqk4zyxpcde7sjzu5xgo7yyntk3w6opoqdspvff4tyud4p6qn@wcnzwwq7d3b6>
Date: Thu, 8 May 2025 16:03:45 +0300
From: "kirill.shutemov@...ux.intel.com" <kirill.shutemov@...ux.intel.com>
To: "Huang, Kai" <kai.huang@...el.com>
Cc: "pbonzini@...hat.com" <pbonzini@...hat.com>, 
	"seanjc@...gle.com" <seanjc@...gle.com>, "Edgecombe, Rick P" <rick.p.edgecombe@...el.com>, 
	"bp@...en8.de" <bp@...en8.de>, "dave.hansen@...ux.intel.com" <dave.hansen@...ux.intel.com>, 
	"x86@...nel.org" <x86@...nel.org>, "mingo@...hat.com" <mingo@...hat.com>, 
	"Zhao, Yan Y" <yan.y.zhao@...el.com>, "tglx@...utronix.de" <tglx@...utronix.de>, 
	"kvm@...r.kernel.org" <kvm@...r.kernel.org>, "linux-coco@...ts.linux.dev" <linux-coco@...ts.linux.dev>, 
	"Yamahata, Isaku" <isaku.yamahata@...el.com>, "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [RFC, PATCH 02/12] x86/virt/tdx: Allocate reference counters for
 PAMT memory

On Mon, May 05, 2025 at 11:05:12AM +0000, Huang, Kai wrote:
> 
> > +static atomic_t *pamt_refcounts;
> > +
> >  static enum tdx_module_status_t tdx_module_status;
> >  static DEFINE_MUTEX(tdx_module_lock);
> >  
> > @@ -1035,9 +1038,108 @@ static int config_global_keyid(void)
> >  	return ret;
> >  }
> >  
> > +atomic_t *tdx_get_pamt_refcount(unsigned long hpa)
> > +{
> > +	return &pamt_refcounts[hpa / PMD_SIZE];
> > +}
> > +EXPORT_SYMBOL_GPL(tdx_get_pamt_refcount);
> 
> It's not quite clear why this function needs to be exported in this patch.  IMO
> it's better to move the export to the patch which actually needs it.
> 
> Looking at patch 5, tdx_pamt_get()/put() use it, and they are in KVM code.  But
> I think we should just put them here in this file.  tdx_alloc_page() and
> tdx_free_page() should be in this file too.
> 
> And instead of exporting tdx_get_pamt_refcount(), the TDX core code here can
> export tdx_alloc_page() and tdx_free_page(), providing two high level helpers to
> allow the TDX users (e.g., KVM) to allocate/free TDX private pages.  How PAMT
> pages are allocated is then hidden in the core TDX code.

We would still need tdx_get_pamt_refcount() to handle case when we need to
bump refcount for page allocated elsewhere.

> > +
> > +static int pamt_refcount_populate(pte_t *pte, unsigned long addr, void *data)
> > +{
> > +	unsigned long vaddr;
> > +	pte_t entry;
> > +
> > +	if (!pte_none(ptep_get(pte)))
> > +		return 0;
> > +
> > +	vaddr = __get_free_page(GFP_KERNEL | __GFP_ZERO);
> > +	if (!vaddr)
> > +		return -ENOMEM;
> > +
> > +	entry = pfn_pte(PFN_DOWN(__pa(vaddr)), PAGE_KERNEL);
> > +
> > +	spin_lock(&init_mm.page_table_lock);
> > +	if (pte_none(ptep_get(pte)))
> > +		set_pte_at(&init_mm, addr, pte, entry);
> > +	else
> > +		free_page(vaddr);
> > +	spin_unlock(&init_mm.page_table_lock);
> > +
> > +	return 0;
> > +}
> > +
> > +static int pamt_refcount_depopulate(pte_t *pte, unsigned long addr,
> > +				    void *data)
> > +{
> > +	unsigned long vaddr;
> > +
> > +	vaddr = (unsigned long)__va(PFN_PHYS(pte_pfn(ptep_get(pte))));
> > +
> > +	spin_lock(&init_mm.page_table_lock);
> > +	if (!pte_none(ptep_get(pte))) {
> > +		pte_clear(&init_mm, addr, pte);
> > +		free_page(vaddr);
> > +	}
> > +	spin_unlock(&init_mm.page_table_lock);
> > +
> > +	return 0;
> > +}
> > +
> > +static int alloc_tdmr_pamt_refcount(struct tdmr_info *tdmr)
> > +{
> > +	unsigned long start, end;
> > +
> > +	start = (unsigned long)tdx_get_pamt_refcount(tdmr->base);
> > +	end = (unsigned long)tdx_get_pamt_refcount(tdmr->base + tdmr->size);
> > +	start = round_down(start, PAGE_SIZE);
> > +	end = round_up(end, PAGE_SIZE);
> > +
> > +	return apply_to_page_range(&init_mm, start, end - start,
> > +				   pamt_refcount_populate, NULL);
> > +}
> 
> IIUC, populating refcount based on TDMR will slightly waste memory.  The reason
> is IIUC we don't need to populate the refcount for a 2M range if the range is
> completely marked as reserved in TDMR, because it's not possible for the kernel
> to use such range for TDX.
> 
> Populating based on the list of TDX memory blocks should be better.  In
> practice, the difference should be unnoticeable, but conceptually, using TDX
> memory blocks is better.

Okay, I will look into this after dealing with huge pages.

> > +
> > +static int init_pamt_metadata(void)
> > +{
> > +	size_t size = max_pfn / PTRS_PER_PTE * sizeof(*pamt_refcounts);
> > +	struct vm_struct *area;
> > +
> > +	if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
> > +		return 0;
> > +
> > +	/*
> > +	 * Reserve vmalloc range for PAMT reference counters. It covers all
> > +	 * physical address space up to max_pfn. It is going to be populated
> > +	 * from init_tdmr() only for present memory that available for TDX use.
> > +	 */
> > +	area = get_vm_area(size, VM_IOREMAP);
> > +	if (!area)
> > +		return -ENOMEM;
> > +
> > +	pamt_refcounts = area->addr;
> > +	return 0;
> > +}
> > +
> > +static void free_pamt_metadata(void)
> > +{
> > +	size_t size = max_pfn / PTRS_PER_PTE * sizeof(*pamt_refcounts);
> > +
> > +	size = round_up(size, PAGE_SIZE);
> > +	apply_to_existing_page_range(&init_mm,
> > +				     (unsigned long)pamt_refcounts,
> > +				     size, pamt_refcount_depopulate,
> > +				     NULL);
> > +	vfree(pamt_refcounts);
> > +	pamt_refcounts = NULL;
> > +}
> > +
> >  static int init_tdmr(struct tdmr_info *tdmr)
> >  {
> >  	u64 next;
> > +	int ret;
> > +
> > +	ret = alloc_tdmr_pamt_refcount(tdmr);
> > +	if (ret)
> > +		return ret;
> >  
> >  	/*
> >  	 * Initializing a TDMR can be time consuming.  To avoid long
> > @@ -1048,7 +1150,6 @@ static int init_tdmr(struct tdmr_info *tdmr)
> >  		struct tdx_module_args args = {
> >  			.rcx = tdmr->base,
> >  		};
> > -		int ret;
> >  
> >  		ret = seamcall_prerr_ret(TDH_SYS_TDMR_INIT, &args);
> >  		if (ret)
> > @@ -1134,10 +1235,15 @@ static int init_tdx_module(void)
> >  	if (ret)
> >  		goto err_reset_pamts;
> >  
> > +	/* Reserve vmalloc range for PAMT reference counters */
> > +	ret = init_pamt_metadata();
> > +	if (ret)
> > +		goto err_reset_pamts;
> > +
> >  	/* Initialize TDMRs to complete the TDX module initialization */
> >  	ret = init_tdmrs(&tdx_tdmr_list);
> >  	if (ret)
> > -		goto err_reset_pamts;
> > +		goto err_free_pamt_metadata;
> >  
> >  	pr_info("%lu KB allocated for PAMT\n", tdmrs_count_pamt_kb(&tdx_tdmr_list));
> >  
> > @@ -1149,6 +1255,9 @@ static int init_tdx_module(void)
> >  	put_online_mems();
> >  	return ret;
> >  
> > +err_free_pamt_metadata:
> > +	free_pamt_metadata();
> > +
> >  err_reset_pamts:
> >  	/*
> >  	 * Part of PAMTs may already have been initialized by the
> 

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ