lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Mon, 24 Jun 2024 20:26:15 +0200
From: Borislav Petkov <bp@...en8.de>
To: Ashish Kalra <Ashish.Kalra@....com>
Cc: tglx@...utronix.de, mingo@...hat.com, dave.hansen@...ux.intel.com,
	x86@...nel.org, rafael@...nel.org, hpa@...or.com,
	peterz@...radead.org, adrian.hunter@...el.com,
	sathyanarayanan.kuppuswamy@...ux.intel.com, jun.nakajima@...el.com,
	rick.p.edgecombe@...el.com, thomas.lendacky@....com,
	michael.roth@....com, seanjc@...gle.com, kai.huang@...el.com,
	bhe@...hat.com, kirill.shutemov@...ux.intel.com, bdas@...hat.com,
	vkuznets@...hat.com, dionnaglaze@...gle.com, anisinha@...hat.com,
	jroedel@...e.de, ardb@...nel.org, dyoung@...hat.com,
	kexec@...ts.infradead.org, linux-coco@...ts.linux.dev,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH v9 3/3] x86/snp: Convert shared memory back to private on
 kexec

On Thu, Jun 20, 2024 at 10:23:59PM +0000, Ashish Kalra wrote:
> From: Ashish Kalra <ashish.kalra@....com>
> 
> SNP guests allocate shared buffers to perform I/O. It is done by
> allocating pages normally from the buddy allocator and converting them
> to shared with set_memory_decrypted().
> 
> The second kernel has no idea what memory is converted this way. It only

The secon, kexec-ed, kernel...

> sees E820_TYPE_RAM.

...

> @@ -92,6 +94,9 @@ static struct ghcb *boot_ghcb __section(".data");
>  /* Bitmap of SEV features supported by the hypervisor */
>  static u64 sev_hv_features __ro_after_init;
>  
> +/* Last address to be switched to private during kexec */
> +static unsigned long kexec_last_addr_to_make_private;

This is particularly yucky.

AFAIU, you want to:

1. Skip GHCB addresses when making all pages private

2. Once you're done converting, in snp_kexec_finish() go over the GHCB
   addresses and convert them explicitly, one-by-one, without this silly
   variable

>  /* #VC handler runtime per-CPU data */
>  struct sev_es_runtime_data {
>  	struct ghcb ghcb_page;
> @@ -1010,6 +1015,169 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
>  	set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
>  }
>  
> +static bool set_pte_enc(pte_t *kpte, int level, void *va)

Code duplication: __set_clr_pte_enc().

You need to refactor the code instead of adding just another, generically
looking helper just because it is easier this way.

> +{
> +	pte_t new_pte;
> +
> +	if (pte_none(*kpte))
> +		return false;
> +
> +	/*
> +	 * Change the physical page attribute from C=0 to C=1. Flush the
> +	 * caches to ensure that data gets accessed with the correct C-bit.
> +	 */
> +	if (pte_present(*kpte))
> +		clflush_cache_range(va, page_level_size(level));
> +
> +	new_pte = __pte(cc_mkenc(pte_val(*kpte)));
> +	set_pte_atomic(kpte, new_pte);
> +
> +	return true;
> +}
> +
> +static bool make_pte_private(pte_t *pte, unsigned long addr, int pages, int level)
> +{
> +	struct sev_es_runtime_data *data;
> +	struct ghcb *ghcb;
> +
> +	data = this_cpu_read(runtime_data);
> +	ghcb = &data->ghcb_page;
> +
> +	/* Check for GHCB for being part of a PMD range. */
> +	if ((unsigned long)ghcb >= addr &&
> +	    (unsigned long)ghcb <= (addr + (pages * PAGE_SIZE))) {
> +		/*
> +		 * Ensure that the current cpu's GHCB is made private

s/cpu/CPU/g

There are multiple places.

> +		 * at the end of unshared loop so that we continue to use the
> +		 * optimized GHCB protocol and not force the switch to
> +		 * MSR protocol till the very end.
> +		 */
> +		pr_debug("setting boot_ghcb to NULL for this cpu ghcb\n");
> +		kexec_last_addr_to_make_private = addr;
> +		return true;
> +	}
> +
> +	if (!set_pte_enc(pte, level, (void *)addr))
> +		return false;
> +
> +	snp_set_memory_private(addr, pages);
> +
> +	return true;
> +}
> +
> +static void unshare_all_memory(void)
> +{
> +	unsigned long addr, end;
> +
> +	/*
> +	 * Walk direct mapping and convert all shared memory back to private,

This ends with a ','. Is anything more coming?

> +	 */
> +
> +	addr = PAGE_OFFSET;
> +	end  = PAGE_OFFSET + get_max_mapped();
> +
> +	while (addr < end) {
> +		unsigned long size;
> +		unsigned int level;
> +		pte_t *pte;
> +
> +		pte = lookup_address(addr, &level);
> +		size = page_level_size(level);
> +
> +		/*
> +		 * pte_none() check is required to skip physical memory holes in direct mapped.

This sentence needs to be written for humans. And come to think of it, you can
simply drop it. lookup_address() can return NULL so you simply need to check
its retval.

> +		 */
> +		if (pte && pte_decrypted(*pte) && !pte_none(*pte)) {
> +			int pages = size / PAGE_SIZE;
> +
> +			if (!make_pte_private(pte, addr, pages, level)) {
> +				pr_err("Failed to unshare range %#lx-%#lx\n",
> +				       addr, addr + size);

Might as well terminate the guest here.

> +			}
> +
> +		}
> +
> +		addr += size;
> +	}
> +	__flush_tlb_all();
> +
> +}
> +
> +static void unshare_all_bss_decrypted_memory(void)

Why is this a separate function and not part of unshare_all_memory()?

> +{
> +	unsigned long vaddr, vaddr_end;
> +	unsigned int level;
> +	unsigned int npages;
> +	pte_t *pte;
> +
> +	vaddr = (unsigned long)__start_bss_decrypted;
> +	vaddr_end = (unsigned long)__start_bss_decrypted_unused;
> +	npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
> +	for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) {
> +		pte = lookup_address(vaddr, &level);
> +		if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
> +			continue;
> +
> +		set_pte_enc(pte, level, (void *)vaddr);
> +	}
> +	vaddr = (unsigned long)__start_bss_decrypted;
> +	snp_set_memory_private(vaddr, npages);
> +}

...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ