lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3a04995a-524c-4d07-8c8b-82930f9bca72@arm.com>
Date: Mon, 19 May 2025 18:35:58 +0100
From: Suzuki K Poulose <suzuki.poulose@....com>
To: Steven Price <steven.price@....com>, kvm@...r.kernel.org,
 kvmarm@...ts.linux.dev
Cc: Catalin Marinas <catalin.marinas@....com>, Marc Zyngier <maz@...nel.org>,
 Will Deacon <will@...nel.org>, James Morse <james.morse@....com>,
 Oliver Upton <oliver.upton@...ux.dev>, Zenghui Yu <yuzenghui@...wei.com>,
 linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org,
 Joey Gouly <joey.gouly@....com>, Alexandru Elisei
 <alexandru.elisei@....com>, Christoffer Dall <christoffer.dall@....com>,
 Fuad Tabba <tabba@...gle.com>, linux-coco@...ts.linux.dev,
 Ganapatrao Kulkarni <gankulkarni@...amperecomputing.com>,
 Gavin Shan <gshan@...hat.com>, Shanker Donthineni <sdonthineni@...dia.com>,
 Alper Gun <alpergun@...gle.com>, "Aneesh Kumar K . V"
 <aneesh.kumar@...nel.org>
Subject: Re: [PATCH v8 20/43] arm64: RME: Runtime faulting of memory

Hi Steven

On 16/04/2025 14:41, Steven Price wrote:
> At runtime if the realm guest accesses memory which hasn't yet been
> mapped then KVM needs to either populate the region or fault the guest.
> 
> For memory in the lower (protected) region of IPA a fresh page is
> provided to the RMM which will zero the contents. For memory in the
> upper (shared) region of IPA, the memory from the memslot is mapped
> into the realm VM non secure.
> 
> Signed-off-by: Steven Price <steven.price@....com>

Please find some comments below.

...

> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c
> index f6af3ea6ea8a..b6959cd17a6c 100644
> --- a/arch/arm64/kvm/rme.c
> +++ b/arch/arm64/kvm/rme.c
> @@ -714,6 +714,186 @@ static int realm_create_protected_data_page(struct realm *realm,
>   	return -ENXIO;
>   }
>   
> +static int fold_rtt(struct realm *realm, unsigned long addr, int level)
> +{
> +	phys_addr_t rtt_addr;
> +	int ret;
> +
> +	ret = realm_rtt_fold(realm, addr, level, &rtt_addr);
> +	if (ret)
> +		return ret;
> +
> +	free_delegated_granule(rtt_addr);
> +
> +	return 0;
> +}
> +
> +int realm_map_protected(struct realm *realm,
> +			unsigned long ipa,
> +			kvm_pfn_t pfn,
> +			unsigned long map_size,
> +			struct kvm_mmu_memory_cache *memcache)
> +{
> +	phys_addr_t phys = __pfn_to_phys(pfn);
> +	phys_addr_t rd = virt_to_phys(realm->rd);
> +	unsigned long base_ipa = ipa;
> +	unsigned long size;
> +	int map_level;
> +	int ret = 0;
> +
> +	if (WARN_ON(!IS_ALIGNED(map_size, RMM_PAGE_SIZE)))
> +		return -EINVAL;
> +
> +	if (WARN_ON(!IS_ALIGNED(ipa, map_size)))
> +		return -EINVAL;
> +
> +	if (IS_ALIGNED(map_size, RMM_L2_BLOCK_SIZE))
> +		map_level = 2;

minor nit : RMM_RTT_BLOCK_LEVEL

> +	else
> +		map_level = 3;

minor nit:  RMM_RTT_MAX_LEVEL ?

> +
> +	if (map_level < RMM_RTT_MAX_LEVEL) {
> +		/*
> +		 * A temporary RTT is needed during the map, precreate it,
> +		 * however if there is an error (e.g. missing parent tables)
> +		 * this will be handled below.
> +		 */
> +		realm_create_rtt_levels(realm, ipa, map_level,
> +					RMM_RTT_MAX_LEVEL, memcache);
> +	}
> +
> +	for (size = 0; size < map_size; size += RMM_PAGE_SIZE) {
> +		if (rmi_granule_delegate(phys)) {
> +			/*
> +			 * It's likely we raced with another VCPU on the same
> +			 * fault. Assume the other VCPU has handled the fault
> +			 * and return to the guest.
> +			 */
> +			return 0;
> +		}
> +
> +		ret = rmi_data_create_unknown(rd, phys, ipa);
> +
> +		if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) {
> +			/* Create missing RTTs and retry */
> +			int level = RMI_RETURN_INDEX(ret);
> +
> +			WARN_ON(level == RMM_RTT_MAX_LEVEL);
> +
> +			ret = realm_create_rtt_levels(realm, ipa, level,
> +						      RMM_RTT_MAX_LEVEL,
> +						      memcache);
> +			if (ret)
> +				goto err_undelegate;
> +
> +			ret = rmi_data_create_unknown(rd, phys, ipa);
> +		}
> +
> +		if (WARN_ON(ret))
> +			goto err_undelegate;
> +
> +		phys += RMM_PAGE_SIZE;
> +		ipa += RMM_PAGE_SIZE;
> +	}
> +
> +	if (map_size == RMM_L2_BLOCK_SIZE) {
> +		ret = fold_rtt(realm, base_ipa, map_level + 1);
> +		if (WARN_ON(ret))
> +			goto err;
> +	}
> +
> +	return 0;
> +
> +err_undelegate:
> +	if (WARN_ON(rmi_granule_undelegate(phys))) {
> +		/* Page can't be returned to NS world so is lost */
> +		get_page(phys_to_page(phys));
> +	}
> +err:
> +	while (size > 0) {
> +		unsigned long data, top;
> +
> +		phys -= RMM_PAGE_SIZE;
> +		size -= RMM_PAGE_SIZE;
> +		ipa -= RMM_PAGE_SIZE;
> +
> +		WARN_ON(rmi_data_destroy(rd, ipa, &data, &top));
> +
> +		if (WARN_ON(rmi_granule_undelegate(phys))) {
> +			/* Page can't be returned to NS world so is lost */
> +			get_page(phys_to_page(phys));
> +		}
> +	}
> +	return -ENXIO;
> +}
> +
> +int realm_map_non_secure(struct realm *realm,
> +			 unsigned long ipa,
> +			 kvm_pfn_t pfn,
> +			 unsigned long size,
> +			 struct kvm_mmu_memory_cache *memcache)
> +{
> +	phys_addr_t rd = virt_to_phys(realm->rd);
> +	phys_addr_t phys = __pfn_to_phys(pfn);
> +	unsigned long offset;
> +	int map_size, map_level;
> +	int ret = 0;
> +
> +	if (WARN_ON(!IS_ALIGNED(size, RMM_PAGE_SIZE)))
> +		return -EINVAL;
> +
> +	if (WARN_ON(!IS_ALIGNED(ipa, size)))
> +		return -EINVAL;
> +
> +	if (IS_ALIGNED(size, RMM_L2_BLOCK_SIZE)) {
> +		map_level = 2;
> +		map_size = RMM_L2_BLOCK_SIZE;

Same here, stick to the symbols than digits.

> +	} else {
> +		map_level = 3;
> +		map_size = RMM_PAGE_SIZE;
> +	}
> +
> +	for (offset = 0; offset < size; offset += map_size) {
> +		/*
> +		 * realm_map_ipa() enforces that the memory is writable,

The function names seems to be obsolete, please fix.

> +		 * so for now we permit both read and write.
> +		 */
> +		unsigned long desc = phys |
> +				     PTE_S2_MEMATTR(MT_S2_FWB_NORMAL) |
> +				     KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R |
> +				     KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
> +		ret = rmi_rtt_map_unprotected(rd, ipa, map_level, desc);
> +
> +		if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) {

Could we hit the following case and end up in failure ?

* Initially a single page is shared and the S2 is mapped
* Later the Realm shares the entire L2 block and encounters a fault
   at a new IPA within the L2 block.

In this case, we may try to L2 mapping when there is a L3 mapping and
we could encounter (RMI_ERROR_RTT, 2).


> +			/* Create missing RTTs and retry */
> +			int level = RMI_RETURN_INDEX(ret);

So we should probably go down the rtt create step, with the following
check.

			if (level < map_level) {

> +
> +			ret = realm_create_rtt_levels(realm, ipa, level,
> +						      map_level, memcache);
> +			if (ret)
> +				return -ENXIO;
> +
> +			ret = rmi_rtt_map_unprotected(rd, ipa, map_level, desc);


		} else {

Otherwise, may be we need to do some more hard work to fix it up.

1. If map_level == 3, something is terribly wrong or we raced with 
another thread ?

2. If map_level < 3 and we didn't race :

   a. Going one level down and creating the mappings there and then 
folding. But we could endup dealing with ERROR_RTT,3 as in (1).

   b. Easiest is to destroy the table at "map_level + 1" and retry the map.


Suzuki



> +		}
> +		/*
> +		 * RMI_ERROR_RTT can be reported for two reasons: either the
> +		 * RTT tables are not there, or there is an RTTE already
> +		 * present for the address.  The call to
> +		 * realm_create_rtt_levels() above handles the first case, and
> +		 * in the second case this indicates that another thread has
> +		 * already populated the RTTE for us, so we can ignore the
> +		 * error and continue.
> +		 */
> +		if (ret && RMI_RETURN_STATUS(ret) != RMI_ERROR_RTT)
> +			return -ENXIO;
> +
> +		ipa += map_size;
> +		phys += map_size;
> +	}
> +
> +	return 0;
> +}
> +
>   static int populate_region(struct kvm *kvm,
>   			   phys_addr_t ipa_base,
>   			   phys_addr_t ipa_end,


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ