linux-kernel - Re: [PATCH v3 12/14] arm64: realm: Support nonsecure ITS emulation shared

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <86a5jzld9g.wl-maz@kernel.org>
Date: Wed, 05 Jun 2024 14:39:39 +0100
From: Marc Zyngier <maz@...nel.org>
To: Steven Price <steven.price@....com>
Cc: kvm@...r.kernel.org,
	kvmarm@...ts.linux.dev,
	Catalin Marinas <catalin.marinas@....com>,
	Will Deacon <will@...nel.org>,
	James Morse <james.morse@....com>,
	Oliver Upton <oliver.upton@...ux.dev>,
	Suzuki K Poulose <suzuki.poulose@....com>,
	Zenghui Yu <yuzenghui@...wei.com>,
	linux-arm-kernel@...ts.infradead.org,
	linux-kernel@...r.kernel.org,
	Joey Gouly <joey.gouly@....com>,
	Alexandru Elisei <alexandru.elisei@....com>,
	Christoffer Dall <christoffer.dall@....com>,
	Fuad Tabba <tabba@...gle.com>,
	linux-coco@...ts.linux.dev,
	Ganapatrao Kulkarni <gankulkarni@...amperecomputing.com>
Subject: Re: [PATCH v3 12/14] arm64: realm: Support nonsecure ITS emulation shared

The subject line is... odd. I'd expect something like:

"irqchip/gic-v3-its: Share ITS tables with a non-trusted hypervisor"

because nothing here should be CCA specific.

On Wed, 05 Jun 2024 10:30:04 +0100,
Steven Price <steven.price@....com> wrote:
> 
> Within a realm guest the ITS is emulated by the host. This means the
> allocations must have been made available to the host by a call to
> set_memory_decrypted(). Introduce an allocation function which performs
> this extra call.

This doesn't mention that this patch radically changes the allocation
of some tables.

> 
> Co-developed-by: Suzuki K Poulose <suzuki.poulose@....com>
> Signed-off-by: Suzuki K Poulose <suzuki.poulose@....com>
> Signed-off-by: Steven Price <steven.price@....com>
> ---
> Changes since v2:
>  * Drop 'shared' from the new its_xxx function names as they are used
>    for non-realm guests too.
>  * Don't handle the NUMA_NO_NODE case specially - alloc_pages_node()
>    should do the right thing.
>  * Drop a pointless (void *) cast.
> ---
>  drivers/irqchip/irq-gic-v3-its.c | 90 ++++++++++++++++++++++++--------
>  1 file changed, 67 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index 40ebf1726393..ca72f830f4cc 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -18,6 +18,7 @@
>  #include <linux/irqdomain.h>
>  #include <linux/list.h>
>  #include <linux/log2.h>
> +#include <linux/mem_encrypt.h>
>  #include <linux/memblock.h>
>  #include <linux/mm.h>
>  #include <linux/msi.h>
> @@ -27,6 +28,7 @@
>  #include <linux/of_pci.h>
>  #include <linux/of_platform.h>
>  #include <linux/percpu.h>
> +#include <linux/set_memory.h>
>  #include <linux/slab.h>
>  #include <linux/syscore_ops.h>
>  
> @@ -163,6 +165,7 @@ struct its_device {
>  	struct its_node		*its;
>  	struct event_lpi_map	event_map;
>  	void			*itt;
> +	u32			itt_order;
>  	u32			nr_ites;
>  	u32			device_id;
>  	bool			shared;
> @@ -198,6 +201,30 @@ static DEFINE_IDA(its_vpeid_ida);
>  #define gic_data_rdist_rd_base()	(gic_data_rdist()->rd_base)
>  #define gic_data_rdist_vlpi_base()	(gic_data_rdist_rd_base() + SZ_128K)
>  
> +static struct page *its_alloc_pages_node(int node, gfp_t gfp,
> +					 unsigned int order)
> +{
> +	struct page *page;
> +
> +	page = alloc_pages_node(node, gfp, order);
> +
> +	if (page)
> +		set_memory_decrypted((unsigned long)page_address(page),
> +				     1 << order);

Please use BIT(order).

> +	return page;
> +}
> +
> +static struct page *its_alloc_pages(gfp_t gfp, unsigned int order)
> +{
> +	return its_alloc_pages_node(NUMA_NO_NODE, gfp, order);
> +}
> +
> +static void its_free_pages(void *addr, unsigned int order)
> +{
> +	set_memory_encrypted((unsigned long)addr, 1 << order);
> +	free_pages((unsigned long)addr, order);
> +}
> +
>  /*
>   * Skip ITSs that have no vLPIs mapped, unless we're on GICv4.1, as we
>   * always have vSGIs mapped.
> @@ -2212,7 +2239,8 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags)
>  {
>  	struct page *prop_page;
>  
> -	prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ));
> +	prop_page = its_alloc_pages(gfp_flags,
> +				    get_order(LPI_PROPBASE_SZ));
>  	if (!prop_page)
>  		return NULL;
>  
> @@ -2223,8 +2251,8 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags)
>  
>  static void its_free_prop_table(struct page *prop_page)
>  {
> -	free_pages((unsigned long)page_address(prop_page),
> -		   get_order(LPI_PROPBASE_SZ));
> +	its_free_pages(page_address(prop_page),
> +		       get_order(LPI_PROPBASE_SZ));
>  }
>  
>  static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size)
> @@ -2346,7 +2374,8 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser,
>  		order = get_order(GITS_BASER_PAGES_MAX * psz);
>  	}
>  
> -	page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, order);
> +	page = its_alloc_pages_node(its->numa_node,
> +				    GFP_KERNEL | __GFP_ZERO, order);
>  	if (!page)
>  		return -ENOMEM;
>  
> @@ -2359,7 +2388,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser,
>  		/* 52bit PA is supported only when PageSize=64K */
>  		if (psz != SZ_64K) {
>  			pr_err("ITS: no 52bit PA support when psz=%d\n", psz);
> -			free_pages((unsigned long)base, order);
> +			its_free_pages(base, order);
>  			return -ENXIO;
>  		}
>  
> @@ -2415,7 +2444,7 @@ static int its_setup_baser(struct its_node *its, struct its_baser *baser,
>  		pr_err("ITS@%pa: %s doesn't stick: %llx %llx\n",
>  		       &its->phys_base, its_base_type_string[type],
>  		       val, tmp);
> -		free_pages((unsigned long)base, order);
> +		its_free_pages(base, order);
>  		return -ENXIO;
>  	}
>  
> @@ -2554,8 +2583,8 @@ static void its_free_tables(struct its_node *its)
>  
>  	for (i = 0; i < GITS_BASER_NR_REGS; i++) {
>  		if (its->tables[i].base) {
> -			free_pages((unsigned long)its->tables[i].base,
> -				   its->tables[i].order);
> +			its_free_pages(its->tables[i].base,
> +				       its->tables[i].order);
>  			its->tables[i].base = NULL;
>  		}
>  	}
> @@ -2821,7 +2850,8 @@ static bool allocate_vpe_l2_table(int cpu, u32 id)
>  
>  	/* Allocate memory for 2nd level table */
>  	if (!table[idx]) {
> -		page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(psz));
> +		page = its_alloc_pages(GFP_KERNEL | __GFP_ZERO,
> +				       get_order(psz));
>  		if (!page)
>  			return false;
>  
> @@ -2940,7 +2970,8 @@ static int allocate_vpe_l1_table(void)
>  
>  	pr_debug("np = %d, npg = %lld, psz = %d, epp = %d, esz = %d\n",
>  		 np, npg, psz, epp, esz);
> -	page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, get_order(np * PAGE_SIZE));
> +	page = its_alloc_pages(GFP_ATOMIC | __GFP_ZERO,
> +			       get_order(np * PAGE_SIZE));
>  	if (!page)
>  		return -ENOMEM;
>  
> @@ -2986,8 +3017,8 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags)
>  {
>  	struct page *pend_page;
>  
> -	pend_page = alloc_pages(gfp_flags | __GFP_ZERO,
> -				get_order(LPI_PENDBASE_SZ));
> +	pend_page = its_alloc_pages(gfp_flags | __GFP_ZERO,
> +				    get_order(LPI_PENDBASE_SZ));
>  	if (!pend_page)
>  		return NULL;
>  
> @@ -2999,7 +3030,7 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags)
>  
>  static void its_free_pending_table(struct page *pt)
>  {
> -	free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ));
> +	its_free_pages(page_address(pt), get_order(LPI_PENDBASE_SZ));
>  }
>  
>  /*
> @@ -3334,8 +3365,9 @@ static bool its_alloc_table_entry(struct its_node *its,
>  
>  	/* Allocate memory for 2nd level table */
>  	if (!table[idx]) {
> -		page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO,
> -					get_order(baser->psz));
> +		page = its_alloc_pages_node(its->numa_node,
> +					    GFP_KERNEL | __GFP_ZERO,
> +					    get_order(baser->psz));
>  		if (!page)
>  			return false;
>  
> @@ -3418,7 +3450,9 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
>  	unsigned long *lpi_map = NULL;
>  	unsigned long flags;
>  	u16 *col_map = NULL;
> +	struct page *page;
>  	void *itt;
> +	int itt_order;
>  	int lpi_base;
>  	int nr_lpis;
>  	int nr_ites;
> @@ -3430,7 +3464,6 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
>  	if (WARN_ON(!is_power_of_2(nvecs)))
>  		nvecs = roundup_pow_of_two(nvecs);
>  
> -	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
>  	/*
>  	 * Even if the device wants a single LPI, the ITT must be
>  	 * sized as a power of two (and you need at least one bit...).
> @@ -3438,7 +3471,16 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
>  	nr_ites = max(2, nvecs);
>  	sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1);
>  	sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
> -	itt = kzalloc_node(sz, GFP_KERNEL, its->numa_node);
> +	itt_order = get_order(sz);
> +	page = its_alloc_pages_node(its->numa_node,
> +				    GFP_KERNEL | __GFP_ZERO,
> +				    itt_order);

So we go from an allocation that was so far measured in *bytes* to
something that is now at least a page. Per device. This seems a bit
excessive to me, specially when it isn't conditioned on anything and
is now imposed on all platforms, including the non-CCA systems (which
are exactly 100% of the machines).

Another thing is that if we go with page alignment, then the 256 byte
alignment can obviously be removed everywhere (hint: MAPD needs to
change).

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.