lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240822161226.00001736.zhiw@nvidia.com>
Date: Thu, 22 Aug 2024 16:12:26 +0300
From: Zhi Wang <zhiw@...dia.com>
To: <alejandro.lucero-palau@....com>
CC: <linux-cxl@...r.kernel.org>, <netdev@...r.kernel.org>,
	<dan.j.williams@...el.com>, <martin.habets@...inx.com>,
	<edward.cree@....com>, <davem@...emloft.net>, <kuba@...nel.org>,
	<pabeni@...hat.com>, <edumazet@...gle.com>, <richard.hughes@....com>,
	Alejandro Lucero <alucerop@....com>, <targupta@...dia.com>,
	<zhiwang@...nel.org>
Subject: Re: [PATCH v2 12/15] cxl: allow region creation by type2 drivers

On Mon, 15 Jul 2024 18:28:32 +0100
<alejandro.lucero-palau@....com> wrote:

> From: Alejandro Lucero <alucerop@....com>
> 
> Creating a CXL region requires userspace intervention through the cxl
> sysfs files. Type2 support should allow accelerator drivers to create
> such cxl region from kernel code.
> 
> Adding that functionality and integrating it with current support for
> memory expanders.
> 
> Based on
> https://lore.kernel.org/linux-cxl/168592149709.1948938.8663425987110396027.stgit@dwillia2-xfh.jf.intel.com/T/#m84598b534cc5664f5bb31521ba6e41c7bc213758
> Signed-off-by: Alejandro Lucero <alucerop@....com> Signed-off-by: Dan
> Williams <dan.j.williams@...el.com> ---
>  drivers/cxl/core/region.c          | 265
> ++++++++++++++++++++++------- drivers/cxl/cxl.h                  |
> 1 + drivers/cxl/cxlmem.h               |   4 +-
>  drivers/net/ethernet/sfc/efx_cxl.c |  15 +-
>  include/linux/cxl_accel_mem.h      |   5 +
>  5 files changed, 231 insertions(+), 59 deletions(-)
> 
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 5cc71b8868bc..697c8df83a4b 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -479,22 +479,14 @@ static ssize_t interleave_ways_show(struct
> device *dev, 
>  static const struct attribute_group
> *get_cxl_region_target_group(void); 
> -static ssize_t interleave_ways_store(struct device *dev,
> -				     struct device_attribute *attr,
> -				     const char *buf, size_t len)
> +static int set_interleave_ways(struct cxl_region *cxlr, int val)
>  {
> -	struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(dev->parent);
> +	struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld =
> &cxlrd->cxlsd.cxld;
> -	struct cxl_region *cxlr = to_cxl_region(dev);
>  	struct cxl_region_params *p = &cxlr->params;
> -	unsigned int val, save;
> -	int rc;
> +	int save, rc;
>  	u8 iw;
>  
> -	rc = kstrtouint(buf, 0, &val);
> -	if (rc)
> -		return rc;
> -
>  	rc = ways_to_eiw(val, &iw);
>  	if (rc)
>  		return rc;
> @@ -509,25 +501,42 @@ static ssize_t interleave_ways_store(struct
> device *dev, return -EINVAL;
>  	}
>  
> -	rc = down_write_killable(&cxl_region_rwsem);
> -	if (rc)
> -		return rc;
> -	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
> -		rc = -EBUSY;
> -		goto out;
> -	}
> +	lockdep_assert_held_write(&cxl_region_rwsem);
> +	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
> +		return -EBUSY;
>  
>  	save = p->interleave_ways;
>  	p->interleave_ways = val;
>  	rc = sysfs_update_group(&cxlr->dev.kobj,
> get_cxl_region_target_group()); if (rc)
>  		p->interleave_ways = save;
> -out:
> +
> +	return rc;
> +}
> +
> +static ssize_t interleave_ways_store(struct device *dev,
> +				     struct device_attribute *attr,
> +				     const char *buf, size_t len)
> +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +	unsigned int val;
> +	int rc;
> +
> +	rc = kstrtouint(buf, 0, &val);
> +	if (rc)
> +		return rc;
> +
> +	rc = down_write_killable(&cxl_region_rwsem);
> +	if (rc)
> +		return rc;
> +
> +	rc = set_interleave_ways(cxlr, val);
>  	up_write(&cxl_region_rwsem);
>  	if (rc)
>  		return rc;
>  	return len;
>  }
> +
>  static DEVICE_ATTR_RW(interleave_ways);
>  
>  static ssize_t interleave_granularity_show(struct device *dev,
> @@ -547,21 +556,14 @@ static ssize_t
> interleave_granularity_show(struct device *dev, return rc;
>  }
>  
> -static ssize_t interleave_granularity_store(struct device *dev,
> -					    struct device_attribute
> *attr,
> -					    const char *buf, size_t
> len) +static int set_interleave_granularity(struct cxl_region *cxlr,
> int val) {
> -	struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(dev->parent);
> +	struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld =
> &cxlrd->cxlsd.cxld;
> -	struct cxl_region *cxlr = to_cxl_region(dev);
>  	struct cxl_region_params *p = &cxlr->params;
> -	int rc, val;
> +	int rc;
>  	u16 ig;
>  
> -	rc = kstrtoint(buf, 0, &val);
> -	if (rc)
> -		return rc;
> -
>  	rc = granularity_to_eig(val, &ig);
>  	if (rc)
>  		return rc;
> @@ -577,21 +579,36 @@ static ssize_t
> interleave_granularity_store(struct device *dev, if
> (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
> return -EINVAL; 
> +	lockdep_assert_held_write(&cxl_region_rwsem);
> +	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
> +		return -EBUSY;
> +
> +	p->interleave_granularity = val;
> +	return 0;
> +}
> +
> +static ssize_t interleave_granularity_store(struct device *dev,
> +					    struct device_attribute
> *attr,
> +					    const char *buf, size_t
> len) +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +	int rc, val;
> +
> +	rc = kstrtoint(buf, 0, &val);
> +	if (rc)
> +		return rc;
> +
>  	rc = down_write_killable(&cxl_region_rwsem);
>  	if (rc)
>  		return rc;
> -	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
> -		rc = -EBUSY;
> -		goto out;
> -	}
>  
> -	p->interleave_granularity = val;
> -out:
> +	rc = set_interleave_granularity(cxlr, val);
>  	up_write(&cxl_region_rwsem);
>  	if (rc)
>  		return rc;
>  	return len;
>  }
> +
>  static DEVICE_ATTR_RW(interleave_granularity);
>  
>  static ssize_t resource_show(struct device *dev, struct
> device_attribute *attr, @@ -2193,7 +2210,7 @@ static int
> cxl_region_attach(struct cxl_region *cxlr, return 0;
>  }
>  
> -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>  {
>  	struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
>  	struct cxl_region *cxlr = cxled->cxld.region;
> @@ -2252,6 +2269,7 @@ static int cxl_region_detach(struct
> cxl_endpoint_decoder *cxled) put_device(&cxlr->dev);
>  	return rc;
>  }
> +EXPORT_SYMBOL_NS_GPL(cxl_region_detach, CXL);
>  
>  void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
>  {
> @@ -2746,6 +2764,14 @@ cxl_find_region_by_name(struct
> cxl_root_decoder *cxlrd, const char *name) return
> to_cxl_region(region_dev); }
>  
> +static void drop_region(struct cxl_region *cxlr)
> +{
> +	struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(cxlr->dev.parent);
> +	struct cxl_port *port = cxlrd_to_port(cxlrd);
> +
> +	devm_release_action(port->uport_dev, unregister_region,
> cxlr); +}
> +
>  static ssize_t delete_region_store(struct device *dev,
>  				   struct device_attribute *attr,
>  				   const char *buf, size_t len)
> @@ -3353,17 +3379,18 @@ static int match_region_by_range(struct
> device *dev, void *data) return rc;
>  }
>  
> -/* Establish an empty region covering the given HPA range */
> -static struct cxl_region *construct_region(struct cxl_root_decoder
> *cxlrd,
> -					   struct
> cxl_endpoint_decoder *cxled) +static void construct_region_end(void)
> +{
> +	up_write(&cxl_region_rwsem);
> +}
> +
> +static struct cxl_region *construct_region_begin(struct
> cxl_root_decoder *cxlrd,
> +						 struct
> cxl_endpoint_decoder *cxled) {
>  	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> -	struct cxl_port *port = cxlrd_to_port(cxlrd);
> -	struct range *hpa = &cxled->cxld.hpa_range;
>  	struct cxl_region_params *p;
>  	struct cxl_region *cxlr;
> -	struct resource *res;
> -	int rc;
> +	int err = 0;
>  
>  	do {
>  		cxlr = __create_region(cxlrd, cxled->mode,
> @@ -3372,8 +3399,7 @@ static struct cxl_region
> *construct_region(struct cxl_root_decoder *cxlrd, } while
> (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); 
>  	if (IS_ERR(cxlr)) {
> -		dev_err(cxlmd->dev.parent,
> -			"%s:%s: %s failed assign region: %ld\n",
> +		dev_err(cxlmd->dev.parent,"%s:%s: %s failed assign
> region: %ld\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>  			__func__, PTR_ERR(cxlr));
>  		return cxlr;
> @@ -3383,23 +3409,47 @@ static struct cxl_region
> *construct_region(struct cxl_root_decoder *cxlrd, p = &cxlr->params;
>  	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>  		dev_err(cxlmd->dev.parent,
> -			"%s:%s: %s autodiscovery interrupted\n",
> +			"%s:%s: %s region setup interrupted\n",
>  			dev_name(&cxlmd->dev),
> dev_name(&cxled->cxld.dev), __func__);
> -		rc = -EBUSY;
> -		goto err;
> +		err = -EBUSY;
> +	}
> +
> +	if (err) {
> +		construct_region_end();
> +		drop_region(cxlr);
> +		return ERR_PTR(err);
>  	}
> +	return cxlr;
> +}
> +
> +
> +/* Establish an empty region covering the given HPA range */
> +static struct cxl_region *construct_region(struct cxl_root_decoder
> *cxlrd,
> +					   struct
> cxl_endpoint_decoder *cxled) +{
> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> +	struct range *hpa = &cxled->cxld.hpa_range;
> +	struct cxl_region_params *p;
> +	struct cxl_region *cxlr;
> +	struct resource *res;
> +	int rc;
> +
> +	cxlr = construct_region_begin(cxlrd, cxled);
> +	if (IS_ERR(cxlr))
> +		return cxlr;
>  
>  	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>  
>  	res = kmalloc(sizeof(*res), GFP_KERNEL);
>  	if (!res) {
>  		rc = -ENOMEM;
> -		goto err;
> +		goto out;
>  	}
>  
>  	*res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
>  				    dev_name(&cxlr->dev));
> +
>  	rc = insert_resource(cxlrd->res, res);
>  	if (rc) {
>  		/*
> @@ -3412,6 +3462,7 @@ static struct cxl_region
> *construct_region(struct cxl_root_decoder *cxlrd, __func__,
> dev_name(&cxlr->dev)); }
>  
> +	p = &cxlr->params;
>  	p->res = res;
>  	p->interleave_ways = cxled->cxld.interleave_ways;
>  	p->interleave_granularity =
> cxled->cxld.interleave_granularity; @@ -3419,24 +3470,124 @@ static
> struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, 
>  	rc = sysfs_update_group(&cxlr->dev.kobj,
> get_cxl_region_target_group()); if (rc)
> -		goto err;
> +		goto out;
>  
>  	dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig:
> %d\n",
> -		dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
> __func__,
> -		dev_name(&cxlr->dev), p->res, p->interleave_ways,
> -		p->interleave_granularity);
> +				   dev_name(&cxlmd->dev),
> +				   dev_name(&cxled->cxld.dev),
> __func__,
> +				   dev_name(&cxlr->dev), p->res,
> +				   p->interleave_ways,
> +				   p->interleave_granularity);
>  
>  	/* ...to match put_device() in cxl_add_to_region() */
>  	get_device(&cxlr->dev);
>  	up_write(&cxl_region_rwsem);
> +out:
> +	construct_region_end();
> +	if (rc) {
> +		drop_region(cxlr);
> +		return ERR_PTR(rc);
> +	}
> +	return cxlr;
> +}
> +
> +static struct cxl_region *
> +__construct_new_region(struct cxl_root_decoder *cxlrd,
> +		       struct cxl_endpoint_decoder **cxled, int ways)
> +{
> +	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
> +	struct cxl_region_params *p;
> +	resource_size_t size = 0;
> +	struct cxl_region *cxlr;
> +	int rc, i;
> +
> +	/* If interleaving is not supported, why does ways need to
> be at least 1? */
> +	if (ways < 1)
> +		return ERR_PTR(-EINVAL);
> +
> +	cxlr = construct_region_begin(cxlrd, cxled[0]);
> +	if (IS_ERR(cxlr))
> +		return cxlr;
> +
> +	rc = set_interleave_ways(cxlr, ways);
> +	if (rc)
> +		goto out;
> +
> +	rc = set_interleave_granularity(cxlr,
> cxld->interleave_granularity);
> +	if (rc)
> +		goto out;
> +
> +	down_read(&cxl_dpa_rwsem);
> +	for (i = 0; i < ways; i++) {
> +		if (!cxled[i]->dpa_res)
> +			break;
> +		size += resource_size(cxled[i]->dpa_res);
> +	}
> +	up_read(&cxl_dpa_rwsem);
> +
> +	if (i < ways)
> +		goto out;
> +
> +	rc = alloc_hpa(cxlr, size);
> +	if (rc)
> +		goto out;
> +
> +	down_read(&cxl_dpa_rwsem);
> +	for (i = 0; i < ways; i++) {
> +		rc = cxl_region_attach(cxlr, cxled[i], i);
> +		if (rc)
> +			break;
> +	}
> +	up_read(&cxl_dpa_rwsem);
> +
> +	if (rc)
> +		goto out;
> +
> +	rc = cxl_region_decode_commit(cxlr);
> +	if (rc)
> +		goto out;
>  
> +	p = &cxlr->params;
> +	p->state = CXL_CONFIG_COMMIT;
> +out:
> +	construct_region_end();
> +	if (rc) {
> +		drop_region(cxlr);
> +		return ERR_PTR(rc);
> +	}
>  	return cxlr;
> +}
>  
> -err:
> -	up_write(&cxl_region_rwsem);
> -	devm_release_action(port->uport_dev, unregister_region,
> cxlr);
> -	return ERR_PTR(rc);
> +/**
> + * cxl_create_region - Establish a region given an array of endpoint
> decoders
> + * @cxlrd: root decoder to allocate HPA
> + * @cxled: array of endpoint decoders with reserved DPA capacity
> + * @ways: size of @cxled array
> + *
> + * Returns a fully formed region in the commit state and attached to
> the
> + * cxl_region driver.
> + */
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> +				     struct cxl_endpoint_decoder
> **cxled,
> +				     int ways)
> +{
> +	struct cxl_region *cxlr;
> +
> +	mutex_lock(&cxlrd->range_lock);
> +	cxlr = __construct_new_region(cxlrd, cxled, ways);
> +	mutex_unlock(&cxlrd->range_lock);
> +
> +	if (IS_ERR(cxlr))
> +		return cxlr;
> +
> +	if (device_attach(&cxlr->dev) <= 0) {
> +		dev_err(&cxlr->dev, "failed to create region\n");
> +		drop_region(cxlr);
> +		return ERR_PTR(-ENODEV);
> +	}
> +	return cxlr;
>  }
> +EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
>  
>  int cxl_add_to_region(struct cxl_port *root, struct
> cxl_endpoint_decoder *cxled) {
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index d3fdd2c1e066..1bf3b74ff959 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -905,6 +905,7 @@ void cxl_coordinates_combine(struct
> access_coordinate *out, 
>  bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
>  
> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled);
>  /*
>   * Unit test builds overrides this to __weak, find the 'strong'
> version
>   * of these symbols in tools/testing/cxl/.
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index a0e0795ec064..377bb3cd2d47 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -881,5 +881,7 @@ struct cxl_root_decoder
> *cxl_get_hpa_freespace(struct cxl_port *endpoint, int interleave_ways,
>  					       unsigned long flags,
>  					       resource_size_t *max);
> -
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> +				     struct cxl_endpoint_decoder
> **cxled,
> +				     int ways);
>  #endif /* __CXL_MEM_H__ */
> diff --git a/drivers/net/ethernet/sfc/efx_cxl.c
> b/drivers/net/ethernet/sfc/efx_cxl.c index b5626d724b52..4012e3faa298
> 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c
> +++ b/drivers/net/ethernet/sfc/efx_cxl.c
> @@ -92,8 +92,18 @@ void efx_cxl_init(struct efx_nic *efx)
>  
>  	cxl->cxled = cxl_request_dpa(cxl->endpoint, true,
> EFX_CTPIO_BUFFER_SIZE, EFX_CTPIO_BUFFER_SIZE);
> -	if (IS_ERR(cxl->cxled))
> +	if (IS_ERR(cxl->cxled)) {
>  		pci_info(pci_dev, "CXL accel request DPA failed");
> +		return;
> +	}
> +
> +	cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled,
> 1);
> +	if (!cxl->efx_region) {

if (IS_ERR(cxl->efx_region))

> +		pci_info(pci_dev, "CXL accel create region failed");
> +		cxl_dpa_free(cxl->cxled);
> +		return;
> +	}
> +
>  out:
>  	cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
>  }
> @@ -102,6 +112,9 @@ void efx_cxl_exit(struct efx_nic *efx)
>  {
>  	struct efx_cxl *cxl = efx->cxl;
>  
> +	if (cxl->efx_region)
> +		cxl_region_detach(cxl->cxled);
> +
>  	if (cxl->cxled)
>  		cxl_dpa_free(cxl->cxled);
>   
> diff --git a/include/linux/cxl_accel_mem.h
> b/include/linux/cxl_accel_mem.h index d4ecb5bb4fc8..a5f9ffc24509
> 100644 --- a/include/linux/cxl_accel_mem.h
> +++ b/include/linux/cxl_accel_mem.h
> @@ -48,4 +48,9 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct
> cxl_port *endpoint, resource_size_t min,
>  					     resource_size_t max);
>  int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> +				     struct cxl_endpoint_decoder
> **cxled,
> +				     int ways);
> +
> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled);
>  #endif


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ