[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240822161226.00001736.zhiw@nvidia.com>
Date: Thu, 22 Aug 2024 16:12:26 +0300
From: Zhi Wang <zhiw@...dia.com>
To: <alejandro.lucero-palau@....com>
CC: <linux-cxl@...r.kernel.org>, <netdev@...r.kernel.org>,
<dan.j.williams@...el.com>, <martin.habets@...inx.com>,
<edward.cree@....com>, <davem@...emloft.net>, <kuba@...nel.org>,
<pabeni@...hat.com>, <edumazet@...gle.com>, <richard.hughes@....com>,
Alejandro Lucero <alucerop@....com>, <targupta@...dia.com>,
<zhiwang@...nel.org>
Subject: Re: [PATCH v2 12/15] cxl: allow region creation by type2 drivers
On Mon, 15 Jul 2024 18:28:32 +0100
<alejandro.lucero-palau@....com> wrote:
> From: Alejandro Lucero <alucerop@....com>
>
> Creating a CXL region requires userspace intervention through the cxl
> sysfs files. Type2 support should allow accelerator drivers to create
> such cxl region from kernel code.
>
> Adding that functionality and integrating it with current support for
> memory expanders.
>
> Based on
> https://lore.kernel.org/linux-cxl/168592149709.1948938.8663425987110396027.stgit@dwillia2-xfh.jf.intel.com/T/#m84598b534cc5664f5bb31521ba6e41c7bc213758
> Signed-off-by: Alejandro Lucero <alucerop@....com> Signed-off-by: Dan
> Williams <dan.j.williams@...el.com> ---
> drivers/cxl/core/region.c | 265
> ++++++++++++++++++++++------- drivers/cxl/cxl.h |
> 1 + drivers/cxl/cxlmem.h | 4 +-
> drivers/net/ethernet/sfc/efx_cxl.c | 15 +-
> include/linux/cxl_accel_mem.h | 5 +
> 5 files changed, 231 insertions(+), 59 deletions(-)
>
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 5cc71b8868bc..697c8df83a4b 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -479,22 +479,14 @@ static ssize_t interleave_ways_show(struct
> device *dev,
> static const struct attribute_group
> *get_cxl_region_target_group(void);
> -static ssize_t interleave_ways_store(struct device *dev,
> - struct device_attribute *attr,
> - const char *buf, size_t len)
> +static int set_interleave_ways(struct cxl_region *cxlr, int val)
> {
> - struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(dev->parent);
> + struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld =
> &cxlrd->cxlsd.cxld;
> - struct cxl_region *cxlr = to_cxl_region(dev);
> struct cxl_region_params *p = &cxlr->params;
> - unsigned int val, save;
> - int rc;
> + int save, rc;
> u8 iw;
>
> - rc = kstrtouint(buf, 0, &val);
> - if (rc)
> - return rc;
> -
> rc = ways_to_eiw(val, &iw);
> if (rc)
> return rc;
> @@ -509,25 +501,42 @@ static ssize_t interleave_ways_store(struct
> device *dev, return -EINVAL;
> }
>
> - rc = down_write_killable(&cxl_region_rwsem);
> - if (rc)
> - return rc;
> - if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
> - rc = -EBUSY;
> - goto out;
> - }
> + lockdep_assert_held_write(&cxl_region_rwsem);
> + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
> + return -EBUSY;
>
> save = p->interleave_ways;
> p->interleave_ways = val;
> rc = sysfs_update_group(&cxlr->dev.kobj,
> get_cxl_region_target_group()); if (rc)
> p->interleave_ways = save;
> -out:
> +
> + return rc;
> +}
> +
> +static ssize_t interleave_ways_store(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t len)
> +{
> + struct cxl_region *cxlr = to_cxl_region(dev);
> + unsigned int val;
> + int rc;
> +
> + rc = kstrtouint(buf, 0, &val);
> + if (rc)
> + return rc;
> +
> + rc = down_write_killable(&cxl_region_rwsem);
> + if (rc)
> + return rc;
> +
> + rc = set_interleave_ways(cxlr, val);
> up_write(&cxl_region_rwsem);
> if (rc)
> return rc;
> return len;
> }
> +
> static DEVICE_ATTR_RW(interleave_ways);
>
> static ssize_t interleave_granularity_show(struct device *dev,
> @@ -547,21 +556,14 @@ static ssize_t
> interleave_granularity_show(struct device *dev, return rc;
> }
>
> -static ssize_t interleave_granularity_store(struct device *dev,
> - struct device_attribute
> *attr,
> - const char *buf, size_t
> len) +static int set_interleave_granularity(struct cxl_region *cxlr,
> int val) {
> - struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(dev->parent);
> + struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld =
> &cxlrd->cxlsd.cxld;
> - struct cxl_region *cxlr = to_cxl_region(dev);
> struct cxl_region_params *p = &cxlr->params;
> - int rc, val;
> + int rc;
> u16 ig;
>
> - rc = kstrtoint(buf, 0, &val);
> - if (rc)
> - return rc;
> -
> rc = granularity_to_eig(val, &ig);
> if (rc)
> return rc;
> @@ -577,21 +579,36 @@ static ssize_t
> interleave_granularity_store(struct device *dev, if
> (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
> return -EINVAL;
> + lockdep_assert_held_write(&cxl_region_rwsem);
> + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
> + return -EBUSY;
> +
> + p->interleave_granularity = val;
> + return 0;
> +}
> +
> +static ssize_t interleave_granularity_store(struct device *dev,
> + struct device_attribute
> *attr,
> + const char *buf, size_t
> len) +{
> + struct cxl_region *cxlr = to_cxl_region(dev);
> + int rc, val;
> +
> + rc = kstrtoint(buf, 0, &val);
> + if (rc)
> + return rc;
> +
> rc = down_write_killable(&cxl_region_rwsem);
> if (rc)
> return rc;
> - if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
> - rc = -EBUSY;
> - goto out;
> - }
>
> - p->interleave_granularity = val;
> -out:
> + rc = set_interleave_granularity(cxlr, val);
> up_write(&cxl_region_rwsem);
> if (rc)
> return rc;
> return len;
> }
> +
> static DEVICE_ATTR_RW(interleave_granularity);
>
> static ssize_t resource_show(struct device *dev, struct
> device_attribute *attr, @@ -2193,7 +2210,7 @@ static int
> cxl_region_attach(struct cxl_region *cxlr, return 0;
> }
>
> -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
> {
> struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
> struct cxl_region *cxlr = cxled->cxld.region;
> @@ -2252,6 +2269,7 @@ static int cxl_region_detach(struct
> cxl_endpoint_decoder *cxled) put_device(&cxlr->dev);
> return rc;
> }
> +EXPORT_SYMBOL_NS_GPL(cxl_region_detach, CXL);
>
> void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
> {
> @@ -2746,6 +2764,14 @@ cxl_find_region_by_name(struct
> cxl_root_decoder *cxlrd, const char *name) return
> to_cxl_region(region_dev); }
>
> +static void drop_region(struct cxl_region *cxlr)
> +{
> + struct cxl_root_decoder *cxlrd =
> to_cxl_root_decoder(cxlr->dev.parent);
> + struct cxl_port *port = cxlrd_to_port(cxlrd);
> +
> + devm_release_action(port->uport_dev, unregister_region,
> cxlr); +}
> +
> static ssize_t delete_region_store(struct device *dev,
> struct device_attribute *attr,
> const char *buf, size_t len)
> @@ -3353,17 +3379,18 @@ static int match_region_by_range(struct
> device *dev, void *data) return rc;
> }
>
> -/* Establish an empty region covering the given HPA range */
> -static struct cxl_region *construct_region(struct cxl_root_decoder
> *cxlrd,
> - struct
> cxl_endpoint_decoder *cxled) +static void construct_region_end(void)
> +{
> + up_write(&cxl_region_rwsem);
> +}
> +
> +static struct cxl_region *construct_region_begin(struct
> cxl_root_decoder *cxlrd,
> + struct
> cxl_endpoint_decoder *cxled) {
> struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> - struct cxl_port *port = cxlrd_to_port(cxlrd);
> - struct range *hpa = &cxled->cxld.hpa_range;
> struct cxl_region_params *p;
> struct cxl_region *cxlr;
> - struct resource *res;
> - int rc;
> + int err = 0;
>
> do {
> cxlr = __create_region(cxlrd, cxled->mode,
> @@ -3372,8 +3399,7 @@ static struct cxl_region
> *construct_region(struct cxl_root_decoder *cxlrd, } while
> (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
> if (IS_ERR(cxlr)) {
> - dev_err(cxlmd->dev.parent,
> - "%s:%s: %s failed assign region: %ld\n",
> + dev_err(cxlmd->dev.parent,"%s:%s: %s failed assign
> region: %ld\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
> __func__, PTR_ERR(cxlr));
> return cxlr;
> @@ -3383,23 +3409,47 @@ static struct cxl_region
> *construct_region(struct cxl_root_decoder *cxlrd, p = &cxlr->params;
> if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
> dev_err(cxlmd->dev.parent,
> - "%s:%s: %s autodiscovery interrupted\n",
> + "%s:%s: %s region setup interrupted\n",
> dev_name(&cxlmd->dev),
> dev_name(&cxled->cxld.dev), __func__);
> - rc = -EBUSY;
> - goto err;
> + err = -EBUSY;
> + }
> +
> + if (err) {
> + construct_region_end();
> + drop_region(cxlr);
> + return ERR_PTR(err);
> }
> + return cxlr;
> +}
> +
> +
> +/* Establish an empty region covering the given HPA range */
> +static struct cxl_region *construct_region(struct cxl_root_decoder
> *cxlrd,
> + struct
> cxl_endpoint_decoder *cxled) +{
> + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> + struct range *hpa = &cxled->cxld.hpa_range;
> + struct cxl_region_params *p;
> + struct cxl_region *cxlr;
> + struct resource *res;
> + int rc;
> +
> + cxlr = construct_region_begin(cxlrd, cxled);
> + if (IS_ERR(cxlr))
> + return cxlr;
>
> set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>
> res = kmalloc(sizeof(*res), GFP_KERNEL);
> if (!res) {
> rc = -ENOMEM;
> - goto err;
> + goto out;
> }
>
> *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
> dev_name(&cxlr->dev));
> +
> rc = insert_resource(cxlrd->res, res);
> if (rc) {
> /*
> @@ -3412,6 +3462,7 @@ static struct cxl_region
> *construct_region(struct cxl_root_decoder *cxlrd, __func__,
> dev_name(&cxlr->dev)); }
>
> + p = &cxlr->params;
> p->res = res;
> p->interleave_ways = cxled->cxld.interleave_ways;
> p->interleave_granularity =
> cxled->cxld.interleave_granularity; @@ -3419,24 +3470,124 @@ static
> struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
> rc = sysfs_update_group(&cxlr->dev.kobj,
> get_cxl_region_target_group()); if (rc)
> - goto err;
> + goto out;
>
> dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig:
> %d\n",
> - dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
> __func__,
> - dev_name(&cxlr->dev), p->res, p->interleave_ways,
> - p->interleave_granularity);
> + dev_name(&cxlmd->dev),
> + dev_name(&cxled->cxld.dev),
> __func__,
> + dev_name(&cxlr->dev), p->res,
> + p->interleave_ways,
> + p->interleave_granularity);
>
> /* ...to match put_device() in cxl_add_to_region() */
> get_device(&cxlr->dev);
> up_write(&cxl_region_rwsem);
> +out:
> + construct_region_end();
> + if (rc) {
> + drop_region(cxlr);
> + return ERR_PTR(rc);
> + }
> + return cxlr;
> +}
> +
> +static struct cxl_region *
> +__construct_new_region(struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder **cxled, int ways)
> +{
> + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
> + struct cxl_region_params *p;
> + resource_size_t size = 0;
> + struct cxl_region *cxlr;
> + int rc, i;
> +
> + /* If interleaving is not supported, why does ways need to
> be at least 1? */
> + if (ways < 1)
> + return ERR_PTR(-EINVAL);
> +
> + cxlr = construct_region_begin(cxlrd, cxled[0]);
> + if (IS_ERR(cxlr))
> + return cxlr;
> +
> + rc = set_interleave_ways(cxlr, ways);
> + if (rc)
> + goto out;
> +
> + rc = set_interleave_granularity(cxlr,
> cxld->interleave_granularity);
> + if (rc)
> + goto out;
> +
> + down_read(&cxl_dpa_rwsem);
> + for (i = 0; i < ways; i++) {
> + if (!cxled[i]->dpa_res)
> + break;
> + size += resource_size(cxled[i]->dpa_res);
> + }
> + up_read(&cxl_dpa_rwsem);
> +
> + if (i < ways)
> + goto out;
> +
> + rc = alloc_hpa(cxlr, size);
> + if (rc)
> + goto out;
> +
> + down_read(&cxl_dpa_rwsem);
> + for (i = 0; i < ways; i++) {
> + rc = cxl_region_attach(cxlr, cxled[i], i);
> + if (rc)
> + break;
> + }
> + up_read(&cxl_dpa_rwsem);
> +
> + if (rc)
> + goto out;
> +
> + rc = cxl_region_decode_commit(cxlr);
> + if (rc)
> + goto out;
>
> + p = &cxlr->params;
> + p->state = CXL_CONFIG_COMMIT;
> +out:
> + construct_region_end();
> + if (rc) {
> + drop_region(cxlr);
> + return ERR_PTR(rc);
> + }
> return cxlr;
> +}
>
> -err:
> - up_write(&cxl_region_rwsem);
> - devm_release_action(port->uport_dev, unregister_region,
> cxlr);
> - return ERR_PTR(rc);
> +/**
> + * cxl_create_region - Establish a region given an array of endpoint
> decoders
> + * @cxlrd: root decoder to allocate HPA
> + * @cxled: array of endpoint decoders with reserved DPA capacity
> + * @ways: size of @cxled array
> + *
> + * Returns a fully formed region in the commit state and attached to
> the
> + * cxl_region driver.
> + */
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder
> **cxled,
> + int ways)
> +{
> + struct cxl_region *cxlr;
> +
> + mutex_lock(&cxlrd->range_lock);
> + cxlr = __construct_new_region(cxlrd, cxled, ways);
> + mutex_unlock(&cxlrd->range_lock);
> +
> + if (IS_ERR(cxlr))
> + return cxlr;
> +
> + if (device_attach(&cxlr->dev) <= 0) {
> + dev_err(&cxlr->dev, "failed to create region\n");
> + drop_region(cxlr);
> + return ERR_PTR(-ENODEV);
> + }
> + return cxlr;
> }
> +EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
>
> int cxl_add_to_region(struct cxl_port *root, struct
> cxl_endpoint_decoder *cxled) {
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index d3fdd2c1e066..1bf3b74ff959 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -905,6 +905,7 @@ void cxl_coordinates_combine(struct
> access_coordinate *out,
> bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
>
> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled);
> /*
> * Unit test builds overrides this to __weak, find the 'strong'
> version
> * of these symbols in tools/testing/cxl/.
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index a0e0795ec064..377bb3cd2d47 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -881,5 +881,7 @@ struct cxl_root_decoder
> *cxl_get_hpa_freespace(struct cxl_port *endpoint, int interleave_ways,
> unsigned long flags,
> resource_size_t *max);
> -
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder
> **cxled,
> + int ways);
> #endif /* __CXL_MEM_H__ */
> diff --git a/drivers/net/ethernet/sfc/efx_cxl.c
> b/drivers/net/ethernet/sfc/efx_cxl.c index b5626d724b52..4012e3faa298
> 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c
> +++ b/drivers/net/ethernet/sfc/efx_cxl.c
> @@ -92,8 +92,18 @@ void efx_cxl_init(struct efx_nic *efx)
>
> cxl->cxled = cxl_request_dpa(cxl->endpoint, true,
> EFX_CTPIO_BUFFER_SIZE, EFX_CTPIO_BUFFER_SIZE);
> - if (IS_ERR(cxl->cxled))
> + if (IS_ERR(cxl->cxled)) {
> pci_info(pci_dev, "CXL accel request DPA failed");
> + return;
> + }
> +
> + cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled,
> 1);
> + if (!cxl->efx_region) {
if (IS_ERR(cxl->efx_region))
> + pci_info(pci_dev, "CXL accel create region failed");
> + cxl_dpa_free(cxl->cxled);
> + return;
> + }
> +
> out:
> cxl_release_endpoint(cxl->cxlmd, cxl->endpoint);
> }
> @@ -102,6 +112,9 @@ void efx_cxl_exit(struct efx_nic *efx)
> {
> struct efx_cxl *cxl = efx->cxl;
>
> + if (cxl->efx_region)
> + cxl_region_detach(cxl->cxled);
> +
> if (cxl->cxled)
> cxl_dpa_free(cxl->cxled);
>
> diff --git a/include/linux/cxl_accel_mem.h
> b/include/linux/cxl_accel_mem.h index d4ecb5bb4fc8..a5f9ffc24509
> 100644 --- a/include/linux/cxl_accel_mem.h
> +++ b/include/linux/cxl_accel_mem.h
> @@ -48,4 +48,9 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct
> cxl_port *endpoint, resource_size_t min,
> resource_size_t max);
> int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder
> **cxled,
> + int ways);
> +
> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled);
> #endif
Powered by blists - more mailing lists