[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <17e5cf38-39f2-4136-fe2e-6936d8f45633@amd.com>
Date: Fri, 23 Aug 2024 10:31:20 +0100
From: Alejandro Lucero Palau <alucerop@....com>
To: Zhi Wang <zhiw@...dia.com>, alejandro.lucero-palau@....com
Cc: linux-cxl@...r.kernel.org, netdev@...r.kernel.org,
dan.j.williams@...el.com, martin.habets@...inx.com, edward.cree@....com,
davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com,
edumazet@...gle.com, richard.hughes@....com, targupta@...dia.com,
zhiwang@...nel.org
Subject: Re: [PATCH v2 12/15] cxl: allow region creation by type2 drivers
On 8/22/24 14:12, Zhi Wang wrote:
> On Mon, 15 Jul 2024 18:28:32 +0100
> <alejandro.lucero-palau@....com> wrote:
>
>> From: Alejandro Lucero <alucerop@....com>
>>
>> Creating a CXL region requires userspace intervention through the cxl
>> sysfs files. Type2 support should allow accelerator drivers to create
>> such cxl region from kernel code.
>>
>> Adding that functionality and integrating it with current support for
>> memory expanders.
>>
>> Based on
>> https://lore.kernel.org/linux-cxl/168592149709.1948938.8663425987110396027.stgit@dwillia2-xfh.jf.intel.com/T/#m84598b534cc5664f5bb31521ba6e41c7bc213758
>> Signed-off-by: Alejandro Lucero <alucerop@....com> Signed-off-by: Dan
>> Williams <dan.j.williams@...el.com> ---
>> drivers/cxl/core/region.c | 265
>> ++++++++++++++++++++++------- drivers/cxl/cxl.h |
>> 1 + drivers/cxl/cxlmem.h | 4 +-
>> drivers/net/ethernet/sfc/efx_cxl.c | 15 +-
>> include/linux/cxl_accel_mem.h | 5 +
>> 5 files changed, 231 insertions(+), 59 deletions(-)
>>
>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>> index 5cc71b8868bc..697c8df83a4b 100644
>> --- a/drivers/cxl/core/region.c
>> +++ b/drivers/cxl/core/region.c
>> @@ -479,22 +479,14 @@ static ssize_t interleave_ways_show(struct
>> device *dev,
>> static const struct attribute_group
>> *get_cxl_region_target_group(void);
>> -static ssize_t interleave_ways_store(struct device *dev,
>> - struct device_attribute *attr,
>> - const char *buf, size_t len)
>> +static int set_interleave_ways(struct cxl_region *cxlr, int val)
>> {
>> - struct cxl_root_decoder *cxlrd =
>> to_cxl_root_decoder(dev->parent);
>> + struct cxl_root_decoder *cxlrd =
>> to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld =
>> &cxlrd->cxlsd.cxld;
>> - struct cxl_region *cxlr = to_cxl_region(dev);
>> struct cxl_region_params *p = &cxlr->params;
>> - unsigned int val, save;
>> - int rc;
>> + int save, rc;
>> u8 iw;
>>
>> - rc = kstrtouint(buf, 0, &val);
>> - if (rc)
>> - return rc;
>> -
>> rc = ways_to_eiw(val, &iw);
>> if (rc)
>> return rc;
>> @@ -509,25 +501,42 @@ static ssize_t interleave_ways_store(struct
>> device *dev, return -EINVAL;
>> }
>>
>> - rc = down_write_killable(&cxl_region_rwsem);
>> - if (rc)
>> - return rc;
>> - if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>> - rc = -EBUSY;
>> - goto out;
>> - }
>> + lockdep_assert_held_write(&cxl_region_rwsem);
>> + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
>> + return -EBUSY;
>>
>> save = p->interleave_ways;
>> p->interleave_ways = val;
>> rc = sysfs_update_group(&cxlr->dev.kobj,
>> get_cxl_region_target_group()); if (rc)
>> p->interleave_ways = save;
>> -out:
>> +
>> + return rc;
>> +}
>> +
>> +static ssize_t interleave_ways_store(struct device *dev,
>> + struct device_attribute *attr,
>> + const char *buf, size_t len)
>> +{
>> + struct cxl_region *cxlr = to_cxl_region(dev);
>> + unsigned int val;
>> + int rc;
>> +
>> + rc = kstrtouint(buf, 0, &val);
>> + if (rc)
>> + return rc;
>> +
>> + rc = down_write_killable(&cxl_region_rwsem);
>> + if (rc)
>> + return rc;
>> +
>> + rc = set_interleave_ways(cxlr, val);
>> up_write(&cxl_region_rwsem);
>> if (rc)
>> return rc;
>> return len;
>> }
>> +
>> static DEVICE_ATTR_RW(interleave_ways);
>>
>> static ssize_t interleave_granularity_show(struct device *dev,
>> @@ -547,21 +556,14 @@ static ssize_t
>> interleave_granularity_show(struct device *dev, return rc;
>> }
>>
>> -static ssize_t interleave_granularity_store(struct device *dev,
>> - struct device_attribute
>> *attr,
>> - const char *buf, size_t
>> len) +static int set_interleave_granularity(struct cxl_region *cxlr,
>> int val) {
>> - struct cxl_root_decoder *cxlrd =
>> to_cxl_root_decoder(dev->parent);
>> + struct cxl_root_decoder *cxlrd =
>> to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld =
>> &cxlrd->cxlsd.cxld;
>> - struct cxl_region *cxlr = to_cxl_region(dev);
>> struct cxl_region_params *p = &cxlr->params;
>> - int rc, val;
>> + int rc;
>> u16 ig;
>>
>> - rc = kstrtoint(buf, 0, &val);
>> - if (rc)
>> - return rc;
>> -
>> rc = granularity_to_eig(val, &ig);
>> if (rc)
>> return rc;
>> @@ -577,21 +579,36 @@ static ssize_t
>> interleave_granularity_store(struct device *dev, if
>> (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
>> return -EINVAL;
>> + lockdep_assert_held_write(&cxl_region_rwsem);
>> + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
>> + return -EBUSY;
>> +
>> + p->interleave_granularity = val;
>> + return 0;
>> +}
>> +
>> +static ssize_t interleave_granularity_store(struct device *dev,
>> + struct device_attribute
>> *attr,
>> + const char *buf, size_t
>> len) +{
>> + struct cxl_region *cxlr = to_cxl_region(dev);
>> + int rc, val;
>> +
>> + rc = kstrtoint(buf, 0, &val);
>> + if (rc)
>> + return rc;
>> +
>> rc = down_write_killable(&cxl_region_rwsem);
>> if (rc)
>> return rc;
>> - if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>> - rc = -EBUSY;
>> - goto out;
>> - }
>>
>> - p->interleave_granularity = val;
>> -out:
>> + rc = set_interleave_granularity(cxlr, val);
>> up_write(&cxl_region_rwsem);
>> if (rc)
>> return rc;
>> return len;
>> }
>> +
>> static DEVICE_ATTR_RW(interleave_granularity);
>>
>> static ssize_t resource_show(struct device *dev, struct
>> device_attribute *attr, @@ -2193,7 +2210,7 @@ static int
>> cxl_region_attach(struct cxl_region *cxlr, return 0;
>> }
>>
>> -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>> {
>> struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
>> struct cxl_region *cxlr = cxled->cxld.region;
>> @@ -2252,6 +2269,7 @@ static int cxl_region_detach(struct
>> cxl_endpoint_decoder *cxled) put_device(&cxlr->dev);
>> return rc;
>> }
>> +EXPORT_SYMBOL_NS_GPL(cxl_region_detach, CXL);
>>
>> void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
>> {
>> @@ -2746,6 +2764,14 @@ cxl_find_region_by_name(struct
>> cxl_root_decoder *cxlrd, const char *name) return
>> to_cxl_region(region_dev); }
>>
>> +static void drop_region(struct cxl_region *cxlr)
>> +{
>> + struct cxl_root_decoder *cxlrd =
>> to_cxl_root_decoder(cxlr->dev.parent);
>> + struct cxl_port *port = cxlrd_to_port(cxlrd);
>> +
>> + devm_release_action(port->uport_dev, unregister_region,
>> cxlr); +}
>> +
>> static ssize_t delete_region_store(struct device *dev,
>> struct device_attribute *attr,
>> const char *buf, size_t len)
>> @@ -3353,17 +3379,18 @@ static int match_region_by_range(struct
>> device *dev, void *data) return rc;
>> }
>>
>> -/* Establish an empty region covering the given HPA range */
>> -static struct cxl_region *construct_region(struct cxl_root_decoder
>> *cxlrd,
>> - struct
>> cxl_endpoint_decoder *cxled) +static void construct_region_end(void)
>> +{
>> + up_write(&cxl_region_rwsem);
>> +}
>> +
>> +static struct cxl_region *construct_region_begin(struct
>> cxl_root_decoder *cxlrd,
>> + struct
>> cxl_endpoint_decoder *cxled) {
>> struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>> - struct cxl_port *port = cxlrd_to_port(cxlrd);
>> - struct range *hpa = &cxled->cxld.hpa_range;
>> struct cxl_region_params *p;
>> struct cxl_region *cxlr;
>> - struct resource *res;
>> - int rc;
>> + int err = 0;
>>
>> do {
>> cxlr = __create_region(cxlrd, cxled->mode,
>> @@ -3372,8 +3399,7 @@ static struct cxl_region
>> *construct_region(struct cxl_root_decoder *cxlrd, } while
>> (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
>> if (IS_ERR(cxlr)) {
>> - dev_err(cxlmd->dev.parent,
>> - "%s:%s: %s failed assign region: %ld\n",
>> + dev_err(cxlmd->dev.parent,"%s:%s: %s failed assign
>> region: %ld\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>> __func__, PTR_ERR(cxlr));
>> return cxlr;
>> @@ -3383,23 +3409,47 @@ static struct cxl_region
>> *construct_region(struct cxl_root_decoder *cxlrd, p = &cxlr->params;
>> if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>> dev_err(cxlmd->dev.parent,
>> - "%s:%s: %s autodiscovery interrupted\n",
>> + "%s:%s: %s region setup interrupted\n",
>> dev_name(&cxlmd->dev),
>> dev_name(&cxled->cxld.dev), __func__);
>> - rc = -EBUSY;
>> - goto err;
>> + err = -EBUSY;
>> + }
>> +
>> + if (err) {
>> + construct_region_end();
>> + drop_region(cxlr);
>> + return ERR_PTR(err);
>> }
>> + return cxlr;
>> +}
>> +
>> +
>> +/* Establish an empty region covering the given HPA range */
>> +static struct cxl_region *construct_region(struct cxl_root_decoder
>> *cxlrd,
>> + struct
>> cxl_endpoint_decoder *cxled) +{
>> + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>> + struct range *hpa = &cxled->cxld.hpa_range;
>> + struct cxl_region_params *p;
>> + struct cxl_region *cxlr;
>> + struct resource *res;
>> + int rc;
>> +
>> + cxlr = construct_region_begin(cxlrd, cxled);
>> + if (IS_ERR(cxlr))
>> + return cxlr;
>>
>> set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>>
>> res = kmalloc(sizeof(*res), GFP_KERNEL);
>> if (!res) {
>> rc = -ENOMEM;
>> - goto err;
>> + goto out;
>> }
>>
>> *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
>> dev_name(&cxlr->dev));
>> +
>> rc = insert_resource(cxlrd->res, res);
>> if (rc) {
>> /*
>> @@ -3412,6 +3462,7 @@ static struct cxl_region
>> *construct_region(struct cxl_root_decoder *cxlrd, __func__,
>> dev_name(&cxlr->dev)); }
>>
>> + p = &cxlr->params;
>> p->res = res;
>> p->interleave_ways = cxled->cxld.interleave_ways;
>> p->interleave_granularity =
>> cxled->cxld.interleave_granularity; @@ -3419,24 +3470,124 @@ static
>> struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>> rc = sysfs_update_group(&cxlr->dev.kobj,
>> get_cxl_region_target_group()); if (rc)
>> - goto err;
>> + goto out;
>>
>> dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig:
>> %d\n",
>> - dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>> __func__,
>> - dev_name(&cxlr->dev), p->res, p->interleave_ways,
>> - p->interleave_granularity);
>> + dev_name(&cxlmd->dev),
>> + dev_name(&cxled->cxld.dev),
>> __func__,
>> + dev_name(&cxlr->dev), p->res,
>> + p->interleave_ways,
>> + p->interleave_granularity);
>>
>> /* ...to match put_device() in cxl_add_to_region() */
>> get_device(&cxlr->dev);
>> up_write(&cxl_region_rwsem);
>> +out:
>> + construct_region_end();
>> + if (rc) {
>> + drop_region(cxlr);
>> + return ERR_PTR(rc);
>> + }
>> + return cxlr;
>> +}
>> +
>> +static struct cxl_region *
>> +__construct_new_region(struct cxl_root_decoder *cxlrd,
>> + struct cxl_endpoint_decoder **cxled, int ways)
>> +{
>> + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
>> + struct cxl_region_params *p;
>> + resource_size_t size = 0;
>> + struct cxl_region *cxlr;
>> + int rc, i;
>> +
>> + /* If interleaving is not supported, why does ways need to
>> be at least 1? */
>> + if (ways < 1)
>> + return ERR_PTR(-EINVAL);
>> +
>> + cxlr = construct_region_begin(cxlrd, cxled[0]);
>> + if (IS_ERR(cxlr))
>> + return cxlr;
>> +
>> + rc = set_interleave_ways(cxlr, ways);
>> + if (rc)
>> + goto out;
>> +
>> + rc = set_interleave_granularity(cxlr,
>> cxld->interleave_granularity);
>> + if (rc)
>> + goto out;
>> +
>> + down_read(&cxl_dpa_rwsem);
>> + for (i = 0; i < ways; i++) {
>> + if (!cxled[i]->dpa_res)
>> + break;
>> + size += resource_size(cxled[i]->dpa_res);
>> + }
>> + up_read(&cxl_dpa_rwsem);
>> +
>> + if (i < ways)
>> + goto out;
>> +
>> + rc = alloc_hpa(cxlr, size);
>> + if (rc)
>> + goto out;
>> +
>> + down_read(&cxl_dpa_rwsem);
>> + for (i = 0; i < ways; i++) {
>> + rc = cxl_region_attach(cxlr, cxled[i], i);
>> + if (rc)
>> + break;
>> + }
>> + up_read(&cxl_dpa_rwsem);
>> +
>> + if (rc)
>> + goto out;
>> +
>> + rc = cxl_region_decode_commit(cxlr);
>> + if (rc)
>> + goto out;
>>
>> + p = &cxlr->params;
>> + p->state = CXL_CONFIG_COMMIT;
>> +out:
>> + construct_region_end();
>> + if (rc) {
>> + drop_region(cxlr);
>> + return ERR_PTR(rc);
>> + }
>> return cxlr;
>> +}
>>
>> -err:
>> - up_write(&cxl_region_rwsem);
>> - devm_release_action(port->uport_dev, unregister_region,
>> cxlr);
>> - return ERR_PTR(rc);
>> +/**
>> + * cxl_create_region - Establish a region given an array of endpoint
>> decoders
>> + * @cxlrd: root decoder to allocate HPA
>> + * @cxled: array of endpoint decoders with reserved DPA capacity
>> + * @ways: size of @cxled array
>> + *
>> + * Returns a fully formed region in the commit state and attached to
>> the
>> + * cxl_region driver.
>> + */
>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>> + struct cxl_endpoint_decoder
>> **cxled,
>> + int ways)
>> +{
>> + struct cxl_region *cxlr;
>> +
>> + mutex_lock(&cxlrd->range_lock);
>> + cxlr = __construct_new_region(cxlrd, cxled, ways);
>> + mutex_unlock(&cxlrd->range_lock);
>> +
>> + if (IS_ERR(cxlr))
>> + return cxlr;
>> +
>> + if (device_attach(&cxlr->dev) <= 0) {
>> + dev_err(&cxlr->dev, "failed to create region\n");
>> + drop_region(cxlr);
>> + return ERR_PTR(-ENODEV);
>> + }
>> + return cxlr;
>> }
>> +EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
>>
>> int cxl_add_to_region(struct cxl_port *root, struct
>> cxl_endpoint_decoder *cxled) {
>> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
>> index d3fdd2c1e066..1bf3b74ff959 100644
>> --- a/drivers/cxl/cxl.h
>> +++ b/drivers/cxl/cxl.h
>> @@ -905,6 +905,7 @@ void cxl_coordinates_combine(struct
>> access_coordinate *out,
>> bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
>>
>> +int cxl_region_detach(struct cxl_endpoint_decoder *cxled);
>> /*
>> * Unit test builds overrides this to __weak, find the 'strong'
>> version
>> * of these symbols in tools/testing/cxl/.
>> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
>> index a0e0795ec064..377bb3cd2d47 100644
>> --- a/drivers/cxl/cxlmem.h
>> +++ b/drivers/cxl/cxlmem.h
>> @@ -881,5 +881,7 @@ struct cxl_root_decoder
>> *cxl_get_hpa_freespace(struct cxl_port *endpoint, int interleave_ways,
>> unsigned long flags,
>> resource_size_t *max);
>> -
>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>> + struct cxl_endpoint_decoder
>> **cxled,
>> + int ways);
>> #endif /* __CXL_MEM_H__ */
>> diff --git a/drivers/net/ethernet/sfc/efx_cxl.c
>> b/drivers/net/ethernet/sfc/efx_cxl.c index b5626d724b52..4012e3faa298
>> 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c
>> +++ b/drivers/net/ethernet/sfc/efx_cxl.c
>> @@ -92,8 +92,18 @@ void efx_cxl_init(struct efx_nic *efx)
>>
>> cxl->cxled = cxl_request_dpa(cxl->endpoint, true,
>> EFX_CTPIO_BUFFER_SIZE, EFX_CTPIO_BUFFER_SIZE);
>> - if (IS_ERR(cxl->cxled))
>> + if (IS_ERR(cxl->cxled)) {
>> pci_info(pci_dev, "CXL accel request DPA failed");
>> + return;
>> + }
>> +
>> + cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled,
>> 1);
>> + if (!cxl->efx_region) {
> if (IS_ERR(cxl->efx_region))
>
I'll fix it.
Thanks
Powered by blists - more mailing lists