[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ab8b5f77-7a53-a716-a1e6-bf1f7e06eb50@amd.com>
Date: Mon, 19 Aug 2024 17:11:45 +0100
From: Alejandro Lucero Palau <alucerop@....com>
To: Jonathan Cameron <Jonathan.Cameron@...wei.com>,
alejandro.lucero-palau@....com
Cc: linux-cxl@...r.kernel.org, netdev@...r.kernel.org,
dan.j.williams@...el.com, martin.habets@...inx.com, edward.cree@....com,
davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com,
edumazet@...gle.com, richard.hughes@....com
Subject: Re: [PATCH v2 12/15] cxl: allow region creation by type2 drivers
On 8/4/24 19:29, Jonathan Cameron wrote:
> On Mon, 15 Jul 2024 18:28:32 +0100
> alejandro.lucero-palau@....com wrote:
>
>> From: Alejandro Lucero <alucerop@....com>
>>
>> Creating a CXL region requires userspace intervention through the cxl
>> sysfs files. Type2 support should allow accelerator drivers to create
>> such cxl region from kernel code.
>>
>> Adding that functionality and integrating it with current support for
>> memory expanders.
>>
>> Based on https://lore.kernel.org/linux-cxl/168592149709.1948938.8663425987110396027.stgit@dwillia2-xfh.jf.intel.com/T/#m84598b534cc5664f5bb31521ba6e41c7bc213758
>> Signed-off-by: Alejandro Lucero <alucerop@....com>
>> Signed-off-by: Dan Williams <dan.j.williams@...el.com>
> Needs a co-developed or similar given Dan didn't email this patch
> (which this sign off list suggests he did).
Yes, I'll fix it.
>
> I'll take another look at the locking, but my main comment is
> that it is really confusing so I have no idea if it's right.
> Consider different ways of breaking up the code you need
> to try and keep the locking obvious.
I have to agree and this means I need to work on it. I know it works for
my case, what was my main focus for the RFC, but not looked at it with
the right mindset.
I take your next comments as valuable inputs for the required work.
Thanks!
> Jonathan
>
>> +
>> +static ssize_t interleave_ways_store(struct device *dev,
>> + struct device_attribute *attr,
>> + const char *buf, size_t len)
>> +{
>> + struct cxl_region *cxlr = to_cxl_region(dev);
>> + unsigned int val;
>> + int rc;
>> +
>> + rc = kstrtouint(buf, 0, &val);
>> + if (rc)
>> + return rc;
>> +
>> + rc = down_write_killable(&cxl_region_rwsem);
>> + if (rc)
>> + return rc;
>> +
>> + rc = set_interleave_ways(cxlr, val);
>> up_write(&cxl_region_rwsem);
>> if (rc)
>> return rc;
>> return len;
>> }
>> +
> This was probably intentional. Common to group a macro like this
> with the function it is using by not having a blank line.
>> static DEVICE_ATTR_RW(interleave_ways);
>>
>> static ssize_t interleave_granularity_show(struct device *dev,
>> @@ -547,21 +556,14 @@ static ssize_t interleave_granularity_show(struct device *dev,
>> return rc;
>> }
>> +static ssize_t interleave_granularity_store(struct device *dev,
>> + struct device_attribute *attr,
>> + const char *buf, size_t len)
>> +{
>> + struct cxl_region *cxlr = to_cxl_region(dev);
>> + int rc, val;
>> +
>> + rc = kstrtoint(buf, 0, &val);
>> + if (rc)
>> + return rc;
>> +
>> rc = down_write_killable(&cxl_region_rwsem);
>> if (rc)
>> return rc;
>> - if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>> - rc = -EBUSY;
>> - goto out;
>> - }
>>
>> - p->interleave_granularity = val;
>> -out:
>> + rc = set_interleave_granularity(cxlr, val);
>> up_write(&cxl_region_rwsem);
>> if (rc)
>> return rc;
>> return len;
>> }
>> +
> grump.
>
>> static DEVICE_ATTR_RW(interleave_granularity);
>> +/* Establish an empty region covering the given HPA range */
>> +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>> + struct cxl_endpoint_decoder *cxled)
>> +{
>> + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>> + struct range *hpa = &cxled->cxld.hpa_range;
>> + struct cxl_region_params *p;
>> + struct cxl_region *cxlr;
>> + struct resource *res;
>> + int rc;
>> +
>> + cxlr = construct_region_begin(cxlrd, cxled);
>> + if (IS_ERR(cxlr))
>> + return cxlr;
>>
>> set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>>
>> res = kmalloc(sizeof(*res), GFP_KERNEL);
>> if (!res) {
>> rc = -ENOMEM;
>> - goto err;
>> + goto out;
>> }
>>
>> *res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
>> dev_name(&cxlr->dev));
>> +
>> rc = insert_resource(cxlrd->res, res);
>> if (rc) {
>> /*
>> @@ -3412,6 +3462,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>> __func__, dev_name(&cxlr->dev));
>> }
>>
>> + p = &cxlr->params;
>> p->res = res;
>> p->interleave_ways = cxled->cxld.interleave_ways;
>> p->interleave_granularity = cxled->cxld.interleave_granularity;
>> @@ -3419,24 +3470,124 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>
>> rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
>> if (rc)
>> - goto err;
>> + goto out;
>>
>> dev_dbg(cxlmd->dev.parent, "%s:%s: %s %s res: %pr iw: %d ig: %d\n",
>> - dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__,
>> - dev_name(&cxlr->dev), p->res, p->interleave_ways,
>> - p->interleave_granularity);
>> + dev_name(&cxlmd->dev),
>> + dev_name(&cxled->cxld.dev), __func__,
>> + dev_name(&cxlr->dev), p->res,
>> + p->interleave_ways,
>> + p->interleave_granularity);
>>
>> /* ...to match put_device() in cxl_add_to_region() */
>> get_device(&cxlr->dev);
>> up_write(&cxl_region_rwsem);
>> +out:
>> + construct_region_end();
> two calls to up_write(&cxl_region_rwsem) next to each other?
>
>> + if (rc) {
>> + drop_region(cxlr);
>> + return ERR_PTR(rc);
>> + }
>> + return cxlr;
>> +}
>> +
>> +static struct cxl_region *
>> +__construct_new_region(struct cxl_root_decoder *cxlrd,
>> + struct cxl_endpoint_decoder **cxled, int ways)
>> +{
>> + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
>> + struct cxl_region_params *p;
>> + resource_size_t size = 0;
>> + struct cxl_region *cxlr;
>> + int rc, i;
>> +
>> + /* If interleaving is not supported, why does ways need to be at least 1? */
> I think 1 means no interleave. It's simpler to do this than have 0 and 1 both
> mean no interleave because 1 works for programmable decoders.
>
>> + if (ways < 1)
>> + return ERR_PTR(-EINVAL);
>> +
>> + cxlr = construct_region_begin(cxlrd, cxled[0]);
> rethink how this broken up. Taking the cxl_dpa_rwsem
> inside this function and is really hard to follow. Ideally
> manage it with scoped_guard()
>
>
>> + if (IS_ERR(cxlr))
>> + return cxlr;
>> +
>> + rc = set_interleave_ways(cxlr, ways);
>> + if (rc)
>> + goto out;
>> +
>> + rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
>> + if (rc)
> here I think cxl_dpa_rwsem is held.
>> + goto out;
>> +
>> + down_read(&cxl_dpa_rwsem);
>> + for (i = 0; i < ways; i++) {
>> + if (!cxled[i]->dpa_res)
>> + break;
>> + size += resource_size(cxled[i]->dpa_res);
>> + }
>> + up_read(&cxl_dpa_rwsem);
>> +
>> + if (i < ways)
> but not here and they go to the same place.
>
>> + goto out;
>> +
>> + rc = alloc_hpa(cxlr, size);
>> + if (rc)
>> + goto out;
>> +
>> + down_read(&cxl_dpa_rwsem);
>> + for (i = 0; i < ways; i++) {
>> + rc = cxl_region_attach(cxlr, cxled[i], i);
>> + if (rc)
>> + break;
>> + }
>> + up_read(&cxl_dpa_rwsem);
>> +
>> + if (rc)
>> + goto out;
>> +
>> + rc = cxl_region_decode_commit(cxlr);
>> + if (rc)
>> + goto out;
>>
>> + p = &cxlr->params;
>> + p->state = CXL_CONFIG_COMMIT;
>> +out:
>> + construct_region_end();
>> + if (rc) {
>> + drop_region(cxlr);
>> + return ERR_PTR(rc);
>> + }
>> return cxlr;
>> +}
>> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
>> index a0e0795ec064..377bb3cd2d47 100644
>> --- a/drivers/cxl/cxlmem.h
>> +++ b/drivers/cxl/cxlmem.h
>> @@ -881,5 +881,7 @@ struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_port *endpoint,
>> int interleave_ways,
>> unsigned long flags,
>> resource_size_t *max);
>> -
> Avoid whitespace noise.
>
>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>> + struct cxl_endpoint_decoder **cxled,
>> + int ways);
>> #endif /* __CXL_MEM_H__ */
Powered by blists - more mailing lists