[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aa942655-d740-4052-8ddc-13540b06ef14@intel.com>
Date: Wed, 15 Oct 2025 14:42:03 -0700
From: Dave Jiang <dave.jiang@...el.com>
To: "Cheatham, Benjamin" <benjamin.cheatham@....com>,
alejandro.lucero-palau@....com
Cc: Jonathan Cameron <Jonathan.Cameron@...wei.com>,
linux-cxl@...r.kernel.org, netdev@...r.kernel.org, dan.j.williams@...el.com,
edward.cree@....com, davem@...emloft.net, kuba@...nel.org,
pabeni@...hat.com, edumazet@...gle.com
Subject: Re: [PATCH v19 18/22] cxl: Allow region creation by type2 drivers
On 10/9/25 1:56 PM, Cheatham, Benjamin wrote:
> On 10/6/2025 5:01 AM, alejandro.lucero-palau@....com wrote:
>> From: Alejandro Lucero <alucerop@....com>
>>
>> Creating a CXL region requires userspace intervention through the cxl
>> sysfs files. Type2 support should allow accelerator drivers to create
>> such cxl region from kernel code.
>>
>> Adding that functionality and integrating it with current support for
>> memory expanders.
>>
>> Support an action by the type2 driver to be linked to the created region
>> for unwinding the resources allocated properly.
>>
>> Based on https://lore.kernel.org/linux-cxl/168592159835.1948938.1647215579839222774.stgit@dwillia2-xfh.jf.intel.com/
>>
>> Signed-off-by: Alejandro Lucero <alucerop@....com>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@...wei.com>
>> ---
>
> Fix for this one should be split between 13/22 and this patch, but the majority of it is in this one. The idea is
> if we don't find a free decoder we check for pre-programmed decoders and use that instead. Unfortunately, this
> invalidates some of the assumptions made by __construct_new_region().
Wouldn't you look for a pre-programmed decoder first and construct the auto region before you try to manually create one? Also for a type 2 device, would the driver know what it wants and what the region configuration should look like? Would it be a single region either it's auto or manual, or would there be a configuration of multiple regions possible? To me a type 2 region is more intentional where the driver would know exactly what it needs and thus trying to get that from the cxl core.
DJ
>
> __construct_new_region() assumes that 1) the underlying HPA is unallocated and 2) the HDM decoders aren't programmed. Neither
> of those are true for a decoder that's programmed by BIOS. The HPA is allocated as part of endpoint_port_probe()
> (see devm_cxl_enumerate_decoders() in cxl/core/hdm.c specifically) and the HDM decoders are enabled and committed by BIOS before
> we ever see them. So the idea here is to split the second half of __construct_new_region() into the 2 cases: un-programmed decoders
> (__setup_new_region()) and pre-programmed decoders (__setup_new_auto_region()). The main differences between the two is we don't
> allocate the HPA region or commit the HDM decoders and just insert the region resource below the CXL window instead in the auto case.
>
> I'm not sure if I've done everything correctly, but I don't see any errors and get the following iomem tree:
> 1050000000-144fffffff : CXL Window 0
> 1050000000-144fffffff : region0
> 1050000000-144fffffff : Soft Reserved
>
> ---
>
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 4af5de5e0a44..a5fa8dd0e63f 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -137,6 +137,8 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
> struct cxl_endpoint_dvsec_info *info);
> int cxl_port_get_possible_dports(struct cxl_port *port);
>
> +bool is_auto_decoder(struct cxl_endpoint_decoder *cxled);
> +
> #ifdef CONFIG_CXL_FEATURES
> struct cxl_feat_entry *
> cxl_feature_info(struct cxl_features_state *cxlfs, const uuid_t *uuid);
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 1f7aa79c1541..8f6236a88c0b 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -712,16 +712,33 @@ static int find_free_decoder(struct device *dev, const void *data)
> return 1;
> }
>
> +bool is_auto_decoder(struct cxl_endpoint_decoder *cxled)
> +{
> + return cxled->state == CXL_DECODER_STATE_AUTO && cxled->pos < 0 &&
> + (cxled->cxld.flags & CXL_DECODER_F_ENABLE);
> +}
> +
> +static int find_auto_decoder(struct device *dev, const void *data)
> +{
> + if (!is_endpoint_decoder(dev))
> + return 0;
> +
> + return is_auto_decoder(to_cxl_endpoint_decoder(dev));
> +}
> +
> static struct cxl_endpoint_decoder *
> cxl_find_free_decoder(struct cxl_memdev *cxlmd)
> {
> struct cxl_port *endpoint = cxlmd->endpoint;
> struct device *dev;
>
> - scoped_guard(rwsem_read, &cxl_rwsem.dpa) {
> - dev = device_find_child(&endpoint->dev, NULL,
> - find_free_decoder);
> - }
> + guard(rwsem_read)(&cxl_rwsem.dpa);
> + dev = device_find_child(&endpoint->dev, NULL,
> + find_free_decoder);
> + if (dev)
> + return to_cxl_endpoint_decoder(dev);
> +
> + dev = device_find_child(&endpoint->dev, NULL, find_auto_decoder);
> if (dev)
> return to_cxl_endpoint_decoder(dev);
>
> @@ -761,6 +778,9 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd,
> if (!cxled)
> return ERR_PTR(-ENODEV);
>
> + if (is_auto_decoder(cxled))
> + return_ptr(cxled);
> +
> rc = cxl_dpa_set_part(cxled, mode);
> if (rc)
> return ERR_PTR(rc);
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 2d60131edff3..004e01ad0e5f 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -3699,48 +3699,74 @@ cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa)
> }
>
> static struct cxl_region *
> -__construct_new_region(struct cxl_root_decoder *cxlrd,
> - struct cxl_endpoint_decoder **cxled, int ways)
> +__setup_new_auto_region(struct cxl_region *cxlr, struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder **cxled, int ways)
> {
> - struct cxl_memdev *cxlmd = cxled_to_memdev(cxled[0]);
> - struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
> - struct cxl_region_params *p;
> + struct range *hpa = &cxled[0]->cxld.hpa_range;
> + struct cxl_region_params *p = &cxlr->params;
> resource_size_t size = 0;
> - struct cxl_region *cxlr;
> - int rc, i;
> + struct resource *res;
> + int rc = -EINVAL, i = 0;
>
> - cxlr = construct_region_begin(cxlrd, cxled[0]);
> - if (IS_ERR(cxlr))
> - return cxlr;
> + scoped_guard(rwsem_read, &cxl_rwsem.dpa)
> + {
> + for (i = 0; i < ways; i++) {
> + if (!cxled[i]->dpa_res)
> + goto err;
>
> - guard(rwsem_write)(&cxl_rwsem.region);
> + if (!is_auto_decoder(cxled[i]))
> + goto err;
>
> - /*
> - * Sanity check. This should not happen with an accel driver handling
> - * the region creation.
> - */
> - p = &cxlr->params;
> - if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
> - dev_err(cxlmd->dev.parent,
> - "%s:%s: %s unexpected region state\n",
> - dev_name(&cxlmd->dev), dev_name(&cxled[0]->cxld.dev),
> - __func__);
> - rc = -EBUSY;
> - goto err;
> + size += resource_size(cxled[i]->dpa_res);
> + }
> }
>
> - rc = set_interleave_ways(cxlr, ways);
> - if (rc)
> + set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
> +
> + p->res = kmalloc(sizeof(*res), GFP_KERNEL);
> + if (!p->res) {
> + rc = -ENOMEM;
> goto err;
> + }
>
> - rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
> + *p->res = DEFINE_RES_MEM_NAMED(hpa->start, range_len(hpa),
> + dev_name(&cxlr->dev));
> +
> + rc = insert_resource(cxlrd->res, p->res);
> if (rc)
> - goto err;
> + dev_warn(&cxlr->dev, "Could not insert resource\n");
> +
> + p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
> + scoped_guard(rwsem_read, &cxl_rwsem.dpa)
> + {
> + for (i = 0; i < ways; i++) {
> + rc = cxl_region_attach(cxlr, cxled[i], -1);
> + if (rc)
> + goto err;
> + }
> + }
> +
> + return cxlr;
> +
> +err:
> + drop_region(cxlr);
> + return ERR_PTR(rc);
> +}
> +
> +static struct cxl_region *
> +__setup_new_region(struct cxl_region *cxlr, struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder **cxled, int ways)
> +{
> + struct cxl_region_params *p = &cxlr->params;
> + resource_size_t size = 0;
> + int rc = -EINVAL, i = 0;
>
> - scoped_guard(rwsem_read, &cxl_rwsem.dpa) {
> + scoped_guard(rwsem_read, &cxl_rwsem.dpa)
> + {
> for (i = 0; i < ways; i++) {
> if (!cxled[i]->dpa_res)
> break;
> +
> size += resource_size(cxled[i]->dpa_res);
> }
> }
> @@ -3752,7 +3778,8 @@ __construct_new_region(struct cxl_root_decoder *cxlrd,
> if (rc)
> goto err;
>
> - scoped_guard(rwsem_read, &cxl_rwsem.dpa) {
> + scoped_guard(rwsem_read, &cxl_rwsem.dpa)
> + {
> for (i = 0; i < ways; i++) {
> rc = cxl_region_attach(cxlr, cxled[i], 0);
> if (rc)
> @@ -3760,16 +3787,61 @@ __construct_new_region(struct cxl_root_decoder *cxlrd,
> }
> }
>
> + rc = cxl_region_decode_commit(cxlr);
> if (rc)
> goto err;
>
> - rc = cxl_region_decode_commit(cxlr);
> + p->state = CXL_CONFIG_COMMIT;
> + return cxlr;
> +
> +err:
> + drop_region(cxlr);
> + return ERR_PTR(rc);
> +}
> +
> +static struct cxl_region *
> +__construct_new_region(struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder **cxled, int ways)
> +{
> + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled[0]);
> + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
> + struct cxl_region_params *p;
> + struct cxl_region *cxlr;
> + int rc;
> +
> + cxlr = construct_region_begin(cxlrd, cxled[0]);
> + if (IS_ERR(cxlr))
> + return cxlr;
> +
> + guard(rwsem_write)(&cxl_rwsem.region);
> +
> + /*
> + * Sanity check. This should not happen with an accel driver handling
> + * the region creation.
> + */
> + p = &cxlr->params;
> + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
> + dev_err(cxlmd->dev.parent,
> + "%s:%s: %s unexpected region state\n",
> + dev_name(&cxlmd->dev), dev_name(&cxled[0]->cxld.dev),
> + __func__);
> + rc = -EBUSY;
> + goto err;
> + }
> +
> + rc = set_interleave_ways(cxlr, ways);
> if (rc)
> goto err;
>
> - p->state = CXL_CONFIG_COMMIT;
> + rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
> + if (rc)
> + goto err;
> +
> + if (is_auto_decoder(cxled[0]))
> + return __setup_new_auto_region(cxlr, cxlrd, cxled, ways);
> + else
> + return __setup_new_region(cxlr, cxlrd, cxled, ways);
>
> - return cxlr;
> err:
> drop_region(cxlr);
> return ERR_PTR(rc);
Powered by blists - more mailing lists