[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4b699955-8131-48d8-a698-999d90523261@amd.com>
Date: Thu, 17 Oct 2024 16:49:59 -0500
From: Ben Cheatham <benjamin.cheatham@....com>
To: <alejandro.lucero-palau@....com>
CC: <linux-cxl@...r.kernel.org>, <netdev@...r.kernel.org>,
<dan.j.williams@...el.com>, <martin.habets@...inx.com>,
<edward.cree@....com>, <davem@...emloft.net>, <kuba@...nel.org>,
<pabeni@...hat.com>, <edumazet@...gle.com>
Subject: Re: [PATCH v4 22/26] cxl: allow region creation by type2 drivers
On 10/17/24 11:52 AM, alejandro.lucero-palau@....com wrote:
> From: Alejandro Lucero <alucerop@....com>
>
> Creating a CXL region requires userspace intervention through the cxl
> sysfs files. Type2 support should allow accelerator drivers to create
> such cxl region from kernel code.
>
> Adding that functionality and integrating it with current support for
> memory expanders.
>
> Based on https://lore.kernel.org/linux-cxl/168592159835.1948938.1647215579839222774.stgit@dwillia2-xfh.jf.intel.com/
>
So I ran into an issue at this point when using v3 as a base for my own testing. The problem is that
you are doing manual region management while not explicitly preventing auto region discovery when
devm_cxl_add_memdev() is called (patch 14/26 in this series). This caused some resource allocation
conflicts which then caused both the auto region and the manual region set up to fail. To make it more
concrete, here's the flow I encountered (I tried something new here, let me know if the ascii
is all mangled):
devm_cxl_add_memdev() is called
│
├───► cxl_mem probes new memdev
│ │
│ ├─► cxl_mem probe adds new endpoint port
│ │
│ └─► cxl_mem probe finishes
├───────────────────────────────────────────────► Manual region set up starts (finding free space, etc.)
├───► cxl_port probes the new endpoint port │
│ │ │
│ ├─► cxl_port probe sets up new endpoint ├─► create_new_region() is called
│ │ │
│ ├─► cxl_port calls discover_region() │
│ │ │
│ ├─► discover_region() creates new auto ├─► create_new_region() creates
│ │ discoveredregion │ new manual region
│◄────◄────────────────────────────────────────────────┘
│
└─► Region creation fails due to resource contention/race (DPA resource, RAM resource, etc.)
The timeline is a little off here I think, but it should be close enough to illustrate the point.
The easy solution here to not allow auto region discovery for CXL type 2 devices, like so:
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 22a9ba89cf5a..07b991e2c05b 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -34,6 +34,7 @@ static void schedule_detach(void *cxlmd)
static int discover_region(struct device *dev, void *root)
{
struct cxl_endpoint_decoder *cxled;
+ struct cxl_memdev *cxlmd;
int rc;
dev_err(dev, "%s:%d: Enter\n", __func__, __LINE__);
@@ -45,7 +46,9 @@ static int discover_region(struct device *dev, void *root)
if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
return 0;
- if (cxled->state != CXL_DECODER_STATE_AUTO)
+ cxlmd = cxled_to_memdev(cxled);
+ if (cxled->state != CXL_DECODER_STATE_AUTO ||
+ cxlmd->cxlds->type == CXL_DEVTYPE_DEVMEM)
return 0;
I think there's a better way to go about this, more to say about it in patch 24/26. I've
dropped this here just in case you don't like my ideas there ;).
> Signed-off-by: Alejandro Lucero <alucerop@....com>
> Signed-off-by: Dan Williams <dan.j.williams@...el.com>
> ---
> drivers/cxl/core/region.c | 147 ++++++++++++++++++++++++++++++++++----
> drivers/cxl/cxlmem.h | 2 +
> include/linux/cxl/cxl.h | 4 ++
> 3 files changed, 138 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index d08a2a848ac9..04c270a29e96 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2253,6 +2253,18 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
> return rc;
> }
>
> +int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled)
> +{
> + int rc;
> +
> + down_write(&cxl_region_rwsem);
> + cxled->mode = CXL_DECODER_DEAD;
> + rc = cxl_region_detach(cxled);
> + up_write(&cxl_region_rwsem);
> + return rc;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_accel_region_detach, CXL);
> +
> void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
> {
> down_write(&cxl_region_rwsem);
> @@ -2781,6 +2793,14 @@ cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
> return to_cxl_region(region_dev);
> }
>
> +static void drop_region(struct cxl_region *cxlr)
> +{
> + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
> + struct cxl_port *port = cxlrd_to_port(cxlrd);
> +
> + devm_release_action(port->uport_dev, unregister_region, cxlr);
> +}
> +
> static ssize_t delete_region_store(struct device *dev,
> struct device_attribute *attr,
> const char *buf, size_t len)
> @@ -3386,17 +3406,18 @@ static int match_region_by_range(struct device *dev, void *data)
> return rc;
> }
>
> -/* Establish an empty region covering the given HPA range */
> -static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
> - struct cxl_endpoint_decoder *cxled)
> +static void construct_region_end(void)
> +{
> + up_write(&cxl_region_rwsem);
> +}
> +
> +static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder *cxled)
> {
> struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> - struct cxl_port *port = cxlrd_to_port(cxlrd);
> - struct range *hpa = &cxled->cxld.hpa_range;
> struct cxl_region_params *p;
> struct cxl_region *cxlr;
> - struct resource *res;
> - int rc;
> + int err;
>
> do {
> cxlr = __create_region(cxlrd, cxled->mode,
> @@ -3405,8 +3426,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
> } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
>
> if (IS_ERR(cxlr)) {
> - dev_err(cxlmd->dev.parent,
> - "%s:%s: %s failed assign region: %ld\n",
> + dev_err(cxlmd->dev.parent, "%s:%s: %s failed assign region: %ld\n",
> dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
> __func__, PTR_ERR(cxlr));
> return cxlr;
> @@ -3416,13 +3436,33 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
> p = &cxlr->params;
> if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
> dev_err(cxlmd->dev.parent,
> - "%s:%s: %s autodiscovery interrupted\n",
> + "%s:%s: %s region setup interrupted\n",
> dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
> __func__);
> - rc = -EBUSY;
> - goto err;
> + err = -EBUSY;
> + construct_region_end();
> + drop_region(cxlr);
> + return ERR_PTR(err);
> }
>
> + return cxlr;
> +}
> +
> +/* Establish an empty region covering the given HPA range */
> +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder *cxled)
> +{
> + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> + struct range *hpa = &cxled->cxld.hpa_range;
> + struct cxl_region_params *p;
> + struct cxl_region *cxlr;
> + struct resource *res;
> + int rc;
> +
> + cxlr = construct_region_begin(cxlrd, cxled);
> + if (IS_ERR(cxlr))
> + return cxlr;
> +
> set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>
> res = kmalloc(sizeof(*res), GFP_KERNEL);
> @@ -3445,6 +3485,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
> __func__, dev_name(&cxlr->dev));
> }
>
> + p = &cxlr->params;
> p->res = res;
> p->interleave_ways = cxled->cxld.interleave_ways;
> p->interleave_granularity = cxled->cxld.interleave_granularity;
> @@ -3462,15 +3503,91 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
> /* ...to match put_device() in cxl_add_to_region() */
> get_device(&cxlr->dev);
> up_write(&cxl_region_rwsem);
> -
> + construct_region_end();
> return cxlr;
>
> err:
> - up_write(&cxl_region_rwsem);
> - devm_release_action(port->uport_dev, unregister_region, cxlr);
> + construct_region_end();
> + drop_region(cxlr);
> + return ERR_PTR(rc);
> +}
> +
> +static struct cxl_region *
> +__construct_new_region(struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder *cxled)
> +{
> + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
> + struct cxl_region_params *p;
> + struct cxl_region *cxlr;
> + int rc;
> +
> + cxlr = construct_region_begin(cxlrd, cxled);
> + if (IS_ERR(cxlr))
> + return cxlr;
> +
> + rc = set_interleave_ways(cxlr, 1);
> + if (rc)
> + goto err;
> +
> + rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
> + if (rc)
> + goto err;
> +
> + rc = alloc_hpa(cxlr, resource_size(cxled->dpa_res));
> + if (rc)
> + goto err;
> +
> + down_read(&cxl_dpa_rwsem);
> + rc = cxl_region_attach(cxlr, cxled, 0);
> + up_read(&cxl_dpa_rwsem);
> +
> + if (rc)
> + goto err;
> +
> + rc = cxl_region_decode_commit(cxlr);
> + if (rc)
> + goto err;
> +
> + p = &cxlr->params;
> + p->state = CXL_CONFIG_COMMIT;
> +
> + construct_region_end();
> + return cxlr;
> +err:
> + construct_region_end();
> + drop_region(cxlr);
> return ERR_PTR(rc);
> }
>
> +/**
> + * cxl_create_region - Establish a region given an endpoint decoder
> + * @cxlrd: root decoder to allocate HPA
> + * @cxled: endpoint decoder with reserved DPA capacity
> + *
> + * Returns a fully formed region in the commit state and attached to the
> + * cxl_region driver.
> + */
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder *cxled)
> +{
> + struct cxl_region *cxlr;
> +
> + mutex_lock(&cxlrd->range_lock);
> + cxlr = __construct_new_region(cxlrd, cxled);
> + mutex_unlock(&cxlrd->range_lock);
> +
> + if (IS_ERR(cxlr))
> + return cxlr;
> +
> + if (device_attach(&cxlr->dev) <= 0) {
> + dev_err(&cxlr->dev, "failed to create region\n");
> + drop_region(cxlr);
> + return ERR_PTR(-ENODEV);
> + }
> + return cxlr;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
> +
> int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
> {
> struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 68d28eab3696..0f5c71909fd1 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -875,4 +875,6 @@ struct cxl_hdm {
> struct seq_file;
> struct dentry *cxl_debugfs_create_dir(const char *dir);
> void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds);
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder *cxled);
> #endif /* __CXL_MEM_H__ */
> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
> index 45b6badb8048..c544339c2baf 100644
> --- a/include/linux/cxl/cxl.h
> +++ b/include/linux/cxl/cxl.h
> @@ -72,4 +72,8 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd,
> resource_size_t min,
> resource_size_t max);
> int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> + struct cxl_endpoint_decoder *cxled);
> +
> +int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled);
> #endif
Powered by blists - more mailing lists