[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9f3678e8-8e7f-479f-a367-868f634adec8@amd.com>
Date: Wed, 24 Sep 2025 17:11:32 +0100
From: Alejandro Lucero Palau <alucerop@....com>
To: Dave Jiang <dave.jiang@...el.com>, alejandro.lucero-palau@....com,
linux-cxl@...r.kernel.org, netdev@...r.kernel.org, dan.j.williams@...el.com,
edward.cree@....com, davem@...emloft.net, kuba@...nel.org,
pabeni@...hat.com, edumazet@...gle.com
Subject: Re: [PATCH v18 08/20] cx/memdev: Indicate probe deferral
On 9/19/25 18:53, Dave Jiang wrote:
>
> On 9/18/25 2:17 AM, alejandro.lucero-palau@....com wrote:
>> From: Alejandro Lucero <alucerop@....com>
>>
>> The first step for a CXL accelerator driver that wants to establish new
>> CXL.mem regions is to register a 'struct cxl_memdev'. That kicks off
>> cxl_mem_probe() to enumerate all 'struct cxl_port' instances in the
>> topology up to the root.
>>
>> If the port driver has not attached yet the expectation is that the
>> driver waits until that link is established. The common cxl_pci driver
>> has reason to keep the 'struct cxl_memdev' device attached to the bus
>> until the root driver attaches. An accelerator may want to instead defer
>> probing until CXL resources can be acquired.
>>
>> Use the @endpoint attribute of a 'struct cxl_memdev' to convey when a
>> accelerator driver probing should be deferred vs failed. Provide that
>> indication via a new cxl_acquire_endpoint() API that can retrieve the
>> probe status of the memdev.
> So the -EPROBE_DEFER actually goes to the caller (accelerator driver) in this instance right? In the situation where the CXL resources never show up, does this particular accelerator driver never completes probe successfully or does it just punt CXL and completes probe without CXL support? This question is just for my understanding.
As I said in previous version, I kept this as a protection against cxl
modules removal while initialization. I think after Dan's patches the
problem is now reduced to cxl_acpi, so I am tempted to remove this from
next v19 which I want to send asap including those Dan patches.
In any case, the original goal is not there anymore, or maybe it is but
it requires to describe it properly. I did cover all this problematic
with in the cover letter.
>> Signed-off-by: Alejandro Lucero <alucerop@....com>
>> ---
>> drivers/cxl/core/memdev.c | 42 +++++++++++++++++++++++++++++++++++++++
>> drivers/cxl/core/port.c | 2 +-
>> drivers/cxl/mem.c | 7 +++++--
>> include/cxl/cxl.h | 2 ++
>> 4 files changed, 50 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
>> index 3228287bf3f0..10d21996598a 100644
>> --- a/drivers/cxl/core/memdev.c
>> +++ b/drivers/cxl/core/memdev.c
>> @@ -1164,6 +1164,48 @@ struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
>> }
>> EXPORT_SYMBOL_NS_GPL(cxl_memdev_alloc, "CXL");
>>
>> +/*
>> + * Try to get a locked reference on a memdev's CXL port topology
>> + * connection. Be careful to observe when cxl_mem_probe() has deposited
>> + * a probe deferral awaiting the arrival of the CXL root driver.
>> + */
>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
>> +{
>> + struct cxl_port *endpoint;
>> + int rc = -ENXIO;
>> +
>> + device_lock(&cxlmd->dev);
>> +
>> + endpoint = cxlmd->endpoint;
>> + if (!endpoint)
>> + goto err;
>> +
>> + if (IS_ERR(endpoint)) {
>> + rc = PTR_ERR(endpoint);
>> + goto err;
>> + }
>> +
>> + device_lock(&endpoint->dev);
>> + if (!endpoint->dev.driver)
>> + goto err_endpoint;
>> +
>> + return endpoint;
>> +
>> +err_endpoint:
>> + device_unlock(&endpoint->dev);
>> +err:
>> + device_unlock(&cxlmd->dev);
>> + return ERR_PTR(rc);
>> +}
>> +EXPORT_SYMBOL_NS_GPL(cxl_acquire_endpoint, "CXL");
>> +
>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
>> +{
>> + device_unlock(&endpoint->dev);
>> + device_unlock(&cxlmd->dev);
>> +}
>> +EXPORT_SYMBOL_NS_GPL(cxl_release_endpoint, "CXL");
> We may want to annotate the locking to help out lockdep debug
>
> static struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
> __acquires(&cxlmd->dev.mutex)
> __acquires(&cxlmd->endpoint->dev.mutex)
> {
> ...
> }
>
> static void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
> __releases(&endpoint->dev.mutex)
> __releases(&cxlmd->dev.mutex)
> {
> ...
> }
>
> DJ
>
>> +
>> static void sanitize_teardown_notifier(void *data)
>> {
>> struct cxl_memdev_state *mds = data;
>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
>> index 240c3c5bcdc8..4c3fecd4c8ea 100644
>> --- a/drivers/cxl/core/port.c
>> +++ b/drivers/cxl/core/port.c
>> @@ -1557,7 +1557,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
>> */
>> dev_dbg(&cxlmd->dev, "%s is a root dport\n",
>> dev_name(dport_dev));
>> - return -ENXIO;
>> + return -EPROBE_DEFER;
>> }
>>
>> struct cxl_port *parent_port __free(put_cxl_port) =
>> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
>> index 9ffee09fcb50..f103e2003add 100644
>> --- a/drivers/cxl/mem.c
>> +++ b/drivers/cxl/mem.c
>> @@ -122,14 +122,17 @@ static int cxl_mem_probe(struct device *dev)
>> return rc;
>>
>> rc = devm_cxl_enumerate_ports(cxlmd);
>> - if (rc)
>> + if (rc) {
>> + cxlmd->endpoint = ERR_PTR(rc);
>> return rc;
>> + }
>>
>> struct cxl_port *parent_port __free(put_cxl_port) =
>> cxl_mem_find_port(cxlmd, &dport);
>> if (!parent_port) {
>> dev_err(dev, "CXL port topology not found\n");
>> - return -ENXIO;
>> + cxlmd->endpoint = ERR_PTR(-EPROBE_DEFER);
>> + return -EPROBE_DEFER;
>> }
>>
>> if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) {
>> diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
>> index 401a59185608..64946e698f5f 100644
>> --- a/include/cxl/cxl.h
>> +++ b/include/cxl/cxl.h
>> @@ -251,4 +251,6 @@ int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity);
>> struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
>> struct cxl_dev_state *cxlds,
>> const struct cxl_memdev_ops *ops);
>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd);
>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *endpoint);
>> #endif /* __CXL_CXL_H__ */
Powered by blists - more mailing lists