[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f43a3b62-2f36-6165-c142-b0bfc6fc32e6@amd.com>
Date: Mon, 16 Sep 2024 11:08:17 +0100
From: Alejandro Lucero Palau <alucerop@....com>
To: Zhi Wang <zhiw@...dia.com>, alejandro.lucero-palau@....com
Cc: linux-cxl@...r.kernel.org, netdev@...r.kernel.org,
dan.j.williams@...el.com, martin.habets@...inx.com, edward.cree@....com,
davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com, edumazet@...gle.com
Subject: Re: [PATCH v3 10/20] cxl: indicate probe deferral
On 9/12/24 10:19, Zhi Wang wrote:
> On Sat, 7 Sep 2024 09:18:26 +0100
> <alejandro.lucero-palau@....com> wrote:
>
>> From: Alejandro Lucero <alucerop@....com>
>>
> Hi Alejandro:
>
> When working with V2, I noticed that if CONFIG_CXL_MEM=m and cxl_mem.ko
> is not loaded, loading the type-2 driver would fail on
> cxl_acquire_endpoint(). Not sure if you met the same problem.
I think I have some problems with kernel build depending on if CXL code
is configured as modules, and even if CXL is not configured at all, what
it was raised by the kernel build robot.
I'll work on this for v4.
Thanks!
> Now we are waiting for it to be loaded, it seems not ideal with the
> problem.
>
> Thanks,
> Zhi.
>
>> The first stop for a CXL accelerator driver that wants to establish
>> new CXL.mem regions is to register a 'struct cxl_memdev. That kicks
>> off cxl_mem_probe() to enumerate all 'struct cxl_port' instances in
>> the topology up to the root.
>>
>> If the root driver has not attached yet the expectation is that the
>> driver waits until that link is established. The common cxl_pci_driver
>> has reason to keep the 'struct cxl_memdev' device attached to the bus
>> until the root driver attaches. An accelerator may want to instead
>> defer probing until CXL resources can be acquired.
>>
>> Use the @endpoint attribute of a 'struct cxl_memdev' to convey when
>> accelerator driver probing should be deferred vs failed. Provide that
>> indication via a new cxl_acquire_endpoint() API that can retrieve the
>> probe status of the memdev.
>>
>> Based on
>> https://lore.kernel.org/linux-cxl/168592155270.1948938.11536845108449547920.stgit@dwillia2-xfh.jf.intel.com/
>>
>> Signed-off-by: Alejandro Lucero <alucerop@....com>
>> Co-developed-by: Dan Williams <dan.j.williams@...el.com>
>> ---
>> drivers/cxl/core/memdev.c | 67
>> +++++++++++++++++++++++++++++++++++++++ drivers/cxl/core/port.c |
>> 2 +- drivers/cxl/mem.c | 4 ++-
>> include/linux/cxl/cxl.h | 2 ++
>> 4 files changed, 73 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
>> index 5f8418620b70..d4406cf3ed32 100644
>> --- a/drivers/cxl/core/memdev.c
>> +++ b/drivers/cxl/core/memdev.c
>> @@ -5,6 +5,7 @@
>> #include <linux/io-64-nonatomic-lo-hi.h>
>> #include <linux/firmware.h>
>> #include <linux/device.h>
>> +#include <linux/delay.h>
>> #include <linux/slab.h>
>> #include <linux/idr.h>
>> #include <linux/pci.h>
>> @@ -23,6 +24,8 @@ static DECLARE_RWSEM(cxl_memdev_rwsem);
>> static int cxl_mem_major;
>> static DEFINE_IDA(cxl_memdev_ida);
>>
>> +static unsigned short endpoint_ready_timeout = HZ;
>> +
>> static void cxl_memdev_release(struct device *dev)
>> {
>> struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>> @@ -1163,6 +1166,70 @@ struct cxl_memdev *devm_cxl_add_memdev(struct
>> device *host, }
>> EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
>>
>> +/*
>> + * Try to get a locked reference on a memdev's CXL port topology
>> + * connection. Be careful to observe when cxl_mem_probe() has
>> deposited
>> + * a probe deferral awaiting the arrival of the CXL root driver.
>> + */
>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd)
>> +{
>> + struct cxl_port *endpoint;
>> + unsigned long timeout;
>> + int rc = -ENXIO;
>> +
>> + /*
>> + * A memdev creation triggers ports creation through the
>> kernel
>> + * device object model. An endpoint port could not be
>> created yet
>> + * but coming. Wait here for a gentle space of time for
>> ensuring
>> + * and endpoint port not there is due to some error and not
>> because
>> + * the race described.
>> + *
>> + * Note this is a similar case this function is implemented
>> for, but
>> + * instead of the race with the root port, this is against
>> its own
>> + * endpoint port.
>> + */
>> + timeout = jiffies + endpoint_ready_timeout;
>> + do {
>> + device_lock(&cxlmd->dev);
>> + endpoint = cxlmd->endpoint;
>> + if (endpoint)
>> + break;
>> + device_unlock(&cxlmd->dev);
>> + if (msleep_interruptible(100)) {
>> + device_lock(&cxlmd->dev);
>> + break;
>> + }
>> + } while (!time_after(jiffies, timeout));
>> +
>> + if (!endpoint)
>> + goto err;
>> +
>> + if (IS_ERR(endpoint)) {
>> + rc = PTR_ERR(endpoint);
>> + goto err;
>> + }
>> +
>> + device_lock(&endpoint->dev);
>> + if (!endpoint->dev.driver)
>> + goto err_endpoint;
>> +
>> + return endpoint;
>> +
>> +err_endpoint:
>> + device_unlock(&endpoint->dev);
>> +err:
>> + device_unlock(&cxlmd->dev);
>> + return ERR_PTR(rc);
>> +}
>> +EXPORT_SYMBOL_NS(cxl_acquire_endpoint, CXL);
>> +
>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port
>> *endpoint) +{
>> + device_unlock(&endpoint->dev);
>> + device_unlock(&cxlmd->dev);
>> +}
>> +EXPORT_SYMBOL_NS(cxl_release_endpoint, CXL);
>> +
>> static void sanitize_teardown_notifier(void *data)
>> {
>> struct cxl_memdev_state *mds = data;
>> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
>> index 39b20ddd0296..ca2c993faa9c 100644
>> --- a/drivers/cxl/core/port.c
>> +++ b/drivers/cxl/core/port.c
>> @@ -1554,7 +1554,7 @@ static int add_port_attach_ep(struct cxl_memdev
>> *cxlmd, */
>> dev_dbg(&cxlmd->dev, "%s is a root dport\n",
>> dev_name(dport_dev));
>> - return -ENXIO;
>> + return -EPROBE_DEFER;
>> }
>>
>> parent_port = find_cxl_port(dparent, &parent_dport);
>> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
>> index 5c7ad230bccb..56fd7a100c2f 100644
>> --- a/drivers/cxl/mem.c
>> +++ b/drivers/cxl/mem.c
>> @@ -145,8 +145,10 @@ static int cxl_mem_probe(struct device *dev)
>> return rc;
>>
>> rc = devm_cxl_enumerate_ports(cxlmd);
>> - if (rc)
>> + if (rc) {
>> + cxlmd->endpoint = ERR_PTR(rc);
>> return rc;
>> + }
>>
>> parent_port = cxl_mem_find_port(cxlmd, &dport);
>> if (!parent_port) {
>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>> index fc0859f841dc..7e4580fb8659 100644
>> --- a/include/linux/cxl/cxl.h
>> +++ b/include/linux/cxl/cxl.h
>> @@ -57,4 +57,6 @@ int cxl_release_resource(struct cxl_dev_state
>> *cxlds, enum cxl_resource type); void cxl_set_media_ready(struct
>> cxl_dev_state *cxlds); struct cxl_memdev *devm_cxl_add_memdev(struct
>> device *host, struct cxl_dev_state *cxlds);
>> +struct cxl_port *cxl_acquire_endpoint(struct cxl_memdev *cxlmd);
>> +void cxl_release_endpoint(struct cxl_memdev *cxlmd, struct cxl_port
>> *endpoint); #endif
Powered by blists - more mailing lists