[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <682c2c6f8398_2b1610050@dwillia2-mobl4.notmuch>
Date: Tue, 20 May 2025 00:17:03 -0700
From: <dan.j.williams@...el.com>
To: <alejandro.lucero-palau@....com>, <linux-cxl@...r.kernel.org>,
<netdev@...r.kernel.org>, <dan.j.williams@...el.com>, <edward.cree@....com>,
<davem@...emloft.net>, <kuba@...nel.org>, <pabeni@...hat.com>,
<edumazet@...gle.com>, <dave.jiang@...el.com>
CC: Alejandro Lucero <alucerop@....com>, Jonathan Cameron
<Jonathan.Cameron@...wei.com>
Subject: Re: [PATCH v16 01/22] cxl: Add type2 device basic support
alejandro.lucero-palau@ wrote:
> From: Alejandro Lucero <alucerop@....com>
>
> Differentiate CXL memory expanders (type 3) from CXL device accelerators
> (type 2) with a new function for initializing cxl_dev_state and a macro
> for helping accel drivers to embed cxl_dev_state inside a private
> struct.
>
> Move structs to include/cxl as the size of the accel driver private
> struct embedding cxl_dev_state needs to know the size of this struct.
>
> Use same new initialization with the type3 pci driver.
>
> Signed-off-by: Alejandro Lucero <alucerop@....com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@...wei.com>
> Reviewed-by: Dave Jiang <dave.jiang@...el.com>
> ---
> drivers/cxl/core/mbox.c | 11 +-
> drivers/cxl/core/memdev.c | 32 +++++
> drivers/cxl/core/pci.c | 1 +
> drivers/cxl/core/regs.c | 1 +
> drivers/cxl/cxl.h | 97 +--------------
> drivers/cxl/cxlmem.h | 88 +-------------
> drivers/cxl/cxlpci.h | 21 ----
> drivers/cxl/pci.c | 17 +--
> include/cxl/cxl.h | 226 +++++++++++++++++++++++++++++++++++
> include/cxl/pci.h | 23 ++++
> tools/testing/cxl/test/mem.c | 3 +-
> 11 files changed, 305 insertions(+), 215 deletions(-)
> create mode 100644 include/cxl/cxl.h
> create mode 100644 include/cxl/pci.h
>
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index d72764056ce6..ab994d459f46 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -1484,23 +1484,20 @@ int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host)
> }
> EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL");
>
> -struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
> +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial,
> + u16 dvsec)
> {
> struct cxl_memdev_state *mds;
> int rc;
>
> - mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL);
> + mds = cxl_dev_state_create(dev, CXL_DEVTYPE_CLASSMEM, serial, dvsec,
> + struct cxl_memdev_state, cxlds, true);
Existing cxl_memdev_state_create() callers expect that any state
allocation is managed by devres.
It is ok to make cxl_memdev_state_create() manually allocate, but then
you still need to take care of existing caller expectations.
> if (!mds) {
> dev_err(dev, "No memory available\n");
> return ERR_PTR(-ENOMEM);
> }
>
> mutex_init(&mds->event.log_lock);
> - mds->cxlds.dev = dev;
> - mds->cxlds.reg_map.host = dev;
> - mds->cxlds.cxl_mbox.host = dev;
> - mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
> - mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
>
> rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier);
Ugh, but this function is now confused as some resources are devm, and
some are manual alloc. this is a bit of a mess. Like why does this need
to dev_warn() every boot on MCE-less archs like ARM64?
I was trying to keep the incremental fixup small, but this makes it
bigger, and likely means we need to clean this up before this patch.
Ugh2, looks like this current arrangment will cause a NULL pointer
de-reference if an MCE fires between cxl_memdev_state_create() and
devm_cxl_add_memdev().
Ugh3, looks like the MCE is registered once per memdev, but triggers
memory_failure() once per spa match. That really wants to be registered
once per-region.
That whole situation needs a rethink, but for now make the other
cleanups a TODO.
> if (rc == -EOPNOTSUPP)
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index a16a5886d40a..6cc732aeb9de 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -633,6 +633,38 @@ static void detach_memdev(struct work_struct *work)
>
> static struct lock_class_key cxl_memdev_key;
>
> +void cxl_dev_state_init(struct cxl_dev_state *cxlds, struct device *dev,
> + enum cxl_devtype type, u64 serial, u16 dvsec,
> + bool has_mbox)
As far as I can see this can be static as _cxl_dev_state_create() is the
only caller in this whole series. Fixup included below:
> +{
> + *cxlds = (struct cxl_dev_state) {
> + .dev = dev,
> + .type = type,
> + .serial = serial,
> + .cxl_dvsec = dvsec,
> + .reg_map.host = dev,
> + .reg_map.resource = CXL_RESOURCE_NONE,
> + };
> +
> + if (has_mbox)
> + cxlds->cxl_mbox.host = dev;
> +}
> +
> +struct cxl_dev_state *_cxl_dev_state_create(struct device *dev,
> + enum cxl_devtype type, u64 serial,
> + u16 dvsec, size_t size,
> + bool has_mbox)
> +{
> + struct cxl_dev_state *cxlds __free(kfree) = kzalloc(size, GFP_KERNEL);
> +
> + if (!cxlds)
> + return NULL;
> +
> + cxl_dev_state_init(cxlds, dev, type, serial, dvsec, has_mbox);
> + return_ptr(cxlds);
This function is so simple, there is no need to use scope-based cleanup.
-- 8< --
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index ab994d459f46..5664514dfb83 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -1488,27 +1488,48 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial,
u16 dvsec)
{
struct cxl_memdev_state *mds;
- int rc;
mds = cxl_dev_state_create(dev, CXL_DEVTYPE_CLASSMEM, serial, dvsec,
struct cxl_memdev_state, cxlds, true);
- if (!mds) {
- dev_err(dev, "No memory available\n");
+ if (!mds)
return ERR_PTR(-ENOMEM);
- }
mutex_init(&mds->event.log_lock);
- rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier);
- if (rc == -EOPNOTSUPP)
- dev_warn(dev, "CXL MCE unsupported\n");
- else if (rc)
- return ERR_PTR(rc);
+ /* TODO: move this registration to cxl_region_probe() */
+ cxl_register_mce_notifier(mds);
return mds;
}
EXPORT_SYMBOL_NS_GPL(cxl_memdev_state_create, "CXL");
+void cxl_memdev_state_destroy(struct cxl_memdev_state *mds)
+{
+ cxl_unregister_mce_notifier(mds);
+ kfree(mds);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_memdev_state_destroy, "CXL");
+
+static void mds_destroy(void *mds)
+{
+ cxl_memdev_state_destroy(mds);
+}
+
+struct cxl_memdev_state *devm_cxl_memdev_state_create(struct device *dev,
+ u64 serial, u16 dvsec)
+{
+ struct cxl_memdev_state *mds = cxl_memdev_state_create(dev, serial, dvsec);
+ int rc;
+
+ if (IS_ERR(mds))
+ return mds;
+ rc = devm_add_action_or_reset(dev, mds_destroy, mds);
+ if (rc)
+ return ERR_PTR(rc);
+ return mds;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_state_create, "CXL");
+
void __init cxl_mbox_init(void)
{
struct dentry *mbox_debugfs;
diff --git a/drivers/cxl/core/mce.c b/drivers/cxl/core/mce.c
index ff8d078c6ca1..71cc650f54ae 100644
--- a/drivers/cxl/core/mce.c
+++ b/drivers/cxl/core/mce.c
@@ -48,18 +48,16 @@ static int cxl_handle_mce(struct notifier_block *nb, unsigned long val,
return NOTIFY_OK;
}
-static void cxl_unregister_mce_notifier(void *mce_notifier)
+void cxl_unregister_mce_notifier(struct cxl_memdev_state *mds)
{
- mce_unregister_decode_chain(mce_notifier);
+ mce_unregister_decode_chain(&mds->mce_notifier);
}
-int devm_cxl_register_mce_notifier(struct device *dev,
- struct notifier_block *mce_notifier)
+void cxl_register_mce_notifier(struct cxl_memdev_state *mds)
{
+ struct notifier_block *mce_notifier = &mds->mce_notifier;
+
mce_notifier->notifier_call = cxl_handle_mce;
mce_notifier->priority = MCE_PRIO_UC;
- mce_register_decode_chain(mce_notifier);
-
- return devm_add_action_or_reset(dev, cxl_unregister_mce_notifier,
- mce_notifier);
+ mce_register_decode_chain(&mds->mce_notifier);
}
diff --git a/drivers/cxl/core/mce.h b/drivers/cxl/core/mce.h
index ace73424eeb6..b7930761b0d6 100644
--- a/drivers/cxl/core/mce.h
+++ b/drivers/cxl/core/mce.h
@@ -4,16 +4,17 @@
#define _CXL_CORE_MCE_H_
#include <linux/notifier.h>
+#include <asm/mce.h>
#ifdef CONFIG_CXL_MCE
-int devm_cxl_register_mce_notifier(struct device *dev,
- struct notifier_block *mce_notifer);
+void cxl_unregister_mce_notifier(struct cxl_memdev_state *mds);
+void cxl_register_mce_notifier(struct cxl_memdev_state *mds);
#else
-static inline int
-devm_cxl_register_mce_notifier(struct device *dev,
- struct notifier_block *mce_notifier)
+static inline void cxl_unregister_mce_notifier(struct cxl_memdev_state *mds)
+{
+}
+void cxl_register_mce_notifier(struct cxl_memdev_state *mds)
{
- return -EOPNOTSUPP;
}
#endif
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 6cc732aeb9de..3baf5b4502d0 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -633,7 +633,7 @@ static void detach_memdev(struct work_struct *work)
static struct lock_class_key cxl_memdev_key;
-void cxl_dev_state_init(struct cxl_dev_state *cxlds, struct device *dev,
+static void cxl_dev_state_init(struct cxl_dev_state *cxlds, struct device *dev,
enum cxl_devtype type, u64 serial, u16 dvsec,
bool has_mbox)
{
@@ -655,13 +655,13 @@ struct cxl_dev_state *_cxl_dev_state_create(struct device *dev,
u16 dvsec, size_t size,
bool has_mbox)
{
- struct cxl_dev_state *cxlds __free(kfree) = kzalloc(size, GFP_KERNEL);
+ struct cxl_dev_state *cxlds = kzalloc(size, GFP_KERNEL);
if (!cxlds)
return NULL;
cxl_dev_state_init(cxlds, dev, type, serial, dvsec, has_mbox);
- return_ptr(cxlds);
+ return cxlds;
}
EXPORT_SYMBOL_NS_GPL(_cxl_dev_state_create, "CXL");
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index e7cd31b9f107..897589a6c6ca 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -416,7 +416,9 @@ struct cxl_memdev_state {
struct cxl_poison_state poison;
struct cxl_security_state security;
struct cxl_fw_state fw;
+#ifdef CONFIG_CXL_MCE
struct notifier_block mce_notifier;
+#endif
};
static inline struct cxl_memdev_state *
@@ -755,9 +757,11 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info);
struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial,
u16 dvsec);
-void cxl_dev_state_init(struct cxl_dev_state *cxlds, struct device *dev,
- enum cxl_devtype type, u64 serial, u16 dvsec,
- bool has_mbox);
+void cxl_memdev_state_destroy(struct cxl_memdev_state *mds);
+
+struct cxl_memdev_state *devm_cxl_memdev_state_create(struct device *dev,
+ u64 serial, u16 dvsec);
+
void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds);
void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 0d3c67867965..d5447c7d540f 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -933,7 +933,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev_warn(&pdev->dev,
"Device DVSEC not present, skip CXL.mem init\n");
- mds = cxl_memdev_state_create(&pdev->dev, pci_get_dsn(pdev), dvsec);
+ mds = devm_cxl_memdev_state_create(&pdev->dev, pci_get_dsn(pdev), dvsec);
if (IS_ERR(mds))
return PTR_ERR(mds);
cxlds = &mds->cxlds;
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index e62cb5049cf5..c40afc743451 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -1717,7 +1717,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
- mds = cxl_memdev_state_create(dev, pdev->id + 1, 0);
+ mds = devm_cxl_memdev_state_create(dev, pdev->id + 1, 0);
if (IS_ERR(mds))
return PTR_ERR(mds);
Powered by blists - more mailing lists