[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e507443b-71cc-4c48-a193-f5361a1f9086@amd.com>
Date: Thu, 20 Nov 2025 12:27:54 -0800
From: "Koralahalli Channabasappa, Smita" <skoralah@....com>
To: alejandro.lucero-palau@....com, linux-cxl@...r.kernel.org,
netdev@...r.kernel.org, dan.j.williams@...el.com, edward.cree@....com,
davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com,
edumazet@...gle.com, dave.jiang@...el.com
Cc: Alejandro Lucero <alucerop@....com>
Subject: Re: [PATCH v21 01/23] cxl/mem: refactor memdev allocation
Hi Alejandro,
On 11/19/2025 11:22 AM, alejandro.lucero-palau@....com wrote:
> From: Alejandro Lucero <alucerop@....com>
>
> In preparation for always-synchronous memdev attach, refactor memdev
> allocation and fix release bug in devm_cxl_add_memdev() when error after
> a successful allocation.
>
> The diff is busy as this moves cxl_memdev_alloc() down below the definition
> of cxl_memdev_fops and introduces devm_cxl_memdev_add_or_reset() to
> preclude needing to export more symbols from the cxl_core.
>
> Fixes: 1c3333a28d45 ("cxl/mem: Do not rely on device_add() side effects for dev_set_name() failures")
>
> Signed-off-by: Dan Williams <dan.j.williams@...el.com>
> Signed-off-by: Alejandro Lucero <alucerop@....com>
> ---
> drivers/cxl/core/memdev.c | 134 +++++++++++++++++++++-----------------
> drivers/cxl/private.h | 10 +++
> 2 files changed, 86 insertions(+), 58 deletions(-)
> create mode 100644 drivers/cxl/private.h
>
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index e370d733e440..8de19807ac7b 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -8,6 +8,7 @@
> #include <linux/idr.h>
> #include <linux/pci.h>
> #include <cxlmem.h>
> +#include "private.h"
> #include "trace.h"
> #include "core.h"
>
> @@ -648,42 +649,25 @@ static void detach_memdev(struct work_struct *work)
>
> static struct lock_class_key cxl_memdev_key;
>
> -static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
> - const struct file_operations *fops)
> +int devm_cxl_memdev_add_or_reset(struct device *host, struct cxl_memdev *cxlmd)
> {
> - struct cxl_memdev *cxlmd;
> - struct device *dev;
> - struct cdev *cdev;
> + struct device *dev = &cxlmd->dev;
> + struct cdev *cdev = &cxlmd->cdev;
> int rc;
>
> - cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
> - if (!cxlmd)
> - return ERR_PTR(-ENOMEM);
> -
> - rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL);
> - if (rc < 0)
> - goto err;
> - cxlmd->id = rc;
> - cxlmd->depth = -1;
> -
> - dev = &cxlmd->dev;
> - device_initialize(dev);
> - lockdep_set_class(&dev->mutex, &cxl_memdev_key);
> - dev->parent = cxlds->dev;
> - dev->bus = &cxl_bus_type;
> - dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
> - dev->type = &cxl_memdev_type;
> - device_set_pm_not_required(dev);
> - INIT_WORK(&cxlmd->detach_work, detach_memdev);
> -
> - cdev = &cxlmd->cdev;
> - cdev_init(cdev, fops);
> - return cxlmd;
> + rc = cdev_device_add(cdev, dev);
> + if (rc) {
> + /*
> + * The cdev was briefly live, shutdown any ioctl operations that
> + * saw that state.
> + */
> + cxl_memdev_shutdown(dev);
> + return rc;
> + }
>
> -err:
> - kfree(cxlmd);
> - return ERR_PTR(rc);
> + return devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
> }
> +EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_add_or_reset, "CXL");
>
> static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
> unsigned long arg)
> @@ -1051,48 +1035,82 @@ static const struct file_operations cxl_memdev_fops = {
> .llseek = noop_llseek,
> };
>
> -struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
> - struct cxl_dev_state *cxlds)
> +struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds)
> {
> - struct cxl_memdev *cxlmd;
> + struct cxl_memdev *cxlmd __free(kfree) =
> + kzalloc(sizeof(*cxlmd), GFP_KERNEL);
> struct device *dev;
> struct cdev *cdev;
> int rc;
>
> - cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
> - if (IS_ERR(cxlmd))
> - return cxlmd;
> -
> - dev = &cxlmd->dev;
> - rc = dev_set_name(dev, "mem%d", cxlmd->id);
> - if (rc)
> - goto err;
> + if (!cxlmd)
> + return ERR_PTR(-ENOMEM);
>
> - /*
> - * Activate ioctl operations, no cxl_memdev_rwsem manipulation
> - * needed as this is ordered with cdev_add() publishing the device.
> - */
> + rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL);
> + if (rc < 0)
> + return ERR_PTR(rc);
> + cxlmd->id = rc;
> + cxlmd->depth = -1;
> cxlmd->cxlds = cxlds;
> cxlds->cxlmd = cxlmd;
>
> + dev = &cxlmd->dev;
> + device_initialize(dev);
> + lockdep_set_class(&dev->mutex, &cxl_memdev_key);
> + dev->parent = cxlds->dev;
> + dev->bus = &cxl_bus_type;
> + dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
> + dev->type = &cxl_memdev_type;
> + device_set_pm_not_required(dev);
> + INIT_WORK(&cxlmd->detach_work, detach_memdev);
> +
> cdev = &cxlmd->cdev;
> - rc = cdev_device_add(cdev, dev);
> + cdev_init(cdev, &cxl_memdev_fops);
> + return_ptr(cxlmd);
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_memdev_alloc, "CXL");
> +
> +static void __cxlmd_free(struct cxl_memdev *cxlmd)
> +{
> + if (IS_ERR(cxlmd))
> + return;
> +
> + if (cxlmd->cxlds)
> + cxlmd->cxlds->cxlmd = NULL;
> +
This series caused a NULL deref in devm_cxl_add_memdev().
__cxlmd_free() only checks IS_ERR(cxlmd) and proceeds to dereference
cxlmd->cxlds.
Adding a NULL check for cxlmd fixed the crash in my setup.
BUG: kernel NULL pointer dereference, address: 0000000000000358
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 1553a7067 P4D 0
Oops: Oops: 0000 [#1] SMP NOPTI
RIP: 0010:devm_cxl_add_memdev+0x71/0xb0 [cxl_mem]
Code: 89 c4 e8 c2 c8 be f8 85 c0 75 17 48 89 de 4c 89 ef e8 b3 08 f9 ff
85 c0 75 08 45 31 e4 45 31 ed eb 08 48 98 49 89 dd 48 89 c3 <49> 8b 85
58 03 00 00 48 85 c0 74 08 48 c7 40 08 00 00 00 00 4c 89
CR2: 0000000000000358 CR3: 00000001553a6002 CR4: 0000000000771ef0
PKRU: 55555554
Call Trace:
<TASK>
cxl_pci_probe+0x409/0xb00 [cxl_pci]
? update_load_avg+0x83/0x780
local_pci_probe+0x4d/0xb0
work_for_cpu_fn+0x1e/0x30
process_scheduled_works+0xa9/0x420
? __pfx_worker_thread+0x10/0x10
worker_thread+0x127/0x270
...
Thanks
Smita
> + put_device(&cxlmd->dev);
> + kfree(cxlmd);
> +}
> +
> +DEFINE_FREE(cxlmd_free, struct cxl_memdev *, __cxlmd_free(_T))
> +
> +/**
> + * devm_cxl_add_memdev - Add a CXL memory device
> + * @host: devres alloc/release context and parent for the memdev
> + * @cxlds: CXL device state to associate with the memdev
> + *
> + * Upon return the device will have had a chance to attach to the
> + * cxl_mem driver, but may fail if the CXL topology is not ready
> + * (hardware CXL link down, or software platform CXL root not attached)
> + */
> +struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
> + struct cxl_dev_state *cxlds)
> +{
> + struct cxl_memdev *cxlmd __free(cxlmd_free) = cxl_memdev_alloc(cxlds);
> + int rc;
> +
> + if (IS_ERR(cxlmd))
> + return cxlmd;
> +
> + rc = dev_set_name(&cxlmd->dev, "mem%d", cxlmd->id);
> if (rc)
> - goto err;
> + return ERR_PTR(rc);
>
> - rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
> + rc = devm_cxl_memdev_add_or_reset(host, cxlmd);
> if (rc)
> return ERR_PTR(rc);
> - return cxlmd;
>
> -err:
> - /*
> - * The cdev was briefly live, shutdown any ioctl operations that
> - * saw that state.
> - */
> - cxl_memdev_shutdown(dev);
> - put_device(dev);
> - return ERR_PTR(rc);
> + return no_free_ptr(cxlmd);
> }
> EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL");
>
> diff --git a/drivers/cxl/private.h b/drivers/cxl/private.h
> new file mode 100644
> index 000000000000..50c2ac57afb5
> --- /dev/null
> +++ b/drivers/cxl/private.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* Copyright(c) 2025 Intel Corporation. */
> +
> +/* Private interfaces betwen common drivers ("cxl_mem") and the cxl_core */
> +
> +#ifndef __CXL_PRIVATE_H__
> +#define __CXL_PRIVATE_H__
> +struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds);
> +int devm_cxl_memdev_add_or_reset(struct device *host, struct cxl_memdev *cxlmd);
> +#endif /* __CXL_PRIVATE_H__ */
Powered by blists - more mailing lists