lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6zhoh5mqzrzqf4mq7lbahwtqvw45dpaqsqd7kj3tkfvencaftu@3r7yculmy4ex>
Date: Sat, 23 Nov 2024 17:49:37 +0100
From: Krzysztof Kozlowski <krzk@...nel.org>
To: Shubhrajyoti Datta <shubhrajyoti.datta@....com>
Cc: linux-kernel@...r.kernel.org, devicetree@...r.kernel.org, 
	linux-edac@...r.kernel.org, git@....com, krzk@...nel.or, robh@...nel.org, 
	conor+dt@...nel.org, bp@...en8.de, tony.luck@...el.com, james.morse@....com, 
	mchehab@...nel.org, rric@...nel.org
Subject: Re: [PATCH 3/3] EDAC: Versal NET: Add support for error notification

On Fri, Nov 22, 2024 at 03:36:25PM +0530, Shubhrajyoti Datta wrote:
> The Versal NET edac listens to the notifications from NMC(Network
> management controller) on rpmsg. The driver registers on the error_edac
> channel. Send a RAS event trace upon a notification. On reading
> the notification the user space application can decide on the response.
> A sysfs reset entry is created for the same that sends an acknowledgment
> back to the NMC. For reporting events register to the RAS framework. For
> memory mc events are used other events use non-standard events.
> 
> Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@....com>
> ---
> 
>  .../ABI/testing/sysfs-driver-versalnet-edac   |   11 +
>  drivers/edac/Kconfig                          |    9 +
>  drivers/edac/Makefile                         |    1 +
>  drivers/edac/versalnet_rpmsg_edac.c           | 1321 +++++++++++++++++
>  4 files changed, 1342 insertions(+)
>  create mode 100644 Documentation/ABI/testing/sysfs-driver-versalnet-edac
>  create mode 100644 drivers/edac/versalnet_rpmsg_edac.c
> 
> diff --git a/Documentation/ABI/testing/sysfs-driver-versalnet-edac b/Documentation/ABI/testing/sysfs-driver-versalnet-edac
> new file mode 100644
> index 000000000000..598a6c6cef39
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-driver-versalnet-edac
> @@ -0,0 +1,11 @@
> +What:		/sys/devices/system/edac/.../reset
> +Date:		Nov 2024
> +Contact:	shubhrajyoti.datta@....com
> +Description:
> +		Writing 1/2 to this file will send a reset request to the NMC (Network
> +		Management Controller). 1 will request a SRST (Soft reset) and 2 will
> +		request a POR ( Power-On Reset).
> +
> +		For example::
> +
> +		  # echo 1 > /sys/devices/system/edac/.../reset

ABI docs are always separate patches.

Anyway, sysfs entry to reset memory is a NAK.

NAK.

> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 81af6c344d6b..c3e0644aed0d 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -564,5 +564,14 @@ config EDAC_VERSAL
>  	  Support injecting both correctable and uncorrectable errors
>  	  for debugging purposes.
>  
> +config EDAC_VERSALNET
> +	tristate "AMD Versal NET EDAC"

No dependency on ARCH? Is this for OF platforms?

> +	depends on CDX_CONTROLLER
> +	help
> +	  Support for error detection and correction on the AMD Versal NET DDR
> +	  memory controller.
> +
> +	  The memory controller supports single bit error correction, double bit
> +	  error detection. Report various errors to the userspace.
>  
>  endif # EDAC
> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
> index faf310eec4a6..7b1a8525c021 100644
> --- a/drivers/edac/Makefile
> +++ b/drivers/edac/Makefile
> @@ -88,3 +88,4 @@ obj-$(CONFIG_EDAC_DMC520)		+= dmc520_edac.o
>  obj-$(CONFIG_EDAC_NPCM)			+= npcm_edac.o
>  obj-$(CONFIG_EDAC_ZYNQMP)		+= zynqmp_edac.o
>  obj-$(CONFIG_EDAC_VERSAL)		+= versal_edac.o
> +obj-$(CONFIG_EDAC_VERSALNET)		+= versalnet_rpmsg_edac.o

...

> +};
> +
> +/* The driver should have only one instance */
> +static int probe_once;

Oh, no, this is just poor coding and design style. Such stuff is a big
warning sign. How, really, how, OF platform could have two instances?

...

> +static int mc_probe(struct platform_device *pdev)
> +{
> +	struct edac_mc_layer layers[2];
> +	u32 num_chans, rank, dwidth;
> +	struct mem_ctl_info *mci;
> +	struct edac_priv *priv;
> +	int rc;
> +
> +	rc = device_property_read_u32(&pdev->dev, "amd,rank", &rank);
> +	if (rc < 0) {
> +		dev_err(&pdev->dev, "unable to read rank property");
> +		return rc;
> +	}
> +
> +	rc = device_property_read_u32(&pdev->dev, "amd,num-chans", &num_chans);
> +	if (rc < 0) {
> +		dev_err(&pdev->dev, "unable to read num-chans property");
> +		return rc;
> +	}
> +
> +	rc = device_property_read_u32(&pdev->dev, "amd,dwidth", &dwidth);
> +	if (rc < 0) {
> +		dev_err(&pdev->dev, "unable to read dwidth property");
> +		return rc;
> +	}
> +
> +	mutex_lock(&vnet_edac_lock);
> +	if (probe_once) {

Not possible. Drop. Fix your DTS (and the binding) instead.

> +		rc = -ENODEV;
> +		goto free_lock;
> +	}
> +
> +	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
> +	layers[0].size = rank;
> +	layers[0].is_virt_csrow = true;
> +	layers[1].type = EDAC_MC_LAYER_CHANNEL;
> +	layers[1].size = num_chans;
> +	layers[1].is_virt_csrow = false;
> +
> +	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
> +			    sizeof(struct edac_priv));
> +	if (!mci) {
> +		edac_printk(KERN_ERR, EDAC_MC,
> +			    "Failed memory allocation for mc instance\n");
> +		rc = -ENOMEM;
> +		goto free_lock;
> +	}
> +
> +	priv = mci->pvt_info;
> +	priv->dwidth = dwidth;
> +
> +	mc_init(mci, pdev);
> +
> +	rc = edac_mc_add_mc(mci);
> +	if (rc) {
> +		edac_printk(KERN_ERR, EDAC_MC,
> +			    "Failed to register with EDAC core\n");
> +		goto free_edac_mc;
> +	}
> +
> +	amd_rpmsg_id_table[0].driver_data = (kernel_ulong_t)mci;
> +	INIT_WORK(&priv->work, amd_rpmsg_post_probe_work);
> +	rc = register_rpmsg_driver(&amd_rpmsg_driver);
> +	if (rc) {
> +		edac_printk(KERN_ERR, EDAC_MC,
> +			    "Failed to register RPMsg driver: %d\n", rc);
> +		goto del_edac_mc;
> +	}
> +
> +	rc = device_create_file(&mci->dev, &dev_attr_reset);
> +	if (rc < 0)
> +		goto unregister;
> +
> +	probe_once = 1;
> +	mutex_unlock(&vnet_edac_lock);
> +
> +	return 0;
> +
> +unregister:
> +	unregister_rpmsg_driver(&amd_rpmsg_driver);
> +del_edac_mc:
> +	edac_mc_del_mc(&pdev->dev);
> +free_edac_mc:
> +	edac_mc_free(mci);
> +free_lock:
> +	mutex_unlock(&vnet_edac_lock);
> +
> +	return rc;
> +}
> +
> +static void mc_remove(struct platform_device *pdev)
> +{
> +	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
> +
> +	mutex_lock(&vnet_edac_lock);
> +	probe_once = 0;
> +	unregister_rpmsg_driver(&amd_rpmsg_driver);
> +	edac_mc_del_mc(&pdev->dev);
> +	edac_mc_free(mci);
> +	mutex_unlock(&vnet_edac_lock);
> +}
> +
> +static const struct of_device_id amd_edac_match[] = {
> +	{ .compatible = "amd,versalnet-edac", },
> +	{
> +		/* end of table */

Really? No, drop.

> +	}
> +};
> +MODULE_DEVICE_TABLE(of, amd_edac_match);
> +
> +static struct platform_driver amd_ddr_edac_mc_driver = {
> +	.driver = {
> +		.name = "amd-ddrmc-edac",
> +		.of_match_table = amd_edac_match,
> +	},
> +	.probe = mc_probe,
> +	.remove = mc_remove,
> +};
> +
> +module_platform_driver(amd_ddr_edac_mc_driver);
> +
> +MODULE_AUTHOR("AMD Inc");
> +MODULE_DESCRIPTION("AMD DDRMC ECC driver");
> +MODULE_LICENSE("GPL");
> -- 
> 2.17.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ