linux-kernel - Re: [PATCH v3 RESEND] PCI: Introduce Cray ClusterStor E1000 NVMe slot LED driver

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <2d2fc30c-62bd-4243-b53a-8a477b153cd6@oracle.com>
Date: Wed, 21 May 2025 13:52:00 +0530
From: ALOK TIWARI <alok.a.tiwari@...cle.com>
To: Tony Hutter <hutter2@...l.gov>, Bjorn Helgaas <helgaas@...nel.org>,
        Lukas Wunner <lukas@...ner.de>, mariusz.tkaczyk@...ux.intel.com,
        minyard@....org
Cc: linux-pci@...r.kernel.org, openipmi-developer@...ts.sourceforge.net,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v3 RESEND] PCI: Introduce Cray ClusterStor E1000 NVMe slot
 LED driver


> diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
> index 123c4c7c2ab5..75c77cec0b21 100644
> --- a/drivers/pci/hotplug/Kconfig
> +++ b/drivers/pci/hotplug/Kconfig
> @@ -183,4 +183,14 @@ config HOTPLUG_PCI_S390
>   
>   	  When in doubt, say Y.
>   
> +config HOTPLUG_PCI_PCIE_CRAY_E1000
> +	bool "PCIe Hotplug extensions for Cray ClusterStor E1000"
> +	depends on HOTPLUG_PCI_PCIE && IPMI_HANDLER=y
> +	help
> +	  Say Y here if you have a Cray ClusterStor E1000 and want to control
> +	  your NVMe slot LEDs.  Without this driver is it not possible

typo is it ->  it is

> +	  to control the fault and locate LEDs on the E1000's 24 NVMe slots.
> +
> +	  When in doubt, say N.
> +
>   endif # HOTPLUG_PCI
> diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
> index 40aaf31fe338..82a1f0592d0a 100644
> --- a/drivers/pci/hotplug/Makefile
> +++ b/drivers/pci/hotplug/Makefile
> @@ -66,6 +66,9 @@ pciehp-objs		:=	pciehp_core.o	\
>   				pciehp_ctrl.o	\
>   				pciehp_pci.o	\
>   				pciehp_hpc.o
> +ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
> +pciehp-objs		+=	pciehp_craye1k.o
> +endif
>   
>   shpchp-objs		:=	shpchp_core.o	\
>   				shpchp_ctrl.o	\
> diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
> index 273dd8c66f4e..ea68ae041547 100644
> --- a/drivers/pci/hotplug/pciehp.h
> +++ b/drivers/pci/hotplug/pciehp.h
> @@ -198,6 +198,13 @@ int pciehp_get_raw_indicator_status(struct hotplug_slot *h_slot, u8 *status);
>   
>   int pciehp_slot_reset(struct pcie_device *dev);
>   
> +#ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
> +int craye1k_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status);
> +int craye1k_set_attention_status(struct hotplug_slot *hotplug_slot, u8 status);
> +bool is_craye1k_slot(struct controller *ctrl);
> +int craye1k_init(void);
> +#endif
> +
>   static inline const char *slot_name(struct controller *ctrl)
>   {
>   	return hotplug_slot_name(&ctrl->hotplug_slot);
> diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
> index ff458e692fed..9a7a7b320810 100644
> --- a/drivers/pci/hotplug/pciehp_core.c
> +++ b/drivers/pci/hotplug/pciehp_core.c
> @@ -73,6 +73,13 @@ static int init_slot(struct controller *ctrl)
>   		ops->get_attention_status = pciehp_get_raw_indicator_status;
>   		ops->set_attention_status = pciehp_set_raw_indicator_status;
>   	}
> +#ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
> +	if (is_craye1k_slot(ctrl)) {
> +		/* slots 1-24 on Cray E1000s are controlled differently */
> +		ops->get_attention_status = craye1k_get_attention_status;
> +		ops->set_attention_status = craye1k_set_attention_status;
> +	}
> +#endif
>   
>   	/* register this slot with the hotplug pci core */
>   	ctrl->hotplug_slot.ops = ops;
> @@ -404,6 +411,11 @@ int __init pcie_hp_init(void)
>   	pr_debug("pcie_port_service_register = %d\n", retval);
>   	if (retval)
>   		pr_debug("Failure to register service\n");
> +#ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
> +	retval = craye1k_init();
> +	if (retval)
> +		pr_debug("Failure to register Cray E1000 extensions");
> +#endif
>   
>   	return retval;
>   }
> diff --git a/drivers/pci/hotplug/pciehp_craye1k.c b/drivers/pci/hotplug/pciehp_craye1k.c
> new file mode 100644
> index 000000000000..844b36882316
> --- /dev/null
> +++ b/drivers/pci/hotplug/pciehp_craye1k.c
> @@ -0,0 +1,659 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2022-2024 Lawrence Livermore National Security, LLC
> + */
> +/*
> + * Cray ClusterStor E1000 hotplug slot LED driver extensions
> + *
> + * This driver controls the NVMe slot LEDs on the Cray ClusterStore E1000.
> + * It provides hotplug attention status callbacks for the 24 NVMe slots on
> + * the E1000.  This allows users to access the E1000's locate and fault
> + * LEDs via the normal /sys/bus/pci/slots/<slot>/attention sysfs entries.
> + * This driver uses IPMI to communicate with the E1000 controller to toggle
> + * the LEDs.
> + *
> + * This driver is based off of ibmpex.c
> + */
> +
> +#include <linux/debugfs.h>
> +#include <linux/delay.h>
> +#include <linux/errno.h>
> +#include <linux/dmi.h>
> +#include <linux/ipmi.h>
> +#include <linux/ipmi_smi.h>
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <linux/pci_hotplug.h>
> +#include <linux/random.h>
> +#include "pciehp.h"
> +
> +/* Cray E1000 commands */
> +#define CRAYE1K_CMD_NETFN       0x3c
> +#define CRAYE1K_CMD_PRIMARY     0x33
> +#define CRAYE1K_CMD_FAULT_LED   0x39
> +#define CRAYE1K_CMD_LOCATE_LED  0x22
> +
> +/* Subcommands */
> +#define CRAYE1K_GET_LED		0x0
> +#define CRAYE1K_SET_LED		0x1
> +#define CRAYE1K_SET_PRIMARY		0x1

both defined as 0x1
This is likely intentional because they belong to different context, it 
is ok.
what about prefix CRAYE1K_SUBCMD_ for subcommands?

> +
> +/*
> + * Milliseconds to wait after get/set LED command.  200ms value found though
> + * experimentation
> + */
> +#define	CRAYE1K_POST_CMD_WAIT_MS	200
> +
> +struct craye1k {
> +	struct device *dev;   /* BMC device */
> +	struct mutex lock;
> +	struct completion read_complete;
> +	struct ipmi_addr address;
> +	struct ipmi_user *user;
> +	int iface;
> +
> +	long tx_msg_id;
> +	struct kernel_ipmi_msg tx_msg;
> +	unsigned char tx_msg_data[IPMI_MAX_MSG_LENGTH];
> +	unsigned char rx_msg_data[IPMI_MAX_MSG_LENGTH];
> +	unsigned long rx_msg_len;
> +	unsigned char rx_result;	/* IPMI completion code */
> +
> +	/* Parent dir for all our debugfs entries */
> +	struct dentry *parent;
> +
> +	/* debugfs stats */
> +	u64 check_primary;
> +	u64 check_primary_failed;
> +	u64 was_already_primary;
> +	u64 was_not_already_primary;
> +	u64 set_primary;
> +	u64 set_initial_primary_failed;
> +	u64 set_primary_failed;
> +	u64 set_led_locate_failed;
> +	u64 set_led_fault_failed;
> +	u64 set_led_readback_failed;
> +	u64 set_led_failed;
> +	u64 get_led_failed;
> +	u64 completion_timeout;
> +	u64 wrong_msgid;
> +	u64 request_failed;
> +
> +	/* debugfs configuration options */
> +
> +	/* Print info on spurious IPMI messages */
> +	bool print_errors;
> +
> +	/* Retries for kernel IPMI layer */
> +	u32 ipmi_retries;
> +
> +	/* Timeout in ms for IPMI (0 = use IPMI default_retry_ms) */
> +	u32 ipmi_timeout_ms;
> +
> +	/* Timeout in ms to wait for E1000 message completion */
> +	u32 completion_timeout_ms;
> +};
> +
> +/*
> + * Make our craye1k a global so get/set_attention_status() can access it.
> + * This is safe since there's only one node controller on the board, and so it's
> + * impossible to instantiate more than one craye1k.
> + */
> +static struct craye1k *craye1k_global;
> +
> +/* Return parent dir dentry */
> +static struct dentry *
> +craye1k_debugfs_init(struct craye1k *craye1k)
> +{
> +	umode_t mode = 0644;
> +	struct dentry *parent = debugfs_create_dir("pciehp_craye1k", NULL);
> +

if (!parent) is correct! but debugfs_create_dir() can return ERR_PTR
what about check for IS_ERR(parent) to make fully robust?

> +	if (!parent)
> +		return NULL;
> +
> +	debugfs_create_x64("check_primary", mode, parent,
> +			   &craye1k->check_primary);
> +	debugfs_create_x64("check_primary_failed", mode, parent,
> +			   &craye1k->check_primary_failed);
> +	debugfs_create_x64("was_already_primary", mode, parent,
> +			   &craye1k->was_already_primary);
> +	debugfs_create_x64("was_not_already_primary", mode, parent,
> +			   &craye1k->was_not_already_primary);
> +	debugfs_create_x64("set_primary", mode, parent,
> +			   &craye1k->set_primary);
> +	debugfs_create_x64("set_initial_primary_failed", mode, parent,
> +			   &craye1k->set_initial_primary_failed);
> +	debugfs_create_x64("set_primary_failed", mode, parent,
> +			   &craye1k->set_primary_failed);
> +	debugfs_create_x64("set_led_locate_failed", mode, parent,
> +			   &craye1k->set_led_locate_failed);
> +	debugfs_create_x64("set_led_fault_failed", mode, parent,
> +			   &craye1k->set_led_fault_failed);
> +	debugfs_create_x64("set_led_readback_failed", mode, parent,
> +			   &craye1k->set_led_readback_failed);
> +	debugfs_create_x64("set_led_failed", mode, parent,
> +			   &craye1k->set_led_failed);
> +	debugfs_create_x64("get_led_failed", mode, parent,
> +			   &craye1k->get_led_failed);
> +	debugfs_create_x64("completion_timeout", mode, parent,
> +			   &craye1k->completion_timeout);
> +	debugfs_create_x64("wrong_msgid", mode, parent,
> +			   &craye1k->wrong_msgid);
> +	debugfs_create_x64("request_failed", mode, parent,
> +			   &craye1k->request_failed);
> +
> +	debugfs_create_x32("ipmi_retries", mode, parent,
> +			   &craye1k->ipmi_retries);
> +	debugfs_create_x32("ipmi_timeout_ms", mode, parent,
> +			   &craye1k->ipmi_timeout_ms);
> +	debugfs_create_x32("completion_timeout_ms", mode, parent,
> +			   &craye1k->completion_timeout_ms);
> +	debugfs_create_bool("print_errors", mode, parent,
> +			    &craye1k->print_errors);
> +
> +	return parent;
> +}
[clip]
> +static int __craye1k_get_attention_status(struct hotplug_slot *hotplug_slot,
> +					  u8 *status, bool set_primary)
> +{
> +	unsigned char slot;
> +	int locate, fault;
> +	struct craye1k *craye1k;
> +
> +	craye1k = craye1k_global;
> +	slot = PSN(to_ctrl(hotplug_slot));
> +
> +	if (set_primary) {
> +		if (craye1k_set_primary(craye1k) != 0) {
> +			craye1k->get_led_failed++;
> +			return -EIO;
> +		}
> +	}
> +

-EIO when craye1k_set_primary() fails,but -EINVAL when LED reads fail
is it not both hardware I/O failures case ?

> +	locate = craye1k_get_slot_led(craye1k, slot, true);
> +	if (locate == -1) {
> +		craye1k->get_led_failed++;
> +		return -EINVAL;
> +	}
> +	msleep(CRAYE1K_POST_CMD_WAIT_MS);
> +
> +	fault = craye1k_get_slot_led(craye1k, slot, false);
> +	if (fault == -1) {
> +		craye1k->get_led_failed++;
> +		return -EINVAL;
> +	}
> +	msleep(CRAYE1K_POST_CMD_WAIT_MS);
> +
> +	*status = locate << 1 | fault;
> +
> +	return 0;
> +}


Thanks,
Alok