[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <2d2fc30c-62bd-4243-b53a-8a477b153cd6@oracle.com>
Date: Wed, 21 May 2025 13:52:00 +0530
From: ALOK TIWARI <alok.a.tiwari@...cle.com>
To: Tony Hutter <hutter2@...l.gov>, Bjorn Helgaas <helgaas@...nel.org>,
Lukas Wunner <lukas@...ner.de>, mariusz.tkaczyk@...ux.intel.com,
minyard@....org
Cc: linux-pci@...r.kernel.org, openipmi-developer@...ts.sourceforge.net,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v3 RESEND] PCI: Introduce Cray ClusterStor E1000 NVMe slot
LED driver
> diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
> index 123c4c7c2ab5..75c77cec0b21 100644
> --- a/drivers/pci/hotplug/Kconfig
> +++ b/drivers/pci/hotplug/Kconfig
> @@ -183,4 +183,14 @@ config HOTPLUG_PCI_S390
>
> When in doubt, say Y.
>
> +config HOTPLUG_PCI_PCIE_CRAY_E1000
> + bool "PCIe Hotplug extensions for Cray ClusterStor E1000"
> + depends on HOTPLUG_PCI_PCIE && IPMI_HANDLER=y
> + help
> + Say Y here if you have a Cray ClusterStor E1000 and want to control
> + your NVMe slot LEDs. Without this driver is it not possible
typo is it -> it is
> + to control the fault and locate LEDs on the E1000's 24 NVMe slots.
> +
> + When in doubt, say N.
> +
> endif # HOTPLUG_PCI
> diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
> index 40aaf31fe338..82a1f0592d0a 100644
> --- a/drivers/pci/hotplug/Makefile
> +++ b/drivers/pci/hotplug/Makefile
> @@ -66,6 +66,9 @@ pciehp-objs := pciehp_core.o \
> pciehp_ctrl.o \
> pciehp_pci.o \
> pciehp_hpc.o
> +ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
> +pciehp-objs += pciehp_craye1k.o
> +endif
>
> shpchp-objs := shpchp_core.o \
> shpchp_ctrl.o \
> diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
> index 273dd8c66f4e..ea68ae041547 100644
> --- a/drivers/pci/hotplug/pciehp.h
> +++ b/drivers/pci/hotplug/pciehp.h
> @@ -198,6 +198,13 @@ int pciehp_get_raw_indicator_status(struct hotplug_slot *h_slot, u8 *status);
>
> int pciehp_slot_reset(struct pcie_device *dev);
>
> +#ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
> +int craye1k_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status);
> +int craye1k_set_attention_status(struct hotplug_slot *hotplug_slot, u8 status);
> +bool is_craye1k_slot(struct controller *ctrl);
> +int craye1k_init(void);
> +#endif
> +
> static inline const char *slot_name(struct controller *ctrl)
> {
> return hotplug_slot_name(&ctrl->hotplug_slot);
> diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
> index ff458e692fed..9a7a7b320810 100644
> --- a/drivers/pci/hotplug/pciehp_core.c
> +++ b/drivers/pci/hotplug/pciehp_core.c
> @@ -73,6 +73,13 @@ static int init_slot(struct controller *ctrl)
> ops->get_attention_status = pciehp_get_raw_indicator_status;
> ops->set_attention_status = pciehp_set_raw_indicator_status;
> }
> +#ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
> + if (is_craye1k_slot(ctrl)) {
> + /* slots 1-24 on Cray E1000s are controlled differently */
> + ops->get_attention_status = craye1k_get_attention_status;
> + ops->set_attention_status = craye1k_set_attention_status;
> + }
> +#endif
>
> /* register this slot with the hotplug pci core */
> ctrl->hotplug_slot.ops = ops;
> @@ -404,6 +411,11 @@ int __init pcie_hp_init(void)
> pr_debug("pcie_port_service_register = %d\n", retval);
> if (retval)
> pr_debug("Failure to register service\n");
> +#ifdef CONFIG_HOTPLUG_PCI_PCIE_CRAY_E1000
> + retval = craye1k_init();
> + if (retval)
> + pr_debug("Failure to register Cray E1000 extensions");
> +#endif
>
> return retval;
> }
> diff --git a/drivers/pci/hotplug/pciehp_craye1k.c b/drivers/pci/hotplug/pciehp_craye1k.c
> new file mode 100644
> index 000000000000..844b36882316
> --- /dev/null
> +++ b/drivers/pci/hotplug/pciehp_craye1k.c
> @@ -0,0 +1,659 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2022-2024 Lawrence Livermore National Security, LLC
> + */
> +/*
> + * Cray ClusterStor E1000 hotplug slot LED driver extensions
> + *
> + * This driver controls the NVMe slot LEDs on the Cray ClusterStore E1000.
> + * It provides hotplug attention status callbacks for the 24 NVMe slots on
> + * the E1000. This allows users to access the E1000's locate and fault
> + * LEDs via the normal /sys/bus/pci/slots/<slot>/attention sysfs entries.
> + * This driver uses IPMI to communicate with the E1000 controller to toggle
> + * the LEDs.
> + *
> + * This driver is based off of ibmpex.c
> + */
> +
> +#include <linux/debugfs.h>
> +#include <linux/delay.h>
> +#include <linux/errno.h>
> +#include <linux/dmi.h>
> +#include <linux/ipmi.h>
> +#include <linux/ipmi_smi.h>
> +#include <linux/module.h>
> +#include <linux/pci.h>
> +#include <linux/pci_hotplug.h>
> +#include <linux/random.h>
> +#include "pciehp.h"
> +
> +/* Cray E1000 commands */
> +#define CRAYE1K_CMD_NETFN 0x3c
> +#define CRAYE1K_CMD_PRIMARY 0x33
> +#define CRAYE1K_CMD_FAULT_LED 0x39
> +#define CRAYE1K_CMD_LOCATE_LED 0x22
> +
> +/* Subcommands */
> +#define CRAYE1K_GET_LED 0x0
> +#define CRAYE1K_SET_LED 0x1
> +#define CRAYE1K_SET_PRIMARY 0x1
both defined as 0x1
This is likely intentional because they belong to different context, it
is ok.
what about prefix CRAYE1K_SUBCMD_ for subcommands?
> +
> +/*
> + * Milliseconds to wait after get/set LED command. 200ms value found though
> + * experimentation
> + */
> +#define CRAYE1K_POST_CMD_WAIT_MS 200
> +
> +struct craye1k {
> + struct device *dev; /* BMC device */
> + struct mutex lock;
> + struct completion read_complete;
> + struct ipmi_addr address;
> + struct ipmi_user *user;
> + int iface;
> +
> + long tx_msg_id;
> + struct kernel_ipmi_msg tx_msg;
> + unsigned char tx_msg_data[IPMI_MAX_MSG_LENGTH];
> + unsigned char rx_msg_data[IPMI_MAX_MSG_LENGTH];
> + unsigned long rx_msg_len;
> + unsigned char rx_result; /* IPMI completion code */
> +
> + /* Parent dir for all our debugfs entries */
> + struct dentry *parent;
> +
> + /* debugfs stats */
> + u64 check_primary;
> + u64 check_primary_failed;
> + u64 was_already_primary;
> + u64 was_not_already_primary;
> + u64 set_primary;
> + u64 set_initial_primary_failed;
> + u64 set_primary_failed;
> + u64 set_led_locate_failed;
> + u64 set_led_fault_failed;
> + u64 set_led_readback_failed;
> + u64 set_led_failed;
> + u64 get_led_failed;
> + u64 completion_timeout;
> + u64 wrong_msgid;
> + u64 request_failed;
> +
> + /* debugfs configuration options */
> +
> + /* Print info on spurious IPMI messages */
> + bool print_errors;
> +
> + /* Retries for kernel IPMI layer */
> + u32 ipmi_retries;
> +
> + /* Timeout in ms for IPMI (0 = use IPMI default_retry_ms) */
> + u32 ipmi_timeout_ms;
> +
> + /* Timeout in ms to wait for E1000 message completion */
> + u32 completion_timeout_ms;
> +};
> +
> +/*
> + * Make our craye1k a global so get/set_attention_status() can access it.
> + * This is safe since there's only one node controller on the board, and so it's
> + * impossible to instantiate more than one craye1k.
> + */
> +static struct craye1k *craye1k_global;
> +
> +/* Return parent dir dentry */
> +static struct dentry *
> +craye1k_debugfs_init(struct craye1k *craye1k)
> +{
> + umode_t mode = 0644;
> + struct dentry *parent = debugfs_create_dir("pciehp_craye1k", NULL);
> +
if (!parent) is correct! but debugfs_create_dir() can return ERR_PTR
what about check for IS_ERR(parent) to make fully robust?
> + if (!parent)
> + return NULL;
> +
> + debugfs_create_x64("check_primary", mode, parent,
> + &craye1k->check_primary);
> + debugfs_create_x64("check_primary_failed", mode, parent,
> + &craye1k->check_primary_failed);
> + debugfs_create_x64("was_already_primary", mode, parent,
> + &craye1k->was_already_primary);
> + debugfs_create_x64("was_not_already_primary", mode, parent,
> + &craye1k->was_not_already_primary);
> + debugfs_create_x64("set_primary", mode, parent,
> + &craye1k->set_primary);
> + debugfs_create_x64("set_initial_primary_failed", mode, parent,
> + &craye1k->set_initial_primary_failed);
> + debugfs_create_x64("set_primary_failed", mode, parent,
> + &craye1k->set_primary_failed);
> + debugfs_create_x64("set_led_locate_failed", mode, parent,
> + &craye1k->set_led_locate_failed);
> + debugfs_create_x64("set_led_fault_failed", mode, parent,
> + &craye1k->set_led_fault_failed);
> + debugfs_create_x64("set_led_readback_failed", mode, parent,
> + &craye1k->set_led_readback_failed);
> + debugfs_create_x64("set_led_failed", mode, parent,
> + &craye1k->set_led_failed);
> + debugfs_create_x64("get_led_failed", mode, parent,
> + &craye1k->get_led_failed);
> + debugfs_create_x64("completion_timeout", mode, parent,
> + &craye1k->completion_timeout);
> + debugfs_create_x64("wrong_msgid", mode, parent,
> + &craye1k->wrong_msgid);
> + debugfs_create_x64("request_failed", mode, parent,
> + &craye1k->request_failed);
> +
> + debugfs_create_x32("ipmi_retries", mode, parent,
> + &craye1k->ipmi_retries);
> + debugfs_create_x32("ipmi_timeout_ms", mode, parent,
> + &craye1k->ipmi_timeout_ms);
> + debugfs_create_x32("completion_timeout_ms", mode, parent,
> + &craye1k->completion_timeout_ms);
> + debugfs_create_bool("print_errors", mode, parent,
> + &craye1k->print_errors);
> +
> + return parent;
> +}
[clip]
> +static int __craye1k_get_attention_status(struct hotplug_slot *hotplug_slot,
> + u8 *status, bool set_primary)
> +{
> + unsigned char slot;
> + int locate, fault;
> + struct craye1k *craye1k;
> +
> + craye1k = craye1k_global;
> + slot = PSN(to_ctrl(hotplug_slot));
> +
> + if (set_primary) {
> + if (craye1k_set_primary(craye1k) != 0) {
> + craye1k->get_led_failed++;
> + return -EIO;
> + }
> + }
> +
-EIO when craye1k_set_primary() fails,but -EINVAL when LED reads fail
is it not both hardware I/O failures case ?
> + locate = craye1k_get_slot_led(craye1k, slot, true);
> + if (locate == -1) {
> + craye1k->get_led_failed++;
> + return -EINVAL;
> + }
> + msleep(CRAYE1K_POST_CMD_WAIT_MS);
> +
> + fault = craye1k_get_slot_led(craye1k, slot, false);
> + if (fault == -1) {
> + craye1k->get_led_failed++;
> + return -EINVAL;
> + }
> + msleep(CRAYE1K_POST_CMD_WAIT_MS);
> +
> + *status = locate << 1 | fault;
> +
> + return 0;
> +}
Thanks,
Alok
Powered by blists - more mailing lists