linux-kernel - RE: [PATCH V2 08/11] cxl/mem: Wire up event interrupts

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6389ab5156083_c9572947c@dwillia2-mobl3.amr.corp.intel.com.notmuch>
Date:   Thu, 1 Dec 2022 23:37:53 -0800
From:   Dan Williams <dan.j.williams@...el.com>
To:     <ira.weiny@...el.com>, Dan Williams <dan.j.williams@...el.com>
CC:     Ira Weiny <ira.weiny@...el.com>,
        Alison Schofield <alison.schofield@...el.com>,
        Vishal Verma <vishal.l.verma@...el.com>,
        "Ben Widawsky" <bwidawsk@...nel.org>,
        Steven Rostedt <rostedt@...dmis.org>,
        Jonathan Cameron <Jonathan.Cameron@...wei.com>,
        Davidlohr Bueso <dave@...olabs.net>,
        Dave Jiang <dave.jiang@...el.com>,
        <linux-kernel@...r.kernel.org>, <linux-cxl@...r.kernel.org>
Subject: RE: [PATCH V2 08/11] cxl/mem: Wire up event interrupts

ira.weiny@ wrote:
> From: Ira Weiny <ira.weiny@...el.com>
> 
> CXL device events are signaled via interrupts.  Each event log may have
> a different interrupt message number.  These message numbers are
> reported in the Get Event Interrupt Policy mailbox command.
> 
> Add interrupt support for event logs.  Interrupts are allocated as
> shared interrupts.  Therefore, all or some event logs can share the same
> message number.

Definitely squash patch1 with this one, especially because this shows
that the ->msi_enabled portion of patch1 was unnecessary, see below.

> 
> Signed-off-by: Ira Weiny <ira.weiny@...el.com>
> 
> ---
> Changes from V1:
> 	Remove unneeded evt_int_policy from struct cxl_dev_state
> 	defer Dynamic Capacity support
> 	Dave Jiang
> 		s/irq/rc
> 		use IRQ_NONE to signal the irq was not for us.
> 	Jonathan
> 		use msi_enabled rather than nr_irq_vec
> 		On failure explicitly set CXL_INT_NONE
> 		Add comment for Get Event Interrupt Policy
> 		use devm_request_threaded_irq()
> 		Use individual handler/thread functions for each of the
> 		logs rather than struct cxl_event_irq_id.
> 
> Changes from RFC v2
> 	Adjust to new irq 16 vector allocation
> 	Jonathan
> 		Remove CXL_INT_RES
> 	Use irq threads to ensure mailbox commands are executed outside irq context
> 	Adjust for optional Dynamic Capacity log
> ---
>  drivers/cxl/core/mbox.c      |  44 +++++++++++-
>  drivers/cxl/cxlmem.h         |  30 ++++++++
>  drivers/cxl/pci.c            | 130 +++++++++++++++++++++++++++++++++++
>  include/uapi/linux/cxl_mem.h |   2 +
>  4 files changed, 204 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 30840b711381..1e00b49d8b06 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -53,6 +53,8 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
>  	CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE),
>  	CXL_CMD(GET_EVENT_RECORD, 1, CXL_VARIABLE_PAYLOAD, 0),
>  	CXL_CMD(CLEAR_EVENT_RECORD, CXL_VARIABLE_PAYLOAD, 0, 0),
> +	CXL_CMD(GET_EVT_INT_POLICY, 0, 0x5, 0),
> +	CXL_CMD(SET_EVT_INT_POLICY, 0x5, 0, 0),
>  	CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
>  	CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
>  	CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0),
> @@ -806,8 +808,8 @@ static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
>  	return 0;
>  }
>  
> -static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
> -				    enum cxl_event_log_type type)
> +void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
> +			     enum cxl_event_log_type type)
>  {
>  	struct cxl_get_event_payload *payload;
>  	u16 nr_rec;
> @@ -857,6 +859,7 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
>  unlock_buffer:
>  	mutex_unlock(&cxlds->event_buf_lock);
>  }
> +EXPORT_SYMBOL_NS_GPL(cxl_mem_get_records_log, CXL);
>  
>  static void cxl_mem_free_event_buffer(void *data)
>  {
> @@ -916,6 +919,43 @@ void cxl_mem_get_event_records(struct cxl_dev_state *cxlds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
>  
> +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds,
> +			     struct cxl_event_interrupt_policy *policy)
> +{
> +	int rc;
> +
> +	policy->info_settings = CXL_INT_MSI_MSIX;
> +	policy->warn_settings = CXL_INT_MSI_MSIX;
> +	policy->failure_settings = CXL_INT_MSI_MSIX;
> +	policy->fatal_settings = CXL_INT_MSI_MSIX;

I think this needs to be careful not to undo events that the BIOS
steered to itself in firmware-first mode, which raises another question,
does firmware-first mean more the OS needs to backoff on some event-log
handling as well?

> +
> +	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_EVT_INT_POLICY,
> +			       policy, sizeof(*policy), NULL, 0);
> +	if (rc < 0) {
> +		dev_err(cxlds->dev, "Failed to set event interrupt policy : %d",
> +			rc);
> +
> +		policy->info_settings = CXL_INT_NONE;
> +		policy->warn_settings = CXL_INT_NONE;
> +		policy->failure_settings = CXL_INT_NONE;
> +		policy->fatal_settings = CXL_INT_NONE;
> +
> +		return rc;
> +	}
> +
> +	/* Retrieve interrupt message numbers */
> +	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_EVT_INT_POLICY, NULL, 0,
> +			       policy, sizeof(*policy));
> +	if (rc < 0) {
> +		dev_err(cxlds->dev, "Failed to get event interrupt policy : %d",
> +			rc);
> +		return rc;
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_event_config_msgnums, CXL);
> +
>  /**
>   * cxl_mem_get_partition_info - Get partition info
>   * @cxlds: The device data for the operation
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 450b410f29f6..2d384b0fc2b3 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -179,6 +179,30 @@ struct cxl_endpoint_dvsec_info {
>  	struct range dvsec_range[2];
>  };
>  
> +/**
> + * Event Interrupt Policy
> + *
> + * CXL rev 3.0 section 8.2.9.2.4; Table 8-52
> + */
> +enum cxl_event_int_mode {
> +	CXL_INT_NONE		= 0x00,
> +	CXL_INT_MSI_MSIX	= 0x01,
> +	CXL_INT_FW		= 0x02
> +};
> +#define CXL_EVENT_INT_MODE_MASK 0x3
> +#define CXL_EVENT_INT_MSGNUM(setting) (((setting) & 0xf0) >> 4)
> +struct cxl_event_interrupt_policy {
> +	u8 info_settings;
> +	u8 warn_settings;
> +	u8 failure_settings;
> +	u8 fatal_settings;
> +} __packed;
> +
> +static inline bool cxl_evt_int_is_msi(u8 setting)
> +{
> +	return CXL_INT_MSI_MSIX == (setting & CXL_EVENT_INT_MODE_MASK);
> +}
> +
>  /**
>   * struct cxl_dev_state - The driver device state
>   *
> @@ -262,6 +286,8 @@ enum cxl_opcode {
>  	CXL_MBOX_OP_RAW			= CXL_MBOX_OP_INVALID,
>  	CXL_MBOX_OP_GET_EVENT_RECORD	= 0x0100,
>  	CXL_MBOX_OP_CLEAR_EVENT_RECORD	= 0x0101,
> +	CXL_MBOX_OP_GET_EVT_INT_POLICY	= 0x0102,
> +	CXL_MBOX_OP_SET_EVT_INT_POLICY	= 0x0103,
>  	CXL_MBOX_OP_GET_FW_INFO		= 0x0200,
>  	CXL_MBOX_OP_ACTIVATE_FW		= 0x0202,
>  	CXL_MBOX_OP_GET_SUPPORTED_LOGS	= 0x0400,
> @@ -537,7 +563,11 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
>  struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
>  void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
>  void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
> +void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
> +			     enum cxl_event_log_type type);
>  void cxl_mem_get_event_records(struct cxl_dev_state *cxlds);
> +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds,
> +			     struct cxl_event_interrupt_policy *policy);
>  #ifdef CONFIG_CXL_SUSPEND
>  void cxl_mem_active_inc(void);
>  void cxl_mem_active_dec(void);
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 11e95a95195a..3c0b9199f11a 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -449,6 +449,134 @@ static void cxl_pci_alloc_irq_vectors(struct cxl_dev_state *cxlds)
>  	cxlds->msi_enabled = true;
>  }
>  
> +static irqreturn_t cxl_event_info_thread(int irq, void *id)
> +{
> +	struct cxl_dev_state *cxlds = id;
> +
> +	cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO);
> +	return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t cxl_event_info_handler(int irq, void *id)
> +{
> +	struct cxl_dev_state *cxlds = id;
> +	u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
> +
> +	if (CXLDEV_EVENT_STATUS_INFO & status)
> +		return IRQ_WAKE_THREAD;
> +	return IRQ_NONE;
> +}
> +
> +static irqreturn_t cxl_event_warn_thread(int irq, void *id)
> +{
> +	struct cxl_dev_state *cxlds = id;
> +
> +	cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN);
> +	return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t cxl_event_warn_handler(int irq, void *id)
> +{
> +	struct cxl_dev_state *cxlds = id;
> +	u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
> +
> +	if (CXLDEV_EVENT_STATUS_WARN & status)
> +		return IRQ_WAKE_THREAD;
> +	return IRQ_NONE;
> +}
> +
> +static irqreturn_t cxl_event_failure_thread(int irq, void *id)
> +{
> +	struct cxl_dev_state *cxlds = id;
> +
> +	cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL);
> +	return IRQ_HANDLED;
> +}

So I think one of the nice side effects of moving log priorty handling
inside of cxl_mem_get_records_log() and looping through all log types in
priority order until all status is clear is that an INFO interrupt also
triggers a check of the FATAL status for free.

You likely do not even need to do the status read in hardirq part of the
handler, just unconditionally wake the event handler thread. I.e. just
pass NULL for @handler to devm_request_threaded_irq() and let the
thread_fn figure it all out in priority order.

> +
> +static irqreturn_t cxl_event_failure_handler(int irq, void *id)
> +{
> +	struct cxl_dev_state *cxlds = id;
> +	u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
> +
> +	if (CXLDEV_EVENT_STATUS_FAIL & status)
> +		return IRQ_WAKE_THREAD;
> +	return IRQ_NONE;
> +}
> +
> +static irqreturn_t cxl_event_fatal_thread(int irq, void *id)
> +{
> +	struct cxl_dev_state *cxlds = id;
> +
> +	cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL);
> +	return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t cxl_event_fatal_handler(int irq, void *id)
> +{
> +	struct cxl_dev_state *cxlds = id;
> +	u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
> +
> +	if (CXLDEV_EVENT_STATUS_FATAL & status)
> +		return IRQ_WAKE_THREAD;
> +	return IRQ_NONE;
> +}
> +
> +static void cxl_event_irqsetup(struct cxl_dev_state *cxlds)
> +{
> +	struct cxl_event_interrupt_policy policy;
> +	struct device *dev = cxlds->dev;
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	u8 setting;
> +	int rc;
> +
> +	if (cxl_event_config_msgnums(cxlds, &policy))
> +		return;
> +
> +	setting = policy.info_settings;
> +	if (cxl_evt_int_is_msi(setting)) {

So pci_irq_vector() automatically handles checking if msi is enabled and
will return a failure if either MSI is not enabled, or the message
number did not get a vector.

With that insight I would do something like this (untested):

@@ -521,7 +521,14 @@ static irqreturn_t cxl_event_fatal_handler(int irq, void *id)
        return IRQ_NONE;
 }
 
-static void cxl_event_irqsetup(struct cxl_dev_state *cxlds)
+static int cxl_evt_irq(struct pci_dev *pdev, u8 setting)
+{
+       if ((setting & CXL_EVENT_INT_MODE_MASK) != CXL_INT_MSI_MSIX)
+               return -ENXIO;
+       return pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting));
+}
+
+static int cxl_event_irqsetup(struct cxl_dev_state *cxlds)
 {
        struct cxl_event_interrupt_policy policy;
        struct device *dev = cxlds->dev;
@@ -529,18 +536,17 @@ static void cxl_event_irqsetup(struct cxl_dev_state *cxlds)
        u8 setting;
        int rc;
 
-       if (cxl_event_config_msgnums(cxlds, &policy))
-               return;
+       rc = cxl_event_config_msgnums(cxlds, &policy);
+       if (rc)
+               return rc;
 
-       setting = policy.info_settings;
-       if (cxl_evt_int_is_msi(setting)) {
-               rc = devm_request_threaded_irq(dev,
-                               pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)),
-                               cxl_event_info_handler, cxl_event_info_thread,
-                               IRQF_SHARED, NULL, cxlds);
-               if (rc)
-                       dev_err(dev, "Failed to get interrupt for %s event log\n",
-                               cxl_event_log_type_str(CXL_EVENT_TYPE_INFO));
+       rc = devm_request_threaded_irq(dev,
+                                      cxl_evt_irq(pdev, policy.info_settings),
+                                      NULL, cxl_event_info_thread, IRQF_SHARED,
+                                      NULL, cxlds);
+       if (rc) {
+               dev_err(dev, "Failed to get interrupt for INFO event log\n");
+               return rc;
        }
 
        setting = policy.warn_settings;



> +		rc = devm_request_threaded_irq(dev,
> +				pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)),
> +				cxl_event_info_handler, cxl_event_info_thread,
> +				IRQF_SHARED, NULL, cxlds);
> +		if (rc)
> +			dev_err(dev, "Failed to get interrupt for %s event log\n",
> +				cxl_event_log_type_str(CXL_EVENT_TYPE_INFO));

Per above, no need to use cxl_event_log_type_str() in these.

> +	}
> +
> +	setting = policy.warn_settings;
> +	if (cxl_evt_int_is_msi(setting)) {
> +		rc = devm_request_threaded_irq(dev,
> +				pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)),
> +				cxl_event_warn_handler, cxl_event_warn_thread,
> +				IRQF_SHARED, NULL, cxlds);
> +		if (rc)
> +			dev_err(dev, "Failed to get interrupt for %s event log\n",
> +				cxl_event_log_type_str(CXL_EVENT_TYPE_WARN));
> +	}
> +
> +	setting = policy.failure_settings;
> +	if (cxl_evt_int_is_msi(setting)) {
> +		rc = devm_request_threaded_irq(dev,
> +				pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)),
> +				cxl_event_failure_handler, cxl_event_failure_thread,
> +				IRQF_SHARED, NULL, cxlds);
> +		if (rc)
> +			dev_err(dev, "Failed to get interrupt for %s event log\n",
> +				cxl_event_log_type_str(CXL_EVENT_TYPE_FAIL));
> +	}
> +
> +	setting = policy.fatal_settings;
> +	if (cxl_evt_int_is_msi(setting)) {
> +		rc = devm_request_threaded_irq(dev,
> +				pci_irq_vector(pdev, CXL_EVENT_INT_MSGNUM(setting)),
> +				cxl_event_fatal_handler, cxl_event_fatal_thread,
> +				IRQF_SHARED, NULL, cxlds);
> +		if (rc)
> +			dev_err(dev, "Failed to get interrupt for %s event log\n",
> +				cxl_event_log_type_str(CXL_EVENT_TYPE_FATAL));
> +	}
> +}
> +
>  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  {
>  	struct cxl_register_map map;
> @@ -516,6 +644,8 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  		return rc;
>  
>  	cxl_pci_alloc_irq_vectors(cxlds);

There should be fail return here, or a comment why this can be skipped,
especially if the device claims to support events.

> +	if (cxlds->msi_enabled)
> +		cxl_event_irqsetup(cxlds);

Per above, do this unconditionally.

>  
>  	cxlmd = devm_cxl_add_memdev(cxlds);
>  	if (IS_ERR(cxlmd))
> diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h
> index 7c1ad8062792..a8204802fcca 100644
> --- a/include/uapi/linux/cxl_mem.h
> +++ b/include/uapi/linux/cxl_mem.h
> @@ -26,6 +26,8 @@
>  	___C(GET_SUPPORTED_LOGS, "Get Supported Logs"),                   \
>  	___C(GET_EVENT_RECORD, "Get Event Record"),                       \
>  	___C(CLEAR_EVENT_RECORD, "Clear Event Record"),                   \
> +	___C(GET_EVT_INT_POLICY, "Get Event Interrupt Policy"),           \
> +	___C(SET_EVT_INT_POLICY, "Set Event Interrupt Policy"),           \
>  	___C(GET_FW_INFO, "Get FW Info"),                                 \
>  	___C(GET_PARTITION_INFO, "Get Partition Information"),            \
>  	___C(GET_LSA, "Get Label Storage Area"),                          \

Same, "at the end" comment.