lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Y5jK658xKJC3bLny@iweiny-mobl>
Date:   Tue, 13 Dec 2022 10:56:43 -0800
From:   Ira Weiny <ira.weiny@...el.com>
To:     johnny <johnny.li@...tage-tech.com>
CC:     Dan Williams <dan.j.williams@...el.com>,
        Bjorn Helgaas <bhelgaas@...gle.com>,
        Alison Schofield <alison.schofield@...el.com>,
        "Vishal Verma" <vishal.l.verma@...el.com>,
        Davidlohr Bueso <dave@...olabs.net>,
        Jonathan Cameron <Jonathan.Cameron@...wei.com>,
        Dave Jiang <dave.jiang@...el.com>,
        <linux-kernel@...r.kernel.org>, <linux-pci@...r.kernel.org>,
        <linux-acpi@...r.kernel.org>, <linux-cxl@...r.kernel.org>
Subject: Re: [PATCH V4 2/9] cxl/mem: Read, trace, and clear events on driver
 load

On Tue, Dec 13, 2022 at 02:49:02PM +0800, johnny wrote:
> On Sun, Dec 11, 2022 at 11:06:20PM -0800, ira.weiny@...el.com wrote:
> > From: Ira Weiny <ira.weiny@...el.com>
> > 

[snip]

> > +
> > +#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
> > +struct cxl_event_record_raw {
> > +	struct cxl_event_record_hdr hdr;
> > +	u8 data[CXL_EVENT_RECORD_DATA_LENGTH];
> > +} __packed;
> > +
> > +/*
> > + * Get Event Records output payload
> > + * CXL rev 3.0 section 8.2.9.2.2; Table 8-50
> > + */
> > +#define CXL_GET_EVENT_FLAG_OVERFLOW		BIT(0)
> > +#define CXL_GET_EVENT_FLAG_MORE_RECORDS		BIT(1)
> I don't see any code consumes this more flag, is anything I miss?
> Device shall set this more flag when single output payload can not fit in all records

I should have removed this flag and put something in the cover letter.  I left
it in for completeness but you are correct it is unused.

We determined back in V1 that the more bit was useless in this particular
looping of Get Events Records.[1]

The net-net is that if the driver does not see the number of records go to 0 it
can't be sure it will get an interrupt for the next set of events.  Therefore
it loops until it sees the number of records go to 0.

Ira

[1] https://lore.kernel.org/all/Y4blpk%2FesXJMe79Y@iweiny-desk3/

> > +struct cxl_get_event_payload {
> > +	u8 flags;
> > +	u8 reserved1;
> > +	__le16 overflow_err_count;
> > +	__le64 first_overflow_timestamp;
> > +	__le64 last_overflow_timestamp;
> > +	__le16 record_count;
> > +	u8 reserved2[10];
> > +	struct cxl_event_record_raw records[];
> > +} __packed;
> > +
> > +/*
> > + * CXL rev 3.0 section 8.2.9.2.2; Table 8-49
> > + */
> > +enum cxl_event_log_type {
> > +	CXL_EVENT_TYPE_INFO = 0x00,
> > +	CXL_EVENT_TYPE_WARN,
> > +	CXL_EVENT_TYPE_FAIL,
> > +	CXL_EVENT_TYPE_FATAL,
> > +	CXL_EVENT_TYPE_MAX
> > +};
> > +
> > +/*
> > + * Clear Event Records input payload
> > + * CXL rev 3.0 section 8.2.9.2.3; Table 8-51
> > + */
> > +#define CXL_CLEAR_EVENT_MAX_HANDLES (0xff)
> > +struct cxl_mbox_clear_event_payload {
> > +	u8 event_log;		/* enum cxl_event_log_type */
> > +	u8 clear_flags;
> > +	u8 nr_recs;
> > +	u8 reserved[3];
> > +	__le16 handle[CXL_CLEAR_EVENT_MAX_HANDLES];
> > +} __packed;
> > +#define CXL_CLEAR_EVENT_LIMIT_HANDLES(payload_size)			\
> > +	(((payload_size) -						\
> > +		(sizeof(struct cxl_mbox_clear_event_payload) -		\
> > +		 (sizeof(__le16) * CXL_CLEAR_EVENT_MAX_HANDLES))) /	\
> > +		sizeof(__le16))
> > +
> >  struct cxl_mbox_get_partition_info {
> >  	__le64 active_volatile_cap;
> >  	__le64 active_persistent_cap;
> > @@ -441,6 +524,7 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
> >  struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
> >  void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
> >  void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
> > +void cxl_mem_get_event_records(struct cxl_dev_state *cxlds, u32 status);
> >  #ifdef CONFIG_CXL_SUSPEND
> >  void cxl_mem_active_inc(void);
> >  void cxl_mem_active_dec(void);
> > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> > index 3a66aadb4df0..a2d8382bc593 100644
> > --- a/drivers/cxl/pci.c
> > +++ b/drivers/cxl/pci.c
> > @@ -417,8 +417,37 @@ static void disable_aer(void *pdev)
> >  	pci_disable_pcie_error_reporting(pdev);
> >  }
> >  
> > +static void cxl_mem_free_event_buffer(void *buf)
> > +{
> > +	kvfree(buf);
> > +}
> > +
> > +/*
> > + * There is a single buffer for reading event logs from the mailbox.  All logs
> > + * share this buffer protected by the cxlds->event_log_lock.
> > + */
> > +static int cxl_mem_alloc_event_buf(struct cxl_dev_state *cxlds)
> > +{
> > +	struct cxl_get_event_payload *buf;
> > +
> > +	dev_dbg(cxlds->dev, "Allocating event buffer size %zu\n",
> > +		cxlds->payload_size);
> > +
> > +	buf = kvmalloc(cxlds->payload_size, GFP_KERNEL);
> > +	if (!buf)
> > +		return -ENOMEM;
> > +
> > +	if (devm_add_action_or_reset(cxlds->dev, cxl_mem_free_event_buffer,
> > +				     buf))
> > +		return -ENOMEM;
> > +
> > +	cxlds->event.buf = buf;
> > +	return 0;
> > +}
> > +
> >  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >  {
> > +	struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
> >  	struct cxl_register_map map;
> >  	struct cxl_memdev *cxlmd;
> >  	struct cxl_dev_state *cxlds;
> > @@ -494,6 +523,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >  	if (IS_ERR(cxlmd))
> >  		return PTR_ERR(cxlmd);
> >  
> > +	rc = cxl_mem_alloc_event_buf(cxlds);
> > +	if (rc)
> > +		return rc;
> > +
> > +	/*
> > +	 * When BIOS maintains CXL error reporting control, it will process
> > +	 * event records.  Only one agent can do so.
> > +	 */
> > +	if (host_bridge->native_cxl_error)
> > +		cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL);
> > +
> >  	if (cxlds->regs.ras) {
> >  		pci_enable_pcie_error_reporting(pdev);
> >  		rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev);
> > -- 
> > 2.37.2
> > 
> > 
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ