lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <653c66507bf8d_244c8f294ea@dwillia2-xfh.jf.intel.com.notmuch>
Date:   Fri, 27 Oct 2023 18:39:28 -0700
From:   Dan Williams <dan.j.williams@...el.com>
To:     Dan Williams <dan.j.williams@...el.com>,
        Robert Richter <rrichter@....com>
CC:     Davidlohr Bueso <dave@...olabs.net>,
        Jonathan Cameron <jonathan.cameron@...wei.com>,
        Dave Jiang <dave.jiang@...el.com>,
        "Alison Schofield" <alison.schofield@...el.com>,
        Vishal Verma <vishal.l.verma@...el.com>,
        Ira Weiny <ira.weiny@...el.com>,
        Ben Widawsky <bwidawsk@...nel.org>,
        <linux-cxl@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        Bjorn Helgaas <bhelgaas@...gle.com>,
        "Terry Bowman" <terry.bowman@....com>
Subject: Re: [PATCH v12 01/20] cxl/port: Fix release of RCD endpoints

Dan Williams wrote:
> Robert Richter wrote:
> > Dan,
> [..]
> > 
> > delete_endpoint() is called here, but the uport etc. is not unbound.
> > Which means this is not true:
> > 
> > 	if (parent->driver && !endpoint->dead) {
> > 		...
> > 
> > I don't remember this with my patch. The parent is there different, so
> > that could be the reason.
> > 
> > I could not yet look into more detail but wanted to let you know. Will
> > continue.
> 
> Apologies, I didn't have that regression going, I think I see the issue.
> Thanks for the heads up.

Here is the incremental fix on top of the lifetime fix:

diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 6230ddfc0be8..0fe915ec2cc2 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1217,30 +1217,39 @@ static struct device *grandparent(struct device *dev)
 	return NULL;
 }
 
+static struct device *endpoint_host(struct cxl_port *endpoint)
+{
+	struct cxl_port *port = to_cxl_port(endpoint->dev.parent);
+
+	if (is_cxl_root(port))
+		return port->uport_dev;
+	return &port->dev;
+}
+
 static void delete_endpoint(void *data)
 {
 	struct cxl_memdev *cxlmd = data;
 	struct cxl_port *endpoint = cxlmd->endpoint;
-	struct device *parent = endpoint->dev.parent;
+	struct device *host = endpoint_host(endpoint);
 
-	device_lock(parent);
-	if (parent->driver && !endpoint->dead) {
-		devm_release_action(parent, cxl_unlink_parent_dport, endpoint);
-		devm_release_action(parent, cxl_unlink_uport, endpoint);
-		devm_release_action(parent, unregister_port, endpoint);
+	device_lock(host);
+	if (host->driver && !endpoint->dead) {
+		devm_release_action(host, cxl_unlink_parent_dport, endpoint);
+		devm_release_action(host, cxl_unlink_uport, endpoint);
+		devm_release_action(host, unregister_port, endpoint);
 	}
 	cxlmd->endpoint = NULL;
-	device_unlock(parent);
+	device_unlock(host);
 	put_device(&endpoint->dev);
-	put_device(parent);
+	put_device(host);
 }
 
 int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
 {
-	struct device *parent = endpoint->dev.parent;
+	struct device *host = endpoint_host(endpoint);
 	struct device *dev = &cxlmd->dev;
 
-	get_device(parent);
+	get_device(host);
 	get_device(&endpoint->dev);
 	cxlmd->endpoint = endpoint;
 	cxlmd->depth = endpoint->depth;


---

...and here is the new regression test so I don't mess up and miss this
again:

diff --git a/cxl/memdev.c b/cxl/memdev.c
index d76a4d86a40a..81dfd4c25b25 100644
--- a/cxl/memdev.c
+++ b/cxl/memdev.c
@@ -752,6 +752,8 @@ static int memdev_action(int argc, const char **argv, struct cxl_ctx *ctx,
                        if (end[0] == 0)
                                continue;
                } else {
+                       unsigned long domain, bus, dev, func;
+
                        if (strcmp(argv[i], "all") == 0) {
                                argc = 1;
                                break;
@@ -760,6 +762,12 @@ static int memdev_action(int argc, const char **argv, struct cxl_ctx *ctx,
                                continue;
                        if (sscanf(argv[i], "%lu", &id) == 1)
                                continue;
+                       if (sscanf(argv[i], "%lx:%lx:%lx.%lx", &domain, &bus, &dev, &func))
+                               continue;
+                       if (sscanf(argv[i], "cxl_mem.%lu", &id))
+                               continue;
+                       if (sscanf(argv[i], "cxl_rcd.%lu", &id))
+                               continue;
                }
 
                log_err(&ml, "'%s' is not a valid memdev %s\n", argv[i],
diff --git a/test/cxl-topology.sh b/test/cxl-topology.sh
index 89d01a89ccb1..0320887a953b 100644
--- a/test/cxl-topology.sh
+++ b/test/cxl-topology.sh
@@ -120,6 +120,13 @@ count=$(jq "map(select(.pmem_size == $pmem_size)) | length" <<< $json)
 ((bridges == 2 && count == 8 || bridges == 3 && count == 10 ||
   bridges == 4 && count == 11)) || err "$LINENO"
 
+# check rcd endpoints disappear when disabling the memdev
+m=$($CXL list -M -b cxl_test | jq -r ".[].host" | grep rcd)
+ep=$($CXL list -E -m $m | jq -r ".[].endpoint")
+$CXL disable-memdev $m --force
+check=$($CXL list -E -e $ep | jq -r ".[].endpoint")
+[ -z "$check" ] || err "$LINENO"
+$CXL enable-memdev $m

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ