lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20260130121638.169160-1-pawel.mielimonka@fujitsu.com>
Date: Fri, 30 Jan 2026 21:16:38 +0900
From: Pawel Mielimonka <pawel.mielimonka@...itsu.com>
To: dan.j.williams@...el.com,
	alison.schofield@...el.com
Cc: Smita.KoralahalliChannabasappa@....com,
	linux-cxl@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	dave@...olabs.net,
	jonathan.cameron@...wei.com,
	dave.jiang@...el.com,
	vishal.l.verma@...el.com,
	ira.weiny@...el.com,
	lizhijian@...itsu.com,
	Pawel Mielimonka <pawel.mielimonka@...itsu.com>
Subject: [ndctl PATCH v4] cxl/cli: enforce HPA-descending teardown

When destroying CXL regions, users may observe failures such as
"set_dpa_size failed: Device or resource busy" even when targeting
valid regions. Afer such failures, subsequent destroy/create cycles may
become impossible without a full system reset.

The current logic does not guarantee descenting HPA order across
regions, even when each region is mapped to a single endpoint decoder.
As a result, a region can only be destroyed if it is the last one in
HPA order.

Alison observed that the issue extends when a region's mappings reach
endpoint decoders under different root decoders. In such cases, the HPA
descending order must be considered across all endpoint decoders that
share any of the root decoders involved - effectively covering the
entire bus/port. Without this global ordering destroy operations may
fail unpredictably, and followng create operations can also be blocked.

This change does not alter the underlying kernel behaior or decoder
programming rules. Instead, it enforces the existing ordering
constraints at the CLI level, preventing users from issuing destroy
oprtation that would violate HPA continuity required by specification
(section 8.2.4.20.12).

Link to v2 - Alison's findings:
https://lore.kernel.org/linux-cxl/aTTKRCUmbNC9jIrG@aschofie-mobl2.lan/

base-commit: 4f7a1c63b3305c97013d3c46daa6c0f76feff10d

v4 updates:
 - expand the commit message to include detailed description
 - add references to prior discussion and failure scenarions
 - follow guidelines for subject and formatting
 - no functional changes compared to v3

v3 updates:
 - fix iteration to cover all endpoint decoders under a common bus/port
 - collapse series into a single patch   

v2 updates:
 - sent by mistake from wrong local branch, does not compile, should
   be ignored

v1:
https://lore.kernel.org/linux-cxl/20251125143826.282312-1-pawel.mielimonka@fujitsu.com/

Signed-off-by: Pawel Mielimonka <pawel.mielimonka@...itsu.com>
---
 cxl/region.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 125 insertions(+), 3 deletions(-)

diff --git a/cxl/region.c b/cxl/region.c
index 207cf2d..d86e45f 100644
--- a/cxl/region.c
+++ b/cxl/region.c
@@ -831,6 +831,72 @@ out:
 	return cxl_region_disable(region);
 }
 
+/*
+ * cmp_region_hpa() - Compare CXL regions by their HPA.
+ * @l: pointer to the first element (const struct cxl_region **).
+ * @r: pointer to the second element (const struct cxl_region **).
+ *
+ * Comparison function for CXL regions based on the Host Physical Address
+ * returned by cxl_region_get_resource().
+ *
+ * Return:
+ *   < 0 if the HPA of the region pointed to by @l is less than that of @r
+ *   = 0 if both regions have te same HPA (not expected)
+ *   > 0 if the HPA of the region pointed to by @l is greater than that of @r
+ */
+static int cmp_region_hpa(const void *l, const void *r)
+{
+	const struct cxl_region *const *left = l;
+	const struct cxl_region *const *right = r;
+	u64 hpa1 = cxl_region_get_resource((struct cxl_region *) *left);
+	u64 hpa2 = cxl_region_get_resource((struct cxl_region *) *right);
+
+	return (hpa1 > hpa2) - (hpa1 < hpa2);
+}
+
+static int collect_regions_sorted(struct cxl_decoder *root,
+	struct cxl_region ***out, int *out_nr)
+{
+	struct cxl_region *region;
+	struct cxl_region **list = NULL;
+	int nr = 0, alloc = 0;
+
+	struct cxl_port *port = cxl_decoder_get_port(root);
+	struct cxl_decoder *decoder;
+
+	cxl_decoder_foreach(port, decoder) {
+		if (!cxl_port_is_root(port))
+			continue;
+		cxl_region_foreach(decoder, region) {
+			if (nr == alloc) {
+				int new_alloc = alloc ? alloc * 2 : 8;
+				size_t new_size = (size_t)new_alloc * sizeof(*list);
+				struct cxl_region **tmp;
+
+				tmp = realloc(list, new_size);
+				if (!tmp) {
+					free(list);
+					return -ENOMEM;
+				}
+				list = tmp;
+				alloc = new_alloc;
+			}
+			list[nr++] = region;
+		}
+
+		if (!nr) {
+			free(list);
+			*out = NULL;
+			*out_nr = 0;
+			return 0;
+		}
+	}
+	qsort(list, nr, sizeof(*list), cmp_region_hpa);
+	*out = list;
+	*out_nr = nr;
+	return 0;
+}
+
 static int destroy_region(struct cxl_region *region)
 {
 	const char *devname = cxl_region_get_devname(region);
@@ -895,6 +961,59 @@ static int destroy_region(struct cxl_region *region)
 	return cxl_region_delete(region);
 }
 
+static int destroy_multiple_regions(
+	struct parsed_params *p,
+	struct cxl_decoder *decoder,
+	int *count)
+{
+	struct cxl_region **list;
+	int nr, rc, i;
+	bool skipped = false;
+
+	rc = collect_regions_sorted(decoder, &list, &nr);
+	if (rc) {
+		log_err(&rl, "failed to allocate region list: %s\n", strerror(-rc));
+		return rc;
+	}
+
+	for (i = nr - 1; i >= 0; --i) {
+		struct cxl_region *region = NULL;
+
+		for (int j = 0; j < p->argc; j++) {
+			region = util_cxl_region_filter(list[i], p->argv[j]);
+			if (region)
+				break;
+		}
+
+		if (!region) {
+			skipped = true;
+			continue;
+		}
+
+		/*
+		 * If current region matches filter, but previous didn't, destroying would
+		 * result in breaking HPA continuity
+		 */
+		if (skipped) {
+			log_err(&rl, "failed to destroy %s: out of order %s reset\n",
+				cxl_region_get_devname(region),
+				cxl_decoder_get_devname(decoder));
+			rc = -EINVAL;
+			break;
+		}
+
+		rc = destroy_region(region);
+		if (rc) {
+			log_err(&rl, "%s: failed: %s\n",
+				cxl_region_get_devname(region), strerror(-rc));
+			break;
+		}
+		++(*count);
+	}
+	free(list);
+	return rc;
+}
+
 static int do_region_xable(struct cxl_region *region, enum region_actions action)
 {
 	switch (action) {
@@ -902,8 +1021,6 @@ static int do_region_xable(struct cxl_region *region, enum region_actions action
 		return cxl_region_enable(region);
 	case ACTION_DISABLE:
 		return disable_region(region);
-	case ACTION_DESTROY:
-		return destroy_region(region);
 	default:
 		return -EINVAL;
 	}
@@ -971,7 +1088,12 @@ static int region_action(int argc, const char **argv, struct cxl_ctx *ctx,
 			if (!util_cxl_decoder_filter(decoder,
 						     param.root_decoder))
 				continue;
-			rc = decoder_region_action(p, decoder, action, count);
+
+			if (action == ACTION_DESTROY)
+				rc = destroy_multiple_regions(p, decoder, count);
+			else
+				rc = decoder_region_action(p, decoder, action, count);
+
 			if (rc)
 				err_rc = rc;
 		}
-- 
2.47.3


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ