[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <aWWMYJSqlmLkYQBP@aschofie-mobl2.lan>
Date: Mon, 12 Jan 2026 16:05:52 -0800
From: Alison Schofield <alison.schofield@...el.com>
To: Robert Richter <rrichter@....com>, Dave Jiang <dave.jiang@...el.com>
CC: Davidlohr Bueso <dave@...olabs.net>, Jonathan Cameron
<jonathan.cameron@...wei.com>, Dave Jiang <dave.jiang@...el.com>, "Vishal
Verma" <vishal.l.verma@...el.com>, Ira Weiny <ira.weiny@...el.com>, "Dan
Williams" <dan.j.williams@...el.com>, <linux-cxl@...r.kernel.org>,
<linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v3] cxl: Check for invalid addresses returned from
translation functions on errors
On Wed, Jan 07, 2026 at 01:05:43PM +0100, Robert Richter wrote:
> Translation functions may return an invalid address in case of errors.
> If the address is not checked the further use of the invalid value
> will cause an address corruption.
>
> Consistently check for a valid address returned by translation
> functions. Use RESOURCE_SIZE_MAX to indicate an invalid address for
> type resource_size_t. Depending on the type either RESOURCE_SIZE_MAX
> or ULLONG_MAX is used to indicate an address error.
DaveJ --
Please see if you can get this in as a Fix in 6.19. One of the fixes
tags points back to 6.18 where we introduced poison by region offset,
and the other points to 6.19 where we refactored for testability.
Also, please add this user impact statement to the commit log upon applying:
Propagating an invalid address from a failed translation may cause
userspace to think it has received a valid SPA, when in fact it is
wrong. The CXL userspace API, using trace events, expects ULLONG_MAX
to indicate a translation failure. If ULLONG_MAX is not returned
immediately, subsequent calculations can transform that bad address
into a different value (!ULLONG_MAX), and an invalid SPA may be
returned to userspace. This can lead to incorrect diagnostics and
erroneous corrective actions.
Fixes: c3dd67681c70 ("cxl/region: Add inject and clear poison by region offset")
Fixes: b78b9e7b7979 ("cxl/region: Refactor address translation funcs for testing")
Reviewed-by: Alison Schofield <alison.schofield@...el.com>
>
> Reviewed-by: Dave Jiang <dave.jiang@...el.com>
> Signed-off-by: Robert Richter <rrichter@....com>
> ---
> v3:
> * updated sob-chain,
> * changed error handling flow in test/cxl_translate.c (Alison),
>
> v2:
> * separated from this patch series (Alison):
> [PATCH v8 00/13] cxl: ACPI PRM Address Translation Support and AMD Zen5 enablement
> * improved error handling logic and early return on error in
> region_offset_to_dpa_result() (Dave),
> * use RESOURCE_SIZE_MAX to indicate an invalid address for
> resource_size_t types (Alison, kernel test robot),
> * improved patch description (Alison),
> * added line wrap for code >80 chars.
> ---
>
> Signed-off-by: Robert Richter <rrichter@....com>
> ---
> drivers/cxl/core/hdm.c | 2 +-
> drivers/cxl/core/region.c | 34 ++++++++++++++++++++------
> tools/testing/cxl/test/cxl_translate.c | 30 ++++++++++++++---------
> 3 files changed, 45 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 1c5d2022c87a..031672e92b0b 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -530,7 +530,7 @@ resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled)
>
> resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled)
> {
> - resource_size_t base = -1;
> + resource_size_t base = RESOURCE_SIZE_MAX;
>
> lockdep_assert_held(&cxl_rwsem.dpa);
> if (cxled->dpa_res)
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index fc36a5413d3f..5bd1213737fa 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -3118,7 +3118,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
> struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
> struct cxl_region_params *p = &cxlr->params;
> struct cxl_endpoint_decoder *cxled = NULL;
> - u64 dpa_offset, hpa_offset, hpa;
> + u64 base, dpa_offset, hpa_offset, hpa;
> u16 eig = 0;
> u8 eiw = 0;
> int pos;
> @@ -3136,8 +3136,14 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
> ways_to_eiw(p->interleave_ways, &eiw);
> granularity_to_eig(p->interleave_granularity, &eig);
>
> - dpa_offset = dpa - cxl_dpa_resource_start(cxled);
> + base = cxl_dpa_resource_start(cxled);
> + if (base == RESOURCE_SIZE_MAX)
> + return ULLONG_MAX;
> +
> + dpa_offset = dpa - base;
> hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig);
> + if (hpa_offset == ULLONG_MAX)
> + return ULLONG_MAX;
>
> /* Apply the hpa_offset to the region base address */
> hpa = hpa_offset + p->res->start + p->cache_size;
> @@ -3146,6 +3152,9 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
> if (cxlrd->ops.hpa_to_spa)
> hpa = cxlrd->ops.hpa_to_spa(cxlrd, hpa);
>
> + if (hpa == ULLONG_MAX)
> + return ULLONG_MAX;
> +
> if (!cxl_resource_contains_addr(p->res, hpa)) {
> dev_dbg(&cxlr->dev,
> "Addr trans fail: hpa 0x%llx not in region\n", hpa);
> @@ -3170,7 +3179,8 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
> struct cxl_region_params *p = &cxlr->params;
> struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
> struct cxl_endpoint_decoder *cxled;
> - u64 hpa, hpa_offset, dpa_offset;
> + u64 hpa_offset = offset;
> + u64 dpa, dpa_offset;
> u16 eig = 0;
> u8 eiw = 0;
> int pos;
> @@ -3187,10 +3197,13 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
> * CXL HPA is assumed to equal SPA.
> */
> if (cxlrd->ops.spa_to_hpa) {
> - hpa = cxlrd->ops.spa_to_hpa(cxlrd, p->res->start + offset);
> - hpa_offset = hpa - p->res->start;
> - } else {
> - hpa_offset = offset;
> + hpa_offset = cxlrd->ops.spa_to_hpa(cxlrd, p->res->start + offset);
> + if (hpa_offset == ULLONG_MAX) {
> + dev_dbg(&cxlr->dev, "HPA not found for %pr offset %#llx\n",
> + p->res, offset);
> + return -ENXIO;
> + }
> + hpa_offset -= p->res->start;
> }
>
> pos = cxl_calculate_position(hpa_offset, eiw, eig);
> @@ -3207,8 +3220,13 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
> cxled = p->targets[i];
> if (cxled->pos != pos)
> continue;
> +
> + dpa = cxl_dpa_resource_start(cxled);
> + if (dpa != RESOURCE_SIZE_MAX)
> + dpa += dpa_offset;
> +
> result->cxlmd = cxled_to_memdev(cxled);
> - result->dpa = cxl_dpa_resource_start(cxled) + dpa_offset;
> + result->dpa = dpa;
>
> return 0;
> }
> diff --git a/tools/testing/cxl/test/cxl_translate.c b/tools/testing/cxl/test/cxl_translate.c
> index 2200ae21795c..2d9b75b30dbc 100644
> --- a/tools/testing/cxl/test/cxl_translate.c
> +++ b/tools/testing/cxl/test/cxl_translate.c
> @@ -68,6 +68,8 @@ static u64 to_hpa(u64 dpa_offset, int pos, u8 r_eiw, u16 r_eig, u8 hb_ways,
>
> /* Calculate base HPA offset from DPA and position */
> hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, r_eiw, r_eig);
> + if (hpa_offset == ULLONG_MAX)
> + return ULLONG_MAX;
>
> if (math == XOR_MATH) {
> cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
> @@ -258,19 +260,23 @@ static int test_random_params(void)
> pos = get_random_u32() % ways;
> dpa = get_random_u64() >> 12;
>
> + reverse_dpa = ULLONG_MAX;
> + reverse_pos = -1;
> +
> hpa = cxl_calculate_hpa_offset(dpa, pos, eiw, eig);
> - reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig);
> - reverse_pos = cxl_calculate_position(hpa, eiw, eig);
> -
> - if (reverse_dpa != dpa || reverse_pos != pos) {
> - pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n",
> - i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw,
> - eig);
> -
> - if (failures++ > 10) {
> - pr_err("test random too many failures, stop\n");
> - break;
> - }
> + if (hpa != ULLONG_MAX) {
> + reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig);
> + reverse_pos = cxl_calculate_position(hpa, eiw, eig);
> + if (reverse_dpa == dpa && reverse_pos == pos)
> + continue;
> + }
> +
> + pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n",
> + i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw, eig);
> +
> + if (failures++ > 10) {
> + pr_err("test random too many failures, stop\n");
> + break;
> }
> }
> pr_info("..... test random: PASS %d FAIL %d\n", i - failures, failures);
>
> base-commit: 88c72bab77aaf389beccf762e112828253ca0564
> --
> 2.47.3
>
Powered by blists - more mailing lists