lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250715165607.000036c4@huawei.com>
Date: Tue, 15 Jul 2025 16:56:07 +0100
From: Jonathan Cameron <Jonathan.Cameron@...wei.com>
To: Dan Williams <dan.j.williams@...el.com>
CC: <linux-cxl@...r.kernel.org>, <linux-kernel@...r.kernel.org>, Davidlohr
 Bueso <dave@...olabs.net>, Dave Jiang <dave.jiang@...el.com>, Alison
 Schofield <alison.schofield@...el.com>, Vishal Verma
	<vishal.l.verma@...el.com>, "Ira Weiny" <ira.weiny@...el.com>, "Peter
 Zijlstra (Intel)" <peterz@...radead.org>
Subject: Re: [PATCH v3 7/8] cxl/region: Consolidate
 cxl_decoder_kill_region() and cxl_region_detach()

On Fri, 11 Jul 2025 16:49:31 -0700
Dan Williams <dan.j.williams@...el.com> wrote:

> Both detach_target() and cxld_unregister() want to tear down a cxl_region
> when an endpoint decoder is either detached or destroyed.
> 
> When a region is to be destroyed cxl_region_detach() releases
> cxl_region_rwsem unbinds the cxl_region driver and re-acquires the rwsem.
> 
> This "reverse" locking pattern is difficult to reason about, not amenable
> to scope-based cleanup, and the minor differences in the calling context of
> detach_target() and cxld_unregister() currently results in the
> cxl_decoder_kill_region() wrapper.
> 
> Introduce cxl_decoder_detach() to wrap a core __cxl_decoder_detach() that
> serves both cases. I.e. either detaching a known position in a region
> (interruptible), or detaching an endpoint decoder if it is found to be a
> member of a region (uninterruptible).
> 
> Cc: Davidlohr Bueso <dave@...olabs.net>
> Cc: Jonathan Cameron <jonathan.cameron@...wei.com>
> Cc: Dave Jiang <dave.jiang@...el.com>
> Cc: Alison Schofield <alison.schofield@...el.com>
> Cc: Vishal Verma <vishal.l.verma@...el.com>
> Cc: Ira Weiny <ira.weiny@...el.com>
> Acked-by: "Peter Zijlstra (Intel)" <peterz@...radead.org>
> Signed-off-by: Dan Williams <dan.j.williams@...el.com>
One query inline about what I think is a change in when a reference count is
held on the region device.  I'm struggling to reason about whether that change
would have always been safe or if there is another change here that makes
it fine now?

(or whether I'm just misreading the change).

Jonathan



> ---
>  drivers/cxl/core/core.h   |  15 +++++-
>  drivers/cxl/core/port.c   |   9 ++--
>  drivers/cxl/core/region.c | 103 ++++++++++++++++++++++----------------
>  3 files changed, 75 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 29b61828a847..2be37084409f 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -12,6 +12,11 @@ extern const struct device_type cxl_pmu_type;
>  
>  extern struct attribute_group cxl_base_attribute_group;
>  
> +enum cxl_detach_mode {
> +	DETACH_ONLY,
> +	DETACH_INVALIDATE,
> +};
> +
>  #ifdef CONFIG_CXL_REGION
>  extern struct device_attribute dev_attr_create_pmem_region;
>  extern struct device_attribute dev_attr_create_ram_region;
> @@ -20,7 +25,11 @@ extern struct device_attribute dev_attr_region;
>  extern const struct device_type cxl_pmem_region_type;
>  extern const struct device_type cxl_dax_region_type;
>  extern const struct device_type cxl_region_type;
> -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
> +
> +int cxl_decoder_detach(struct cxl_region *cxlr,
> +		       struct cxl_endpoint_decoder *cxled, int pos,
> +		       enum cxl_detach_mode mode);
> +
>  #define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
>  #define CXL_REGION_TYPE(x) (&cxl_region_type)
>  #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
> @@ -48,7 +57,9 @@ static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
>  {
>  	return 0;
>  }
> -static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
> +static inline int cxl_decoder_detach(struct cxl_region *cxlr,
> +				     struct cxl_endpoint_decoder *cxled,
> +				     int pos, enum cxl_detach_mode mode)
>  {
>  }
>  static inline int cxl_region_init(void)
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index eb46c6764d20..087a20a9ee1c 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -2001,12 +2001,9 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL");
>  
>  static void cxld_unregister(void *dev)
>  {
> -	struct cxl_endpoint_decoder *cxled;
> -
> -	if (is_endpoint_decoder(dev)) {
> -		cxled = to_cxl_endpoint_decoder(dev);
> -		cxl_decoder_kill_region(cxled);
> -	}
> +	if (is_endpoint_decoder(dev))
> +		cxl_decoder_detach(NULL, to_cxl_endpoint_decoder(dev), -1,
> +				   DETACH_INVALIDATE);
>  
>  	device_unregister(dev);
>  }
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 2a97fa9a394f..4314aaed8ad8 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2135,27 +2135,43 @@ static int cxl_region_attach(struct cxl_region *cxlr,
>  	return 0;
>  }
>  
> -static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
> +static struct cxl_region *
> +__cxl_decoder_detach(struct cxl_region *cxlr,
> +		     struct cxl_endpoint_decoder *cxled, int pos,
> +		     enum cxl_detach_mode mode)
>  {
> -	struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
> -	struct cxl_region *cxlr = cxled->cxld.region;
>  	struct cxl_region_params *p;
> -	int rc = 0;
>  
>  	lockdep_assert_held_write(&cxl_region_rwsem);
>  
> -	if (!cxlr)
> -		return 0;
> +	if (!cxled) {
> +		p = &cxlr->params;
>  
> -	p = &cxlr->params;
> -	get_device(&cxlr->dev);

This is a fairly nasty patch to unwind and fully understand but
I'm nervous that in the existing we have a get_device(&cxlr->dev)
before potential cxl_region_decode_reset(cxlr, ...)
and now we don't.  I'm not sure if that is a real problem though,
it just makes me nervous.


> +		if (pos >= p->interleave_ways) {
> +			dev_dbg(&cxlr->dev, "position %d out of range %d\n",
> +				pos, p->interleave_ways);
> +			return ERR_PTR(-ENXIO);
> +		}
> +
> +		if (!p->targets[pos])
> +			return NULL;
> +		cxled = p->targets[pos];
> +	} else {
> +		cxlr = cxled->cxld.region;
> +		if (!cxlr)
> +			return NULL;
> +		p = &cxlr->params;
> +	}
> +
> +	if (mode == DETACH_INVALIDATE)
> +		cxled->part = -1;
>  
>  	if (p->state > CXL_CONFIG_ACTIVE) {
>  		cxl_region_decode_reset(cxlr, p->interleave_ways);
>  		p->state = CXL_CONFIG_ACTIVE;
>  	}
>  
> -	for (iter = ep_port; !is_cxl_root(iter);
> +	for (struct cxl_port *iter = cxled_to_port(cxled); !is_cxl_root(iter);
>  	     iter = to_cxl_port(iter->dev.parent))
>  		cxl_port_detach_region(iter, cxlr, cxled);
>  
> @@ -2166,7 +2182,7 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>  		dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
>  			      dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>  			      cxled->pos);
> -		goto out;
> +		return NULL;
>  	}
>  
>  	if (p->state == CXL_CONFIG_ACTIVE) {
> @@ -2180,21 +2196,42 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>  		.end = -1,
>  	};
>  
> -	/* notify the region driver that one of its targets has departed */
> -	up_write(&cxl_region_rwsem);
> -	device_release_driver(&cxlr->dev);
> -	down_write(&cxl_region_rwsem);
> -out:
> -	put_device(&cxlr->dev);
> -	return rc;
> +	get_device(&cxlr->dev);
> +	return cxlr;
>  }
>  
> -void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
> +/*
> + * Cleanup a decoder's interest in a region. There are 2 cases to
> + * handle, removing an unknown @cxled from a known position in a region
> + * (detach_target()) or removing a known @cxled from an unknown @cxlr
> + * (cxld_unregister())
> + *
> + * When the detachment finds a region release the region driver.
> + */
> +int cxl_decoder_detach(struct cxl_region *cxlr,
> +		       struct cxl_endpoint_decoder *cxled, int pos,
> +		       enum cxl_detach_mode mode)
>  {
> -	down_write(&cxl_region_rwsem);
> -	cxled->part = -1;
> -	cxl_region_detach(cxled);
> +	struct cxl_region *detach;
> +
> +	/* when the decoder is being destroyed lock unconditionally */
> +	if (mode == DETACH_INVALIDATE)
> +		down_write(&cxl_region_rwsem);
> +	else {
> +		int rc = down_write_killable(&cxl_region_rwsem);
> +
> +		if (rc)
> +			return rc;
> +	}
> +
> +	detach = __cxl_decoder_detach(cxlr, cxled, pos, mode);
>  	up_write(&cxl_region_rwsem);
> +
> +	if (detach) {
> +		device_release_driver(&detach->dev);
> +		put_device(&detach->dev);
> +	}
> +	return 0;
>  }
>  
>  static int attach_target(struct cxl_region *cxlr,
> @@ -2225,29 +2262,7 @@ static int attach_target(struct cxl_region *cxlr,
>  
>  static int detach_target(struct cxl_region *cxlr, int pos)
>  {
> -	struct cxl_region_params *p = &cxlr->params;
> -	int rc;
> -
> -	rc = down_write_killable(&cxl_region_rwsem);
> -	if (rc)
> -		return rc;
> -
> -	if (pos >= p->interleave_ways) {
> -		dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
> -			p->interleave_ways);
> -		rc = -ENXIO;
> -		goto out;
> -	}
> -
> -	if (!p->targets[pos]) {
> -		rc = 0;
> -		goto out;
> -	}
> -
> -	rc = cxl_region_detach(p->targets[pos]);
> -out:
> -	up_write(&cxl_region_rwsem);
> -	return rc;
> +	return cxl_decoder_detach(cxlr, NULL, pos, DETACH_ONLY);
>  }
>  
>  static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ