lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <67ec269dc51da_1d47294c8@dwillia2-xfh.jf.intel.com.notmuch>
Date: Tue, 1 Apr 2025 10:47:09 -0700
From: Dan Williams <dan.j.williams@...el.com>
To: David Hildenbrand <david@...hat.com>, Gregory Price <gourry@...rry.net>,
	<linux-cxl@...r.kernel.org>
CC: <nvdimm@...ts.linux.dev>, <linux-kernel@...r.kernel.org>,
	<kernel-team@...a.com>, <dan.j.williams@...el.com>,
	<vishal.l.verma@...el.com>, <dave.jiang@...el.com>
Subject: Re: [PATCH] DAX: warn when kmem regions are truncated for memory
 block alignment.

David Hildenbrand wrote:
> On 21.03.25 19:07, Gregory Price wrote:
> > Device capacity intended for use as system ram should be aligned to the
> > architecture-defined memory block size or that capacity will be silently
> > truncated and capacity stranded.
> > 
> > As hotplug dax memory becomes more prevelant, the memory block size
> > alignment becomes more important for platform and device vendors to
> > pay attention to - so this truncation should not be silent.
> > 
> > This issue is particularly relevant for CXL Dynamic Capacity devices,
> > whose capacity may arrive in spec-aligned but block-misaligned chunks.
> > 
> > Example:
> >   [...] kmem dax0.0: dax region truncated 2684354560 bytes - alignment
> >   [...] kmem dax1.0: dax region truncated 1610612736 bytes - alignment
> > 
> > Signed-off-by: Gregory Price <gourry@...rry.net>
> > ---
> >   drivers/dax/kmem.c | 18 ++++++++++++++----
> >   1 file changed, 14 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
> > index e97d47f42ee2..15b6807b703d 100644
> > --- a/drivers/dax/kmem.c
> > +++ b/drivers/dax/kmem.c
> > @@ -28,7 +28,8 @@ static const char *kmem_name;
> >   /* Set if any memory will remain added when the driver will be unloaded. */
> >   static bool any_hotremove_failed;
> >   
> > -static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
> > +static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r,
> > +			  unsigned long *truncated)
> >   {
> >   	struct dev_dax_range *dax_range = &dev_dax->ranges[i];
> >   	struct range *range = &dax_range->range;
> > @@ -41,6 +42,9 @@ static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
> >   		r->end = range->end;
> >   		return -ENOSPC;
> >   	}
> > +
> > +	if (truncated && (r->start != range->start || r->end != range->end))
> > +		*truncated = (r->start - range->start) + (range->end - r->end);
> >   	return 0;
> >   }
> >   
> > @@ -75,6 +79,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
> >   	mhp_t mhp_flags;
> >   	int numa_node;
> >   	int adist = MEMTIER_DEFAULT_DAX_ADISTANCE;
> > +	unsigned long ttl_trunc = 0;
> >   
> >   	/*
> >   	 * Ensure good NUMA information for the persistent memory.
> > @@ -97,7 +102,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
> >   	for (i = 0; i < dev_dax->nr_range; i++) {
> >   		struct range range;
> >   
> > -		rc = dax_kmem_range(dev_dax, i, &range);
> > +		rc = dax_kmem_range(dev_dax, i, &range, NULL);
> >   		if (rc) {
> >   			dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
> >   					i, range.start, range.end);
> > @@ -130,8 +135,9 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
> >   	for (i = 0; i < dev_dax->nr_range; i++) {
> >   		struct resource *res;
> >   		struct range range;
> > +		unsigned long truncated = 0;
> >   
> > -		rc = dax_kmem_range(dev_dax, i, &range);
> > +		rc = dax_kmem_range(dev_dax, i, &range, &truncated);
> >   		if (rc)
> >   			continue;
> >   
> > @@ -180,8 +186,12 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
> >   				continue;
> >   			goto err_request_mem;
> >   		}
> > +
> > +		ttl_trunc += truncated;
> >   		mapped++;
> >   	}
> > +	if (ttl_trunc)
> > +		dev_warn(dev, "dax region truncated %ld bytes - alignment\n", ttl_trunc);
> >   
> >   	dev_set_drvdata(dev, data);
> >   
> > @@ -216,7 +226,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
> >   		struct range range;
> >   		int rc;
> >   
> > -		rc = dax_kmem_range(dev_dax, i, &range);
> > +		rc = dax_kmem_range(dev_dax, i, &range, NULL);
> >   		if (rc)
> >   			continue;
> >   
> 
> Can't that be done a bit simpler?
> 
> diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
> index e97d47f42ee2e..23a68ff809cdf 100644
> --- a/drivers/dax/kmem.c
> +++ b/drivers/dax/kmem.c
> @@ -67,8 +67,8 @@ static void kmem_put_memory_types(void)
>   
>   static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
>   {
> +       unsigned long total_len = 0, orig_len = 0;
>          struct device *dev = &dev_dax->dev;
> -       unsigned long total_len = 0;
>          struct dax_kmem_data *data;
>          struct memory_dev_type *mtype;
>          int i, rc, mapped = 0;
> @@ -97,6 +97,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
>          for (i = 0; i < dev_dax->nr_range; i++) {
>                  struct range range;
>   
> +               orig_len += range_len(&dev_dax->ranges[i].range);
>                  rc = dax_kmem_range(dev_dax, i, &range);
>                  if (rc) {
>                          dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
> @@ -109,6 +110,9 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
>          if (!total_len) {
>                  dev_warn(dev, "rejecting DAX region without any memory after alignment\n");
>                  return -EINVAL;
> +       } else if (total_len != orig_len) {
> +               dev_warn(dev, "DAX region truncated by %lu bytes due to alignment\n",
> +                        orig_len - total_len);

This looks good, I agree with it being a warn because the user has lost
usable capacity and maybe this eventually pressures platform BIOS to
avoid causing Linux warnings.

In terms of making that loss easier for people to report / understand
how about use string_get_size() to convert raw bytes to power of 10 and
power of 2 values? I.e.

"DAX region truncated by X.XX GiB (Y.YY GB) due to alignment."

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ