lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2d547eae6f031943101d7fb10b815bc128995125.camel@web.de>
Date: Thu, 27 Mar 2025 01:58:14 +0100
From: Bert Karwatzki <spasswolf@....de>
To: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Christian König <christian.koenig@....com>, Balbir
 Singh <balbirs@...dia.com>, Ingo Molnar <mingo@...nel.org>, Kees Cook
 <kees@...nel.org>,  Bjorn Helgaas <bhelgaas@...gle.com>, Peter Zijlstra
 <peterz@...radead.org>, Andy Lutomirski <luto@...nel.org>,  Alex Deucher
 <alexander.deucher@....com>, linux-kernel@...r.kernel.org,
 amd-gfx@...ts.freedesktop.org, 	spasswolf@....de
Subject: Re: commit 7ffb791423c7 breaks steam game

Am Mittwoch, dem 26.03.2025 um 15:58 -0700 schrieb Linus Torvalds:
> On Wed, 26 Mar 2025 at 15:00, Bert Karwatzki <spasswolf@....de> wrote:
> >
> > As Balbir Singh found out this memory comes from amdkfd
> > (kgd2kfd_init_zone_device()) with CONFIG_HSA_AMD_SVM=y. The memory gets placed
> > by devm_request_free_mem_region() which places the memory at the end of the
> > physical address space (DIRECT_MAP_PHYSMEM_END). DIRECT_MAP_PHYSMEM_END changes
> > when using nokaslr and so the memory shifts.
>
> So I just want to say that having followed the thread as a spectator,
> big kudos to everybody involved in this thing. Particularly to you,
> Bart, for all your debugging and testing, and to Balbir for following
> up and figuring it out.
>
> Because this was a strange one.
>
> >  One can work around this by removing the GFR_DESCENDING flag from
> > devm_request_free_mem_region() so the memory gets placed right after the other
> > resources:
>
> I worry that there might be other machines where that completely breaks things.
>
> There are various historical reasons why we look for addresses in high
> regions, ie on machines where there are various hidden IO regions that
> aren't enumerated by e280 and aren't found by our usual PCI BAR
> discovery because they are special hidden ones.
>
> So then users of [devm_]request_free_mem_region() might end up getting
> allocated a region that has some magic system resource in it.
>
> And no, this shouldn't happen on any normal machine, but it has
> definitely been a thing in the past.
>
> So I'm very happy that you guys figured out what ended up happening,
> but I'm not convinced that the devm_request_free_mem_region()
> workaround is tenable.
>
> So I think it needs to be more targeted to the HSA_AMD_SVM case than
> touch the devm_request_free_mem_region() logic for everybody.
>
>            Linus

This patch adds another function devm_request_free_mem_region_from_end()
with an additional argument which allows to choose the end address from
which to place the resource.
The problem here is this uses dma_get_mask(adev->dev) as end address which
uses the dma mask for the discrete GPU while it should use the dma mask for
the built-in GPU (In my case both are equal (44bits), but I'm not sure if
this is always the case)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index d05d199b5e44..e1942fef3637 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -1042,7 +1042,8 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
 		pgmap->range.end = adev->gmc.aper_base + adev->gmc.aper_size -
1;
 		pgmap->type = MEMORY_DEVICE_COHERENT;
 	} else {
-		res = devm_request_free_mem_region(adev->dev, &iomem_resource,
size);
+		res = devm_request_free_mem_region_from_end(adev->dev,
+				&iomem_resource, size, dma_get_mask(adev-
>dev));
 		if (IS_ERR(res))
 			return PTR_ERR(res);
 		pgmap->range.start = res->start;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5385349f0b8a..a9a765721ab4 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -407,6 +407,9 @@ walk_iomem_res_desc(unsigned long desc, unsigned long flags,
u64 start, u64 end,

 struct resource *devm_request_free_mem_region(struct device *dev,
 		struct resource *base, unsigned long size);
+struct resource *devm_request_free_mem_region_from_end(struct device *dev,
+		struct resource *base, unsigned long size,
+		resource_size_t seek_end);
 struct resource *request_free_mem_region(struct resource *base,
 		unsigned long size, const char *name);
 struct resource *alloc_free_mem_region(struct resource *base,
diff --git a/kernel/resource.c b/kernel/resource.c
index 12004452d999..82f40407c02d 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1875,12 +1875,14 @@ EXPORT_SYMBOL(resource_list_free);
 #endif

 static resource_size_t gfr_start(struct resource *base, resource_size_t size,
-				 resource_size_t align, unsigned long flags)
+				 resource_size_t align, resource_size_t
seek_end,
+				 unsigned long flags)
 {
 	if (flags & GFR_DESCENDING) {
 		resource_size_t end;

 		end = min_t(resource_size_t, base->end,
DIRECT_MAP_PHYSMEM_END);
+		end = min_t(resource_size_t, end, seek_end);
 		return end - size + 1;
 	}

@@ -1920,8 +1922,8 @@ static void remove_free_mem_region(void *_res)
 static struct resource *
 get_free_mem_region(struct device *dev, struct resource *base,
 		    resource_size_t size, const unsigned long align,
-		    const char *name, const unsigned long desc,
-		    const unsigned long flags)
+		    resource_size_t seek_end, const char *name,
+		    const unsigned long desc, const unsigned long flags)
 {
 	resource_size_t addr;
 	struct resource *res;
@@ -1946,7 +1948,7 @@ get_free_mem_region(struct device *dev, struct resource
*base,
 	}

 	write_lock(&resource_lock);
-	for (addr = gfr_start(base, size, align, flags);
+	for (addr = gfr_start(base, size, align, seek_end, flags);
 	     gfr_continue(base, addr, align, flags);
 	     addr = gfr_next(addr, align, flags)) {
 		if (__region_intersects(base, addr, size, 0, IORES_DESC_NONE)
!=
@@ -2021,17 +2023,30 @@ struct resource *devm_request_free_mem_region(struct
device *dev,
 	unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;

 	return get_free_mem_region(dev, base, size, GFR_DEFAULT_ALIGN,
-				   dev_name(dev),
+				   DIRECT_MAP_PHYSMEM_END, dev_name(dev),
 				   IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
 }
 EXPORT_SYMBOL_GPL(devm_request_free_mem_region);

+struct resource *devm_request_free_mem_region_from_end(struct device *dev,
+		struct resource *base, unsigned long size,
+		resource_size_t seek_end)
+{
+	unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;
+
+	return get_free_mem_region(dev, base, size, GFR_DEFAULT_ALIGN,
+				   seek_end, dev_name(dev),
+				   IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
+}
+EXPORT_SYMBOL_GPL(devm_request_free_mem_region_from_end);
+
 struct resource *request_free_mem_region(struct resource *base,
 		unsigned long size, const char *name)
 {
 	unsigned long flags = GFR_DESCENDING | GFR_REQUEST_REGION;

-	return get_free_mem_region(NULL, base, size, GFR_DEFAULT_ALIGN, name,
+	return get_free_mem_region(NULL, base, size, GFR_DEFAULT_ALIGN,
+				   DIRECT_MAP_PHYSMEM_END, name,
 				   IORES_DESC_DEVICE_PRIVATE_MEMORY, flags);
 }
 EXPORT_SYMBOL_GPL(request_free_mem_region);
@@ -2055,8 +2070,8 @@ struct resource *alloc_free_mem_region(struct resource
*base,
 	/* Default of ascending direction and insert resource */
 	unsigned long flags = 0;

-	return get_free_mem_region(NULL, base, size, align, name,
-				   IORES_DESC_NONE, flags);
+	return get_free_mem_region(NULL, base, size, align,
DIRECT_MAP_PHYSMEM_END,
+				   name, IORES_DESC_NONE, flags);
 }
 EXPORT_SYMBOL_GPL(alloc_free_mem_region);
 #endif /* CONFIG_GET_FREE_REGION */



Bert Karwatzki

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ