lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <d2017c29-11fb-44a5-bd0f-4204329bbefb@app.fastmail.com>
Date: Thu, 16 Oct 2025 15:58:36 +0200
From: "Pierre Barre" <pierre@...re.sh>
To: "Christian Schoenebeck" <linux_oss@...debyte.com>,
 asmadeus <asmadeus@...ewreck.org>
Cc: "Matthew Wilcox (Oracle)" <willy@...radead.org>, v9fs@...ts.linux.dev,
 ericvh@...nel.org, lucho@...kov.net, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3] 9p: Use kvmalloc for message buffers on supported  transports

While developing a 9P server (https://github.com/Barre/ZeroFS) and
testing it under high-load, I was running into allocation failures.
The failures occur even with plenty of free memory available because
kmalloc requires contiguous physical memory.

This results in errors like:
ls: page allocation failure: order:7, mode:0x40c40(GFP_NOFS|__GFP_COMP)

This patch introduces a transport capability flag (supports_vmalloc)
that indicates whether a transport can work with vmalloc'd buffers
(non-physically contiguous memory). Transports requiring DMA should
leave this flag as false.

The fd-based transports (tcp, unix, fd) set this flag to true, and
p9_fcall_init will use kvmalloc instead of kmalloc for these
transports. This allows the allocator to fall back to vmalloc when
contiguous physical memory is not available.

Additionally, if kmem_cache_alloc fails, the code falls back to
kvmalloc for transports that support it.

Signed-off-by: Pierre Barre <pierre@...re.sh>
---
Changes in v3:
- Added explicit .supports_vmalloc = false to DMA-based transports
  (suggested by Christian Schoenebeck)

 include/net/9p/transport.h |  4 ++++
 net/9p/client.c            | 11 +++++++++--
 net/9p/trans_fd.c          |  3 +++
 net/9p/trans_rdma.c        |  1 +
 net/9p/trans_usbg.c        |  1 +
 net/9p/trans_virtio.c      |  1 +
 net/9p/trans_xen.c         |  1 +
 7 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 766ec07c9599..f0981515148d 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -24,6 +24,9 @@
  *                   we're less flexible when choosing the response message
  *                   size in this case
  * @def: set if this transport should be considered the default
+ * @supports_vmalloc: set if this transport can work with vmalloc'd buffers
+ *                    (non-physically contiguous memory). Transports requiring
+ *                    DMA should leave this as false.
  * @create: member function to create a new connection on this transport
  * @close: member function to discard a connection on this transport
  * @request: member function to issue a request to the transport
@@ -44,6 +47,7 @@ struct p9_trans_module {
 	int maxsize;		/* max message size of transport */
 	bool pooled_rbuffers;
 	int def;		/* this transport should be default */
+	bool supports_vmalloc;	/* can work with vmalloc'd buffers */
 	struct module *owner;
 	int (*create)(struct p9_client *client,
 		      const char *devname, char *args);
diff --git a/net/9p/client.c b/net/9p/client.c
index 5c1ca57ccd28..2a4884c880c1 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -229,8 +229,15 @@ static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
 	if (likely(c->fcall_cache) && alloc_msize == c->msize) {
 		fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
 		fc->cache = c->fcall_cache;
+		if (!fc->sdata && c->trans_mod->supports_vmalloc) {
+			fc->sdata = kvmalloc(alloc_msize, GFP_NOFS);
+			fc->cache = NULL;
+		}
 	} else {
-		fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
+		if (c->trans_mod->supports_vmalloc)
+			fc->sdata = kvmalloc(alloc_msize, GFP_NOFS);
+		else
+			fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
 		fc->cache = NULL;
 	}
 	if (!fc->sdata)
@@ -252,7 +259,7 @@ void p9_fcall_fini(struct p9_fcall *fc)
 	if (fc->cache)
 		kmem_cache_free(fc->cache, fc->sdata);
 	else
-		kfree(fc->sdata);
+		kvfree(fc->sdata);
 }
 EXPORT_SYMBOL(p9_fcall_fini);
 
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index a516745f732f..e7334033eba5 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1101,6 +1101,7 @@ static struct p9_trans_module p9_tcp_trans = {
 	.maxsize = MAX_SOCK_BUF,
 	.pooled_rbuffers = false,
 	.def = 0,
+	.supports_vmalloc = true,
 	.create = p9_fd_create_tcp,
 	.close = p9_fd_close,
 	.request = p9_fd_request,
@@ -1115,6 +1116,7 @@ static struct p9_trans_module p9_unix_trans = {
 	.name = "unix",
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
+	.supports_vmalloc = true,
 	.create = p9_fd_create_unix,
 	.close = p9_fd_close,
 	.request = p9_fd_request,
@@ -1129,6 +1131,7 @@ static struct p9_trans_module p9_fd_trans = {
 	.name = "fd",
 	.maxsize = MAX_SOCK_BUF,
 	.def = 0,
+	.supports_vmalloc = true,
 	.create = p9_fd_create,
 	.close = p9_fd_close,
 	.request = p9_fd_request,
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index b84748baf9cb..6c5ad232c194 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -749,6 +749,7 @@ static struct p9_trans_module p9_rdma_trans = {
 	.maxsize = P9_RDMA_MAXSIZE,
 	.pooled_rbuffers = true,
 	.def = 0,
+	.supports_vmalloc = false,
 	.owner = THIS_MODULE,
 	.create = rdma_create_trans,
 	.close = rdma_close,
diff --git a/net/9p/trans_usbg.c b/net/9p/trans_usbg.c
index 468f7e8f0277..2542ef099233 100644
--- a/net/9p/trans_usbg.c
+++ b/net/9p/trans_usbg.c
@@ -514,6 +514,7 @@ static struct p9_trans_module p9_usbg_trans = {
 	.close = p9_usbg_close,
 	.request = p9_usbg_request,
 	.cancel = p9_usbg_cancel,
+	.supports_vmalloc = false,
 	.owner = THIS_MODULE,
 };
 
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 0b8086f58ad5..12806207f4f0 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -803,6 +803,7 @@ static struct p9_trans_module p9_virtio_trans = {
 	.maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
 	.pooled_rbuffers = false,
 	.def = 1,
+	.supports_vmalloc = false,
 	.owner = THIS_MODULE,
 };
 
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index b9ff69c7522a..4b1cec0ab829 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -258,6 +258,7 @@ static struct p9_trans_module p9_xen_trans = {
 	.maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT - 2),
 	.pooled_rbuffers = false,
 	.def = 1,
+	.supports_vmalloc = false,
 	.create = p9_xen_create,
 	.close = p9_xen_close,
 	.request = p9_xen_request,
-- 
2.39.5 (Apple Git-154)


On Thu, Oct 16, 2025, at 15:06, Christian Schoenebeck wrote:
> On Thursday, October 16, 2025 9:01:56 AM CEST Pierre Barre wrote:
>> While developing a 9P server (https://github.com/Barre/ZeroFS) and
>> testing it under high-load, I was running into allocation failures.
>> The failures occur even with plenty of free memory available because
>> kmalloc requires contiguous physical memory.
>> 
>> This results in errors like:
>> ls: page allocation failure: order:7, mode:0x40c40(GFP_NOFS|__GFP_COMP)
>> 
>> This patch introduces a transport capability flag (supports_vmalloc)
>> that indicates whether a transport can work with vmalloc'd buffers
>> (non-physically contiguous memory). Transports requiring DMA should
>> leave this flag as false.
>> 
>> The fd-based transports (tcp, unix, fd) set this flag to true, and
>> p9_fcall_init will use kvmalloc instead of kmalloc for these
>> transports. This allows the allocator to fall back to vmalloc when
>> contiguous physical memory is not available.
>> 
>> Additionally, if kmem_cache_alloc fails, the code falls back to
>> kvmalloc for transports that support it.
>> 
>> Signed-off-by: Pierre Barre <pierre@...re.sh>
>> ---
>> 
>>  include/net/9p/transport.h |  4 ++++
>>  net/9p/client.c            | 11 +++++++++--
>>  net/9p/trans_fd.c          |  3 +++
>>  3 files changed, 16 insertions(+), 2 deletions(-)
>> 
>> diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
>> index 766ec07c9599..f0981515148d 100644
>> --- a/include/net/9p/transport.h
>> +++ b/include/net/9p/transport.h
>> @@ -24,6 +24,9 @@
>>   *                   we're less flexible when choosing the response message
>> *                   size in this case
>>   * @def: set if this transport should be considered the default
>> + * @supports_vmalloc: set if this transport can work with vmalloc'd buffers
>> + *                    (non-physically contiguous memory). Transports
>> requiring + *                    DMA should leave this as false.
>>   * @create: member function to create a new connection on this transport
>>   * @close: member function to discard a connection on this transport
>>   * @request: member function to issue a request to the transport
>> @@ -44,6 +47,7 @@ struct p9_trans_module {
>>  	int maxsize;		/* max message size of transport */
>>  	bool pooled_rbuffers;
>>  	int def;		/* this transport should be default */
>> +	bool supports_vmalloc;	/* can work with vmalloc'd buffers */
>>  	struct module *owner;
>>  	int (*create)(struct p9_client *client,
>>  		      const char *devname, char *args);
>> diff --git a/net/9p/client.c b/net/9p/client.c
>> index 5c1ca57ccd28..2a4884c880c1 100644
>> --- a/net/9p/client.c
>> +++ b/net/9p/client.c
>> @@ -229,8 +229,15 @@ static int p9_fcall_init(struct p9_client *c, struct
>> p9_fcall *fc, if (likely(c->fcall_cache) && alloc_msize == c->msize) {
>>  		fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
>>  		fc->cache = c->fcall_cache;
>> +		if (!fc->sdata && c->trans_mod->supports_vmalloc) {
>> +			fc->sdata = kvmalloc(alloc_msize, GFP_NOFS);
>> +			fc->cache = NULL;
>> +		}
>>  	} else {
>> -		fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
>> +		if (c->trans_mod->supports_vmalloc)
>> +			fc->sdata = kvmalloc(alloc_msize, GFP_NOFS);
>> +		else
>> +			fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
>>  		fc->cache = NULL;
>>  	}
>>  	if (!fc->sdata)
>> @@ -252,7 +259,7 @@ void p9_fcall_fini(struct p9_fcall *fc)
>>  	if (fc->cache)
>>  		kmem_cache_free(fc->cache, fc->sdata);
>>  	else
>> -		kfree(fc->sdata);
>> +		kvfree(fc->sdata);
>>  }
>>  EXPORT_SYMBOL(p9_fcall_fini);
>> 
>> diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
>> index a516745f732f..e7334033eba5 100644
>> --- a/net/9p/trans_fd.c
>> +++ b/net/9p/trans_fd.c
>> @@ -1101,6 +1101,7 @@ static struct p9_trans_module p9_tcp_trans = {
>>  	.maxsize = MAX_SOCK_BUF,
>>  	.pooled_rbuffers = false,
>>  	.def = 0,
>> +	.supports_vmalloc = true,
>>  	.create = p9_fd_create_tcp,
>>  	.close = p9_fd_close,n
>>  	.request = p9_fd_request,
>> @@ -1115,6 +1116,7 @@ static struct p9_trans_module p9_unix_trans = {
>>  	.name = "unix",
>>  	.maxsize = MAX_SOCK_BUF,
>>  	.def = 0,
>> +	.supports_vmalloc = true,
>>  	.create = p9_fd_create_unix,
>>  	.close = p9_fd_close,
>>  	.request = p9_fd_request,
>> @@ -1129,6 +1131,7 @@ static struct p9_trans_module p9_fd_trans = {
>>  	.name = "fd",
>>  	.maxsize = MAX_SOCK_BUF,
>>  	.def = 0,
>> +	.supports_vmalloc = true,
>>  	.create = p9_fd_create,
>>  	.close = p9_fd_close,
>>  	.request = p9_fd_request,
>
> Just for clarity I would add .supports_vmalloc = false to the other 
> transports. Except of that:
>
> Reviewed-by: Christian Schoenebeck <linux_oss@...debyte.com>
>
> Thanks!
>
> /Christian

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ