lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220704030557.fm7xecylcq4z4zkr@moria.home.lan>
Date:   Sun, 3 Jul 2022 23:05:57 -0400
From:   Kent Overstreet <kent.overstreet@...il.com>
To:     Dominique Martinet <asmadeus@...ewreck.org>
Cc:     linux-kernel@...r.kernel.org, v9fs-developer@...ts.sourceforge.net,
        Eric Van Hensbergen <ericvh@...il.com>,
        Latchesar Ionkov <lucho@...kov.net>
Subject: Re: [PATCH 3/3] 9p: Add mempools for RPCs

On Mon, Jul 04, 2022 at 11:22:42AM +0900, Dominique Martinet wrote:
> Thanks for the patches!
> 
> first two patches look good, couple of comments below for this one
> 
> Kent Overstreet wrote on Sun, Jul 03, 2022 at 09:42:43PM -0400:
> > Signed-off-by: Kent Overstreet <kent.overstreet@...il.com>
> > Cc: Eric Van Hensbergen <ericvh@...il.com>
> > Cc: Latchesar Ionkov <lucho@...kov.net>
> > Cc: Dominique Martinet <asmadeus@...ewreck.org>
> > ---
> >  include/net/9p/9p.h     |  6 ++++-
> >  include/net/9p/client.h |  5 +++-
> >  net/9p/client.c         | 58 ++++++++++++++++++++++++++++++-----------
> >  net/9p/trans_rdma.c     |  2 +-
> >  4 files changed, 53 insertions(+), 18 deletions(-)
> > 
> > diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
> > index 24a509f559..c0d59b53c1 100644
> > --- a/include/net/9p/9p.h
> > +++ b/include/net/9p/9p.h
> > @@ -539,12 +539,16 @@ struct p9_rstatfs {
> >  struct p9_fcall {
> >  	u32 size;
> >  	u8 id;
> > +	enum p9_fcall_alloc {
> > +		P9_FCALL_KMEM_CACHE,
> > +		P9_FCALL_KMALLOC,
> > +		P9_FCALL_MEMPOOL,
> > +	} allocated;
> >  	u16 tag;
> >  
> >  	size_t offset;
> >  	size_t capacity;
> >  
> > -	struct kmem_cache *cache;
> >  	u8 *sdata;
> >  };
> >  
> > diff --git a/include/net/9p/client.h b/include/net/9p/client.h
> > index cb78e0e333..6517ca36bf 100644
> > --- a/include/net/9p/client.h
> > +++ b/include/net/9p/client.h
> > @@ -9,6 +9,7 @@
> >  #ifndef NET_9P_CLIENT_H
> >  #define NET_9P_CLIENT_H
> >  
> > +#include <linux/mempool.h>
> >  #include <linux/utsname.h>
> >  #include <linux/idr.h>
> >  
> > @@ -106,6 +107,7 @@ struct p9_client {
> >  	enum p9_trans_status status;
> >  	void *trans;
> >  	struct kmem_cache *fcall_cache;
> > +	mempool_t pools[2];
> >  
> >  	union {
> >  		struct {
> > @@ -222,7 +224,8 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
> >  				kgid_t gid, struct p9_qid *qid);
> >  int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
> >  int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
> > -void p9_fcall_fini(struct p9_fcall *fc);
> > +void p9_fcall_fini(struct p9_client *c, struct p9_fcall *fc,
> > +		   int fc_idx);
> >  struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag);
> >  
> >  static inline void p9_req_get(struct p9_req_t *r)
> > diff --git a/net/9p/client.c b/net/9p/client.c
> > index a36a40137c..82061c49cb 100644
> > --- a/net/9p/client.c
> > +++ b/net/9p/client.c
> > @@ -219,22 +219,34 @@ static int parse_opts(char *opts, struct p9_client *clnt)
> >  }
> >  
> >  static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
> > -			 int alloc_msize)
> > +			 int fc_idx, unsigned alloc_msize)
> >  {
> > +	WARN(alloc_msize > c->msize, "alloc_mize %u client msize %u",
> > +	     alloc_msize, c->msize);
> > +
> >  	if (likely(c->fcall_cache) && alloc_msize == c->msize) {
> >  		fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
> > -		fc->cache = c->fcall_cache;
> > +		fc->allocated = P9_FCALL_KMEM_CACHE;
> >  	} else {
> >  		fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
> > -		fc->cache = NULL;
> > +		fc->allocated = P9_FCALL_KMALLOC;
> >  	}
> > -	if (!fc->sdata)
> > +
> > +	if (!fc->sdata >> alloc_msize > c->msize)
> >  		return -ENOMEM;
> 
> probably meant && instead of >> ?

Thanks, good catch

> I'd also move this alloc_msize > c->msize check just below the warn to
> keep it early if you want to keep it, but if we want to warn here it
> really should be in p9_tag_alloc that alreadys cuts the user argument
> short with a `min(c->msize, max_size)`
> 
> We shouldn't have any user calling with more at this point (the
> user-provided size comes from p9_client_prepare_req arguments and it's
> either msize or header size constants); and it probably makes sense to
> check and error out rather than cap it.

If that's the case I think we should just switch the warning to a BUG_ON() - I
just wasn't sure from reading the code if that was really guarded against.

> hm, so you try with the kmalloc/kmem_cache first and only fallback to
> mempool if that failed?
> 
> What's the point of keeping the kmem cache in this case, instead of
> routing all size-appropriate requests to the mempool?
> (honest question)

Actually none, and I should've made it a kmem_cache mempool, not a kmalloc pool.


> 
> > +	}
> > +
> >  	fc->capacity = alloc_msize;
> >  	return 0;
> >  }
> >  
> > -void p9_fcall_fini(struct p9_fcall *fc)
> > +void p9_fcall_fini(struct p9_client *c, struct p9_fcall *fc,
> > +		   int fc_idx)
> >  {
> >  	/* sdata can be NULL for interrupted requests in trans_rdma,
> >  	 * and kmem_cache_free does not do NULL-check for us
> > @@ -242,10 +254,17 @@ void p9_fcall_fini(struct p9_fcall *fc)
> >  	if (unlikely(!fc->sdata))
> >  		return;
> >  
> > -	if (fc->cache)
> > -		kmem_cache_free(fc->cache, fc->sdata);
> > -	else
> > +	switch (fc->allocated) {
> > +	case P9_FCALL_KMEM_CACHE:
> > +		kmem_cache_free(c->fcall_cache, fc->sdata);
> > +		break;
> > +	case P9_FCALL_KMALLOC:
> >  		kfree(fc->sdata);
> > +		break;
> > +	case P9_FCALL_MEMPOOL:
> > +		mempool_free(fc->sdata, &c->pools[fc_idx]);
> > +		break;
> > +	}
> >  }
> >  EXPORT_SYMBOL(p9_fcall_fini);
> >  
> > @@ -270,9 +289,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
> >  	if (!req)
> >  		return ERR_PTR(-ENOMEM);
> >  
> > -	if (p9_fcall_init(c, &req->tc, alloc_msize))
> > +	if (p9_fcall_init(c, &req->tc, 0, alloc_msize))
> >  		goto free_req;
> > -	if (p9_fcall_init(c, &req->rc, alloc_msize))
> > +	if (p9_fcall_init(c, &req->rc, 1, alloc_msize))
> 
> given the two rc/tc buffers are of same size I don't see the point of
> using two caches either, you could just double the min number of
> elements to the same effect?

You can't double allocate from the same mempool, that will deadlock if multiple
threads need the last element at the same time - I should've left a comment for
that.

Here's an updated patch - also up in git at
https://evilpiepirate.org/git/bcachefs.git/log/?h=9p_mempool
-- >8 --
Subject: [PATCH] 9p: Add mempools for RPCs

Signed-off-by: Kent Overstreet <kent.overstreet@...il.com>
Cc: Eric Van Hensbergen <ericvh@...il.com>
Cc: Latchesar Ionkov <lucho@...kov.net>
Cc: Dominique Martinet <asmadeus@...ewreck.org>
---
 include/net/9p/9p.h     |  2 +-
 include/net/9p/client.h | 12 ++++++-
 net/9p/client.c         | 70 ++++++++++++++++++++++++-----------------
 net/9p/trans_rdma.c     |  2 +-
 4 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 24a509f559..0b20ee6854 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -539,12 +539,12 @@ struct p9_rstatfs {
 struct p9_fcall {
 	u32 size;
 	u8 id;
+	bool used_mempool;
 	u16 tag;
 
 	size_t offset;
 	size_t capacity;
 
-	struct kmem_cache *cache;
 	u8 *sdata;
 };
 
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index cb78e0e333..832dcc866a 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -9,6 +9,7 @@
 #ifndef NET_9P_CLIENT_H
 #define NET_9P_CLIENT_H
 
+#include <linux/mempool.h>
 #include <linux/utsname.h>
 #include <linux/idr.h>
 
@@ -107,6 +108,14 @@ struct p9_client {
 	void *trans;
 	struct kmem_cache *fcall_cache;
 
+	/*
+	 * We need two identical mempools because it's not safe to allocate
+	 * multiple elements from the same pool (without freeing the first);
+	 * that will deadlock if multiple threads need the last element at the
+	 * same time.
+	 */
+	mempool_t pools[2];
+
 	union {
 		struct {
 			int rfd;
@@ -222,7 +231,8 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
 				kgid_t gid, struct p9_qid *qid);
 int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
 int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
-void p9_fcall_fini(struct p9_fcall *fc);
+void p9_fcall_fini(struct p9_client *c, struct p9_fcall *fc,
+		   int fc_idx);
 struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag);
 
 static inline void p9_req_get(struct p9_req_t *r)
diff --git a/net/9p/client.c b/net/9p/client.c
index a36a40137c..e14074d031 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -218,23 +218,29 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	return ret;
 }
 
-static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
-			 int alloc_msize)
+static void p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
+			  int fc_idx, unsigned alloc_msize)
 {
-	if (likely(c->fcall_cache) && alloc_msize == c->msize) {
-		fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS);
-		fc->cache = c->fcall_cache;
-	} else {
-		fc->sdata = kmalloc(alloc_msize, GFP_NOFS);
-		fc->cache = NULL;
-	}
-	if (!fc->sdata)
-		return -ENOMEM;
+	gfp_t gfp = GFP_NOFS|__GFP_NOWARN;
+
+	BUG_ON(alloc_msize > c->msize);
+
+	fc->sdata = NULL;
+	fc->used_mempool = false;
 	fc->capacity = alloc_msize;
-	return 0;
+
+	if (alloc_msize < c->msize)
+		fc->sdata = kmalloc(alloc_msize, gfp);
+
+	if (!fc->sdata) {
+		fc->sdata = mempool_alloc(&c->pools[fc_idx], gfp);
+		fc->used_mempool = true;
+		fc->capacity = c->msize;
+	}
 }
 
-void p9_fcall_fini(struct p9_fcall *fc)
+void p9_fcall_fini(struct p9_client *c, struct p9_fcall *fc,
+		   int fc_idx)
 {
 	/* sdata can be NULL for interrupted requests in trans_rdma,
 	 * and kmem_cache_free does not do NULL-check for us
@@ -242,8 +248,8 @@ void p9_fcall_fini(struct p9_fcall *fc)
 	if (unlikely(!fc->sdata))
 		return;
 
-	if (fc->cache)
-		kmem_cache_free(fc->cache, fc->sdata);
+	if (fc->used_mempool)
+		mempool_free(fc->sdata, &c->pools[fc_idx]);
 	else
 		kfree(fc->sdata);
 }
@@ -270,10 +276,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
 	if (!req)
 		return ERR_PTR(-ENOMEM);
 
-	if (p9_fcall_init(c, &req->tc, alloc_msize))
-		goto free_req;
-	if (p9_fcall_init(c, &req->rc, alloc_msize))
-		goto free;
+	p9_fcall_init(c, &req->tc, 0, alloc_msize);
+	p9_fcall_init(c, &req->rc, 1, alloc_msize);
 
 	p9pdu_reset(&req->tc);
 	p9pdu_reset(&req->rc);
@@ -310,9 +314,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
 	return req;
 
 free:
-	p9_fcall_fini(&req->tc);
-	p9_fcall_fini(&req->rc);
-free_req:
+	p9_fcall_fini(c, &req->tc, 0);
+	p9_fcall_fini(c, &req->rc, 1);
 	kmem_cache_free(p9_req_cache, req);
 	return ERR_PTR(-ENOMEM);
 }
@@ -373,8 +376,8 @@ static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
 int p9_req_put(struct p9_client *c, struct p9_req_t *r)
 {
 	if (refcount_dec_and_test(&r->refcount)) {
-		p9_fcall_fini(&r->tc);
-		p9_fcall_fini(&r->rc);
+		p9_fcall_fini(c, &r->tc, 0);
+		p9_fcall_fini(c, &r->rc, 1);
 		kmem_cache_free(p9_req_cache, r);
 		return 1;
 	}
@@ -999,7 +1002,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 	char *client_id;
 
 	err = 0;
-	clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
+	clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
 	if (!clnt)
 		return ERR_PTR(-ENOMEM);
 
@@ -1050,10 +1053,6 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		goto close_trans;
 	}
 
-	err = p9_client_version(clnt);
-	if (err)
-		goto close_trans;
-
 	/* P9_HDRSZ + 4 is the smallest packet header we can have that is
 	 * followed by data accessed from userspace by read
 	 */
@@ -1063,6 +1062,15 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 					   clnt->msize - (P9_HDRSZ + 4),
 					   NULL);
 
+	err =   mempool_init_slab_pool(&clnt->pools[0], 4, clnt->fcall_cache) ?:
+		mempool_init_slab_pool(&clnt->pools[1], 4, clnt->fcall_cache);
+	if (err)
+		goto close_trans;
+
+	err = p9_client_version(clnt);
+	if (err)
+		goto close_trans;
+
 	return clnt;
 
 close_trans:
@@ -1070,6 +1078,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 put_trans:
 	v9fs_put_trans(clnt->trans_mod);
 free_client:
+	mempool_exit(&clnt->pools[1]);
+	mempool_exit(&clnt->pools[0]);
 	kfree(clnt);
 	return ERR_PTR(err);
 }
@@ -1094,6 +1104,8 @@ void p9_client_destroy(struct p9_client *clnt)
 
 	p9_tag_cleanup(clnt);
 
+	mempool_exit(&clnt->pools[1]);
+	mempool_exit(&clnt->pools[0]);
 	kmem_cache_destroy(clnt->fcall_cache);
 	kfree(clnt);
 }
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index d817d37452..99d878d70d 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -431,7 +431,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
 		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
 			/* Got one! */
-			p9_fcall_fini(&req->rc);
+			p9_fcall_fini(client, &req->rc, 1);
 			req->rc.sdata = NULL;
 			goto dont_need_post_recv;
 		} else {
-- 
2.36.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ