Message ID | 1532943263-24378-2-git-send-email-asmadeus@codewreck.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/2] net/9p: embed fcall in req to round down buffer allocs | expand |
Hi Dominique, Could you help paste some test result before-and-after the patch applied? And I have a little suggestion in comments below. On 2018/7/30 17:34, Dominique Martinet wrote: > From: Dominique Martinet <dominique.martinet@cea.fr> > > Having a specific cache for the fcall allocations helps speed up > allocations a bit, especially in case of non-"round" msizes. > > The caches will automatically be merged if there are multiple caches > of items with the same size so we do not need to try to share a cache > between different clients of the same size. > > Since the msize is negotiated with the server, only allocate the cache > after that negotiation has happened - previous allocations or > allocations of different sizes (e.g. zero-copy fcall) are made with > kmalloc directly. > > Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr> > --- > include/net/9p/client.h | 2 ++ > net/9p/client.c | 40 ++++++++++++++++++++++++++++++++-------- > net/9p/trans_rdma.c | 2 +- > 3 files changed, 35 insertions(+), 9 deletions(-) > > diff --git a/include/net/9p/client.h b/include/net/9p/client.h > index 4b4ac1362ad5..8d9bc7402a42 100644 > --- a/include/net/9p/client.h > +++ b/include/net/9p/client.h > @@ -123,6 +123,7 @@ struct p9_client { > struct p9_trans_module *trans_mod; > enum p9_trans_status status; > void *trans; > + struct kmem_cache *fcall_cache; > > union { > struct { > @@ -230,6 +231,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, > kgid_t gid, struct p9_qid *); > int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); > int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); > +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc); > struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); > void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); > > diff --git a/net/9p/client.c b/net/9p/client.c > index ba99a94a12c9..215e3b1ed7b4 100644 > --- a/net/9p/client.c > +++ b/net/9p/client.c > @@ -231,15 +231,34 @@ static int parse_opts(char *opts, struct p9_client *clnt) > return ret; > } > > -static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize) > +static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc, > + int alloc_msize) > { > - fc->sdata = kmalloc(alloc_msize, GFP_NOFS); > + if (c->fcall_cache && alloc_msize == c->msize) > + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS); > + else > + fc->sdata = kmalloc(alloc_msize, GFP_NOFS); > if (!fc->sdata) > return -ENOMEM; > fc->capacity = alloc_msize; > return 0; > } > > +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc) > +{ > + /* sdata can be NULL for interrupted requests in trans_rdma, > + * and kmem_cache_free does not do NULL-check for us > + */ > + if (unlikely(!fc->sdata)) > + return; > + > + if (c->fcall_cache && fc->capacity == c->msize) > + kmem_cache_free(c->fcall_cache, fc->sdata); > + else > + kfree(fc->sdata); > +} > +EXPORT_SYMBOL(p9_fcall_free); > + > static struct kmem_cache *p9_req_cache; > > /** > @@ -261,9 +280,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) > if (!req) > return NULL; > > - if (p9_fcall_alloc(&req->tc, alloc_msize)) > + if (p9_fcall_alloc(c, &req->tc, alloc_msize)) > goto free; > - if (p9_fcall_alloc(&req->rc, alloc_msize)) > + if (p9_fcall_alloc(c, &req->rc, alloc_msize)) > goto free; > > p9pdu_reset(&req->tc); > @@ -288,8 +307,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) > return req; > > free: > - kfree(req->tc.sdata); > - kfree(req->rc.sdata); > + p9_fcall_free(c, &req->tc); > + p9_fcall_free(c, &req->rc); > kmem_cache_free(p9_req_cache, req); > return ERR_PTR(-ENOMEM); > } > @@ -333,8 +352,8 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) > spin_lock_irqsave(&c->lock, flags); > idr_remove(&c->reqs, tag); > spin_unlock_irqrestore(&c->lock, flags); > - kfree(r->tc.sdata); > - kfree(r->rc.sdata); > + p9_fcall_free(c, &r->tc); > + p9_fcall_free(c, &r->rc); > kmem_cache_free(p9_req_cache, r); > } > > @@ -944,6 +963,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) > > clnt->trans_mod = NULL; > clnt->trans = NULL; > + clnt->fcall_cache = NULL; > > client_id = utsname()->nodename; > memcpy(clnt->name, client_id, strlen(client_id) + 1); > @@ -980,6 +1000,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) > if (err) > goto close_trans; > > + clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize, > + 0, 0, NULL); > + > return clnt; > > close_trans: > @@ -1011,6 +1034,7 @@ void p9_client_destroy(struct p9_client *clnt) > > p9_tag_cleanup(clnt); > > + kmem_cache_destroy(clnt->fcall_cache); We could set NULL for fcall_cache in case of use-after-free. > kfree(clnt); > } > EXPORT_SYMBOL(p9_client_destroy); > diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c > index c5cac97df7f7..5e43f0a00b3a 100644 > --- a/net/9p/trans_rdma.c > +++ b/net/9p/trans_rdma.c > @@ -445,7 +445,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) > if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { > if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { > /* Got one! */ > - kfree(req->rc.sdata); > + p9_fcall_free(client, &req->rc); > req->rc.sdata = NULL; > goto dont_need_post_recv; > } else { >
piaojun wrote on Tue, Jul 31, 2018: > Could you help paste some test result before-and-after the patch applied? The only performance tests I did were sent to the list a couple of mails earlier, you can find it here: http://lkml.kernel.org/r/20180730093101.GA7894@nautica In particular, the results for benchmark on small writes just before and after this patch, without KASAN (these are the same numbers as in the link, hardware/setup is described there): - no alloc (4.18-rc7 request cache): 65.4k req/s - non-power of two alloc, no patch: 61.6k req/s - power of two alloc, no patch: 62.2k req/s - non-power of two alloc, with patch: 64.7k req/s - power of two alloc, with patch: 65.1k req/s I'm rather happy with the result, I didn't expect using a dedicated cache would bring this much back but it's certainly worth it. > > @@ -1011,6 +1034,7 @@ void p9_client_destroy(struct p9_client *clnt) > > > > p9_tag_cleanup(clnt); > > > > + kmem_cache_destroy(clnt->fcall_cache); > > We could set NULL for fcall_cache in case of use-after-free. > > > kfree(clnt); Hmm, I understand where this comes from, but I'm not sure I agree. If someone tries to access the client while/after it is freed things are going to break anyway, I'd rather let things break as obviously as possible than try to cover it up.
On 2018/7/31 9:35, Dominique Martinet wrote: > piaojun wrote on Tue, Jul 31, 2018: >> Could you help paste some test result before-and-after the patch applied? > > The only performance tests I did were sent to the list a couple of mails > earlier, you can find it here: > http://lkml.kernel.org/r/20180730093101.GA7894@nautica > > In particular, the results for benchmark on small writes just before and > after this patch, without KASAN (these are the same numbers as in the > link, hardware/setup is described there): > - no alloc (4.18-rc7 request cache): 65.4k req/s > - non-power of two alloc, no patch: 61.6k req/s > - power of two alloc, no patch: 62.2k req/s > - non-power of two alloc, with patch: 64.7k req/s > - power of two alloc, with patch: 65.1k req/s > > I'm rather happy with the result, I didn't expect using a dedicated > cache would bring this much back but it's certainly worth it. > It looks like an obvious promotion. >>> @@ -1011,6 +1034,7 @@ void p9_client_destroy(struct p9_client *clnt) >>> >>> p9_tag_cleanup(clnt); >>> >>> + kmem_cache_destroy(clnt->fcall_cache); >> >> We could set NULL for fcall_cache in case of use-after-free. >> >>> kfree(clnt); > > Hmm, I understand where this comes from, but I'm not sure I agree. > If someone tries to access the client while/after it is freed things are > going to break anyway, I'd rather let things break as obviously as > possible than try to cover it up. > Setting NULL is not a big matter, and I will hear others' opinion.
On Mon, Jul 30, 2018 at 11:34:23AM +0200, Dominique Martinet wrote: > -static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize) > +static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc, > + int alloc_msize) > { > - fc->sdata = kmalloc(alloc_msize, GFP_NOFS); > + if (c->fcall_cache && alloc_msize == c->msize) > + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS); > + else > + fc->sdata = kmalloc(alloc_msize, GFP_NOFS); Could you simplify this by initialising c->msize to 0 and then this can simply be: > + if (alloc_msize == c->msize) ... > +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc) > +{ > + /* sdata can be NULL for interrupted requests in trans_rdma, > + * and kmem_cache_free does not do NULL-check for us > + */ > + if (unlikely(!fc->sdata)) > + return; > + > + if (c->fcall_cache && fc->capacity == c->msize) > + kmem_cache_free(c->fcall_cache, fc->sdata); > + else > + kfree(fc->sdata); > +} Is it possible for fcall_cache to be allocated before fcall_free is called? I'm concerned we might do this: allocate message A allocate message B receive response A allocate fcall_cache receive response B and then we'd call kmem_cache_free() for something allocated by kmalloc(), which works with slab and slub, but doesn't work with slob (alas). > @@ -980,6 +1000,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) > if (err) > goto close_trans; > > + clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize, > + 0, 0, NULL); > + If we have slab merging turned off, or we have two mounts from servers with different msizes, we'll end up with two slabs called 9p-fcall-cache. I'm OK with that, but are you?
Matthew Wilcox wrote on Mon, Jul 30, 2018: > On Mon, Jul 30, 2018 at 11:34:23AM +0200, Dominique Martinet wrote: > > -static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize) > > +static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc, > > + int alloc_msize) > > { > > - fc->sdata = kmalloc(alloc_msize, GFP_NOFS); > > + if (c->fcall_cache && alloc_msize == c->msize) > > + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS); > > + else > > + fc->sdata = kmalloc(alloc_msize, GFP_NOFS); > > Could you simplify this by initialising c->msize to 0 and then this > can simply be: > > > + if (alloc_msize == c->msize) > ... Hmm, this is rather tricky with the current flow of things; p9_client_version() has multiple uses for that msize field. Basically what happens is: - init client struct, set clip msize to mount option/transport-specific max - p9_client_version() uses current c->msize to send a suggested value to the server - p9_client_rpc() uses current c->msize to allocate that first rpc, this is pretty much hard-coded and will be quite intrusive to make an exception for - p9_client_version() looks at the msize the server suggested and clips c->msize if the reply's is smaller than c->msize I kind of agree it'd be nice to remove that check being done all the time for just startup, but I don't see how to do this easily with the current code. Making p9_client_version take an extra argument would be easy but we'd need to actually hardcode in p9_client_rpc that "if the message type is TVERSION then use [page size or whatever] for allocation" and that kinds of kills the point... The alternative being having p9_client_rpc takes the actual size as argument itself but this once again is pretty intrusive even if it could be done mechanically... I'll think about this some more > > +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc) > > +{ > > + /* sdata can be NULL for interrupted requests in trans_rdma, > > + * and kmem_cache_free does not do NULL-check for us > > + */ > > + if (unlikely(!fc->sdata)) > > + return; > > + > > + if (c->fcall_cache && fc->capacity == c->msize) > > + kmem_cache_free(c->fcall_cache, fc->sdata); > > + else > > + kfree(fc->sdata); > > +} > > Is it possible for fcall_cache to be allocated before fcall_free is > called? I'm concerned we might do this: > > allocate message A > allocate message B > receive response A > allocate fcall_cache > receive response B > > and then we'd call kmem_cache_free() for something allocated by kmalloc(), > which works with slab and slub, but doesn't work with slob (alas). Bleh, I checked this would work for slab and didn't really check others.. This cannot happen right now because we only return the client struct from p9_client_create after the first message is done (and, right now, freed) but when we start adding refcounting to requests it'd be possible to free the very first response after fcall_cache is allocated with a "bad" server like syzcaller does sending the version reply before the request came in. I can't see any work-around around this other than storing how the fcall was allocated in the struct itself though... I guess I might as well do that now, unless you have a better idea. > > @@ -980,6 +1000,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) > > if (err) > > goto close_trans; > > > > + clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize, > > + 0, 0, NULL); > > + > > If we have slab merging turned off, or we have two mounts from servers > with different msizes, we'll end up with two slabs called 9p-fcall-cache. > I'm OK with that, but are you? Yeah, the reason I didn't make it global like p9_req_cache is precisely to get two separate caches if the msizes are different. I actually considered adding msize to the string with snprintf or something but someone looking at it through slabinfo or similar will have the sizes anyway so I don't think this would bring anything, do you know if/think that tools will choke on multiple caches with the same name? I'm not sure about slab merging being disabled though, from the little I understand I do not see why anyone would do that except for debugging, and I'm fine with that. Please let me know if I'm missing something though! Thanks for the review,
On Mon, 30 Jul 2018 11:34:23 +0200 Dominique Martinet <asmadeus@codewreck.org> wrote: > From: Dominique Martinet <dominique.martinet@cea.fr> > > Having a specific cache for the fcall allocations helps speed up > allocations a bit, especially in case of non-"round" msizes. > > The caches will automatically be merged if there are multiple caches > of items with the same size so we do not need to try to share a cache > between different clients of the same size. > > Since the msize is negotiated with the server, only allocate the cache > after that negotiation has happened - previous allocations or > allocations of different sizes (e.g. zero-copy fcall) are made with > kmalloc directly. > > Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr> > --- The patch looks good to me. It would need to be rebased when you have fixed the potential kfree() of stale data in patch 1. Either with an extra goto label in p9_tag_alloc or by turning p9_fcall_alloc into p9_fcall_alloc_sdata, both solutions are equivalent. Just one suggestion, see below. > include/net/9p/client.h | 2 ++ > net/9p/client.c | 40 ++++++++++++++++++++++++++++++++-------- > net/9p/trans_rdma.c | 2 +- > 3 files changed, 35 insertions(+), 9 deletions(-) > > diff --git a/include/net/9p/client.h b/include/net/9p/client.h > index 4b4ac1362ad5..8d9bc7402a42 100644 > --- a/include/net/9p/client.h > +++ b/include/net/9p/client.h > @@ -123,6 +123,7 @@ struct p9_client { > struct p9_trans_module *trans_mod; > enum p9_trans_status status; > void *trans; > + struct kmem_cache *fcall_cache; > > union { > struct { > @@ -230,6 +231,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, > kgid_t gid, struct p9_qid *); > int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); > int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); > +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc); > struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); > void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); > > diff --git a/net/9p/client.c b/net/9p/client.c > index ba99a94a12c9..215e3b1ed7b4 100644 > --- a/net/9p/client.c > +++ b/net/9p/client.c > @@ -231,15 +231,34 @@ static int parse_opts(char *opts, struct p9_client *clnt) > return ret; > } > > -static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize) > +static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc, > + int alloc_msize) > { > - fc->sdata = kmalloc(alloc_msize, GFP_NOFS); > + if (c->fcall_cache && alloc_msize == c->msize) This is a presumably hot path for any request but the initial TVERSION, you probably want likely() here... > + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS); > + else > + fc->sdata = kmalloc(alloc_msize, GFP_NOFS); > if (!fc->sdata) > return -ENOMEM; > fc->capacity = alloc_msize; > return 0; > } > > +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc) > +{ > + /* sdata can be NULL for interrupted requests in trans_rdma, > + * and kmem_cache_free does not do NULL-check for us > + */ > + if (unlikely(!fc->sdata)) > + return; > + > + if (c->fcall_cache && fc->capacity == c->msize) ... and here as well. > + kmem_cache_free(c->fcall_cache, fc->sdata); > + else > + kfree(fc->sdata); > +} > +EXPORT_SYMBOL(p9_fcall_free); > + > static struct kmem_cache *p9_req_cache; > > /** > @@ -261,9 +280,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) > if (!req) > return NULL; > > - if (p9_fcall_alloc(&req->tc, alloc_msize)) > + if (p9_fcall_alloc(c, &req->tc, alloc_msize)) > goto free; > - if (p9_fcall_alloc(&req->rc, alloc_msize)) > + if (p9_fcall_alloc(c, &req->rc, alloc_msize)) > goto free; > > p9pdu_reset(&req->tc); > @@ -288,8 +307,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) > return req; > > free: > - kfree(req->tc.sdata); > - kfree(req->rc.sdata); > + p9_fcall_free(c, &req->tc); > + p9_fcall_free(c, &req->rc); > kmem_cache_free(p9_req_cache, req); > return ERR_PTR(-ENOMEM); > } > @@ -333,8 +352,8 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) > spin_lock_irqsave(&c->lock, flags); > idr_remove(&c->reqs, tag); > spin_unlock_irqrestore(&c->lock, flags); > - kfree(r->tc.sdata); > - kfree(r->rc.sdata); > + p9_fcall_free(c, &r->tc); > + p9_fcall_free(c, &r->rc); > kmem_cache_free(p9_req_cache, r); > } > > @@ -944,6 +963,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) > > clnt->trans_mod = NULL; > clnt->trans = NULL; > + clnt->fcall_cache = NULL; > > client_id = utsname()->nodename; > memcpy(clnt->name, client_id, strlen(client_id) + 1); > @@ -980,6 +1000,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) > if (err) > goto close_trans; > > + clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize, > + 0, 0, NULL); > + > return clnt; > > close_trans: > @@ -1011,6 +1034,7 @@ void p9_client_destroy(struct p9_client *clnt) > > p9_tag_cleanup(clnt); > > + kmem_cache_destroy(clnt->fcall_cache); > kfree(clnt); > } > EXPORT_SYMBOL(p9_client_destroy); > diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c > index c5cac97df7f7..5e43f0a00b3a 100644 > --- a/net/9p/trans_rdma.c > +++ b/net/9p/trans_rdma.c > @@ -445,7 +445,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) > if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { > if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { > /* Got one! */ > - kfree(req->rc.sdata); > + p9_fcall_free(client, &req->rc); > req->rc.sdata = NULL; > goto dont_need_post_recv; > } else {
Greg Kurz wrote on Wed, Aug 01, 2018: > > diff --git a/net/9p/client.c b/net/9p/client.c > > index ba99a94a12c9..215e3b1ed7b4 100644 > > --- a/net/9p/client.c > > +++ b/net/9p/client.c > > @@ -231,15 +231,34 @@ static int parse_opts(char *opts, struct p9_client *clnt) > > return ret; > > } > > > > -static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize) > > +static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc, > > + int alloc_msize) > > { > > - fc->sdata = kmalloc(alloc_msize, GFP_NOFS); > > + if (c->fcall_cache && alloc_msize == c->msize) > > This is a presumably hot path for any request but the initial TVERSION, > you probably want likely() here... c->fcall_cache is indeed very likely, but alloc_size == c->msize not so much, as zc requests will be quite common for virtio and will be 4k in size. Although with that cache I'm starting to wonder if we should always use it... Speed-wise if there is no memory pressure the cache is likely going to be faster. If there is pressure and the items are reclaimed though that'll bring some heavier slow-down as it'll need to find bigger memory regions. I'm not sure which path we should favor, tbh. I'll keep these separate for now. For the first part of the check, Matthew suggested trying to trick msize into a different value to fail this check for the initial TVERSION call, but even after thinking it a bit I don't really see how to do that cleanly. I can at least make -that- likely()... > > > + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS); > > + else > > + fc->sdata = kmalloc(alloc_msize, GFP_NOFS); > > if (!fc->sdata) > > return -ENOMEM; > > fc->capacity = alloc_msize; > > return 0; > > } > > > > +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc) > > +{ > > + /* sdata can be NULL for interrupted requests in trans_rdma, > > + * and kmem_cache_free does not do NULL-check for us > > + */ > > + if (unlikely(!fc->sdata)) > > + return; > > + > > + if (c->fcall_cache && fc->capacity == c->msize) > > ... and here as well. For this one I'll unfortunately need to store in the fc how it has been allocated as slob doesn't allow to kmem_cache_free() a buffer allocated by kmalloc and in prevision of refs being refcounted in a hostile world the initial TVERSION req could be freed after fcall_cache is created :/ That's a bit of a burden but at least will reduce the checks to one here > > + kmem_cache_free(c->fcall_cache, fc->sdata); > > + else > > + kfree(fc->sdata); > > +} > > +EXPORT_SYMBOL(p9_fcall_free); > > + > > static struct kmem_cache *p9_req_cache; > > > > /** Anyway I've had as many comments as I could hope for, thanks everyone for the quick review. I'll send a v2 of both patches tomorrow
diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 4b4ac1362ad5..8d9bc7402a42 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -123,6 +123,7 @@ struct p9_client { struct p9_trans_module *trans_mod; enum p9_trans_status status; void *trans; + struct kmem_cache *fcall_cache; union { struct { @@ -230,6 +231,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, kgid_t gid, struct p9_qid *); int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status); int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl); +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc); struct p9_req_t *p9_tag_lookup(struct p9_client *, u16); void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); diff --git a/net/9p/client.c b/net/9p/client.c index ba99a94a12c9..215e3b1ed7b4 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -231,15 +231,34 @@ static int parse_opts(char *opts, struct p9_client *clnt) return ret; } -static int p9_fcall_alloc(struct p9_fcall *fc, int alloc_msize) +static int p9_fcall_alloc(struct p9_client *c, struct p9_fcall *fc, + int alloc_msize) { - fc->sdata = kmalloc(alloc_msize, GFP_NOFS); + if (c->fcall_cache && alloc_msize == c->msize) + fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS); + else + fc->sdata = kmalloc(alloc_msize, GFP_NOFS); if (!fc->sdata) return -ENOMEM; fc->capacity = alloc_msize; return 0; } +void p9_fcall_free(struct p9_client *c, struct p9_fcall *fc) +{ + /* sdata can be NULL for interrupted requests in trans_rdma, + * and kmem_cache_free does not do NULL-check for us + */ + if (unlikely(!fc->sdata)) + return; + + if (c->fcall_cache && fc->capacity == c->msize) + kmem_cache_free(c->fcall_cache, fc->sdata); + else + kfree(fc->sdata); +} +EXPORT_SYMBOL(p9_fcall_free); + static struct kmem_cache *p9_req_cache; /** @@ -261,9 +280,9 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) if (!req) return NULL; - if (p9_fcall_alloc(&req->tc, alloc_msize)) + if (p9_fcall_alloc(c, &req->tc, alloc_msize)) goto free; - if (p9_fcall_alloc(&req->rc, alloc_msize)) + if (p9_fcall_alloc(c, &req->rc, alloc_msize)) goto free; p9pdu_reset(&req->tc); @@ -288,8 +307,8 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) return req; free: - kfree(req->tc.sdata); - kfree(req->rc.sdata); + p9_fcall_free(c, &req->tc); + p9_fcall_free(c, &req->rc); kmem_cache_free(p9_req_cache, req); return ERR_PTR(-ENOMEM); } @@ -333,8 +352,8 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) spin_lock_irqsave(&c->lock, flags); idr_remove(&c->reqs, tag); spin_unlock_irqrestore(&c->lock, flags); - kfree(r->tc.sdata); - kfree(r->rc.sdata); + p9_fcall_free(c, &r->tc); + p9_fcall_free(c, &r->rc); kmem_cache_free(p9_req_cache, r); } @@ -944,6 +963,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) clnt->trans_mod = NULL; clnt->trans = NULL; + clnt->fcall_cache = NULL; client_id = utsname()->nodename; memcpy(clnt->name, client_id, strlen(client_id) + 1); @@ -980,6 +1000,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (err) goto close_trans; + clnt->fcall_cache = kmem_cache_create("9p-fcall-cache", clnt->msize, + 0, 0, NULL); + return clnt; close_trans: @@ -1011,6 +1034,7 @@ void p9_client_destroy(struct p9_client *clnt) p9_tag_cleanup(clnt); + kmem_cache_destroy(clnt->fcall_cache); kfree(clnt); } EXPORT_SYMBOL(p9_client_destroy); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index c5cac97df7f7..5e43f0a00b3a 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -445,7 +445,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { /* Got one! */ - kfree(req->rc.sdata); + p9_fcall_free(client, &req->rc); req->rc.sdata = NULL; goto dont_need_post_recv; } else {