Message ID | 20240829010424.83693-15-snitzer@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | nfs/nfsd: add support for LOCALIO | expand |
On Wed, 2024-08-28 at 21:04 -0400, Mike Snitzer wrote: > fs/nfs_common/nfslocalio.c provides interfaces that enable an NFS > client to generate a nonce (single-use UUID) and associated > short-lived nfs_uuid_t struct, register it with nfs_common for > subsequent lookup and verification by the NFS server and if matched > the NFS server populates members in the nfs_uuid_t struct. > > nfs_common's nfs_uuids list is the basis for localio enablement, as > such it has members that point to nfsd memory for direct use by the > client (e.g. 'net' is the server's network namespace, through it the > client can access nn->nfsd_serv with proper rcu read access). > > Signed-off-by: Mike Snitzer <snitzer@kernel.org> > --- > fs/nfs_common/Makefile | 3 ++ > fs/nfs_common/nfslocalio.c | 74 ++++++++++++++++++++++++++++++++++++++ > include/linux/nfslocalio.h | 31 ++++++++++++++++ > 3 files changed, 108 insertions(+) > create mode 100644 fs/nfs_common/nfslocalio.c > create mode 100644 include/linux/nfslocalio.h > > diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile > index e58b01bb8dda..a5e54809701e 100644 > --- a/fs/nfs_common/Makefile > +++ b/fs/nfs_common/Makefile > @@ -6,6 +6,9 @@ > obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o > nfs_acl-objs := nfsacl.o > > +obj-$(CONFIG_NFS_COMMON_LOCALIO_SUPPORT) += nfs_localio.o > +nfs_localio-objs := nfslocalio.o > + > obj-$(CONFIG_GRACE_PERIOD) += grace.o > obj-$(CONFIG_NFS_V4_2_SSC_HELPER) += nfs_ssc.o > > diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c > new file mode 100644 > index 000000000000..1a35a4a6dbe0 > --- /dev/null > +++ b/fs/nfs_common/nfslocalio.c > @@ -0,0 +1,74 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> > + */ > + > +#include <linux/module.h> > +#include <linux/rculist.h> > +#include <linux/nfslocalio.h> > +#include <net/netns/generic.h> > + > +MODULE_LICENSE("GPL"); > +MODULE_DESCRIPTION("NFS localio protocol bypass support"); > + > +DEFINE_MUTEX(nfs_uuid_mutex); Why a mutex here? AFAICT, you're just using this to protect the list. A spinlock would probably be more efficient. > + > +/* > + * Global list of nfs_uuid_t instances, add/remove > + * is protected by nfs_uuid_mutex. > + * Reads are protected by RCU read lock (see below). > + */ > +LIST_HEAD(nfs_uuids); > + > +void nfs_uuid_begin(nfs_uuid_t *nfs_uuid) > +{ > + nfs_uuid->net = NULL; > + nfs_uuid->dom = NULL; > + uuid_gen(&nfs_uuid->uuid); > + > + mutex_lock(&nfs_uuid_mutex); > + list_add_tail_rcu(&nfs_uuid->list, &nfs_uuids); > + mutex_unlock(&nfs_uuid_mutex); > +} > +EXPORT_SYMBOL_GPL(nfs_uuid_begin); > + > +void nfs_uuid_end(nfs_uuid_t *nfs_uuid) > +{ > + mutex_lock(&nfs_uuid_mutex); > + list_del_rcu(&nfs_uuid->list); > + mutex_unlock(&nfs_uuid_mutex); > +} > +EXPORT_SYMBOL_GPL(nfs_uuid_end); > + > +/* Must be called with RCU read lock held. */ > +static nfs_uuid_t * nfs_uuid_lookup(const uuid_t *uuid) > +{ > + nfs_uuid_t *nfs_uuid; > + > + list_for_each_entry_rcu(nfs_uuid, &nfs_uuids, list) > + if (uuid_equal(&nfs_uuid->uuid, uuid)) > + return nfs_uuid; > + > + return NULL; > +} > + > +bool nfs_uuid_is_local(const uuid_t *uuid, struct net *net, struct auth_domain *dom) > +{ > + bool is_local = false; > + nfs_uuid_t *nfs_uuid; > + > + rcu_read_lock(); > + nfs_uuid = nfs_uuid_lookup(uuid); > + if (nfs_uuid) { > + nfs_uuid->net = maybe_get_net(net); > + if (nfs_uuid->net) { > + is_local = true; > + kref_get(&dom->ref); > + nfs_uuid->dom = dom; > + } > + } > + rcu_read_unlock(); > + > + return is_local; > +} > +EXPORT_SYMBOL_GPL(nfs_uuid_is_local); > diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h > new file mode 100644 > index 000000000000..9735ae8d3e5e > --- /dev/null > +++ b/include/linux/nfslocalio.h > @@ -0,0 +1,31 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> > + */ > +#ifndef __LINUX_NFSLOCALIO_H > +#define __LINUX_NFSLOCALIO_H > + > +#include <linux/list.h> > +#include <linux/uuid.h> > +#include <linux/sunrpc/svcauth.h> > +#include <linux/nfs.h> > +#include <net/net_namespace.h> > + > +/* > + * Useful to allow a client to negotiate if localio > + * possible with its server. > + * > + * See Documentation/filesystems/nfs/localio.rst for more detail. > + */ > +typedef struct { > + uuid_t uuid; > + struct list_head list; > + struct net *net; /* nfsd's network namespace */ > + struct auth_domain *dom; /* auth_domain for localio */ > +} nfs_uuid_t; > + > +void nfs_uuid_begin(nfs_uuid_t *); > +void nfs_uuid_end(nfs_uuid_t *); > +bool nfs_uuid_is_local(const uuid_t *, struct net *, struct auth_domain *); > + > +#endif /* __LINUX_NFSLOCALIO_H */
On Thu, Aug 29, 2024 at 12:07:06PM -0400, Jeff Layton wrote: > On Wed, 2024-08-28 at 21:04 -0400, Mike Snitzer wrote: > > fs/nfs_common/nfslocalio.c provides interfaces that enable an NFS > > client to generate a nonce (single-use UUID) and associated > > short-lived nfs_uuid_t struct, register it with nfs_common for > > subsequent lookup and verification by the NFS server and if matched > > the NFS server populates members in the nfs_uuid_t struct. > > > > nfs_common's nfs_uuids list is the basis for localio enablement, as > > such it has members that point to nfsd memory for direct use by the > > client (e.g. 'net' is the server's network namespace, through it the > > client can access nn->nfsd_serv with proper rcu read access). > > > > Signed-off-by: Mike Snitzer <snitzer@kernel.org> > > --- > > fs/nfs_common/Makefile | 3 ++ > > fs/nfs_common/nfslocalio.c | 74 ++++++++++++++++++++++++++++++++++++++ > > include/linux/nfslocalio.h | 31 ++++++++++++++++ > > 3 files changed, 108 insertions(+) > > create mode 100644 fs/nfs_common/nfslocalio.c > > create mode 100644 include/linux/nfslocalio.h > > > > diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile > > index e58b01bb8dda..a5e54809701e 100644 > > --- a/fs/nfs_common/Makefile > > +++ b/fs/nfs_common/Makefile > > @@ -6,6 +6,9 @@ > > obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o > > nfs_acl-objs := nfsacl.o > > > > +obj-$(CONFIG_NFS_COMMON_LOCALIO_SUPPORT) += nfs_localio.o > > +nfs_localio-objs := nfslocalio.o > > + > > obj-$(CONFIG_GRACE_PERIOD) += grace.o > > obj-$(CONFIG_NFS_V4_2_SSC_HELPER) += nfs_ssc.o > > > > diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c > > new file mode 100644 > > index 000000000000..1a35a4a6dbe0 > > --- /dev/null > > +++ b/fs/nfs_common/nfslocalio.c > > @@ -0,0 +1,74 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* > > + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> > > + */ > > + > > +#include <linux/module.h> > > +#include <linux/rculist.h> > > +#include <linux/nfslocalio.h> > > +#include <net/netns/generic.h> > > + > > +MODULE_LICENSE("GPL"); > > +MODULE_DESCRIPTION("NFS localio protocol bypass support"); > > + > > +DEFINE_MUTEX(nfs_uuid_mutex); > > Why a mutex here? AFAICT, you're just using this to protect the list. A > spinlock would probably be more efficient. Yeah, will do, I meant to revisit (when Neil suggested the same for the lock that is added in 15/25). Thanks. > > + > > +/* > > + * Global list of nfs_uuid_t instances, add/remove > > + * is protected by nfs_uuid_mutex. > > + * Reads are protected by RCU read lock (see below). > > + */ > > +LIST_HEAD(nfs_uuids); > > + > > +void nfs_uuid_begin(nfs_uuid_t *nfs_uuid) > > +{ > > + nfs_uuid->net = NULL; > > + nfs_uuid->dom = NULL; > > + uuid_gen(&nfs_uuid->uuid); > > + > > + mutex_lock(&nfs_uuid_mutex); > > + list_add_tail_rcu(&nfs_uuid->list, &nfs_uuids); > > + mutex_unlock(&nfs_uuid_mutex); > > +} > > +EXPORT_SYMBOL_GPL(nfs_uuid_begin); > > + > > +void nfs_uuid_end(nfs_uuid_t *nfs_uuid) > > +{ > > + mutex_lock(&nfs_uuid_mutex); > > + list_del_rcu(&nfs_uuid->list); > > + mutex_unlock(&nfs_uuid_mutex); > > +} > > +EXPORT_SYMBOL_GPL(nfs_uuid_end); > > + > > +/* Must be called with RCU read lock held. */ > > +static nfs_uuid_t * nfs_uuid_lookup(const uuid_t *uuid) > > +{ > > + nfs_uuid_t *nfs_uuid; > > + > > + list_for_each_entry_rcu(nfs_uuid, &nfs_uuids, list) > > + if (uuid_equal(&nfs_uuid->uuid, uuid)) > > + return nfs_uuid; > > + > > + return NULL; > > +} > > + > > +bool nfs_uuid_is_local(const uuid_t *uuid, struct net *net, struct auth_domain *dom) > > +{ > > + bool is_local = false; > > + nfs_uuid_t *nfs_uuid; > > + > > + rcu_read_lock(); > > + nfs_uuid = nfs_uuid_lookup(uuid); > > + if (nfs_uuid) { > > + nfs_uuid->net = maybe_get_net(net); > > + if (nfs_uuid->net) { > > + is_local = true; > > + kref_get(&dom->ref); > > + nfs_uuid->dom = dom; > > + } > > + } > > + rcu_read_unlock(); > > + > > + return is_local; > > +} > > +EXPORT_SYMBOL_GPL(nfs_uuid_is_local); > > diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h > > new file mode 100644 > > index 000000000000..9735ae8d3e5e > > --- /dev/null > > +++ b/include/linux/nfslocalio.h > > @@ -0,0 +1,31 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +/* > > + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> > > + */ > > +#ifndef __LINUX_NFSLOCALIO_H > > +#define __LINUX_NFSLOCALIO_H > > + > > +#include <linux/list.h> > > +#include <linux/uuid.h> > > +#include <linux/sunrpc/svcauth.h> > > +#include <linux/nfs.h> > > +#include <net/net_namespace.h> > > + > > +/* > > + * Useful to allow a client to negotiate if localio > > + * possible with its server. > > + * > > + * See Documentation/filesystems/nfs/localio.rst for more detail. > > + */ > > +typedef struct { > > + uuid_t uuid; > > + struct list_head list; > > + struct net *net; /* nfsd's network namespace */ > > + struct auth_domain *dom; /* auth_domain for localio */ > > +} nfs_uuid_t; > > + > > +void nfs_uuid_begin(nfs_uuid_t *); > > +void nfs_uuid_end(nfs_uuid_t *); > > +bool nfs_uuid_is_local(const uuid_t *, struct net *, struct auth_domain *); > > + > > +#endif /* __LINUX_NFSLOCALIO_H */ > > -- > Jeff Layton <jlayton@kernel.org>
On Thu, 29 Aug 2024, Mike Snitzer wrote: > + > +bool nfs_uuid_is_local(const uuid_t *uuid, struct net *net, struct auth_domain *dom) > +{ > + bool is_local = false; > + nfs_uuid_t *nfs_uuid; > + > + rcu_read_lock(); > + nfs_uuid = nfs_uuid_lookup(uuid); > + if (nfs_uuid) { > + nfs_uuid->net = maybe_get_net(net); I know I said it looked wrong to be getting a ref for the domain but not the net - and it did. But that doesn't mean the fix was to get a ref for the net and to hold it indefinitely. This ref is now held until the client happens to notice that localio doesn't work any more (because nfsd_serv_try_get() fails). So the shutdown of a net namespace will be delayed indefinitely if the NFS filesystem isn't being actively used. I would prefer that there were a way for the net namespace to reach back into the client and disconnect itself. Probably this would be a linked-list in struct nfsd_net which linked list_heads in struct nfs_client. This list would need to be protected by a spinlock - probably global in nfs_common so client could remove itself and server could remove all clients after clearing their net pointers. It is probably best if I explain all of what I am thinking as a patch. Stay tuned. NeilBrown
On Fri, Aug 30, 2024 at 09:39:10AM +1000, NeilBrown wrote: > On Thu, 29 Aug 2024, Mike Snitzer wrote: > > > + > > +bool nfs_uuid_is_local(const uuid_t *uuid, struct net *net, struct auth_domain *dom) > > +{ > > + bool is_local = false; > > + nfs_uuid_t *nfs_uuid; > > + > > + rcu_read_lock(); > > + nfs_uuid = nfs_uuid_lookup(uuid); > > + if (nfs_uuid) { > > + nfs_uuid->net = maybe_get_net(net); > > I know I said it looked wrong to be getting a ref for the domain but not > the net - and it did. But that doesn't mean the fix was to get a ref > for the net and to hold it indefinitely. > > This ref is now held until the client happens to notice that localio > doesn't work any more (because nfsd_serv_try_get() fails). So the > shutdown of a net namespace will be delayed indefinitely if the NFS > filesystem isn't being actively used. > > I would prefer that there were a way for the net namespace to reach back > into the client and disconnect itself. Probably this would be a > linked-list in struct nfsd_net which linked list_heads in struct > nfs_client. This list would need to be protected by a spinlock - > probably global in nfs_common so client could remove itself and server > could remove all clients after clearing their net pointers. > > It is probably best if I explain all of what I am thinking as a patch. > > Stay tuned. OK, a mechanism to have the net namespace disconnect itself sounds neat. Or alternatively we could do what I was doing: /* Not running in nfsd context, must safely get reference on nfsd_serv */ cl_nfssvc_net = maybe_get_net(cl_nfssvc_net); if (!cl_nfssvc_net) return -ENXIO; nn = net_generic(cl_nfssvc_net, nfsd_net_id); /* The server may already be shutting down, disallow new localio */ if (unlikely(!nfsd_serv_try_get(nn))) { But only if maybe_get_net() will always fail safely... I feel like we talked about the relative safety of maybe_get_net() before (but I'm coming up short searching my email): static inline struct net *maybe_get_net(struct net *net) { /* Used when we know struct net exists but we * aren't guaranteed a previous reference count * exists. If the reference count is zero this * function fails and returns NULL. */ if (!refcount_inc_not_zero(&net->ns.count)) net = NULL; return net; } So you have doubts the struct net will always still exist because I didn't take a reference? (from fs/nfsd/localio.c): static __be32 localio_proc_uuid_is_local(struct svc_rqst *rqstp) { struct localio_uuidarg *argp = rqstp->rq_argp; (void) nfs_uuid_is_local(&argp->uuid, SVC_NET(rqstp), rqstp->rq_client); return rpc_success; } I think that's a fair concern (despite it working fine in practice with destructive container testing, I cannot say there won't ever be a use-after-free bug). So all said: consider me staying tuned ;) Thanks
diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile index e58b01bb8dda..a5e54809701e 100644 --- a/fs/nfs_common/Makefile +++ b/fs/nfs_common/Makefile @@ -6,6 +6,9 @@ obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o nfs_acl-objs := nfsacl.o +obj-$(CONFIG_NFS_COMMON_LOCALIO_SUPPORT) += nfs_localio.o +nfs_localio-objs := nfslocalio.o + obj-$(CONFIG_GRACE_PERIOD) += grace.o obj-$(CONFIG_NFS_V4_2_SSC_HELPER) += nfs_ssc.o diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c new file mode 100644 index 000000000000..1a35a4a6dbe0 --- /dev/null +++ b/fs/nfs_common/nfslocalio.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + */ + +#include <linux/module.h> +#include <linux/rculist.h> +#include <linux/nfslocalio.h> +#include <net/netns/generic.h> + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("NFS localio protocol bypass support"); + +DEFINE_MUTEX(nfs_uuid_mutex); + +/* + * Global list of nfs_uuid_t instances, add/remove + * is protected by nfs_uuid_mutex. + * Reads are protected by RCU read lock (see below). + */ +LIST_HEAD(nfs_uuids); + +void nfs_uuid_begin(nfs_uuid_t *nfs_uuid) +{ + nfs_uuid->net = NULL; + nfs_uuid->dom = NULL; + uuid_gen(&nfs_uuid->uuid); + + mutex_lock(&nfs_uuid_mutex); + list_add_tail_rcu(&nfs_uuid->list, &nfs_uuids); + mutex_unlock(&nfs_uuid_mutex); +} +EXPORT_SYMBOL_GPL(nfs_uuid_begin); + +void nfs_uuid_end(nfs_uuid_t *nfs_uuid) +{ + mutex_lock(&nfs_uuid_mutex); + list_del_rcu(&nfs_uuid->list); + mutex_unlock(&nfs_uuid_mutex); +} +EXPORT_SYMBOL_GPL(nfs_uuid_end); + +/* Must be called with RCU read lock held. */ +static nfs_uuid_t * nfs_uuid_lookup(const uuid_t *uuid) +{ + nfs_uuid_t *nfs_uuid; + + list_for_each_entry_rcu(nfs_uuid, &nfs_uuids, list) + if (uuid_equal(&nfs_uuid->uuid, uuid)) + return nfs_uuid; + + return NULL; +} + +bool nfs_uuid_is_local(const uuid_t *uuid, struct net *net, struct auth_domain *dom) +{ + bool is_local = false; + nfs_uuid_t *nfs_uuid; + + rcu_read_lock(); + nfs_uuid = nfs_uuid_lookup(uuid); + if (nfs_uuid) { + nfs_uuid->net = maybe_get_net(net); + if (nfs_uuid->net) { + is_local = true; + kref_get(&dom->ref); + nfs_uuid->dom = dom; + } + } + rcu_read_unlock(); + + return is_local; +} +EXPORT_SYMBOL_GPL(nfs_uuid_is_local); diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h new file mode 100644 index 000000000000..9735ae8d3e5e --- /dev/null +++ b/include/linux/nfslocalio.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + */ +#ifndef __LINUX_NFSLOCALIO_H +#define __LINUX_NFSLOCALIO_H + +#include <linux/list.h> +#include <linux/uuid.h> +#include <linux/sunrpc/svcauth.h> +#include <linux/nfs.h> +#include <net/net_namespace.h> + +/* + * Useful to allow a client to negotiate if localio + * possible with its server. + * + * See Documentation/filesystems/nfs/localio.rst for more detail. + */ +typedef struct { + uuid_t uuid; + struct list_head list; + struct net *net; /* nfsd's network namespace */ + struct auth_domain *dom; /* auth_domain for localio */ +} nfs_uuid_t; + +void nfs_uuid_begin(nfs_uuid_t *); +void nfs_uuid_end(nfs_uuid_t *); +bool nfs_uuid_is_local(const uuid_t *, struct net *, struct auth_domain *); + +#endif /* __LINUX_NFSLOCALIO_H */
fs/nfs_common/nfslocalio.c provides interfaces that enable an NFS client to generate a nonce (single-use UUID) and associated short-lived nfs_uuid_t struct, register it with nfs_common for subsequent lookup and verification by the NFS server and if matched the NFS server populates members in the nfs_uuid_t struct. nfs_common's nfs_uuids list is the basis for localio enablement, as such it has members that point to nfsd memory for direct use by the client (e.g. 'net' is the server's network namespace, through it the client can access nn->nfsd_serv with proper rcu read access). Signed-off-by: Mike Snitzer <snitzer@kernel.org> --- fs/nfs_common/Makefile | 3 ++ fs/nfs_common/nfslocalio.c | 74 ++++++++++++++++++++++++++++++++++++++ include/linux/nfslocalio.h | 31 ++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 fs/nfs_common/nfslocalio.c create mode 100644 include/linux/nfslocalio.h