Message ID | 20241108234002.16392-11-snitzer@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | nfs/nfsd: fixes and improvements for LOCALIO | expand |
On Sat, 09 Nov 2024, Mike Snitzer wrote: > Remove cl_localio_lock from 'struct nfs_client' in favor of adding a > lock to the nfs_uuid_t struct (which is embedded in each nfs_client). > > Push nfs_local_{enable,disable} implementation down to nfs_common. > Those methods now call nfs_localio_{enable,disable}_client. > > This allows implementing nfs_localio_invalidate_clients in terms of > nfs_localio_disable_client. > > Signed-off-by: Mike Snitzer <snitzer@kernel.org> > --- > fs/nfs/client.c | 1 - > fs/nfs/localio.c | 18 ++++++------ > fs/nfs_common/nfslocalio.c | 57 ++++++++++++++++++++++++++------------ > include/linux/nfs_fs_sb.h | 1 - > include/linux/nfslocalio.h | 8 +++++- > 5 files changed, 55 insertions(+), 30 deletions(-) > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > index 03ecc7765615..124232054807 100644 > --- a/fs/nfs/client.c > +++ b/fs/nfs/client.c > @@ -182,7 +182,6 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) > seqlock_init(&clp->cl_boot_lock); > ktime_get_real_ts64(&clp->cl_nfssvc_boot); > nfs_uuid_init(&clp->cl_uuid); > - spin_lock_init(&clp->cl_localio_lock); > #endif /* CONFIG_NFS_LOCALIO */ > > clp->cl_principal = "*"; > diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c > index cab2a8819259..4c75ffc5efa2 100644 > --- a/fs/nfs/localio.c > +++ b/fs/nfs/localio.c > @@ -125,10 +125,8 @@ const struct rpc_program nfslocalio_program = { > */ > static void nfs_local_enable(struct nfs_client *clp) > { > - spin_lock(&clp->cl_localio_lock); > - set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); > trace_nfs_local_enable(clp); > - spin_unlock(&clp->cl_localio_lock); > + nfs_localio_enable_client(clp); > } Why do we need this function? The one caller could call nfs_localio_enable_client() directly instead. The tracepoint could be placed in that one caller. > > /* > @@ -136,12 +134,8 @@ static void nfs_local_enable(struct nfs_client *clp) > */ > void nfs_local_disable(struct nfs_client *clp) > { > - spin_lock(&clp->cl_localio_lock); > - if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) { > - trace_nfs_local_disable(clp); > - nfs_localio_disable_client(&clp->cl_uuid); > - } > - spin_unlock(&clp->cl_localio_lock); > + trace_nfs_local_disable(clp); > + nfs_localio_disable_client(clp); > } Ditto. Though there are more callers so the tracepoint solution isn't quite so obvious. > > /* > @@ -183,8 +177,12 @@ static bool nfs_server_uuid_is_local(struct nfs_client *clp) > rpc_shutdown_client(rpcclient_localio); > > /* Server is only local if it initialized required struct members */ > - if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom) > + rcu_read_lock(); > + if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) { > + rcu_read_unlock(); > return false; > + } > + rcu_read_unlock(); What value does RCU provide here? I don't think this change is needed. rcu_access_pointer does not require rcu_read_lock(). > > return true; > } > diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c > index 904439e4bb85..cf2f47ea4f8d 100644 > --- a/fs/nfs_common/nfslocalio.c > +++ b/fs/nfs_common/nfslocalio.c > @@ -7,6 +7,9 @@ > #include <linux/module.h> > #include <linux/list.h> > #include <linux/nfslocalio.h> > +#include <linux/nfs3.h> > +#include <linux/nfs4.h> > +#include <linux/nfs_fs_sb.h> I don't feel good about adding this nfs client knowledge in to nfs_common. > #include <net/netns/generic.h> > > MODULE_LICENSE("GPL"); > @@ -25,6 +28,7 @@ void nfs_uuid_init(nfs_uuid_t *nfs_uuid) > nfs_uuid->net = NULL; > nfs_uuid->dom = NULL; > INIT_LIST_HEAD(&nfs_uuid->list); > + spin_lock_init(&nfs_uuid->lock); > } > EXPORT_SYMBOL_GPL(nfs_uuid_init); > > @@ -94,12 +98,23 @@ void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list, > } > EXPORT_SYMBOL_GPL(nfs_uuid_is_local); > > +void nfs_localio_enable_client(struct nfs_client *clp) > +{ > + nfs_uuid_t *nfs_uuid = &clp->cl_uuid; > + > + spin_lock(&nfs_uuid->lock); > + set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); > + spin_unlock(&nfs_uuid->lock); > +} > +EXPORT_SYMBOL_GPL(nfs_localio_enable_client); And I don't feel good about nfs_local accessing nfs_client directly. It only uses it for NFS_CS_LOCAL_IO. Can we ditch that flag and instead so something like testing nfs_uuid.net ?? > + > static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid) > { > - if (nfs_uuid->net) { > - module_put(nfsd_mod); > - nfs_uuid->net = NULL; > - } > + if (!nfs_uuid->net) > + return; > + module_put(nfsd_mod); > + rcu_assign_pointer(nfs_uuid->net, NULL); > + I much prefer RCU_INIT_POINTER for assigning NULL as there is no need for ordering here. > if (nfs_uuid->dom) { > auth_domain_put(nfs_uuid->dom); > nfs_uuid->dom = NULL; > @@ -107,27 +122,35 @@ static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid) > list_del_init(&nfs_uuid->list); > } > > -void nfs_localio_invalidate_clients(struct list_head *list) > +void nfs_localio_disable_client(struct nfs_client *clp) > +{ > + nfs_uuid_t *nfs_uuid = &clp->cl_uuid; > + > + spin_lock(&nfs_uuid->lock); > + if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) { > + spin_lock(&nfs_uuid_lock); > + nfs_uuid_put_locked(nfs_uuid); > + spin_unlock(&nfs_uuid_lock); > + } > + spin_unlock(&nfs_uuid->lock); > +} > +EXPORT_SYMBOL_GPL(nfs_localio_disable_client); > + > +void nfs_localio_invalidate_clients(struct list_head *cl_uuid_list) > { > nfs_uuid_t *nfs_uuid, *tmp; > > spin_lock(&nfs_uuid_lock); > - list_for_each_entry_safe(nfs_uuid, tmp, list, list) > - nfs_uuid_put_locked(nfs_uuid); > + list_for_each_entry_safe(nfs_uuid, tmp, cl_uuid_list, list) { > + struct nfs_client *clp = > + container_of(nfs_uuid, struct nfs_client, cl_uuid); > + > + nfs_localio_disable_client(clp); > + } > spin_unlock(&nfs_uuid_lock); > } > EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients); > > -void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid) > -{ > - if (nfs_uuid->net) { > - spin_lock(&nfs_uuid_lock); > - nfs_uuid_put_locked(nfs_uuid); > - spin_unlock(&nfs_uuid_lock); > - } > -} > -EXPORT_SYMBOL_GPL(nfs_localio_disable_client); > - > struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, > struct rpc_clnt *rpc_clnt, const struct cred *cred, > const struct nfs_fh *nfs_fh, const fmode_t fmode) > diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h > index b804346a9741..239d86ef166c 100644 > --- a/include/linux/nfs_fs_sb.h > +++ b/include/linux/nfs_fs_sb.h > @@ -132,7 +132,6 @@ struct nfs_client { > struct timespec64 cl_nfssvc_boot; > seqlock_t cl_boot_lock; > nfs_uuid_t cl_uuid; > - spinlock_t cl_localio_lock; > #endif /* CONFIG_NFS_LOCALIO */ > }; > > diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h > index a05d1043f2b0..4d5583873f41 100644 > --- a/include/linux/nfslocalio.h > +++ b/include/linux/nfslocalio.h > @@ -6,6 +6,7 @@ > #ifndef __LINUX_NFSLOCALIO_H > #define __LINUX_NFSLOCALIO_H > > + > /* nfsd_file structure is purposely kept opaque to NFS client */ > struct nfsd_file; > > @@ -19,6 +20,8 @@ struct nfsd_file; > #include <linux/nfs.h> > #include <net/net_namespace.h> > > +struct nfs_client; > + > /* > * Useful to allow a client to negotiate if localio > * possible with its server. > @@ -27,6 +30,8 @@ struct nfsd_file; > */ > typedef struct { > uuid_t uuid; > + /* sadly this struct is just over a cacheline, avoid bouncing */ > + spinlock_t ____cacheline_aligned lock; > struct list_head list; > struct net __rcu *net; /* nfsd's network namespace */ > struct auth_domain *dom; /* auth_domain for localio */ > @@ -38,7 +43,8 @@ void nfs_uuid_end(nfs_uuid_t *); > void nfs_uuid_is_local(const uuid_t *, struct list_head *, > struct net *, struct auth_domain *, struct module *); > > -void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid); > +void nfs_localio_enable_client(struct nfs_client *clp); > +void nfs_localio_disable_client(struct nfs_client *clp); > void nfs_localio_invalidate_clients(struct list_head *list); > > /* localio needs to map filehandle -> struct nfsd_file */ > -- > 2.44.0 > > I think this is a good refactoring to do, but I don't like some of the details, or some of the RCU code. NeilBrown
On Mon, Nov 11, 2024 at 12:55:24PM +1100, NeilBrown wrote: > On Sat, 09 Nov 2024, Mike Snitzer wrote: > > Remove cl_localio_lock from 'struct nfs_client' in favor of adding a > > lock to the nfs_uuid_t struct (which is embedded in each nfs_client). > > > > Push nfs_local_{enable,disable} implementation down to nfs_common. > > Those methods now call nfs_localio_{enable,disable}_client. > > > > This allows implementing nfs_localio_invalidate_clients in terms of > > nfs_localio_disable_client. > > > > Signed-off-by: Mike Snitzer <snitzer@kernel.org> > > --- > > fs/nfs/client.c | 1 - > > fs/nfs/localio.c | 18 ++++++------ > > fs/nfs_common/nfslocalio.c | 57 ++++++++++++++++++++++++++------------ > > include/linux/nfs_fs_sb.h | 1 - > > include/linux/nfslocalio.h | 8 +++++- > > 5 files changed, 55 insertions(+), 30 deletions(-) > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > > index 03ecc7765615..124232054807 100644 > > --- a/fs/nfs/client.c > > +++ b/fs/nfs/client.c > > @@ -182,7 +182,6 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) > > seqlock_init(&clp->cl_boot_lock); > > ktime_get_real_ts64(&clp->cl_nfssvc_boot); > > nfs_uuid_init(&clp->cl_uuid); > > - spin_lock_init(&clp->cl_localio_lock); > > #endif /* CONFIG_NFS_LOCALIO */ > > > > clp->cl_principal = "*"; > > diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c > > index cab2a8819259..4c75ffc5efa2 100644 > > --- a/fs/nfs/localio.c > > +++ b/fs/nfs/localio.c > > @@ -125,10 +125,8 @@ const struct rpc_program nfslocalio_program = { > > */ > > static void nfs_local_enable(struct nfs_client *clp) > > { > > - spin_lock(&clp->cl_localio_lock); > > - set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); > > trace_nfs_local_enable(clp); > > - spin_unlock(&clp->cl_localio_lock); > > + nfs_localio_enable_client(clp); > > } > > Why do we need this function? The one caller could call > nfs_localio_enable_client() directly instead. The tracepoint could be > placed in that one caller. Yeah, I saw that too but felt it useful to differentiate between calls that occur during NFS client initialization and those that happen as a side-effect of callers from other contexts (in later patch this manifests as nfs_localio_disable_client vs nfs_local_disable). Hence my adding secondary tracepoints for nfs_common (see "[PATCH 17/19] nfs_common: add nfs_localio trace events). But sure, we can just eliminate nfs_local_{enable,disable} and the corresponding tracepoints (which will have moved down to nfs_common). > > /* > > @@ -136,12 +134,8 @@ static void nfs_local_enable(struct nfs_client *clp) > > */ > > void nfs_local_disable(struct nfs_client *clp) > > { > > - spin_lock(&clp->cl_localio_lock); > > - if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) { > > - trace_nfs_local_disable(clp); > > - nfs_localio_disable_client(&clp->cl_uuid); > > - } > > - spin_unlock(&clp->cl_localio_lock); > > + trace_nfs_local_disable(clp); > > + nfs_localio_disable_client(clp); > > } > > Ditto. Though there are more callers so the tracepoint solution isn't > quite so obvious. Right... as I just explained: that's why I preserved nfs_local_disable (and the tracepoint). > > /* > > @@ -183,8 +177,12 @@ static bool nfs_server_uuid_is_local(struct nfs_client *clp) > > rpc_shutdown_client(rpcclient_localio); > > > > /* Server is only local if it initialized required struct members */ > > - if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom) > > + rcu_read_lock(); > > + if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) { > > + rcu_read_unlock(); > > return false; > > + } > > + rcu_read_unlock(); > > What value does RCU provide here? I don't think this change is needed. > rcu_access_pointer does not require rcu_read_lock(). OK, not sure why I though RCU read-side needed for rcu_access_pointer()... > > return true; > > } > > diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c > > index 904439e4bb85..cf2f47ea4f8d 100644 > > --- a/fs/nfs_common/nfslocalio.c > > +++ b/fs/nfs_common/nfslocalio.c > > @@ -7,6 +7,9 @@ > > #include <linux/module.h> > > #include <linux/list.h> > > #include <linux/nfslocalio.h> > > +#include <linux/nfs3.h> > > +#include <linux/nfs4.h> > > +#include <linux/nfs_fs_sb.h> > > I don't feel good about adding this nfs client knowledge in to nfs_common. I hear you.. I was "OK with it". > > #include <net/netns/generic.h> > > > > MODULE_LICENSE("GPL"); > > @@ -25,6 +28,7 @@ void nfs_uuid_init(nfs_uuid_t *nfs_uuid) > > nfs_uuid->net = NULL; > > nfs_uuid->dom = NULL; > > INIT_LIST_HEAD(&nfs_uuid->list); > > + spin_lock_init(&nfs_uuid->lock); > > } > > EXPORT_SYMBOL_GPL(nfs_uuid_init); > > > > @@ -94,12 +98,23 @@ void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list, > > } > > EXPORT_SYMBOL_GPL(nfs_uuid_is_local); > > > > +void nfs_localio_enable_client(struct nfs_client *clp) > > +{ > > + nfs_uuid_t *nfs_uuid = &clp->cl_uuid; > > + > > + spin_lock(&nfs_uuid->lock); > > + set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); > > + spin_unlock(&nfs_uuid->lock); > > +} > > +EXPORT_SYMBOL_GPL(nfs_localio_enable_client); > > And I don't feel good about nfs_local accessing nfs_client directly. > It only uses it for NFS_CS_LOCAL_IO. Can we ditch that flag and instead > so something like testing nfs_uuid.net ?? That'd probably be OK for the equivalent of NFS_CS_LOCAL_IO but the last patch in this series ("nfs: probe for LOCALIO when v3 client reconnects to server") adds NFS_CS_LOCAL_IO_CAPABLE to provide a hint that the client and server successfully established themselves local via LOCALIO protocol. This is needed so that NFSv3 (stateless) has a hint that reestablishing LOCALIO needed if/when the client loses connectivity to the server (because it was shutdown and restarted). Could introduce flags local to nfs_uuid_t structure as a means of nfs_common/nfslocalio.c not needing to know internals of nfs_client at all -- conversely: probably best to not bloat nfs_uuid_t (and nfs_client) further, so should just ensure nfs_client treated as an opaque pointer in nfs_common by introducing accessors? > > + > > static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid) > > { > > - if (nfs_uuid->net) { > > - module_put(nfsd_mod); > > - nfs_uuid->net = NULL; > > - } > > + if (!nfs_uuid->net) > > + return; > > + module_put(nfsd_mod); > > + rcu_assign_pointer(nfs_uuid->net, NULL); > > + > > I much prefer RCU_INIT_POINTER for assigning NULL as there is no need > for ordering here. OK. > > if (nfs_uuid->dom) { > > auth_domain_put(nfs_uuid->dom); > > nfs_uuid->dom = NULL; > > @@ -107,27 +122,35 @@ static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid) > > list_del_init(&nfs_uuid->list); > > } > > > > -void nfs_localio_invalidate_clients(struct list_head *list) > > +void nfs_localio_disable_client(struct nfs_client *clp) > > +{ > > + nfs_uuid_t *nfs_uuid = &clp->cl_uuid; > > + > > + spin_lock(&nfs_uuid->lock); > > + if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) { > > + spin_lock(&nfs_uuid_lock); > > + nfs_uuid_put_locked(nfs_uuid); > > + spin_unlock(&nfs_uuid_lock); > > + } > > + spin_unlock(&nfs_uuid->lock); > > +} > > +EXPORT_SYMBOL_GPL(nfs_localio_disable_client); > > + > > +void nfs_localio_invalidate_clients(struct list_head *cl_uuid_list) > > { > > nfs_uuid_t *nfs_uuid, *tmp; > > > > spin_lock(&nfs_uuid_lock); > > - list_for_each_entry_safe(nfs_uuid, tmp, list, list) > > - nfs_uuid_put_locked(nfs_uuid); > > + list_for_each_entry_safe(nfs_uuid, tmp, cl_uuid_list, list) { > > + struct nfs_client *clp = > > + container_of(nfs_uuid, struct nfs_client, cl_uuid); > > + > > + nfs_localio_disable_client(clp); > > + } > > spin_unlock(&nfs_uuid_lock); > > } > > EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients); > > > > -void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid) > > -{ > > - if (nfs_uuid->net) { > > - spin_lock(&nfs_uuid_lock); > > - nfs_uuid_put_locked(nfs_uuid); > > - spin_unlock(&nfs_uuid_lock); > > - } > > -} > > -EXPORT_SYMBOL_GPL(nfs_localio_disable_client); > > - > > struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, > > struct rpc_clnt *rpc_clnt, const struct cred *cred, > > const struct nfs_fh *nfs_fh, const fmode_t fmode) > > diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h > > index b804346a9741..239d86ef166c 100644 > > --- a/include/linux/nfs_fs_sb.h > > +++ b/include/linux/nfs_fs_sb.h > > @@ -132,7 +132,6 @@ struct nfs_client { > > struct timespec64 cl_nfssvc_boot; > > seqlock_t cl_boot_lock; > > nfs_uuid_t cl_uuid; > > - spinlock_t cl_localio_lock; > > #endif /* CONFIG_NFS_LOCALIO */ > > }; > > > > diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h > > index a05d1043f2b0..4d5583873f41 100644 > > --- a/include/linux/nfslocalio.h > > +++ b/include/linux/nfslocalio.h > > @@ -6,6 +6,7 @@ > > #ifndef __LINUX_NFSLOCALIO_H > > #define __LINUX_NFSLOCALIO_H > > > > + > > /* nfsd_file structure is purposely kept opaque to NFS client */ > > struct nfsd_file; > > > > @@ -19,6 +20,8 @@ struct nfsd_file; > > #include <linux/nfs.h> > > #include <net/net_namespace.h> > > > > +struct nfs_client; > > + > > /* > > * Useful to allow a client to negotiate if localio > > * possible with its server. > > @@ -27,6 +30,8 @@ struct nfsd_file; > > */ > > typedef struct { > > uuid_t uuid; > > + /* sadly this struct is just over a cacheline, avoid bouncing */ > > + spinlock_t ____cacheline_aligned lock; > > struct list_head list; > > struct net __rcu *net; /* nfsd's network namespace */ > > struct auth_domain *dom; /* auth_domain for localio */ > > @@ -38,7 +43,8 @@ void nfs_uuid_end(nfs_uuid_t *); > > void nfs_uuid_is_local(const uuid_t *, struct list_head *, > > struct net *, struct auth_domain *, struct module *); > > > > -void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid); > > +void nfs_localio_enable_client(struct nfs_client *clp); > > +void nfs_localio_disable_client(struct nfs_client *clp); > > void nfs_localio_invalidate_clients(struct list_head *list); > > > > /* localio needs to map filehandle -> struct nfsd_file */ > > -- > > 2.44.0 > > > > > > I think this is a good refactoring to do, but I don't like some of the > details, or some of the RCU code. Sure, I'll clean it up further. Thanks for your review. Mike
On Tue, 12 Nov 2024, Mike Snitzer wrote: > On Mon, Nov 11, 2024 at 12:55:24PM +1100, NeilBrown wrote: > > On Sat, 09 Nov 2024, Mike Snitzer wrote: > > > Remove cl_localio_lock from 'struct nfs_client' in favor of adding a > > > lock to the nfs_uuid_t struct (which is embedded in each nfs_client). > > > > > > Push nfs_local_{enable,disable} implementation down to nfs_common. > > > Those methods now call nfs_localio_{enable,disable}_client. > > > > > > This allows implementing nfs_localio_invalidate_clients in terms of > > > nfs_localio_disable_client. > > > > > > Signed-off-by: Mike Snitzer <snitzer@kernel.org> > > > --- > > > fs/nfs/client.c | 1 - > > > fs/nfs/localio.c | 18 ++++++------ > > > fs/nfs_common/nfslocalio.c | 57 ++++++++++++++++++++++++++------------ > > > include/linux/nfs_fs_sb.h | 1 - > > > include/linux/nfslocalio.h | 8 +++++- > > > 5 files changed, 55 insertions(+), 30 deletions(-) > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > > > index 03ecc7765615..124232054807 100644 > > > --- a/fs/nfs/client.c > > > +++ b/fs/nfs/client.c > > > @@ -182,7 +182,6 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) > > > seqlock_init(&clp->cl_boot_lock); > > > ktime_get_real_ts64(&clp->cl_nfssvc_boot); > > > nfs_uuid_init(&clp->cl_uuid); > > > - spin_lock_init(&clp->cl_localio_lock); > > > #endif /* CONFIG_NFS_LOCALIO */ > > > > > > clp->cl_principal = "*"; > > > diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c > > > index cab2a8819259..4c75ffc5efa2 100644 > > > --- a/fs/nfs/localio.c > > > +++ b/fs/nfs/localio.c > > > @@ -125,10 +125,8 @@ const struct rpc_program nfslocalio_program = { > > > */ > > > static void nfs_local_enable(struct nfs_client *clp) > > > { > > > - spin_lock(&clp->cl_localio_lock); > > > - set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); > > > trace_nfs_local_enable(clp); > > > - spin_unlock(&clp->cl_localio_lock); > > > + nfs_localio_enable_client(clp); > > > } > > > > Why do we need this function? The one caller could call > > nfs_localio_enable_client() directly instead. The tracepoint could be > > placed in that one caller. > > Yeah, I saw that too but felt it useful to differentiate between calls > that occur during NFS client initialization and those that happen as a > side-effect of callers from other contexts (in later patch this > manifests as nfs_localio_disable_client vs nfs_local_disable). > > Hence my adding secondary tracepoints for nfs_common (see "[PATCH > 17/19] nfs_common: add nfs_localio trace events). > > But sure, we can just eliminate nfs_local_{enable,disable} and the > corresponding tracepoints (which will have moved down to nfs_common). I don't feel strongly about this. If you think these is value in these wrapper functions then I won't argue. As a general rule I don't like multiple interfaces that do (much) the same thing as keeping track of them increases the mental load. > > > > /* > > > @@ -136,12 +134,8 @@ static void nfs_local_enable(struct nfs_client *clp) > > > */ > > > void nfs_local_disable(struct nfs_client *clp) > > > { > > > - spin_lock(&clp->cl_localio_lock); > > > - if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) { > > > - trace_nfs_local_disable(clp); > > > - nfs_localio_disable_client(&clp->cl_uuid); > > > - } > > > - spin_unlock(&clp->cl_localio_lock); > > > + trace_nfs_local_disable(clp); > > > + nfs_localio_disable_client(clp); > > > } > > > > Ditto. Though there are more callers so the tracepoint solution isn't > > quite so obvious. > > Right... as I just explained: that's why I preserved nfs_local_disable > (and the tracepoint). > > > > > /* > > > @@ -183,8 +177,12 @@ static bool nfs_server_uuid_is_local(struct nfs_client *clp) > > > rpc_shutdown_client(rpcclient_localio); > > > > > > /* Server is only local if it initialized required struct members */ > > > - if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom) > > > + rcu_read_lock(); > > > + if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) { > > > + rcu_read_unlock(); > > > return false; > > > + } > > > + rcu_read_unlock(); > > > > What value does RCU provide here? I don't think this change is needed. > > rcu_access_pointer does not require rcu_read_lock(). > > OK, not sure why I though RCU read-side needed for rcu_access_pointer()... > > > > return true; > > > } > > > diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c > > > index 904439e4bb85..cf2f47ea4f8d 100644 > > > --- a/fs/nfs_common/nfslocalio.c > > > +++ b/fs/nfs_common/nfslocalio.c > > > @@ -7,6 +7,9 @@ > > > #include <linux/module.h> > > > #include <linux/list.h> > > > #include <linux/nfslocalio.h> > > > +#include <linux/nfs3.h> > > > +#include <linux/nfs4.h> > > > +#include <linux/nfs_fs_sb.h> > > > > I don't feel good about adding this nfs client knowledge in to nfs_common. > > I hear you.. I was "OK with it". > > > > #include <net/netns/generic.h> > > > > > > MODULE_LICENSE("GPL"); > > > @@ -25,6 +28,7 @@ void nfs_uuid_init(nfs_uuid_t *nfs_uuid) > > > nfs_uuid->net = NULL; > > > nfs_uuid->dom = NULL; > > > INIT_LIST_HEAD(&nfs_uuid->list); > > > + spin_lock_init(&nfs_uuid->lock); > > > } > > > EXPORT_SYMBOL_GPL(nfs_uuid_init); > > > > > > @@ -94,12 +98,23 @@ void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list, > > > } > > > EXPORT_SYMBOL_GPL(nfs_uuid_is_local); > > > > > > +void nfs_localio_enable_client(struct nfs_client *clp) > > > +{ > > > + nfs_uuid_t *nfs_uuid = &clp->cl_uuid; > > > + > > > + spin_lock(&nfs_uuid->lock); > > > + set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); > > > + spin_unlock(&nfs_uuid->lock); > > > +} > > > +EXPORT_SYMBOL_GPL(nfs_localio_enable_client); > > > > And I don't feel good about nfs_local accessing nfs_client directly. > > It only uses it for NFS_CS_LOCAL_IO. Can we ditch that flag and instead > > so something like testing nfs_uuid.net ?? > > That'd probably be OK for the equivalent of NFS_CS_LOCAL_IO but the last > patch in this series ("nfs: probe for LOCALIO when v3 client > reconnects to server") adds NFS_CS_LOCAL_IO_CAPABLE to provide a hint > that the client and server successfully established themselves local > via LOCALIO protocol. This is needed so that NFSv3 (stateless) has a > hint that reestablishing LOCALIO needed if/when the client loses > connectivity to the server (because it was shutdown and restarted). I don't like NFS_CS_LOCAL_IO_CAPABLE. A use case that I imagine (and a customer does something like this) is an HA cluster where the NFS server can move from one node to another. All the node access the filesystem, most over NFS. If a server-migration happens (e.g. the current server node failed) then the new server node would suddenly become LOCALIO-capable even though it wasn't at mount-time. I would like it to be able to detect this and start doing localio. So I don't want NFS_CS_LOCAL_IO_CAPABLE. I think tracking when the network connection is re-established is sufficient. Thanks, NeilBrown
On Tue, Nov 12, 2024 at 07:35:04AM +1100, NeilBrown wrote: > On Tue, 12 Nov 2024, Mike Snitzer wrote: > > On Mon, Nov 11, 2024 at 12:55:24PM +1100, NeilBrown wrote: > > > On Sat, 09 Nov 2024, Mike Snitzer wrote: > > > > Remove cl_localio_lock from 'struct nfs_client' in favor of adding a > > > > lock to the nfs_uuid_t struct (which is embedded in each nfs_client). > > > > > > > > Push nfs_local_{enable,disable} implementation down to nfs_common. > > > > Those methods now call nfs_localio_{enable,disable}_client. > > > > > > > > This allows implementing nfs_localio_invalidate_clients in terms of > > > > nfs_localio_disable_client. > > > > > > > > Signed-off-by: Mike Snitzer <snitzer@kernel.org> > > > > --- > > > > fs/nfs/client.c | 1 - > > > > fs/nfs/localio.c | 18 ++++++------ > > > > fs/nfs_common/nfslocalio.c | 57 ++++++++++++++++++++++++++------------ > > > > include/linux/nfs_fs_sb.h | 1 - > > > > include/linux/nfslocalio.h | 8 +++++- > > > > 5 files changed, 55 insertions(+), 30 deletions(-) > > > > > > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c > > > > index 03ecc7765615..124232054807 100644 > > > > --- a/fs/nfs/client.c > > > > +++ b/fs/nfs/client.c > > > > @@ -182,7 +182,6 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) > > > > seqlock_init(&clp->cl_boot_lock); > > > > ktime_get_real_ts64(&clp->cl_nfssvc_boot); > > > > nfs_uuid_init(&clp->cl_uuid); > > > > - spin_lock_init(&clp->cl_localio_lock); > > > > #endif /* CONFIG_NFS_LOCALIO */ > > > > > > > > clp->cl_principal = "*"; > > > > diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c > > > > index cab2a8819259..4c75ffc5efa2 100644 > > > > --- a/fs/nfs/localio.c > > > > +++ b/fs/nfs/localio.c > > > > @@ -125,10 +125,8 @@ const struct rpc_program nfslocalio_program = { > > > > */ > > > > static void nfs_local_enable(struct nfs_client *clp) > > > > { > > > > - spin_lock(&clp->cl_localio_lock); > > > > - set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); > > > > trace_nfs_local_enable(clp); > > > > - spin_unlock(&clp->cl_localio_lock); > > > > + nfs_localio_enable_client(clp); > > > > } > > > > > > Why do we need this function? The one caller could call > > > nfs_localio_enable_client() directly instead. The tracepoint could be > > > placed in that one caller. > > > > Yeah, I saw that too but felt it useful to differentiate between calls > > that occur during NFS client initialization and those that happen as a > > side-effect of callers from other contexts (in later patch this > > manifests as nfs_localio_disable_client vs nfs_local_disable). > > > > Hence my adding secondary tracepoints for nfs_common (see "[PATCH > > 17/19] nfs_common: add nfs_localio trace events). > > > > But sure, we can just eliminate nfs_local_{enable,disable} and the > > corresponding tracepoints (which will have moved down to nfs_common). > > I don't feel strongly about this. If you think these is value in these > wrapper functions then I won't argue. As a general rule I don't like > multiple interfaces that do (much) the same thing as keeping track of > them increases the mental load. > > > > > > > /* > > > > @@ -136,12 +134,8 @@ static void nfs_local_enable(struct nfs_client *clp) > > > > */ > > > > void nfs_local_disable(struct nfs_client *clp) > > > > { > > > > - spin_lock(&clp->cl_localio_lock); > > > > - if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) { > > > > - trace_nfs_local_disable(clp); > > > > - nfs_localio_disable_client(&clp->cl_uuid); > > > > - } > > > > - spin_unlock(&clp->cl_localio_lock); > > > > + trace_nfs_local_disable(clp); > > > > + nfs_localio_disable_client(clp); > > > > } > > > > > > Ditto. Though there are more callers so the tracepoint solution isn't > > > quite so obvious. > > > > Right... as I just explained: that's why I preserved nfs_local_disable > > (and the tracepoint). > > > > > > > > /* > > > > @@ -183,8 +177,12 @@ static bool nfs_server_uuid_is_local(struct nfs_client *clp) > > > > rpc_shutdown_client(rpcclient_localio); > > > > > > > > /* Server is only local if it initialized required struct members */ > > > > - if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom) > > > > + rcu_read_lock(); > > > > + if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) { > > > > + rcu_read_unlock(); > > > > return false; > > > > + } > > > > + rcu_read_unlock(); > > > > > > What value does RCU provide here? I don't think this change is needed. > > > rcu_access_pointer does not require rcu_read_lock(). > > > > OK, not sure why I though RCU read-side needed for rcu_access_pointer()... > > > > > > return true; > > > > } > > > > diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c > > > > index 904439e4bb85..cf2f47ea4f8d 100644 > > > > --- a/fs/nfs_common/nfslocalio.c > > > > +++ b/fs/nfs_common/nfslocalio.c > > > > @@ -7,6 +7,9 @@ > > > > #include <linux/module.h> > > > > #include <linux/list.h> > > > > #include <linux/nfslocalio.h> > > > > +#include <linux/nfs3.h> > > > > +#include <linux/nfs4.h> > > > > +#include <linux/nfs_fs_sb.h> > > > > > > I don't feel good about adding this nfs client knowledge in to nfs_common. > > > > I hear you.. I was "OK with it". > > > > > > #include <net/netns/generic.h> > > > > > > > > MODULE_LICENSE("GPL"); > > > > @@ -25,6 +28,7 @@ void nfs_uuid_init(nfs_uuid_t *nfs_uuid) > > > > nfs_uuid->net = NULL; > > > > nfs_uuid->dom = NULL; > > > > INIT_LIST_HEAD(&nfs_uuid->list); > > > > + spin_lock_init(&nfs_uuid->lock); > > > > } > > > > EXPORT_SYMBOL_GPL(nfs_uuid_init); > > > > > > > > @@ -94,12 +98,23 @@ void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list, > > > > } > > > > EXPORT_SYMBOL_GPL(nfs_uuid_is_local); > > > > > > > > +void nfs_localio_enable_client(struct nfs_client *clp) > > > > +{ > > > > + nfs_uuid_t *nfs_uuid = &clp->cl_uuid; > > > > + > > > > + spin_lock(&nfs_uuid->lock); > > > > + set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); > > > > + spin_unlock(&nfs_uuid->lock); > > > > +} > > > > +EXPORT_SYMBOL_GPL(nfs_localio_enable_client); > > > > > > And I don't feel good about nfs_local accessing nfs_client directly. > > > It only uses it for NFS_CS_LOCAL_IO. Can we ditch that flag and instead > > > so something like testing nfs_uuid.net ?? > > > > That'd probably be OK for the equivalent of NFS_CS_LOCAL_IO but the last > > patch in this series ("nfs: probe for LOCALIO when v3 client > > reconnects to server") adds NFS_CS_LOCAL_IO_CAPABLE to provide a hint > > that the client and server successfully established themselves local > > via LOCALIO protocol. This is needed so that NFSv3 (stateless) has a > > hint that reestablishing LOCALIO needed if/when the client loses > > connectivity to the server (because it was shutdown and restarted). > > I don't like NFS_CS_LOCAL_IO_CAPABLE. > A use case that I imagine (and a customer does something like this) is an > HA cluster where the NFS server can move from one node to another. All > the node access the filesystem, most over NFS. If a server-migration > happens (e.g. the current server node failed) then the new server node > would suddenly become LOCALIO-capable even though it wasn't at > mount-time. I would like it to be able to detect this and start doing > localio. Server migration while retaining the client being local to the new server? So client migrates too? If the client migrates then it will negotiate with server using LOCALIO protocol. Anyway, this HA hypothetical feels contrived. It is fine that you dislike NFS_CS_LOCAL_IO_CAPABLE but I don't understand what you'd like as an alternative. Or why the simplicity in my approach lacking. > So I don't want NFS_CS_LOCAL_IO_CAPABLE. I think tracking when the > network connection is re-established is sufficient. Eh, that type of tracking doesn't really buy me anything if I've lost context (that LOCALIO was previously established and should be re-established). NFS v3 is stateless, hence my hooking off read and write paths to trigger nfs_local_probe_async(). Unlike NFS v4, with its grace, more care is needed to avoid needless calls to nfs_local_probe_async(). Your previous email about just tracking network connection change was an optimization for avoiding repeat (pointless) probes. We still need to know to do the probe to begin with. Are you saying you want to backfill the equivalent of grace (or pseudo-grace) to NFSv3? My approach works. Not following what you are saying will be better. Thanks, Mike
On Tue, 12 Nov 2024, Mike Snitzer wrote: > On Tue, Nov 12, 2024 at 07:35:04AM +1100, NeilBrown wrote: > > > > I don't like NFS_CS_LOCAL_IO_CAPABLE. > > A use case that I imagine (and a customer does something like this) is an > > HA cluster where the NFS server can move from one node to another. All > > the node access the filesystem, most over NFS. If a server-migration > > happens (e.g. the current server node failed) then the new server node > > would suddenly become LOCALIO-capable even though it wasn't at > > mount-time. I would like it to be able to detect this and start doing > > localio. > > Server migration while retaining the client being local to the new > server? So client migrates too? No. Client doesn't migrate. Server migrates and appears on the same host as the client. The client can suddenly get better performance. It should benefit from that. > > If the client migrates then it will negotiate with server using > LOCALIO protocol. > > Anyway, this HA hypothetical feels contrived. It is fine that you > dislike NFS_CS_LOCAL_IO_CAPABLE but I don't understand what you'd like > as an alternative. Or why the simplicity in my approach lacking. We have customers with exactly this HA config. This is why I put work into make sure loop-back NFS (client and server on same node) works cleanly without memory allocation deadlocks. https://lwn.net/Articles/595652/ Getting localio in that config would be even better. Your approach assumes that if LOCALIO isn't detected at mount time, it will never been available. I think that is a flawed assumption. > > > So I don't want NFS_CS_LOCAL_IO_CAPABLE. I think tracking when the > > network connection is re-established is sufficient. > > Eh, that type of tracking doesn't really buy me anything if I've lost > context (that LOCALIO was previously established and should be > re-established). > > NFS v3 is stateless, hence my hooking off read and write paths to > trigger nfs_local_probe_async(). Unlike NFS v4, with its grace, more > care is needed to avoid needless calls to nfs_local_probe_async(). I think it makes perfect sense to trigger the probe on a successful read/write with some rate limiting to avoid sending a LOCALIO probe on EVERY read/write. Your rate-limiting for NFSv3 is: - never probe if the mount-time probe was not successful - otherwise probe once every 256 IO requests. I think the first is too restrictive, and the second is unnecessarily frequent. I propose: - probe once each time the client reconnects with the server This will result in many fewer probes in practice, but any successful probe will happen at nearly the earliest possible moment. > > Your previous email about just tracking network connection change was > an optimization for avoiding repeat (pointless) probes. We still > need to know to do the probe to begin with. Are you saying you want > to backfill the equivalent of grace (or pseudo-grace) to NFSv3? You don't "know to do the probe" at mount time. You simply always do it. Similarly whenever localio isn't active it is always appropriate to probe - with rate limiting. And NFSv3 already has a grace period - in the NLM/STAT protocols. We could use STAT to detect when the server has restarted and so it is worth probing again. But STAT is not as reliable as we might like and it would be more complexity with no real gain. I would be happy to use exactly the same mechanism for both v3 and v4: send a probe after IO on a new connection. But your solution for v4 is simple and elegant so I'm not at all against it. > > My approach works. Not following what you are saying will be better. - server-migration can benefit from localio on the new host - many fewer probes - probes are much more timely. NeilBrown > > Thanks, > Mike >
On Tue, Nov 12, 2024 at 10:23:19AM +1100, NeilBrown wrote: > On Tue, 12 Nov 2024, Mike Snitzer wrote: > > On Tue, Nov 12, 2024 at 07:35:04AM +1100, NeilBrown wrote: > > > > > > I don't like NFS_CS_LOCAL_IO_CAPABLE. > > > A use case that I imagine (and a customer does something like this) is an > > > HA cluster where the NFS server can move from one node to another. All > > > the node access the filesystem, most over NFS. If a server-migration > > > happens (e.g. the current server node failed) then the new server node > > > would suddenly become LOCALIO-capable even though it wasn't at > > > mount-time. I would like it to be able to detect this and start doing > > > localio. > > > > Server migration while retaining the client being local to the new > > server? So client migrates too? > > No. Client doesn't migrate. Server migrates and appears on the same > host as the client. The client can suddenly get better performance. It > should benefit from that. > > > > > If the client migrates then it will negotiate with server using > > LOCALIO protocol. > > > > Anyway, this HA hypothetical feels contrived. It is fine that you > > dislike NFS_CS_LOCAL_IO_CAPABLE but I don't understand what you'd like > > as an alternative. Or why the simplicity in my approach lacking. > > We have customers with exactly this HA config. This is why I put work > into make sure loop-back NFS (client and server on same node) works > cleanly without memory allocation deadlocks. > https://lwn.net/Articles/595652/ > Getting localio in that config would be even better. > > Your approach assumes that if LOCALIO isn't detected at mount time, it > will never been available. I think that is a flawed assumption. That's fair, I agree your HA scenario is valid. It was terse as initially presented but I understand now, thanks. > > > So I don't want NFS_CS_LOCAL_IO_CAPABLE. I think tracking when the > > > network connection is re-established is sufficient. > > > > Eh, that type of tracking doesn't really buy me anything if I've lost > > context (that LOCALIO was previously established and should be > > re-established). > > > > NFS v3 is stateless, hence my hooking off read and write paths to > > trigger nfs_local_probe_async(). Unlike NFS v4, with its grace, more > > care is needed to avoid needless calls to nfs_local_probe_async(). > > I think it makes perfect sense to trigger the probe on a successful > read/write with some rate limiting to avoid sending a LOCALIO probe on > EVERY read/write. Your rate-limiting for NFSv3 is: > - never probe if the mount-time probe was not successful > - otherwise probe once every 256 IO requests. > > I think the first is too restrictive, and the second is unnecessarily > frequent. > I propose: > - probe once each time the client reconnects with the server > > This will result in many fewer probes in practice, but any successful > probe will happen at nearly the earliest possible moment. I'm all for what you're proposing (its what I wanted from the start). In practice I just don't quite grok the client reconnect awareness implementation you're saying is at our finger tips. > > Your previous email about just tracking network connection change was > > an optimization for avoiding repeat (pointless) probes. We still > > need to know to do the probe to begin with. Are you saying you want > > to backfill the equivalent of grace (or pseudo-grace) to NFSv3? > > You don't "know to do the probe" at mount time. You simply always do > it. Similarly whenever localio isn't active it is always appropriate to > probe - with rate limiting. > > And NFSv3 already has a grace period - in the NLM/STAT protocols. We > could use STAT to detect when the server has restarted and so it is worth > probing again. But STAT is not as reliable as we might like and it > would be more complexity with no real gain. If you have a specific idea for the mechanism we need to create to detect the v3 client reconnects to the server please let me know. Reusing or augmenting an existing thing is fine by me. > I would be happy to use exactly the same mechanism for both v3 and v4: > send a probe after IO on a new connection. But your solution for v4 is > simple and elegant so I'm not at all against it. > > > > > My approach works. Not following what you are saying will be better. > > - server-migration can benefit from localio on the new host > - many fewer probes > - probes are much more timely. Ha, you misunderstood me: I know the benefits.. what eludes me is the construction of the point to probe (reliable v3 client reconnect awareness). ;) Mike
On Tue, 12 Nov 2024, Mike Snitzer wrote: > On Tue, Nov 12, 2024 at 10:23:19AM +1100, NeilBrown wrote: > > On Tue, 12 Nov 2024, Mike Snitzer wrote: > > > On Tue, Nov 12, 2024 at 07:35:04AM +1100, NeilBrown wrote: > > > > > > > > I don't like NFS_CS_LOCAL_IO_CAPABLE. > > > > A use case that I imagine (and a customer does something like this) is an > > > > HA cluster where the NFS server can move from one node to another. All > > > > the node access the filesystem, most over NFS. If a server-migration > > > > happens (e.g. the current server node failed) then the new server node > > > > would suddenly become LOCALIO-capable even though it wasn't at > > > > mount-time. I would like it to be able to detect this and start doing > > > > localio. > > > > > > Server migration while retaining the client being local to the new > > > server? So client migrates too? > > > > No. Client doesn't migrate. Server migrates and appears on the same > > host as the client. The client can suddenly get better performance. It > > should benefit from that. > > > > > > > > If the client migrates then it will negotiate with server using > > > LOCALIO protocol. > > > > > > Anyway, this HA hypothetical feels contrived. It is fine that you > > > dislike NFS_CS_LOCAL_IO_CAPABLE but I don't understand what you'd like > > > as an alternative. Or why the simplicity in my approach lacking. > > > > We have customers with exactly this HA config. This is why I put work > > into make sure loop-back NFS (client and server on same node) works > > cleanly without memory allocation deadlocks. > > https://lwn.net/Articles/595652/ > > Getting localio in that config would be even better. > > > > Your approach assumes that if LOCALIO isn't detected at mount time, it > > will never been available. I think that is a flawed assumption. > > That's fair, I agree your HA scenario is valid. It was terse as > initially presented but I understand now, thanks. > > > > > So I don't want NFS_CS_LOCAL_IO_CAPABLE. I think tracking when the > > > > network connection is re-established is sufficient. > > > > > > Eh, that type of tracking doesn't really buy me anything if I've lost > > > context (that LOCALIO was previously established and should be > > > re-established). > > > > > > NFS v3 is stateless, hence my hooking off read and write paths to > > > trigger nfs_local_probe_async(). Unlike NFS v4, with its grace, more > > > care is needed to avoid needless calls to nfs_local_probe_async(). > > > > I think it makes perfect sense to trigger the probe on a successful > > read/write with some rate limiting to avoid sending a LOCALIO probe on > > EVERY read/write. Your rate-limiting for NFSv3 is: > > - never probe if the mount-time probe was not successful > > - otherwise probe once every 256 IO requests. > > > > I think the first is too restrictive, and the second is unnecessarily > > frequent. > > I propose: > > - probe once each time the client reconnects with the server > > > > This will result in many fewer probes in practice, but any successful > > probe will happen at nearly the earliest possible moment. > > I'm all for what you're proposing (its what I wanted from the start). > In practice I just don't quite grok the client reconnect awareness > implementation you're saying is at our finger tips. > > > > Your previous email about just tracking network connection change was > > > an optimization for avoiding repeat (pointless) probes. We still > > > need to know to do the probe to begin with. Are you saying you want > > > to backfill the equivalent of grace (or pseudo-grace) to NFSv3? > > > > You don't "know to do the probe" at mount time. You simply always do > > it. Similarly whenever localio isn't active it is always appropriate to > > probe - with rate limiting. > > > > And NFSv3 already has a grace period - in the NLM/STAT protocols. We > > could use STAT to detect when the server has restarted and so it is worth > > probing again. But STAT is not as reliable as we might like and it > > would be more complexity with no real gain. > > If you have a specific idea for the mechanism we need to create to > detect the v3 client reconnects to the server please let me know. > Reusing or augmenting an existing thing is fine by me. nfs3_local_probe(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; nfs_uuid_t *nfs_uuid = &clp->cl_uuid; if (nfs_uuid->connect_cookie != clp->cl_rpcclient->cl_xprt->connect_cookie) nfs_local_probe_async() } static void nfs_local_probe_async_work(struct work_struct *work) { struct nfs_client *clp = container_of(work, struct nfs_client, cl_local_probe_work); clp->cl_uuid.connect_cookie = clp->cl_rpcclient->cl_xprt->connect_cookie; nfs_local_probe(clp); } Or maybe assign connect_cookie (which we have to add to uuid) inside nfs_local_probe(). Though you need rcu_dereference_pointer() to access cl_xprt and rcu_read_lock() protection around that. (cl_xprt can change when the NFS client follows a "location" reported by the server to handle migration or mirroring. Conceivably we should check for either cl_xprt changing or cl_xprt->connect_cookie changing, but if that were an issue it would be easier to initialise ->connect_cookie to a random number) Note that you don't need local_probe_mutex. A given work_struct (cl_local_probe_work) can only be running once. If you try to queue_work() it while it is running, queue_work() will do nothing. You'll want to only INIT_WORK() once - not on every nfs_local_probe_async() call. NeilBrown
On Tue, Nov 12, 2024 at 11:49:30AM +1100, NeilBrown wrote: > On Tue, 12 Nov 2024, Mike Snitzer wrote: > > On Tue, Nov 12, 2024 at 10:23:19AM +1100, NeilBrown wrote: > > > On Tue, 12 Nov 2024, Mike Snitzer wrote: > > > > On Tue, Nov 12, 2024 at 07:35:04AM +1100, NeilBrown wrote: > > > > > > > > > > I don't like NFS_CS_LOCAL_IO_CAPABLE. > > > > > A use case that I imagine (and a customer does something like this) is an > > > > > HA cluster where the NFS server can move from one node to another. All > > > > > the node access the filesystem, most over NFS. If a server-migration > > > > > happens (e.g. the current server node failed) then the new server node > > > > > would suddenly become LOCALIO-capable even though it wasn't at > > > > > mount-time. I would like it to be able to detect this and start doing > > > > > localio. > > > > > > > > Server migration while retaining the client being local to the new > > > > server? So client migrates too? > > > > > > No. Client doesn't migrate. Server migrates and appears on the same > > > host as the client. The client can suddenly get better performance. It > > > should benefit from that. > > > > > > > > > > > If the client migrates then it will negotiate with server using > > > > LOCALIO protocol. > > > > > > > > Anyway, this HA hypothetical feels contrived. It is fine that you > > > > dislike NFS_CS_LOCAL_IO_CAPABLE but I don't understand what you'd like > > > > as an alternative. Or why the simplicity in my approach lacking. > > > > > > We have customers with exactly this HA config. This is why I put work > > > into make sure loop-back NFS (client and server on same node) works > > > cleanly without memory allocation deadlocks. > > > https://lwn.net/Articles/595652/ > > > Getting localio in that config would be even better. > > > > > > Your approach assumes that if LOCALIO isn't detected at mount time, it > > > will never been available. I think that is a flawed assumption. > > > > That's fair, I agree your HA scenario is valid. It was terse as > > initially presented but I understand now, thanks. > > > > > > > So I don't want NFS_CS_LOCAL_IO_CAPABLE. I think tracking when the > > > > > network connection is re-established is sufficient. > > > > > > > > Eh, that type of tracking doesn't really buy me anything if I've lost > > > > context (that LOCALIO was previously established and should be > > > > re-established). > > > > > > > > NFS v3 is stateless, hence my hooking off read and write paths to > > > > trigger nfs_local_probe_async(). Unlike NFS v4, with its grace, more > > > > care is needed to avoid needless calls to nfs_local_probe_async(). > > > > > > I think it makes perfect sense to trigger the probe on a successful > > > read/write with some rate limiting to avoid sending a LOCALIO probe on > > > EVERY read/write. Your rate-limiting for NFSv3 is: > > > - never probe if the mount-time probe was not successful > > > - otherwise probe once every 256 IO requests. > > > > > > I think the first is too restrictive, and the second is unnecessarily > > > frequent. > > > I propose: > > > - probe once each time the client reconnects with the server > > > > > > This will result in many fewer probes in practice, but any successful > > > probe will happen at nearly the earliest possible moment. > > > > I'm all for what you're proposing (its what I wanted from the start). > > In practice I just don't quite grok the client reconnect awareness > > implementation you're saying is at our finger tips. > > > > > > Your previous email about just tracking network connection change was > > > > an optimization for avoiding repeat (pointless) probes. We still > > > > need to know to do the probe to begin with. Are you saying you want > > > > to backfill the equivalent of grace (or pseudo-grace) to NFSv3? > > > > > > You don't "know to do the probe" at mount time. You simply always do > > > it. Similarly whenever localio isn't active it is always appropriate to > > > probe - with rate limiting. > > > > > > And NFSv3 already has a grace period - in the NLM/STAT protocols. We > > > could use STAT to detect when the server has restarted and so it is worth > > > probing again. But STAT is not as reliable as we might like and it > > > would be more complexity with no real gain. > > > > If you have a specific idea for the mechanism we need to create to > > detect the v3 client reconnects to the server please let me know. > > Reusing or augmenting an existing thing is fine by me. > > nfs3_local_probe(struct nfs_server *server) > { > struct nfs_client *clp = server->nfs_client; > nfs_uuid_t *nfs_uuid = &clp->cl_uuid; > > if (nfs_uuid->connect_cookie != clp->cl_rpcclient->cl_xprt->connect_cookie) > nfs_local_probe_async() > } > > static void nfs_local_probe_async_work(struct work_struct *work) > { > struct nfs_client *clp = container_of(work, struct nfs_client, > cl_local_probe_work); > clp->cl_uuid.connect_cookie = > clp->cl_rpcclient->cl_xprt->connect_cookie; > nfs_local_probe(clp); > } > > Or maybe assign connect_cookie (which we have to add to uuid) inside > nfs_local_probe(). The problem with per-connection checks is that a change in export security policy could disable LOCALIO rather persistently. The only way to recover, if checking is done only when a connection is established, is to remount or force a disconnect. > Though you need rcu_dereference_pointer() to access cl_xprt and > rcu_read_lock() protection around that. > (cl_xprt can change when the NFS client follows a "location" reported by > the server to handle migration or mirroring. Conceivably we should > check for either cl_xprt changing or cl_xprt->connect_cookie changing, > but if that were an issue it would be easier to initialise > ->connect_cookie to a random number) > > Note that you don't need local_probe_mutex. A given work_struct > (cl_local_probe_work) can only be running once. If you try to > queue_work() it while it is running, queue_work() will do nothing. > > You'll want to only INIT_WORK() once - not on every > nfs_local_probe_async() call. > > NeilBrown
On Wed, 13 Nov 2024, Chuck Lever wrote: > On Tue, Nov 12, 2024 at 11:49:30AM +1100, NeilBrown wrote: > > > > > > If you have a specific idea for the mechanism we need to create to > > > detect the v3 client reconnects to the server please let me know. > > > Reusing or augmenting an existing thing is fine by me. > > > > nfs3_local_probe(struct nfs_server *server) > > { > > struct nfs_client *clp = server->nfs_client; > > nfs_uuid_t *nfs_uuid = &clp->cl_uuid; > > > > if (nfs_uuid->connect_cookie != clp->cl_rpcclient->cl_xprt->connect_cookie) > > nfs_local_probe_async() > > } > > > > static void nfs_local_probe_async_work(struct work_struct *work) > > { > > struct nfs_client *clp = container_of(work, struct nfs_client, > > cl_local_probe_work); > > clp->cl_uuid.connect_cookie = > > clp->cl_rpcclient->cl_xprt->connect_cookie; > > nfs_local_probe(clp); > > } > > > > Or maybe assign connect_cookie (which we have to add to uuid) inside > > nfs_local_probe(). > > The problem with per-connection checks is that a change in export > security policy could disable LOCALIO rather persistently. The only > way to recover, if checking is done only when a connection is > established, is to remount or force a disconnect. > What export security policy specifically? Do you mean changing from sec=sys to to sec=krb5i for example? This would (hopefully) disable localio. Then changing the export back to sec=sys would mean that localio would be possible again. I wonder how the client copes with this. Does it work on a live mount without remount? If so it would certainly make sense for the current security setting to be cached in nfs_uidd and for a probe to be attempted whenever that changed to sec=sys. Thanks, NeilBrown
> On Nov 12, 2024, at 6:13 PM, NeilBrown <neilb@suse.de> wrote: > > On Wed, 13 Nov 2024, Chuck Lever wrote: >> On Tue, Nov 12, 2024 at 11:49:30AM +1100, NeilBrown wrote: >>>> >>>> If you have a specific idea for the mechanism we need to create to >>>> detect the v3 client reconnects to the server please let me know. >>>> Reusing or augmenting an existing thing is fine by me. >>> >>> nfs3_local_probe(struct nfs_server *server) >>> { >>> struct nfs_client *clp = server->nfs_client; >>> nfs_uuid_t *nfs_uuid = &clp->cl_uuid; >>> >>> if (nfs_uuid->connect_cookie != clp->cl_rpcclient->cl_xprt->connect_cookie) >>> nfs_local_probe_async() >>> } >>> >>> static void nfs_local_probe_async_work(struct work_struct *work) >>> { >>> struct nfs_client *clp = container_of(work, struct nfs_client, >>> cl_local_probe_work); >>> clp->cl_uuid.connect_cookie = >>> clp->cl_rpcclient->cl_xprt->connect_cookie; >>> nfs_local_probe(clp); >>> } >>> >>> Or maybe assign connect_cookie (which we have to add to uuid) inside >>> nfs_local_probe(). >> >> The problem with per-connection checks is that a change in export >> security policy could disable LOCALIO rather persistently. The only >> way to recover, if checking is done only when a connection is >> established, is to remount or force a disconnect. >> > What export security policy specifically? > Do you mean changing from sec=sys to to sec=krb5i for example? Another example might be altering the IP address list on the export. Suppose the client is accidentally blocked by this policy, the administrator realizes it, and changes it again to restore access. The client does not disconnect in this case, AFAIK. > This > would (hopefully) disable localio. Then changing the export back to > sec=sys would mean that localio would be possible again. I wonder how > the client copes with this. Does it work on a live mount without > remount? If so it would certainly make sense for the current security > setting to be cached in nfs_uidd and for a probe to be attempted > whenever that changed to sec=sys. > > Thanks, > NeilBrown -- Chuck Lever
On Wed, 13 Nov 2024, Chuck Lever III wrote: > > > > On Nov 12, 2024, at 6:13 PM, NeilBrown <neilb@suse.de> wrote: > > > > On Wed, 13 Nov 2024, Chuck Lever wrote: > >> On Tue, Nov 12, 2024 at 11:49:30AM +1100, NeilBrown wrote: > >>>> > >>>> If you have a specific idea for the mechanism we need to create to > >>>> detect the v3 client reconnects to the server please let me know. > >>>> Reusing or augmenting an existing thing is fine by me. > >>> > >>> nfs3_local_probe(struct nfs_server *server) > >>> { > >>> struct nfs_client *clp = server->nfs_client; > >>> nfs_uuid_t *nfs_uuid = &clp->cl_uuid; > >>> > >>> if (nfs_uuid->connect_cookie != clp->cl_rpcclient->cl_xprt->connect_cookie) > >>> nfs_local_probe_async() > >>> } > >>> > >>> static void nfs_local_probe_async_work(struct work_struct *work) > >>> { > >>> struct nfs_client *clp = container_of(work, struct nfs_client, > >>> cl_local_probe_work); > >>> clp->cl_uuid.connect_cookie = > >>> clp->cl_rpcclient->cl_xprt->connect_cookie; > >>> nfs_local_probe(clp); > >>> } > >>> > >>> Or maybe assign connect_cookie (which we have to add to uuid) inside > >>> nfs_local_probe(). > >> > >> The problem with per-connection checks is that a change in export > >> security policy could disable LOCALIO rather persistently. The only > >> way to recover, if checking is done only when a connection is > >> established, is to remount or force a disconnect. > >> > > What export security policy specifically? > > Do you mean changing from sec=sys to to sec=krb5i for example? > > Another example might be altering the IP address list on > the export. Suppose the client is accidentally blocked > by this policy, the administrator realizes it, and changes > it again to restore access. > > The client does not disconnect in this case, AFAIK. Yes, that is a simpler case... How would the localio path get disabled when this happens? I suspect ->nfsd_open_local_fh would (should?) fail. It, or nfs_open_local_fh() which calls it, could reset uuid->connect_cookie to an impossible value so as to force a probe after the next successful IO. That would be an important part of the protocol. Thanks, NeilBrown
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 03ecc7765615..124232054807 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -182,7 +182,6 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) seqlock_init(&clp->cl_boot_lock); ktime_get_real_ts64(&clp->cl_nfssvc_boot); nfs_uuid_init(&clp->cl_uuid); - spin_lock_init(&clp->cl_localio_lock); #endif /* CONFIG_NFS_LOCALIO */ clp->cl_principal = "*"; diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index cab2a8819259..4c75ffc5efa2 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -125,10 +125,8 @@ const struct rpc_program nfslocalio_program = { */ static void nfs_local_enable(struct nfs_client *clp) { - spin_lock(&clp->cl_localio_lock); - set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); trace_nfs_local_enable(clp); - spin_unlock(&clp->cl_localio_lock); + nfs_localio_enable_client(clp); } /* @@ -136,12 +134,8 @@ static void nfs_local_enable(struct nfs_client *clp) */ void nfs_local_disable(struct nfs_client *clp) { - spin_lock(&clp->cl_localio_lock); - if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) { - trace_nfs_local_disable(clp); - nfs_localio_disable_client(&clp->cl_uuid); - } - spin_unlock(&clp->cl_localio_lock); + trace_nfs_local_disable(clp); + nfs_localio_disable_client(clp); } /* @@ -183,8 +177,12 @@ static bool nfs_server_uuid_is_local(struct nfs_client *clp) rpc_shutdown_client(rpcclient_localio); /* Server is only local if it initialized required struct members */ - if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom) + rcu_read_lock(); + if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) { + rcu_read_unlock(); return false; + } + rcu_read_unlock(); return true; } diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 904439e4bb85..cf2f47ea4f8d 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -7,6 +7,9 @@ #include <linux/module.h> #include <linux/list.h> #include <linux/nfslocalio.h> +#include <linux/nfs3.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs_sb.h> #include <net/netns/generic.h> MODULE_LICENSE("GPL"); @@ -25,6 +28,7 @@ void nfs_uuid_init(nfs_uuid_t *nfs_uuid) nfs_uuid->net = NULL; nfs_uuid->dom = NULL; INIT_LIST_HEAD(&nfs_uuid->list); + spin_lock_init(&nfs_uuid->lock); } EXPORT_SYMBOL_GPL(nfs_uuid_init); @@ -94,12 +98,23 @@ void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list, } EXPORT_SYMBOL_GPL(nfs_uuid_is_local); +void nfs_localio_enable_client(struct nfs_client *clp) +{ + nfs_uuid_t *nfs_uuid = &clp->cl_uuid; + + spin_lock(&nfs_uuid->lock); + set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags); + spin_unlock(&nfs_uuid->lock); +} +EXPORT_SYMBOL_GPL(nfs_localio_enable_client); + static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid) { - if (nfs_uuid->net) { - module_put(nfsd_mod); - nfs_uuid->net = NULL; - } + if (!nfs_uuid->net) + return; + module_put(nfsd_mod); + rcu_assign_pointer(nfs_uuid->net, NULL); + if (nfs_uuid->dom) { auth_domain_put(nfs_uuid->dom); nfs_uuid->dom = NULL; @@ -107,27 +122,35 @@ static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid) list_del_init(&nfs_uuid->list); } -void nfs_localio_invalidate_clients(struct list_head *list) +void nfs_localio_disable_client(struct nfs_client *clp) +{ + nfs_uuid_t *nfs_uuid = &clp->cl_uuid; + + spin_lock(&nfs_uuid->lock); + if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) { + spin_lock(&nfs_uuid_lock); + nfs_uuid_put_locked(nfs_uuid); + spin_unlock(&nfs_uuid_lock); + } + spin_unlock(&nfs_uuid->lock); +} +EXPORT_SYMBOL_GPL(nfs_localio_disable_client); + +void nfs_localio_invalidate_clients(struct list_head *cl_uuid_list) { nfs_uuid_t *nfs_uuid, *tmp; spin_lock(&nfs_uuid_lock); - list_for_each_entry_safe(nfs_uuid, tmp, list, list) - nfs_uuid_put_locked(nfs_uuid); + list_for_each_entry_safe(nfs_uuid, tmp, cl_uuid_list, list) { + struct nfs_client *clp = + container_of(nfs_uuid, struct nfs_client, cl_uuid); + + nfs_localio_disable_client(clp); + } spin_unlock(&nfs_uuid_lock); } EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients); -void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid) -{ - if (nfs_uuid->net) { - spin_lock(&nfs_uuid_lock); - nfs_uuid_put_locked(nfs_uuid); - spin_unlock(&nfs_uuid_lock); - } -} -EXPORT_SYMBOL_GPL(nfs_localio_disable_client); - struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, struct rpc_clnt *rpc_clnt, const struct cred *cred, const struct nfs_fh *nfs_fh, const fmode_t fmode) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b804346a9741..239d86ef166c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -132,7 +132,6 @@ struct nfs_client { struct timespec64 cl_nfssvc_boot; seqlock_t cl_boot_lock; nfs_uuid_t cl_uuid; - spinlock_t cl_localio_lock; #endif /* CONFIG_NFS_LOCALIO */ }; diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index a05d1043f2b0..4d5583873f41 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -6,6 +6,7 @@ #ifndef __LINUX_NFSLOCALIO_H #define __LINUX_NFSLOCALIO_H + /* nfsd_file structure is purposely kept opaque to NFS client */ struct nfsd_file; @@ -19,6 +20,8 @@ struct nfsd_file; #include <linux/nfs.h> #include <net/net_namespace.h> +struct nfs_client; + /* * Useful to allow a client to negotiate if localio * possible with its server. @@ -27,6 +30,8 @@ struct nfsd_file; */ typedef struct { uuid_t uuid; + /* sadly this struct is just over a cacheline, avoid bouncing */ + spinlock_t ____cacheline_aligned lock; struct list_head list; struct net __rcu *net; /* nfsd's network namespace */ struct auth_domain *dom; /* auth_domain for localio */ @@ -38,7 +43,8 @@ void nfs_uuid_end(nfs_uuid_t *); void nfs_uuid_is_local(const uuid_t *, struct list_head *, struct net *, struct auth_domain *, struct module *); -void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid); +void nfs_localio_enable_client(struct nfs_client *clp); +void nfs_localio_disable_client(struct nfs_client *clp); void nfs_localio_invalidate_clients(struct list_head *list); /* localio needs to map filehandle -> struct nfsd_file */
Remove cl_localio_lock from 'struct nfs_client' in favor of adding a lock to the nfs_uuid_t struct (which is embedded in each nfs_client). Push nfs_local_{enable,disable} implementation down to nfs_common. Those methods now call nfs_localio_{enable,disable}_client. This allows implementing nfs_localio_invalidate_clients in terms of nfs_localio_disable_client. Signed-off-by: Mike Snitzer <snitzer@kernel.org> --- fs/nfs/client.c | 1 - fs/nfs/localio.c | 18 ++++++------ fs/nfs_common/nfslocalio.c | 57 ++++++++++++++++++++++++++------------ include/linux/nfs_fs_sb.h | 1 - include/linux/nfslocalio.h | 8 +++++- 5 files changed, 55 insertions(+), 30 deletions(-)