diff mbox series

[for-6.13,10/19] nfs_common: move localio_lock to new lock member of nfs_uuid_t

Message ID 20241108234002.16392-11-snitzer@kernel.org (mailing list archive)
State New
Headers show
Series nfs/nfsd: fixes and improvements for LOCALIO | expand

Commit Message

Mike Snitzer Nov. 8, 2024, 11:39 p.m. UTC
Remove cl_localio_lock from 'struct nfs_client' in favor of adding a
lock to the nfs_uuid_t struct (which is embedded in each nfs_client).

Push nfs_local_{enable,disable} implementation down to nfs_common.
Those methods now call nfs_localio_{enable,disable}_client.

This allows implementing nfs_localio_invalidate_clients in terms of
nfs_localio_disable_client.

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 fs/nfs/client.c            |  1 -
 fs/nfs/localio.c           | 18 ++++++------
 fs/nfs_common/nfslocalio.c | 57 ++++++++++++++++++++++++++------------
 include/linux/nfs_fs_sb.h  |  1 -
 include/linux/nfslocalio.h |  8 +++++-
 5 files changed, 55 insertions(+), 30 deletions(-)

Comments

NeilBrown Nov. 11, 2024, 1:55 a.m. UTC | #1
On Sat, 09 Nov 2024, Mike Snitzer wrote:
> Remove cl_localio_lock from 'struct nfs_client' in favor of adding a
> lock to the nfs_uuid_t struct (which is embedded in each nfs_client).
> 
> Push nfs_local_{enable,disable} implementation down to nfs_common.
> Those methods now call nfs_localio_{enable,disable}_client.
> 
> This allows implementing nfs_localio_invalidate_clients in terms of
> nfs_localio_disable_client.
> 
> Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> ---
>  fs/nfs/client.c            |  1 -
>  fs/nfs/localio.c           | 18 ++++++------
>  fs/nfs_common/nfslocalio.c | 57 ++++++++++++++++++++++++++------------
>  include/linux/nfs_fs_sb.h  |  1 -
>  include/linux/nfslocalio.h |  8 +++++-
>  5 files changed, 55 insertions(+), 30 deletions(-)
> 
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index 03ecc7765615..124232054807 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -182,7 +182,6 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
>  	seqlock_init(&clp->cl_boot_lock);
>  	ktime_get_real_ts64(&clp->cl_nfssvc_boot);
>  	nfs_uuid_init(&clp->cl_uuid);
> -	spin_lock_init(&clp->cl_localio_lock);
>  #endif /* CONFIG_NFS_LOCALIO */
>  
>  	clp->cl_principal = "*";
> diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
> index cab2a8819259..4c75ffc5efa2 100644
> --- a/fs/nfs/localio.c
> +++ b/fs/nfs/localio.c
> @@ -125,10 +125,8 @@ const struct rpc_program nfslocalio_program = {
>   */
>  static void nfs_local_enable(struct nfs_client *clp)
>  {
> -	spin_lock(&clp->cl_localio_lock);
> -	set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
>  	trace_nfs_local_enable(clp);
> -	spin_unlock(&clp->cl_localio_lock);
> +	nfs_localio_enable_client(clp);
>  }

Why do we need this function?  The one caller could call
nfs_localio_enable_client() directly instead.  The tracepoint could be
placed in that one caller.

>  
>  /*
> @@ -136,12 +134,8 @@ static void nfs_local_enable(struct nfs_client *clp)
>   */
>  void nfs_local_disable(struct nfs_client *clp)
>  {
> -	spin_lock(&clp->cl_localio_lock);
> -	if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) {
> -		trace_nfs_local_disable(clp);
> -		nfs_localio_disable_client(&clp->cl_uuid);
> -	}
> -	spin_unlock(&clp->cl_localio_lock);
> +	trace_nfs_local_disable(clp);
> +	nfs_localio_disable_client(clp);
>  }

Ditto.  Though there are more callers so the tracepoint solution isn't
quite so obvious.

>  
>  /*
> @@ -183,8 +177,12 @@ static bool nfs_server_uuid_is_local(struct nfs_client *clp)
>  	rpc_shutdown_client(rpcclient_localio);
>  
>  	/* Server is only local if it initialized required struct members */
> -	if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom)
> +	rcu_read_lock();
> +	if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) {
> +		rcu_read_unlock();
>  		return false;
> +	}
> +	rcu_read_unlock();

What value does RCU provide here?  I don't think this change is needed.
rcu_access_pointer does not require rcu_read_lock().

>  
>  	return true;
>  }
> diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
> index 904439e4bb85..cf2f47ea4f8d 100644
> --- a/fs/nfs_common/nfslocalio.c
> +++ b/fs/nfs_common/nfslocalio.c
> @@ -7,6 +7,9 @@
>  #include <linux/module.h>
>  #include <linux/list.h>
>  #include <linux/nfslocalio.h>
> +#include <linux/nfs3.h>
> +#include <linux/nfs4.h>
> +#include <linux/nfs_fs_sb.h>

I don't feel good about adding this nfs client knowledge in to nfs_common.

>  #include <net/netns/generic.h>
>  
>  MODULE_LICENSE("GPL");
> @@ -25,6 +28,7 @@ void nfs_uuid_init(nfs_uuid_t *nfs_uuid)
>  	nfs_uuid->net = NULL;
>  	nfs_uuid->dom = NULL;
>  	INIT_LIST_HEAD(&nfs_uuid->list);
> +	spin_lock_init(&nfs_uuid->lock);
>  }
>  EXPORT_SYMBOL_GPL(nfs_uuid_init);
>  
> @@ -94,12 +98,23 @@ void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list,
>  }
>  EXPORT_SYMBOL_GPL(nfs_uuid_is_local);
>  
> +void nfs_localio_enable_client(struct nfs_client *clp)
> +{
> +	nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
> +
> +	spin_lock(&nfs_uuid->lock);
> +	set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
> +	spin_unlock(&nfs_uuid->lock);
> +}
> +EXPORT_SYMBOL_GPL(nfs_localio_enable_client);

And I don't feel good about nfs_local accessing nfs_client directly.
It only uses it for NFS_CS_LOCAL_IO.  Can we ditch that flag and instead
so something like testing nfs_uuid.net ??

> +
>  static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid)
>  {
> -	if (nfs_uuid->net) {
> -		module_put(nfsd_mod);
> -		nfs_uuid->net = NULL;
> -	}
> +	if (!nfs_uuid->net)
> +		return;
> +	module_put(nfsd_mod);
> +	rcu_assign_pointer(nfs_uuid->net, NULL);
> +

I much prefer RCU_INIT_POINTER for assigning NULL as there is no need
for ordering here.

>  	if (nfs_uuid->dom) {
>  		auth_domain_put(nfs_uuid->dom);
>  		nfs_uuid->dom = NULL;
> @@ -107,27 +122,35 @@ static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid)
>  	list_del_init(&nfs_uuid->list);
>  }
>  
> -void nfs_localio_invalidate_clients(struct list_head *list)
> +void nfs_localio_disable_client(struct nfs_client *clp)
> +{
> +	nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
> +
> +	spin_lock(&nfs_uuid->lock);
> +	if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) {
> +		spin_lock(&nfs_uuid_lock);
> +		nfs_uuid_put_locked(nfs_uuid);
> +		spin_unlock(&nfs_uuid_lock);
> +	}
> +	spin_unlock(&nfs_uuid->lock);
> +}
> +EXPORT_SYMBOL_GPL(nfs_localio_disable_client);
> +
> +void nfs_localio_invalidate_clients(struct list_head *cl_uuid_list)
>  {
>  	nfs_uuid_t *nfs_uuid, *tmp;
>  
>  	spin_lock(&nfs_uuid_lock);
> -	list_for_each_entry_safe(nfs_uuid, tmp, list, list)
> -		nfs_uuid_put_locked(nfs_uuid);
> +	list_for_each_entry_safe(nfs_uuid, tmp, cl_uuid_list, list) {
> +		struct nfs_client *clp =
> +			container_of(nfs_uuid, struct nfs_client, cl_uuid);
> +
> +		nfs_localio_disable_client(clp);
> +	}
>  	spin_unlock(&nfs_uuid_lock);
>  }
>  EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients);
>  
> -void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid)
> -{
> -	if (nfs_uuid->net) {
> -		spin_lock(&nfs_uuid_lock);
> -		nfs_uuid_put_locked(nfs_uuid);
> -		spin_unlock(&nfs_uuid_lock);
> -	}
> -}
> -EXPORT_SYMBOL_GPL(nfs_localio_disable_client);
> -
>  struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
>  		   struct rpc_clnt *rpc_clnt, const struct cred *cred,
>  		   const struct nfs_fh *nfs_fh, const fmode_t fmode)
> diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> index b804346a9741..239d86ef166c 100644
> --- a/include/linux/nfs_fs_sb.h
> +++ b/include/linux/nfs_fs_sb.h
> @@ -132,7 +132,6 @@ struct nfs_client {
>  	struct timespec64	cl_nfssvc_boot;
>  	seqlock_t		cl_boot_lock;
>  	nfs_uuid_t		cl_uuid;
> -	spinlock_t		cl_localio_lock;
>  #endif /* CONFIG_NFS_LOCALIO */
>  };
>  
> diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
> index a05d1043f2b0..4d5583873f41 100644
> --- a/include/linux/nfslocalio.h
> +++ b/include/linux/nfslocalio.h
> @@ -6,6 +6,7 @@
>  #ifndef __LINUX_NFSLOCALIO_H
>  #define __LINUX_NFSLOCALIO_H
>  
> +
>  /* nfsd_file structure is purposely kept opaque to NFS client */
>  struct nfsd_file;
>  
> @@ -19,6 +20,8 @@ struct nfsd_file;
>  #include <linux/nfs.h>
>  #include <net/net_namespace.h>
>  
> +struct nfs_client;
> +
>  /*
>   * Useful to allow a client to negotiate if localio
>   * possible with its server.
> @@ -27,6 +30,8 @@ struct nfsd_file;
>   */
>  typedef struct {
>  	uuid_t uuid;
> +	/* sadly this struct is just over a cacheline, avoid bouncing */
> +	spinlock_t ____cacheline_aligned lock;
>  	struct list_head list;
>  	struct net __rcu *net; /* nfsd's network namespace */
>  	struct auth_domain *dom; /* auth_domain for localio */
> @@ -38,7 +43,8 @@ void nfs_uuid_end(nfs_uuid_t *);
>  void nfs_uuid_is_local(const uuid_t *, struct list_head *,
>  		       struct net *, struct auth_domain *, struct module *);
>  
> -void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid);
> +void nfs_localio_enable_client(struct nfs_client *clp);
> +void nfs_localio_disable_client(struct nfs_client *clp);
>  void nfs_localio_invalidate_clients(struct list_head *list);
>  
>  /* localio needs to map filehandle -> struct nfsd_file */
> -- 
> 2.44.0
> 
> 

I think this is a good refactoring to do, but I don't like some of the
details, or some of the RCU code.

NeilBrown
Mike Snitzer Nov. 11, 2024, 3:33 p.m. UTC | #2
On Mon, Nov 11, 2024 at 12:55:24PM +1100, NeilBrown wrote:
> On Sat, 09 Nov 2024, Mike Snitzer wrote:
> > Remove cl_localio_lock from 'struct nfs_client' in favor of adding a
> > lock to the nfs_uuid_t struct (which is embedded in each nfs_client).
> > 
> > Push nfs_local_{enable,disable} implementation down to nfs_common.
> > Those methods now call nfs_localio_{enable,disable}_client.
> > 
> > This allows implementing nfs_localio_invalidate_clients in terms of
> > nfs_localio_disable_client.
> > 
> > Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> > ---
> >  fs/nfs/client.c            |  1 -
> >  fs/nfs/localio.c           | 18 ++++++------
> >  fs/nfs_common/nfslocalio.c | 57 ++++++++++++++++++++++++++------------
> >  include/linux/nfs_fs_sb.h  |  1 -
> >  include/linux/nfslocalio.h |  8 +++++-
> >  5 files changed, 55 insertions(+), 30 deletions(-)
> > 
> > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > index 03ecc7765615..124232054807 100644
> > --- a/fs/nfs/client.c
> > +++ b/fs/nfs/client.c
> > @@ -182,7 +182,6 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
> >  	seqlock_init(&clp->cl_boot_lock);
> >  	ktime_get_real_ts64(&clp->cl_nfssvc_boot);
> >  	nfs_uuid_init(&clp->cl_uuid);
> > -	spin_lock_init(&clp->cl_localio_lock);
> >  #endif /* CONFIG_NFS_LOCALIO */
> >  
> >  	clp->cl_principal = "*";
> > diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
> > index cab2a8819259..4c75ffc5efa2 100644
> > --- a/fs/nfs/localio.c
> > +++ b/fs/nfs/localio.c
> > @@ -125,10 +125,8 @@ const struct rpc_program nfslocalio_program = {
> >   */
> >  static void nfs_local_enable(struct nfs_client *clp)
> >  {
> > -	spin_lock(&clp->cl_localio_lock);
> > -	set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
> >  	trace_nfs_local_enable(clp);
> > -	spin_unlock(&clp->cl_localio_lock);
> > +	nfs_localio_enable_client(clp);
> >  }
> 
> Why do we need this function?  The one caller could call
> nfs_localio_enable_client() directly instead.  The tracepoint could be
> placed in that one caller.

Yeah, I saw that too but felt it useful to differentiate between calls
that occur during NFS client initialization and those that happen as a
side-effect of callers from other contexts (in later patch this
manifests as nfs_localio_disable_client vs nfs_local_disable).

Hence my adding secondary tracepoints for nfs_common (see "[PATCH
17/19] nfs_common: add nfs_localio trace events).

But sure, we can just eliminate nfs_local_{enable,disable} and the
corresponding tracepoints (which will have moved down to nfs_common).

> >  /*
> > @@ -136,12 +134,8 @@ static void nfs_local_enable(struct nfs_client *clp)
> >   */
> >  void nfs_local_disable(struct nfs_client *clp)
> >  {
> > -	spin_lock(&clp->cl_localio_lock);
> > -	if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) {
> > -		trace_nfs_local_disable(clp);
> > -		nfs_localio_disable_client(&clp->cl_uuid);
> > -	}
> > -	spin_unlock(&clp->cl_localio_lock);
> > +	trace_nfs_local_disable(clp);
> > +	nfs_localio_disable_client(clp);
> >  }
> 
> Ditto.  Though there are more callers so the tracepoint solution isn't
> quite so obvious.

Right... as I just explained: that's why I preserved nfs_local_disable
(and the tracepoint).


> >  /*
> > @@ -183,8 +177,12 @@ static bool nfs_server_uuid_is_local(struct nfs_client *clp)
> >  	rpc_shutdown_client(rpcclient_localio);
> >  
> >  	/* Server is only local if it initialized required struct members */
> > -	if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom)
> > +	rcu_read_lock();
> > +	if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) {
> > +		rcu_read_unlock();
> >  		return false;
> > +	}
> > +	rcu_read_unlock();
> 
> What value does RCU provide here?  I don't think this change is needed.
> rcu_access_pointer does not require rcu_read_lock().

OK, not sure why I though RCU read-side needed for rcu_access_pointer()...

> >  	return true;
> >  }
> > diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
> > index 904439e4bb85..cf2f47ea4f8d 100644
> > --- a/fs/nfs_common/nfslocalio.c
> > +++ b/fs/nfs_common/nfslocalio.c
> > @@ -7,6 +7,9 @@
> >  #include <linux/module.h>
> >  #include <linux/list.h>
> >  #include <linux/nfslocalio.h>
> > +#include <linux/nfs3.h>
> > +#include <linux/nfs4.h>
> > +#include <linux/nfs_fs_sb.h>
> 
> I don't feel good about adding this nfs client knowledge in to nfs_common.

I hear you.. I was "OK with it".

> >  #include <net/netns/generic.h>
> >  
> >  MODULE_LICENSE("GPL");
> > @@ -25,6 +28,7 @@ void nfs_uuid_init(nfs_uuid_t *nfs_uuid)
> >  	nfs_uuid->net = NULL;
> >  	nfs_uuid->dom = NULL;
> >  	INIT_LIST_HEAD(&nfs_uuid->list);
> > +	spin_lock_init(&nfs_uuid->lock);
> >  }
> >  EXPORT_SYMBOL_GPL(nfs_uuid_init);
> >  
> > @@ -94,12 +98,23 @@ void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list,
> >  }
> >  EXPORT_SYMBOL_GPL(nfs_uuid_is_local);
> >  
> > +void nfs_localio_enable_client(struct nfs_client *clp)
> > +{
> > +	nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
> > +
> > +	spin_lock(&nfs_uuid->lock);
> > +	set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
> > +	spin_unlock(&nfs_uuid->lock);
> > +}
> > +EXPORT_SYMBOL_GPL(nfs_localio_enable_client);
> 
> And I don't feel good about nfs_local accessing nfs_client directly.
> It only uses it for NFS_CS_LOCAL_IO.  Can we ditch that flag and instead
> so something like testing nfs_uuid.net ??

That'd probably be OK for the equivalent of NFS_CS_LOCAL_IO but the last
patch in this series ("nfs: probe for LOCALIO when v3 client
reconnects to server") adds NFS_CS_LOCAL_IO_CAPABLE to provide a hint
that the client and server successfully established themselves local
via LOCALIO protocol.  This is needed so that NFSv3 (stateless) has a
hint that reestablishing LOCALIO needed if/when the client loses
connectivity to the server (because it was shutdown and restarted).

Could introduce flags local to nfs_uuid_t structure as a means of
nfs_common/nfslocalio.c not needing to know internals of nfs_client at
all -- conversely: probably best to not bloat nfs_uuid_t (and
nfs_client) further, so should just ensure nfs_client treated as an
opaque pointer in nfs_common by introducing accessors?

> > +
> >  static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid)
> >  {
> > -	if (nfs_uuid->net) {
> > -		module_put(nfsd_mod);
> > -		nfs_uuid->net = NULL;
> > -	}
> > +	if (!nfs_uuid->net)
> > +		return;
> > +	module_put(nfsd_mod);
> > +	rcu_assign_pointer(nfs_uuid->net, NULL);
> > +
> 
> I much prefer RCU_INIT_POINTER for assigning NULL as there is no need
> for ordering here.

OK.

> >  	if (nfs_uuid->dom) {
> >  		auth_domain_put(nfs_uuid->dom);
> >  		nfs_uuid->dom = NULL;
> > @@ -107,27 +122,35 @@ static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid)
> >  	list_del_init(&nfs_uuid->list);
> >  }
> >  
> > -void nfs_localio_invalidate_clients(struct list_head *list)
> > +void nfs_localio_disable_client(struct nfs_client *clp)
> > +{
> > +	nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
> > +
> > +	spin_lock(&nfs_uuid->lock);
> > +	if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) {
> > +		spin_lock(&nfs_uuid_lock);
> > +		nfs_uuid_put_locked(nfs_uuid);
> > +		spin_unlock(&nfs_uuid_lock);
> > +	}
> > +	spin_unlock(&nfs_uuid->lock);
> > +}
> > +EXPORT_SYMBOL_GPL(nfs_localio_disable_client);
> > +
> > +void nfs_localio_invalidate_clients(struct list_head *cl_uuid_list)
> >  {
> >  	nfs_uuid_t *nfs_uuid, *tmp;
> >  
> >  	spin_lock(&nfs_uuid_lock);
> > -	list_for_each_entry_safe(nfs_uuid, tmp, list, list)
> > -		nfs_uuid_put_locked(nfs_uuid);
> > +	list_for_each_entry_safe(nfs_uuid, tmp, cl_uuid_list, list) {
> > +		struct nfs_client *clp =
> > +			container_of(nfs_uuid, struct nfs_client, cl_uuid);
> > +
> > +		nfs_localio_disable_client(clp);
> > +	}
> >  	spin_unlock(&nfs_uuid_lock);
> >  }
> >  EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients);
> >  
> > -void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid)
> > -{
> > -	if (nfs_uuid->net) {
> > -		spin_lock(&nfs_uuid_lock);
> > -		nfs_uuid_put_locked(nfs_uuid);
> > -		spin_unlock(&nfs_uuid_lock);
> > -	}
> > -}
> > -EXPORT_SYMBOL_GPL(nfs_localio_disable_client);
> > -
> >  struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
> >  		   struct rpc_clnt *rpc_clnt, const struct cred *cred,
> >  		   const struct nfs_fh *nfs_fh, const fmode_t fmode)
> > diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
> > index b804346a9741..239d86ef166c 100644
> > --- a/include/linux/nfs_fs_sb.h
> > +++ b/include/linux/nfs_fs_sb.h
> > @@ -132,7 +132,6 @@ struct nfs_client {
> >  	struct timespec64	cl_nfssvc_boot;
> >  	seqlock_t		cl_boot_lock;
> >  	nfs_uuid_t		cl_uuid;
> > -	spinlock_t		cl_localio_lock;
> >  #endif /* CONFIG_NFS_LOCALIO */
> >  };
> >  
> > diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
> > index a05d1043f2b0..4d5583873f41 100644
> > --- a/include/linux/nfslocalio.h
> > +++ b/include/linux/nfslocalio.h
> > @@ -6,6 +6,7 @@
> >  #ifndef __LINUX_NFSLOCALIO_H
> >  #define __LINUX_NFSLOCALIO_H
> >  
> > +
> >  /* nfsd_file structure is purposely kept opaque to NFS client */
> >  struct nfsd_file;
> >  
> > @@ -19,6 +20,8 @@ struct nfsd_file;
> >  #include <linux/nfs.h>
> >  #include <net/net_namespace.h>
> >  
> > +struct nfs_client;
> > +
> >  /*
> >   * Useful to allow a client to negotiate if localio
> >   * possible with its server.
> > @@ -27,6 +30,8 @@ struct nfsd_file;
> >   */
> >  typedef struct {
> >  	uuid_t uuid;
> > +	/* sadly this struct is just over a cacheline, avoid bouncing */
> > +	spinlock_t ____cacheline_aligned lock;
> >  	struct list_head list;
> >  	struct net __rcu *net; /* nfsd's network namespace */
> >  	struct auth_domain *dom; /* auth_domain for localio */
> > @@ -38,7 +43,8 @@ void nfs_uuid_end(nfs_uuid_t *);
> >  void nfs_uuid_is_local(const uuid_t *, struct list_head *,
> >  		       struct net *, struct auth_domain *, struct module *);
> >  
> > -void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid);
> > +void nfs_localio_enable_client(struct nfs_client *clp);
> > +void nfs_localio_disable_client(struct nfs_client *clp);
> >  void nfs_localio_invalidate_clients(struct list_head *list);
> >  
> >  /* localio needs to map filehandle -> struct nfsd_file */
> > -- 
> > 2.44.0
> > 
> > 
> 
> I think this is a good refactoring to do, but I don't like some of the
> details, or some of the RCU code.

Sure, I'll clean it up further.

Thanks for your review.

Mike
NeilBrown Nov. 11, 2024, 8:35 p.m. UTC | #3
On Tue, 12 Nov 2024, Mike Snitzer wrote:
> On Mon, Nov 11, 2024 at 12:55:24PM +1100, NeilBrown wrote:
> > On Sat, 09 Nov 2024, Mike Snitzer wrote:
> > > Remove cl_localio_lock from 'struct nfs_client' in favor of adding a
> > > lock to the nfs_uuid_t struct (which is embedded in each nfs_client).
> > > 
> > > Push nfs_local_{enable,disable} implementation down to nfs_common.
> > > Those methods now call nfs_localio_{enable,disable}_client.
> > > 
> > > This allows implementing nfs_localio_invalidate_clients in terms of
> > > nfs_localio_disable_client.
> > > 
> > > Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> > > ---
> > >  fs/nfs/client.c            |  1 -
> > >  fs/nfs/localio.c           | 18 ++++++------
> > >  fs/nfs_common/nfslocalio.c | 57 ++++++++++++++++++++++++++------------
> > >  include/linux/nfs_fs_sb.h  |  1 -
> > >  include/linux/nfslocalio.h |  8 +++++-
> > >  5 files changed, 55 insertions(+), 30 deletions(-)
> > > 
> > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > index 03ecc7765615..124232054807 100644
> > > --- a/fs/nfs/client.c
> > > +++ b/fs/nfs/client.c
> > > @@ -182,7 +182,6 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
> > >  	seqlock_init(&clp->cl_boot_lock);
> > >  	ktime_get_real_ts64(&clp->cl_nfssvc_boot);
> > >  	nfs_uuid_init(&clp->cl_uuid);
> > > -	spin_lock_init(&clp->cl_localio_lock);
> > >  #endif /* CONFIG_NFS_LOCALIO */
> > >  
> > >  	clp->cl_principal = "*";
> > > diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
> > > index cab2a8819259..4c75ffc5efa2 100644
> > > --- a/fs/nfs/localio.c
> > > +++ b/fs/nfs/localio.c
> > > @@ -125,10 +125,8 @@ const struct rpc_program nfslocalio_program = {
> > >   */
> > >  static void nfs_local_enable(struct nfs_client *clp)
> > >  {
> > > -	spin_lock(&clp->cl_localio_lock);
> > > -	set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
> > >  	trace_nfs_local_enable(clp);
> > > -	spin_unlock(&clp->cl_localio_lock);
> > > +	nfs_localio_enable_client(clp);
> > >  }
> > 
> > Why do we need this function?  The one caller could call
> > nfs_localio_enable_client() directly instead.  The tracepoint could be
> > placed in that one caller.
> 
> Yeah, I saw that too but felt it useful to differentiate between calls
> that occur during NFS client initialization and those that happen as a
> side-effect of callers from other contexts (in later patch this
> manifests as nfs_localio_disable_client vs nfs_local_disable).
> 
> Hence my adding secondary tracepoints for nfs_common (see "[PATCH
> 17/19] nfs_common: add nfs_localio trace events).
> 
> But sure, we can just eliminate nfs_local_{enable,disable} and the
> corresponding tracepoints (which will have moved down to nfs_common).

I don't feel strongly about this.  If you think these is value in these
wrapper functions then I won't argue.  As a general rule I don't like
multiple interfaces that do (much) the same thing as keeping track of
them increases the mental load.

> 
> > >  /*
> > > @@ -136,12 +134,8 @@ static void nfs_local_enable(struct nfs_client *clp)
> > >   */
> > >  void nfs_local_disable(struct nfs_client *clp)
> > >  {
> > > -	spin_lock(&clp->cl_localio_lock);
> > > -	if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) {
> > > -		trace_nfs_local_disable(clp);
> > > -		nfs_localio_disable_client(&clp->cl_uuid);
> > > -	}
> > > -	spin_unlock(&clp->cl_localio_lock);
> > > +	trace_nfs_local_disable(clp);
> > > +	nfs_localio_disable_client(clp);
> > >  }
> > 
> > Ditto.  Though there are more callers so the tracepoint solution isn't
> > quite so obvious.
> 
> Right... as I just explained: that's why I preserved nfs_local_disable
> (and the tracepoint).
> 
> 
> > >  /*
> > > @@ -183,8 +177,12 @@ static bool nfs_server_uuid_is_local(struct nfs_client *clp)
> > >  	rpc_shutdown_client(rpcclient_localio);
> > >  
> > >  	/* Server is only local if it initialized required struct members */
> > > -	if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom)
> > > +	rcu_read_lock();
> > > +	if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) {
> > > +		rcu_read_unlock();
> > >  		return false;
> > > +	}
> > > +	rcu_read_unlock();
> > 
> > What value does RCU provide here?  I don't think this change is needed.
> > rcu_access_pointer does not require rcu_read_lock().
> 
> OK, not sure why I though RCU read-side needed for rcu_access_pointer()...
> 
> > >  	return true;
> > >  }
> > > diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
> > > index 904439e4bb85..cf2f47ea4f8d 100644
> > > --- a/fs/nfs_common/nfslocalio.c
> > > +++ b/fs/nfs_common/nfslocalio.c
> > > @@ -7,6 +7,9 @@
> > >  #include <linux/module.h>
> > >  #include <linux/list.h>
> > >  #include <linux/nfslocalio.h>
> > > +#include <linux/nfs3.h>
> > > +#include <linux/nfs4.h>
> > > +#include <linux/nfs_fs_sb.h>
> > 
> > I don't feel good about adding this nfs client knowledge in to nfs_common.
> 
> I hear you.. I was "OK with it".
> 
> > >  #include <net/netns/generic.h>
> > >  
> > >  MODULE_LICENSE("GPL");
> > > @@ -25,6 +28,7 @@ void nfs_uuid_init(nfs_uuid_t *nfs_uuid)
> > >  	nfs_uuid->net = NULL;
> > >  	nfs_uuid->dom = NULL;
> > >  	INIT_LIST_HEAD(&nfs_uuid->list);
> > > +	spin_lock_init(&nfs_uuid->lock);
> > >  }
> > >  EXPORT_SYMBOL_GPL(nfs_uuid_init);
> > >  
> > > @@ -94,12 +98,23 @@ void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list,
> > >  }
> > >  EXPORT_SYMBOL_GPL(nfs_uuid_is_local);
> > >  
> > > +void nfs_localio_enable_client(struct nfs_client *clp)
> > > +{
> > > +	nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
> > > +
> > > +	spin_lock(&nfs_uuid->lock);
> > > +	set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
> > > +	spin_unlock(&nfs_uuid->lock);
> > > +}
> > > +EXPORT_SYMBOL_GPL(nfs_localio_enable_client);
> > 
> > And I don't feel good about nfs_local accessing nfs_client directly.
> > It only uses it for NFS_CS_LOCAL_IO.  Can we ditch that flag and instead
> > so something like testing nfs_uuid.net ??
> 
> That'd probably be OK for the equivalent of NFS_CS_LOCAL_IO but the last
> patch in this series ("nfs: probe for LOCALIO when v3 client
> reconnects to server") adds NFS_CS_LOCAL_IO_CAPABLE to provide a hint
> that the client and server successfully established themselves local
> via LOCALIO protocol.  This is needed so that NFSv3 (stateless) has a
> hint that reestablishing LOCALIO needed if/when the client loses
> connectivity to the server (because it was shutdown and restarted).

I don't like NFS_CS_LOCAL_IO_CAPABLE.
A use case that I imagine (and a customer does something like this) is an
HA cluster where the NFS server can move from one node to another.  All
the node access the filesystem, most over NFS.  If a server-migration
happens (e.g.  the current server node failed) then the new server node
would suddenly become LOCALIO-capable even though it wasn't at
mount-time.  I would like it to be able to detect this and start doing
localio.

So I don't want NFS_CS_LOCAL_IO_CAPABLE.  I think tracking when the
network connection is re-established is sufficient.

Thanks,
NeilBrown
Mike Snitzer Nov. 11, 2024, 10:27 p.m. UTC | #4
On Tue, Nov 12, 2024 at 07:35:04AM +1100, NeilBrown wrote:
> On Tue, 12 Nov 2024, Mike Snitzer wrote:
> > On Mon, Nov 11, 2024 at 12:55:24PM +1100, NeilBrown wrote:
> > > On Sat, 09 Nov 2024, Mike Snitzer wrote:
> > > > Remove cl_localio_lock from 'struct nfs_client' in favor of adding a
> > > > lock to the nfs_uuid_t struct (which is embedded in each nfs_client).
> > > > 
> > > > Push nfs_local_{enable,disable} implementation down to nfs_common.
> > > > Those methods now call nfs_localio_{enable,disable}_client.
> > > > 
> > > > This allows implementing nfs_localio_invalidate_clients in terms of
> > > > nfs_localio_disable_client.
> > > > 
> > > > Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> > > > ---
> > > >  fs/nfs/client.c            |  1 -
> > > >  fs/nfs/localio.c           | 18 ++++++------
> > > >  fs/nfs_common/nfslocalio.c | 57 ++++++++++++++++++++++++++------------
> > > >  include/linux/nfs_fs_sb.h  |  1 -
> > > >  include/linux/nfslocalio.h |  8 +++++-
> > > >  5 files changed, 55 insertions(+), 30 deletions(-)
> > > > 
> > > > diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> > > > index 03ecc7765615..124232054807 100644
> > > > --- a/fs/nfs/client.c
> > > > +++ b/fs/nfs/client.c
> > > > @@ -182,7 +182,6 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
> > > >  	seqlock_init(&clp->cl_boot_lock);
> > > >  	ktime_get_real_ts64(&clp->cl_nfssvc_boot);
> > > >  	nfs_uuid_init(&clp->cl_uuid);
> > > > -	spin_lock_init(&clp->cl_localio_lock);
> > > >  #endif /* CONFIG_NFS_LOCALIO */
> > > >  
> > > >  	clp->cl_principal = "*";
> > > > diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
> > > > index cab2a8819259..4c75ffc5efa2 100644
> > > > --- a/fs/nfs/localio.c
> > > > +++ b/fs/nfs/localio.c
> > > > @@ -125,10 +125,8 @@ const struct rpc_program nfslocalio_program = {
> > > >   */
> > > >  static void nfs_local_enable(struct nfs_client *clp)
> > > >  {
> > > > -	spin_lock(&clp->cl_localio_lock);
> > > > -	set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
> > > >  	trace_nfs_local_enable(clp);
> > > > -	spin_unlock(&clp->cl_localio_lock);
> > > > +	nfs_localio_enable_client(clp);
> > > >  }
> > > 
> > > Why do we need this function?  The one caller could call
> > > nfs_localio_enable_client() directly instead.  The tracepoint could be
> > > placed in that one caller.
> > 
> > Yeah, I saw that too but felt it useful to differentiate between calls
> > that occur during NFS client initialization and those that happen as a
> > side-effect of callers from other contexts (in later patch this
> > manifests as nfs_localio_disable_client vs nfs_local_disable).
> > 
> > Hence my adding secondary tracepoints for nfs_common (see "[PATCH
> > 17/19] nfs_common: add nfs_localio trace events).
> > 
> > But sure, we can just eliminate nfs_local_{enable,disable} and the
> > corresponding tracepoints (which will have moved down to nfs_common).
> 
> I don't feel strongly about this.  If you think these is value in these
> wrapper functions then I won't argue.  As a general rule I don't like
> multiple interfaces that do (much) the same thing as keeping track of
> them increases the mental load.
> 
> > 
> > > >  /*
> > > > @@ -136,12 +134,8 @@ static void nfs_local_enable(struct nfs_client *clp)
> > > >   */
> > > >  void nfs_local_disable(struct nfs_client *clp)
> > > >  {
> > > > -	spin_lock(&clp->cl_localio_lock);
> > > > -	if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) {
> > > > -		trace_nfs_local_disable(clp);
> > > > -		nfs_localio_disable_client(&clp->cl_uuid);
> > > > -	}
> > > > -	spin_unlock(&clp->cl_localio_lock);
> > > > +	trace_nfs_local_disable(clp);
> > > > +	nfs_localio_disable_client(clp);
> > > >  }
> > > 
> > > Ditto.  Though there are more callers so the tracepoint solution isn't
> > > quite so obvious.
> > 
> > Right... as I just explained: that's why I preserved nfs_local_disable
> > (and the tracepoint).
> > 
> > 
> > > >  /*
> > > > @@ -183,8 +177,12 @@ static bool nfs_server_uuid_is_local(struct nfs_client *clp)
> > > >  	rpc_shutdown_client(rpcclient_localio);
> > > >  
> > > >  	/* Server is only local if it initialized required struct members */
> > > > -	if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom)
> > > > +	rcu_read_lock();
> > > > +	if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) {
> > > > +		rcu_read_unlock();
> > > >  		return false;
> > > > +	}
> > > > +	rcu_read_unlock();
> > > 
> > > What value does RCU provide here?  I don't think this change is needed.
> > > rcu_access_pointer does not require rcu_read_lock().
> > 
> > OK, not sure why I though RCU read-side needed for rcu_access_pointer()...
> > 
> > > >  	return true;
> > > >  }
> > > > diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
> > > > index 904439e4bb85..cf2f47ea4f8d 100644
> > > > --- a/fs/nfs_common/nfslocalio.c
> > > > +++ b/fs/nfs_common/nfslocalio.c
> > > > @@ -7,6 +7,9 @@
> > > >  #include <linux/module.h>
> > > >  #include <linux/list.h>
> > > >  #include <linux/nfslocalio.h>
> > > > +#include <linux/nfs3.h>
> > > > +#include <linux/nfs4.h>
> > > > +#include <linux/nfs_fs_sb.h>
> > > 
> > > I don't feel good about adding this nfs client knowledge in to nfs_common.
> > 
> > I hear you.. I was "OK with it".
> > 
> > > >  #include <net/netns/generic.h>
> > > >  
> > > >  MODULE_LICENSE("GPL");
> > > > @@ -25,6 +28,7 @@ void nfs_uuid_init(nfs_uuid_t *nfs_uuid)
> > > >  	nfs_uuid->net = NULL;
> > > >  	nfs_uuid->dom = NULL;
> > > >  	INIT_LIST_HEAD(&nfs_uuid->list);
> > > > +	spin_lock_init(&nfs_uuid->lock);
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(nfs_uuid_init);
> > > >  
> > > > @@ -94,12 +98,23 @@ void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list,
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(nfs_uuid_is_local);
> > > >  
> > > > +void nfs_localio_enable_client(struct nfs_client *clp)
> > > > +{
> > > > +	nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
> > > > +
> > > > +	spin_lock(&nfs_uuid->lock);
> > > > +	set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
> > > > +	spin_unlock(&nfs_uuid->lock);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(nfs_localio_enable_client);
> > > 
> > > And I don't feel good about nfs_local accessing nfs_client directly.
> > > It only uses it for NFS_CS_LOCAL_IO.  Can we ditch that flag and instead
> > > so something like testing nfs_uuid.net ??
> > 
> > That'd probably be OK for the equivalent of NFS_CS_LOCAL_IO but the last
> > patch in this series ("nfs: probe for LOCALIO when v3 client
> > reconnects to server") adds NFS_CS_LOCAL_IO_CAPABLE to provide a hint
> > that the client and server successfully established themselves local
> > via LOCALIO protocol.  This is needed so that NFSv3 (stateless) has a
> > hint that reestablishing LOCALIO needed if/when the client loses
> > connectivity to the server (because it was shutdown and restarted).
> 
> I don't like NFS_CS_LOCAL_IO_CAPABLE.
> A use case that I imagine (and a customer does something like this) is an
> HA cluster where the NFS server can move from one node to another.  All
> the node access the filesystem, most over NFS.  If a server-migration
> happens (e.g.  the current server node failed) then the new server node
> would suddenly become LOCALIO-capable even though it wasn't at
> mount-time.  I would like it to be able to detect this and start doing
> localio.

Server migration while retaining the client being local to the new
server?  So client migrates too?

If the client migrates then it will negotiate with server using
LOCALIO protocol.

Anyway, this HA hypothetical feels contrived.  It is fine that you
dislike NFS_CS_LOCAL_IO_CAPABLE but I don't understand what you'd like
as an alternative.  Or why the simplicity in my approach lacking.

> So I don't want NFS_CS_LOCAL_IO_CAPABLE.  I think tracking when the
> network connection is re-established is sufficient.

Eh, that type of tracking doesn't really buy me anything if I've lost
context (that LOCALIO was previously established and should be
re-established).

NFS v3 is stateless, hence my hooking off read and write paths to
trigger nfs_local_probe_async().  Unlike NFS v4, with its grace, more
care is needed to avoid needless calls to nfs_local_probe_async().

Your previous email about just tracking network connection change was
an optimization for avoiding repeat (pointless) probes.  We still
need to know to do the probe to begin with.  Are you saying you want
to backfill the equivalent of grace (or pseudo-grace) to NFSv3?

My approach works.  Not following what you are saying will be better.

Thanks,
Mike
NeilBrown Nov. 11, 2024, 11:23 p.m. UTC | #5
On Tue, 12 Nov 2024, Mike Snitzer wrote:
> On Tue, Nov 12, 2024 at 07:35:04AM +1100, NeilBrown wrote:
> > 
> > I don't like NFS_CS_LOCAL_IO_CAPABLE.
> > A use case that I imagine (and a customer does something like this) is an
> > HA cluster where the NFS server can move from one node to another.  All
> > the node access the filesystem, most over NFS.  If a server-migration
> > happens (e.g.  the current server node failed) then the new server node
> > would suddenly become LOCALIO-capable even though it wasn't at
> > mount-time.  I would like it to be able to detect this and start doing
> > localio.
> 
> Server migration while retaining the client being local to the new
> server?  So client migrates too?

No.  Client doesn't migrate.  Server migrates and appears on the same
host as the client.  The client can suddenly get better performance.  It
should benefit from that.

> 
> If the client migrates then it will negotiate with server using
> LOCALIO protocol.
> 
> Anyway, this HA hypothetical feels contrived.  It is fine that you
> dislike NFS_CS_LOCAL_IO_CAPABLE but I don't understand what you'd like
> as an alternative.  Or why the simplicity in my approach lacking.

We have customers with exactly this HA config.  This is why I put work
into make sure loop-back NFS (client and server on same node) works
cleanly without memory allocation deadlocks.
  https://lwn.net/Articles/595652/
Getting localio in that config would be even better.

Your approach assumes that if LOCALIO isn't detected at mount time, it
will never been available.  I think that is a flawed assumption.

> 
> > So I don't want NFS_CS_LOCAL_IO_CAPABLE.  I think tracking when the
> > network connection is re-established is sufficient.
> 
> Eh, that type of tracking doesn't really buy me anything if I've lost
> context (that LOCALIO was previously established and should be
> re-established).
> 
> NFS v3 is stateless, hence my hooking off read and write paths to
> trigger nfs_local_probe_async().  Unlike NFS v4, with its grace, more
> care is needed to avoid needless calls to nfs_local_probe_async().

I think it makes perfect sense to trigger the probe on a successful
read/write with some rate limiting to avoid sending a LOCALIO probe on
EVERY read/write.  Your rate-limiting for NFSv3 is:
   - never probe if the mount-time probe was not successful
   - otherwise probe once every 256 IO requests.

I think the first is too restrictive, and the second is unnecessarily
frequent.
I propose:
   - probe once each time the client reconnects with the server

This will result in many fewer probes in practice, but any successful
probe will happen at nearly the earliest possible moment.

> 
> Your previous email about just tracking network connection change was
> an optimization for avoiding repeat (pointless) probes.  We still
> need to know to do the probe to begin with.  Are you saying you want
> to backfill the equivalent of grace (or pseudo-grace) to NFSv3?

You don't "know to do the probe" at mount time.  You simply always do
it.  Similarly whenever localio isn't active it is always appropriate to
probe - with rate limiting.

And NFSv3 already has a grace period - in the NLM/STAT protocols.  We
could use STAT to detect when the server has restarted and so it is worth
probing again.  But STAT is not as reliable as we might like and it
would be more complexity with no real gain.

I would be happy to use exactly the same mechanism for both v3 and v4:
send a probe after IO on a new connection.  But your solution for v4 is
simple and elegant so I'm not at all against it.

> 
> My approach works.  Not following what you are saying will be better.

 - server-migration can benefit from localio on the new host
 - many fewer probes
 - probes are much more timely.

NeilBrown


> 
> Thanks,
> Mike
>
Mike Snitzer Nov. 12, 2024, 12:16 a.m. UTC | #6
On Tue, Nov 12, 2024 at 10:23:19AM +1100, NeilBrown wrote:
> On Tue, 12 Nov 2024, Mike Snitzer wrote:
> > On Tue, Nov 12, 2024 at 07:35:04AM +1100, NeilBrown wrote:
> > > 
> > > I don't like NFS_CS_LOCAL_IO_CAPABLE.
> > > A use case that I imagine (and a customer does something like this) is an
> > > HA cluster where the NFS server can move from one node to another.  All
> > > the node access the filesystem, most over NFS.  If a server-migration
> > > happens (e.g.  the current server node failed) then the new server node
> > > would suddenly become LOCALIO-capable even though it wasn't at
> > > mount-time.  I would like it to be able to detect this and start doing
> > > localio.
> > 
> > Server migration while retaining the client being local to the new
> > server?  So client migrates too?
> 
> No.  Client doesn't migrate.  Server migrates and appears on the same
> host as the client.  The client can suddenly get better performance.  It
> should benefit from that.
> 
> > 
> > If the client migrates then it will negotiate with server using
> > LOCALIO protocol.
> > 
> > Anyway, this HA hypothetical feels contrived.  It is fine that you
> > dislike NFS_CS_LOCAL_IO_CAPABLE but I don't understand what you'd like
> > as an alternative.  Or why the simplicity in my approach lacking.
> 
> We have customers with exactly this HA config.  This is why I put work
> into make sure loop-back NFS (client and server on same node) works
> cleanly without memory allocation deadlocks.
>   https://lwn.net/Articles/595652/
> Getting localio in that config would be even better.
> 
> Your approach assumes that if LOCALIO isn't detected at mount time, it
> will never been available.  I think that is a flawed assumption.

That's fair, I agree your HA scenario is valid.  It was terse as
initially presented but I understand now, thanks.

> > > So I don't want NFS_CS_LOCAL_IO_CAPABLE.  I think tracking when the
> > > network connection is re-established is sufficient.
> > 
> > Eh, that type of tracking doesn't really buy me anything if I've lost
> > context (that LOCALIO was previously established and should be
> > re-established).
> > 
> > NFS v3 is stateless, hence my hooking off read and write paths to
> > trigger nfs_local_probe_async().  Unlike NFS v4, with its grace, more
> > care is needed to avoid needless calls to nfs_local_probe_async().
> 
> I think it makes perfect sense to trigger the probe on a successful
> read/write with some rate limiting to avoid sending a LOCALIO probe on
> EVERY read/write.  Your rate-limiting for NFSv3 is:
>    - never probe if the mount-time probe was not successful
>    - otherwise probe once every 256 IO requests.
> 
> I think the first is too restrictive, and the second is unnecessarily
> frequent.
> I propose:
>    - probe once each time the client reconnects with the server
> 
> This will result in many fewer probes in practice, but any successful
> probe will happen at nearly the earliest possible moment.

I'm all for what you're proposing (its what I wanted from the start).
In practice I just don't quite grok the client reconnect awareness
implementation you're saying is at our finger tips.

> > Your previous email about just tracking network connection change was
> > an optimization for avoiding repeat (pointless) probes.  We still
> > need to know to do the probe to begin with.  Are you saying you want
> > to backfill the equivalent of grace (or pseudo-grace) to NFSv3?
> 
> You don't "know to do the probe" at mount time.  You simply always do
> it.  Similarly whenever localio isn't active it is always appropriate to
> probe - with rate limiting.
> 
> And NFSv3 already has a grace period - in the NLM/STAT protocols.  We
> could use STAT to detect when the server has restarted and so it is worth
> probing again.  But STAT is not as reliable as we might like and it
> would be more complexity with no real gain.

If you have a specific idea for the mechanism we need to create to
detect the v3 client reconnects to the server please let me know.
Reusing or augmenting an existing thing is fine by me.
 
> I would be happy to use exactly the same mechanism for both v3 and v4:
> send a probe after IO on a new connection.  But your solution for v4 is
> simple and elegant so I'm not at all against it.
> 
> > 
> > My approach works.  Not following what you are saying will be better.
> 
>  - server-migration can benefit from localio on the new host
>  - many fewer probes
>  - probes are much more timely.

Ha, you misunderstood me: I know the benefits.. what eludes me is the
construction of the point to probe (reliable v3 client reconnect
awareness). ;)

Mike
NeilBrown Nov. 12, 2024, 12:49 a.m. UTC | #7
On Tue, 12 Nov 2024, Mike Snitzer wrote:
> On Tue, Nov 12, 2024 at 10:23:19AM +1100, NeilBrown wrote:
> > On Tue, 12 Nov 2024, Mike Snitzer wrote:
> > > On Tue, Nov 12, 2024 at 07:35:04AM +1100, NeilBrown wrote:
> > > > 
> > > > I don't like NFS_CS_LOCAL_IO_CAPABLE.
> > > > A use case that I imagine (and a customer does something like this) is an
> > > > HA cluster where the NFS server can move from one node to another.  All
> > > > the node access the filesystem, most over NFS.  If a server-migration
> > > > happens (e.g.  the current server node failed) then the new server node
> > > > would suddenly become LOCALIO-capable even though it wasn't at
> > > > mount-time.  I would like it to be able to detect this and start doing
> > > > localio.
> > > 
> > > Server migration while retaining the client being local to the new
> > > server?  So client migrates too?
> > 
> > No.  Client doesn't migrate.  Server migrates and appears on the same
> > host as the client.  The client can suddenly get better performance.  It
> > should benefit from that.
> > 
> > > 
> > > If the client migrates then it will negotiate with server using
> > > LOCALIO protocol.
> > > 
> > > Anyway, this HA hypothetical feels contrived.  It is fine that you
> > > dislike NFS_CS_LOCAL_IO_CAPABLE but I don't understand what you'd like
> > > as an alternative.  Or why the simplicity in my approach lacking.
> > 
> > We have customers with exactly this HA config.  This is why I put work
> > into make sure loop-back NFS (client and server on same node) works
> > cleanly without memory allocation deadlocks.
> >   https://lwn.net/Articles/595652/
> > Getting localio in that config would be even better.
> > 
> > Your approach assumes that if LOCALIO isn't detected at mount time, it
> > will never been available.  I think that is a flawed assumption.
> 
> That's fair, I agree your HA scenario is valid.  It was terse as
> initially presented but I understand now, thanks.
> 
> > > > So I don't want NFS_CS_LOCAL_IO_CAPABLE.  I think tracking when the
> > > > network connection is re-established is sufficient.
> > > 
> > > Eh, that type of tracking doesn't really buy me anything if I've lost
> > > context (that LOCALIO was previously established and should be
> > > re-established).
> > > 
> > > NFS v3 is stateless, hence my hooking off read and write paths to
> > > trigger nfs_local_probe_async().  Unlike NFS v4, with its grace, more
> > > care is needed to avoid needless calls to nfs_local_probe_async().
> > 
> > I think it makes perfect sense to trigger the probe on a successful
> > read/write with some rate limiting to avoid sending a LOCALIO probe on
> > EVERY read/write.  Your rate-limiting for NFSv3 is:
> >    - never probe if the mount-time probe was not successful
> >    - otherwise probe once every 256 IO requests.
> > 
> > I think the first is too restrictive, and the second is unnecessarily
> > frequent.
> > I propose:
> >    - probe once each time the client reconnects with the server
> > 
> > This will result in many fewer probes in practice, but any successful
> > probe will happen at nearly the earliest possible moment.
> 
> I'm all for what you're proposing (its what I wanted from the start).
> In practice I just don't quite grok the client reconnect awareness
> implementation you're saying is at our finger tips.
> 
> > > Your previous email about just tracking network connection change was
> > > an optimization for avoiding repeat (pointless) probes.  We still
> > > need to know to do the probe to begin with.  Are you saying you want
> > > to backfill the equivalent of grace (or pseudo-grace) to NFSv3?
> > 
> > You don't "know to do the probe" at mount time.  You simply always do
> > it.  Similarly whenever localio isn't active it is always appropriate to
> > probe - with rate limiting.
> > 
> > And NFSv3 already has a grace period - in the NLM/STAT protocols.  We
> > could use STAT to detect when the server has restarted and so it is worth
> > probing again.  But STAT is not as reliable as we might like and it
> > would be more complexity with no real gain.
> 
> If you have a specific idea for the mechanism we need to create to
> detect the v3 client reconnects to the server please let me know.
> Reusing or augmenting an existing thing is fine by me.

nfs3_local_probe(struct nfs_server *server)
{
  struct nfs_client *clp = server->nfs_client;
  nfs_uuid_t *nfs_uuid = &clp->cl_uuid;

  if (nfs_uuid->connect_cookie != clp->cl_rpcclient->cl_xprt->connect_cookie)
       nfs_local_probe_async()
}

static void nfs_local_probe_async_work(struct work_struct *work)
{
  struct nfs_client *clp = container_of(work, struct nfs_client,
                              cl_local_probe_work);
  clp->cl_uuid.connect_cookie =
     clp->cl_rpcclient->cl_xprt->connect_cookie;
  nfs_local_probe(clp);
}

Or maybe assign connect_cookie (which we have to add to uuid) inside
nfs_local_probe(). 

Though you need rcu_dereference_pointer() to access cl_xprt and
rcu_read_lock() protection around that.
(cl_xprt can change when the NFS client follows a "location" reported by
 the server to handle migration or mirroring.  Conceivably we should
 check for either cl_xprt changing or cl_xprt->connect_cookie changing,
 but if that were an issue it would be easier to initialise
 ->connect_cookie to a random number)
 
Note that you don't need local_probe_mutex.  A given work_struct
(cl_local_probe_work) can only be running once.  If you try to
queue_work() it while it is running, queue_work() will do nothing.

You'll want to only INIT_WORK() once - not on every
nfs_local_probe_async() call.

NeilBrown
Chuck Lever Nov. 12, 2024, 2:36 p.m. UTC | #8
On Tue, Nov 12, 2024 at 11:49:30AM +1100, NeilBrown wrote:
> On Tue, 12 Nov 2024, Mike Snitzer wrote:
> > On Tue, Nov 12, 2024 at 10:23:19AM +1100, NeilBrown wrote:
> > > On Tue, 12 Nov 2024, Mike Snitzer wrote:
> > > > On Tue, Nov 12, 2024 at 07:35:04AM +1100, NeilBrown wrote:
> > > > > 
> > > > > I don't like NFS_CS_LOCAL_IO_CAPABLE.
> > > > > A use case that I imagine (and a customer does something like this) is an
> > > > > HA cluster where the NFS server can move from one node to another.  All
> > > > > the node access the filesystem, most over NFS.  If a server-migration
> > > > > happens (e.g.  the current server node failed) then the new server node
> > > > > would suddenly become LOCALIO-capable even though it wasn't at
> > > > > mount-time.  I would like it to be able to detect this and start doing
> > > > > localio.
> > > > 
> > > > Server migration while retaining the client being local to the new
> > > > server?  So client migrates too?
> > > 
> > > No.  Client doesn't migrate.  Server migrates and appears on the same
> > > host as the client.  The client can suddenly get better performance.  It
> > > should benefit from that.
> > > 
> > > > 
> > > > If the client migrates then it will negotiate with server using
> > > > LOCALIO protocol.
> > > > 
> > > > Anyway, this HA hypothetical feels contrived.  It is fine that you
> > > > dislike NFS_CS_LOCAL_IO_CAPABLE but I don't understand what you'd like
> > > > as an alternative.  Or why the simplicity in my approach lacking.
> > > 
> > > We have customers with exactly this HA config.  This is why I put work
> > > into make sure loop-back NFS (client and server on same node) works
> > > cleanly without memory allocation deadlocks.
> > >   https://lwn.net/Articles/595652/
> > > Getting localio in that config would be even better.
> > > 
> > > Your approach assumes that if LOCALIO isn't detected at mount time, it
> > > will never been available.  I think that is a flawed assumption.
> > 
> > That's fair, I agree your HA scenario is valid.  It was terse as
> > initially presented but I understand now, thanks.
> > 
> > > > > So I don't want NFS_CS_LOCAL_IO_CAPABLE.  I think tracking when the
> > > > > network connection is re-established is sufficient.
> > > > 
> > > > Eh, that type of tracking doesn't really buy me anything if I've lost
> > > > context (that LOCALIO was previously established and should be
> > > > re-established).
> > > > 
> > > > NFS v3 is stateless, hence my hooking off read and write paths to
> > > > trigger nfs_local_probe_async().  Unlike NFS v4, with its grace, more
> > > > care is needed to avoid needless calls to nfs_local_probe_async().
> > > 
> > > I think it makes perfect sense to trigger the probe on a successful
> > > read/write with some rate limiting to avoid sending a LOCALIO probe on
> > > EVERY read/write.  Your rate-limiting for NFSv3 is:
> > >    - never probe if the mount-time probe was not successful
> > >    - otherwise probe once every 256 IO requests.
> > > 
> > > I think the first is too restrictive, and the second is unnecessarily
> > > frequent.
> > > I propose:
> > >    - probe once each time the client reconnects with the server
> > > 
> > > This will result in many fewer probes in practice, but any successful
> > > probe will happen at nearly the earliest possible moment.
> > 
> > I'm all for what you're proposing (its what I wanted from the start).
> > In practice I just don't quite grok the client reconnect awareness
> > implementation you're saying is at our finger tips.
> > 
> > > > Your previous email about just tracking network connection change was
> > > > an optimization for avoiding repeat (pointless) probes.  We still
> > > > need to know to do the probe to begin with.  Are you saying you want
> > > > to backfill the equivalent of grace (or pseudo-grace) to NFSv3?
> > > 
> > > You don't "know to do the probe" at mount time.  You simply always do
> > > it.  Similarly whenever localio isn't active it is always appropriate to
> > > probe - with rate limiting.
> > > 
> > > And NFSv3 already has a grace period - in the NLM/STAT protocols.  We
> > > could use STAT to detect when the server has restarted and so it is worth
> > > probing again.  But STAT is not as reliable as we might like and it
> > > would be more complexity with no real gain.
> > 
> > If you have a specific idea for the mechanism we need to create to
> > detect the v3 client reconnects to the server please let me know.
> > Reusing or augmenting an existing thing is fine by me.
> 
> nfs3_local_probe(struct nfs_server *server)
> {
>   struct nfs_client *clp = server->nfs_client;
>   nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
> 
>   if (nfs_uuid->connect_cookie != clp->cl_rpcclient->cl_xprt->connect_cookie)
>        nfs_local_probe_async()
> }
> 
> static void nfs_local_probe_async_work(struct work_struct *work)
> {
>   struct nfs_client *clp = container_of(work, struct nfs_client,
>                               cl_local_probe_work);
>   clp->cl_uuid.connect_cookie =
>      clp->cl_rpcclient->cl_xprt->connect_cookie;
>   nfs_local_probe(clp);
> }
> 
> Or maybe assign connect_cookie (which we have to add to uuid) inside
> nfs_local_probe(). 

The problem with per-connection checks is that a change in export
security policy could disable LOCALIO rather persistently. The only
way to recover, if checking is done only when a connection is
established, is to remount or force a disconnect.


> Though you need rcu_dereference_pointer() to access cl_xprt and
> rcu_read_lock() protection around that.
> (cl_xprt can change when the NFS client follows a "location" reported by
>  the server to handle migration or mirroring.  Conceivably we should
>  check for either cl_xprt changing or cl_xprt->connect_cookie changing,
>  but if that were an issue it would be easier to initialise
>  ->connect_cookie to a random number)
>  
> Note that you don't need local_probe_mutex.  A given work_struct
> (cl_local_probe_work) can only be running once.  If you try to
> queue_work() it while it is running, queue_work() will do nothing.
> 
> You'll want to only INIT_WORK() once - not on every
> nfs_local_probe_async() call.
> 
> NeilBrown
NeilBrown Nov. 12, 2024, 11:13 p.m. UTC | #9
On Wed, 13 Nov 2024, Chuck Lever wrote:
> On Tue, Nov 12, 2024 at 11:49:30AM +1100, NeilBrown wrote:
> > > 
> > > If you have a specific idea for the mechanism we need to create to
> > > detect the v3 client reconnects to the server please let me know.
> > > Reusing or augmenting an existing thing is fine by me.
> > 
> > nfs3_local_probe(struct nfs_server *server)
> > {
> >   struct nfs_client *clp = server->nfs_client;
> >   nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
> > 
> >   if (nfs_uuid->connect_cookie != clp->cl_rpcclient->cl_xprt->connect_cookie)
> >        nfs_local_probe_async()
> > }
> > 
> > static void nfs_local_probe_async_work(struct work_struct *work)
> > {
> >   struct nfs_client *clp = container_of(work, struct nfs_client,
> >                               cl_local_probe_work);
> >   clp->cl_uuid.connect_cookie =
> >      clp->cl_rpcclient->cl_xprt->connect_cookie;
> >   nfs_local_probe(clp);
> > }
> > 
> > Or maybe assign connect_cookie (which we have to add to uuid) inside
> > nfs_local_probe(). 
> 
> The problem with per-connection checks is that a change in export
> security policy could disable LOCALIO rather persistently. The only
> way to recover, if checking is done only when a connection is
> established, is to remount or force a disconnect.
> 
What export security policy specifically?
Do you mean changing from sec=sys to to sec=krb5i for example?  This
would (hopefully) disable localio.  Then changing the export back to
sec=sys would mean that localio would be possible again.  I wonder how
the client copes with this.  Does it work on a live mount without
remount?  If so it would certainly make sense for the current security
setting to be cached in nfs_uidd and for a probe to be attempted
whenever that changed to sec=sys.

Thanks,
NeilBrown
Chuck Lever Nov. 13, 2024, 12:07 a.m. UTC | #10
> On Nov 12, 2024, at 6:13 PM, NeilBrown <neilb@suse.de> wrote:
> 
> On Wed, 13 Nov 2024, Chuck Lever wrote:
>> On Tue, Nov 12, 2024 at 11:49:30AM +1100, NeilBrown wrote:
>>>> 
>>>> If you have a specific idea for the mechanism we need to create to
>>>> detect the v3 client reconnects to the server please let me know.
>>>> Reusing or augmenting an existing thing is fine by me.
>>> 
>>> nfs3_local_probe(struct nfs_server *server)
>>> {
>>>  struct nfs_client *clp = server->nfs_client;
>>>  nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
>>> 
>>>  if (nfs_uuid->connect_cookie != clp->cl_rpcclient->cl_xprt->connect_cookie)
>>>       nfs_local_probe_async()
>>> }
>>> 
>>> static void nfs_local_probe_async_work(struct work_struct *work)
>>> {
>>>  struct nfs_client *clp = container_of(work, struct nfs_client,
>>>                              cl_local_probe_work);
>>>  clp->cl_uuid.connect_cookie =
>>>     clp->cl_rpcclient->cl_xprt->connect_cookie;
>>>  nfs_local_probe(clp);
>>> }
>>> 
>>> Or maybe assign connect_cookie (which we have to add to uuid) inside
>>> nfs_local_probe().
>> 
>> The problem with per-connection checks is that a change in export
>> security policy could disable LOCALIO rather persistently. The only
>> way to recover, if checking is done only when a connection is
>> established, is to remount or force a disconnect.
>> 
> What export security policy specifically?
> Do you mean changing from sec=sys to to sec=krb5i for example?

Another example might be altering the IP address list on
the export. Suppose the client is accidentally blocked
by this policy, the administrator realizes it, and changes
it again to restore access.

The client does not disconnect in this case, AFAIK.


> This
> would (hopefully) disable localio.  Then changing the export back to
> sec=sys would mean that localio would be possible again.  I wonder how
> the client copes with this.  Does it work on a live mount without
> remount?  If so it would certainly make sense for the current security
> setting to be cached in nfs_uidd and for a probe to be attempted
> whenever that changed to sec=sys.
> 
> Thanks,
> NeilBrown

--
Chuck Lever
NeilBrown Nov. 13, 2024, 12:32 a.m. UTC | #11
On Wed, 13 Nov 2024, Chuck Lever III wrote:
> 
> 
> > On Nov 12, 2024, at 6:13 PM, NeilBrown <neilb@suse.de> wrote:
> > 
> > On Wed, 13 Nov 2024, Chuck Lever wrote:
> >> On Tue, Nov 12, 2024 at 11:49:30AM +1100, NeilBrown wrote:
> >>>> 
> >>>> If you have a specific idea for the mechanism we need to create to
> >>>> detect the v3 client reconnects to the server please let me know.
> >>>> Reusing or augmenting an existing thing is fine by me.
> >>> 
> >>> nfs3_local_probe(struct nfs_server *server)
> >>> {
> >>>  struct nfs_client *clp = server->nfs_client;
> >>>  nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
> >>> 
> >>>  if (nfs_uuid->connect_cookie != clp->cl_rpcclient->cl_xprt->connect_cookie)
> >>>       nfs_local_probe_async()
> >>> }
> >>> 
> >>> static void nfs_local_probe_async_work(struct work_struct *work)
> >>> {
> >>>  struct nfs_client *clp = container_of(work, struct nfs_client,
> >>>                              cl_local_probe_work);
> >>>  clp->cl_uuid.connect_cookie =
> >>>     clp->cl_rpcclient->cl_xprt->connect_cookie;
> >>>  nfs_local_probe(clp);
> >>> }
> >>> 
> >>> Or maybe assign connect_cookie (which we have to add to uuid) inside
> >>> nfs_local_probe().
> >> 
> >> The problem with per-connection checks is that a change in export
> >> security policy could disable LOCALIO rather persistently. The only
> >> way to recover, if checking is done only when a connection is
> >> established, is to remount or force a disconnect.
> >> 
> > What export security policy specifically?
> > Do you mean changing from sec=sys to to sec=krb5i for example?
> 
> Another example might be altering the IP address list on
> the export. Suppose the client is accidentally blocked
> by this policy, the administrator realizes it, and changes
> it again to restore access.
> 
> The client does not disconnect in this case, AFAIK.

Yes, that is a simpler case...

How would the localio path get disabled when this happens?
I suspect ->nfsd_open_local_fh would (should?) fail.
It, or nfs_open_local_fh() which calls it, could reset
uuid->connect_cookie to an impossible value so as to force a
probe after the next successful IO.  That would be an important part of
the protocol.

Thanks,
NeilBrown
diff mbox series

Patch

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 03ecc7765615..124232054807 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -182,7 +182,6 @@  struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
 	seqlock_init(&clp->cl_boot_lock);
 	ktime_get_real_ts64(&clp->cl_nfssvc_boot);
 	nfs_uuid_init(&clp->cl_uuid);
-	spin_lock_init(&clp->cl_localio_lock);
 #endif /* CONFIG_NFS_LOCALIO */
 
 	clp->cl_principal = "*";
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index cab2a8819259..4c75ffc5efa2 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -125,10 +125,8 @@  const struct rpc_program nfslocalio_program = {
  */
 static void nfs_local_enable(struct nfs_client *clp)
 {
-	spin_lock(&clp->cl_localio_lock);
-	set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
 	trace_nfs_local_enable(clp);
-	spin_unlock(&clp->cl_localio_lock);
+	nfs_localio_enable_client(clp);
 }
 
 /*
@@ -136,12 +134,8 @@  static void nfs_local_enable(struct nfs_client *clp)
  */
 void nfs_local_disable(struct nfs_client *clp)
 {
-	spin_lock(&clp->cl_localio_lock);
-	if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) {
-		trace_nfs_local_disable(clp);
-		nfs_localio_disable_client(&clp->cl_uuid);
-	}
-	spin_unlock(&clp->cl_localio_lock);
+	trace_nfs_local_disable(clp);
+	nfs_localio_disable_client(clp);
 }
 
 /*
@@ -183,8 +177,12 @@  static bool nfs_server_uuid_is_local(struct nfs_client *clp)
 	rpc_shutdown_client(rpcclient_localio);
 
 	/* Server is only local if it initialized required struct members */
-	if (status || !clp->cl_uuid.net || !clp->cl_uuid.dom)
+	rcu_read_lock();
+	if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) {
+		rcu_read_unlock();
 		return false;
+	}
+	rcu_read_unlock();
 
 	return true;
 }
diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
index 904439e4bb85..cf2f47ea4f8d 100644
--- a/fs/nfs_common/nfslocalio.c
+++ b/fs/nfs_common/nfslocalio.c
@@ -7,6 +7,9 @@ 
 #include <linux/module.h>
 #include <linux/list.h>
 #include <linux/nfslocalio.h>
+#include <linux/nfs3.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs_sb.h>
 #include <net/netns/generic.h>
 
 MODULE_LICENSE("GPL");
@@ -25,6 +28,7 @@  void nfs_uuid_init(nfs_uuid_t *nfs_uuid)
 	nfs_uuid->net = NULL;
 	nfs_uuid->dom = NULL;
 	INIT_LIST_HEAD(&nfs_uuid->list);
+	spin_lock_init(&nfs_uuid->lock);
 }
 EXPORT_SYMBOL_GPL(nfs_uuid_init);
 
@@ -94,12 +98,23 @@  void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list,
 }
 EXPORT_SYMBOL_GPL(nfs_uuid_is_local);
 
+void nfs_localio_enable_client(struct nfs_client *clp)
+{
+	nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
+
+	spin_lock(&nfs_uuid->lock);
+	set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
+	spin_unlock(&nfs_uuid->lock);
+}
+EXPORT_SYMBOL_GPL(nfs_localio_enable_client);
+
 static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid)
 {
-	if (nfs_uuid->net) {
-		module_put(nfsd_mod);
-		nfs_uuid->net = NULL;
-	}
+	if (!nfs_uuid->net)
+		return;
+	module_put(nfsd_mod);
+	rcu_assign_pointer(nfs_uuid->net, NULL);
+
 	if (nfs_uuid->dom) {
 		auth_domain_put(nfs_uuid->dom);
 		nfs_uuid->dom = NULL;
@@ -107,27 +122,35 @@  static void nfs_uuid_put_locked(nfs_uuid_t *nfs_uuid)
 	list_del_init(&nfs_uuid->list);
 }
 
-void nfs_localio_invalidate_clients(struct list_head *list)
+void nfs_localio_disable_client(struct nfs_client *clp)
+{
+	nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
+
+	spin_lock(&nfs_uuid->lock);
+	if (test_and_clear_bit(NFS_CS_LOCAL_IO, &clp->cl_flags)) {
+		spin_lock(&nfs_uuid_lock);
+		nfs_uuid_put_locked(nfs_uuid);
+		spin_unlock(&nfs_uuid_lock);
+	}
+	spin_unlock(&nfs_uuid->lock);
+}
+EXPORT_SYMBOL_GPL(nfs_localio_disable_client);
+
+void nfs_localio_invalidate_clients(struct list_head *cl_uuid_list)
 {
 	nfs_uuid_t *nfs_uuid, *tmp;
 
 	spin_lock(&nfs_uuid_lock);
-	list_for_each_entry_safe(nfs_uuid, tmp, list, list)
-		nfs_uuid_put_locked(nfs_uuid);
+	list_for_each_entry_safe(nfs_uuid, tmp, cl_uuid_list, list) {
+		struct nfs_client *clp =
+			container_of(nfs_uuid, struct nfs_client, cl_uuid);
+
+		nfs_localio_disable_client(clp);
+	}
 	spin_unlock(&nfs_uuid_lock);
 }
 EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients);
 
-void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid)
-{
-	if (nfs_uuid->net) {
-		spin_lock(&nfs_uuid_lock);
-		nfs_uuid_put_locked(nfs_uuid);
-		spin_unlock(&nfs_uuid_lock);
-	}
-}
-EXPORT_SYMBOL_GPL(nfs_localio_disable_client);
-
 struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
 		   struct rpc_clnt *rpc_clnt, const struct cred *cred,
 		   const struct nfs_fh *nfs_fh, const fmode_t fmode)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b804346a9741..239d86ef166c 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -132,7 +132,6 @@  struct nfs_client {
 	struct timespec64	cl_nfssvc_boot;
 	seqlock_t		cl_boot_lock;
 	nfs_uuid_t		cl_uuid;
-	spinlock_t		cl_localio_lock;
 #endif /* CONFIG_NFS_LOCALIO */
 };
 
diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index a05d1043f2b0..4d5583873f41 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -6,6 +6,7 @@ 
 #ifndef __LINUX_NFSLOCALIO_H
 #define __LINUX_NFSLOCALIO_H
 
+
 /* nfsd_file structure is purposely kept opaque to NFS client */
 struct nfsd_file;
 
@@ -19,6 +20,8 @@  struct nfsd_file;
 #include <linux/nfs.h>
 #include <net/net_namespace.h>
 
+struct nfs_client;
+
 /*
  * Useful to allow a client to negotiate if localio
  * possible with its server.
@@ -27,6 +30,8 @@  struct nfsd_file;
  */
 typedef struct {
 	uuid_t uuid;
+	/* sadly this struct is just over a cacheline, avoid bouncing */
+	spinlock_t ____cacheline_aligned lock;
 	struct list_head list;
 	struct net __rcu *net; /* nfsd's network namespace */
 	struct auth_domain *dom; /* auth_domain for localio */
@@ -38,7 +43,8 @@  void nfs_uuid_end(nfs_uuid_t *);
 void nfs_uuid_is_local(const uuid_t *, struct list_head *,
 		       struct net *, struct auth_domain *, struct module *);
 
-void nfs_localio_disable_client(nfs_uuid_t *nfs_uuid);
+void nfs_localio_enable_client(struct nfs_client *clp);
+void nfs_localio_disable_client(struct nfs_client *clp);
 void nfs_localio_invalidate_clients(struct list_head *list);
 
 /* localio needs to map filehandle -> struct nfsd_file */