diff mbox series

[for-6.13,v2,15/15] nfs: probe for LOCALIO when v3 client reconnects to server

Message ID 20241114035952.13889-16-snitzer@kernel.org (mailing list archive)
State New
Headers show
Series nfs/nfsd: fixes and improvements for LOCALIO | expand

Commit Message

Mike Snitzer Nov. 14, 2024, 3:59 a.m. UTC
Re-enabling NFSv3 LOCALIO is made more complex (than NFSv4) because v3
is stateless.  As such, the hueristic used to identify a LOCALIO probe
point is more adhoc by nature: if/when NFSv3 client IO begins to
complete again in terms of normal RPC-based NFSv3 server IO, attempt
nfs_local_probe_async().

Care is taken to throttle the frequency of nfs_local_probe_async(),
otherwise there could be a flood of repeat calls to
nfs_local_probe_async().

This approach works for most use-cases but it doesn't handle the
possibility of using HA to migrate an NFS server local to the same
host as an NFS client that is actively connected to the migrated NFS
server. NFSv3 LOCALIO won't handle this case given it relies on the
client having been flagged with NFS_CS_LOCAL_IO when the client was
created on the host (e.g. mount time).

Alternatve approaches have been discussed but there isn't clear
consensus yet.

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 fs/nfs/internal.h          |  5 +++++
 fs/nfs/localio.c           | 18 +++++++++++++++++-
 fs/nfs/nfs3proc.c          | 32 +++++++++++++++++++++++++++++---
 fs/nfs_common/nfslocalio.c |  1 +
 include/linux/nfslocalio.h |  3 ++-
 5 files changed, 54 insertions(+), 5 deletions(-)

Comments

NeilBrown Nov. 15, 2024, 3:55 a.m. UTC | #1
On Thu, 14 Nov 2024, Mike Snitzer wrote:
> Re-enabling NFSv3 LOCALIO is made more complex (than NFSv4) because v3
> is stateless.  As such, the hueristic used to identify a LOCALIO probe
> point is more adhoc by nature: if/when NFSv3 client IO begins to
> complete again in terms of normal RPC-based NFSv3 server IO, attempt
> nfs_local_probe_async().
> 
> Care is taken to throttle the frequency of nfs_local_probe_async(),
> otherwise there could be a flood of repeat calls to
> nfs_local_probe_async().
> 
> This approach works for most use-cases but it doesn't handle the
> possibility of using HA to migrate an NFS server local to the same
> host as an NFS client that is actively connected to the migrated NFS
> server. NFSv3 LOCALIO won't handle this case given it relies on the
> client having been flagged with NFS_CS_LOCAL_IO when the client was
> created on the host (e.g. mount time).

You could simply remove the NFS_CS_LOCAL_IO flag from the patch
completely, and always probe every 512 IO requests.  That would not be
much worse, and would be substantially better.

Then we would be well placed to remove "struct nfs_client" from
nfs_common.

NeilBrown


> 
> Alternatve approaches have been discussed but there isn't clear
> consensus yet.
> 
> Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> ---
>  fs/nfs/internal.h          |  5 +++++
>  fs/nfs/localio.c           | 18 +++++++++++++++++-
>  fs/nfs/nfs3proc.c          | 32 +++++++++++++++++++++++++++++---
>  fs/nfs_common/nfslocalio.c |  1 +
>  include/linux/nfslocalio.h |  3 ++-
>  5 files changed, 54 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index ad9c56bc977bf..14f4d871b6a30 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -470,6 +470,7 @@ extern int nfs_local_commit(struct nfsd_file *,
>  			    struct nfs_commit_data *,
>  			    const struct rpc_call_ops *, int);
>  extern bool nfs_server_is_local(const struct nfs_client *clp);
> +extern bool nfs_server_was_local(const struct nfs_client *clp);
>  
>  #else /* CONFIG_NFS_LOCALIO */
>  static inline void nfs_local_probe(struct nfs_client *clp) {}
> @@ -498,6 +499,10 @@ static inline bool nfs_server_is_local(const struct nfs_client *clp)
>  {
>  	return false;
>  }
> +static inline bool nfs_server_was_local(const struct nfs_client *clp)
> +{
> +	return false;
> +}
>  #endif /* CONFIG_NFS_LOCALIO */
>  
>  /* super.c */
> diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
> index 1eee5aac28843..d5152aa46813c 100644
> --- a/fs/nfs/localio.c
> +++ b/fs/nfs/localio.c
> @@ -65,6 +65,17 @@ bool nfs_server_is_local(const struct nfs_client *clp)
>  }
>  EXPORT_SYMBOL_GPL(nfs_server_is_local);
>  
> +static inline bool nfs_client_was_local(const struct nfs_client *clp)
> +{
> +	return !!test_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
> +}
> +
> +bool nfs_server_was_local(const struct nfs_client *clp)
> +{
> +	return nfs_client_was_local(clp) && localio_enabled;
> +}
> +EXPORT_SYMBOL_GPL(nfs_server_was_local);
> +
>  /*
>   * UUID_IS_LOCAL XDR functions
>   */
> @@ -187,8 +198,13 @@ void nfs_local_probe(struct nfs_client *clp)
>  
>  	if (!nfs_uuid_begin(&clp->cl_uuid))
>  		return;
> -	if (nfs_server_uuid_is_local(clp))
> +	if (nfs_server_uuid_is_local(clp)) {
>  		nfs_localio_enable_client(clp);
> +		/* Set hint that client and server are LOCALIO capable */
> +		spin_lock(&clp->cl_uuid.lock);
> +		set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
> +		spin_unlock(&clp->cl_uuid.lock);
> +	}
>  	nfs_uuid_end(&clp->cl_uuid);
>  }
>  EXPORT_SYMBOL_GPL(nfs_local_probe);
> diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
> index 1566163c6d85b..6ed2e4466002d 100644
> --- a/fs/nfs/nfs3proc.c
> +++ b/fs/nfs/nfs3proc.c
> @@ -844,6 +844,27 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
>  	return status;
>  }
>  
> +static void nfs3_local_probe(struct nfs_server *server)
> +{
> +#if IS_ENABLED(CONFIG_NFS_LOCALIO)
> +	struct nfs_client *clp = server->nfs_client;
> +	nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
> +
> +	if (likely(!nfs_server_was_local(clp)))
> +		return;
> +	/*
> +	 * Try re-enabling LOCALIO if it was previously enabled, but
> +	 * was disabled due to server restart, and IO has successfully
> +	 * completed in terms of normal RPC.
> +	 */
> +	/* Arbitrary throttle to reduce nfs_local_probe_async() frequency */
> +	if ((nfs_uuid->local_probe_count++ & 511) == 0) {
> +		if (unlikely(!nfs_server_is_local(clp) && nfs_server_was_local(clp)))
> +			nfs_local_probe_async(clp);
> +	}
> +#endif
> +}
> +
>  static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
>  {
>  	struct inode *inode = hdr->inode;
> @@ -855,8 +876,11 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
>  	if (nfs3_async_handle_jukebox(task, inode))
>  		return -EAGAIN;
>  
> -	if (task->tk_status >= 0 && !server->read_hdrsize)
> -		cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
> +	if (task->tk_status >= 0) {
> +		if (!server->read_hdrsize)
> +			cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
> +		nfs3_local_probe(server);
> +	}
>  
>  	nfs_invalidate_atime(inode);
>  	nfs_refresh_inode(inode, &hdr->fattr);
> @@ -886,8 +910,10 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
>  
>  	if (nfs3_async_handle_jukebox(task, inode))
>  		return -EAGAIN;
> -	if (task->tk_status >= 0)
> +	if (task->tk_status >= 0) {
>  		nfs_writeback_update_inode(hdr);
> +		nfs3_local_probe(NFS_SERVER(inode));
> +	}
>  	return 0;
>  }
>  
> diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
> index 0a26c0ca99c21..41c0077ead721 100644
> --- a/fs/nfs_common/nfslocalio.c
> +++ b/fs/nfs_common/nfslocalio.c
> @@ -43,6 +43,7 @@ void nfs_uuid_init(nfs_uuid_t *nfs_uuid)
>  	INIT_LIST_HEAD(&nfs_uuid->list);
>  	INIT_LIST_HEAD(&nfs_uuid->files);
>  	spin_lock_init(&nfs_uuid->lock);
> +	nfs_uuid->local_probe_count = 0;
>  }
>  EXPORT_SYMBOL_GPL(nfs_uuid_init);
>  
> diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
> index c68a529230c14..e05f1cf3cf476 100644
> --- a/include/linux/nfslocalio.h
> +++ b/include/linux/nfslocalio.h
> @@ -27,7 +27,8 @@ struct nfs_file_localio;
>   */
>  typedef struct {
>  	uuid_t uuid;
> -	/* sadly this struct is just over a cacheline, avoid bouncing */
> +	unsigned local_probe_count;
> +	/* sadly this struct is over a cacheline, avoid bouncing */
>  	spinlock_t ____cacheline_aligned lock;
>  	struct list_head list;
>  	spinlock_t *list_lock; /* nn->local_clients_lock */
> -- 
> 2.44.0
> 
>
diff mbox series

Patch

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ad9c56bc977bf..14f4d871b6a30 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -470,6 +470,7 @@  extern int nfs_local_commit(struct nfsd_file *,
 			    struct nfs_commit_data *,
 			    const struct rpc_call_ops *, int);
 extern bool nfs_server_is_local(const struct nfs_client *clp);
+extern bool nfs_server_was_local(const struct nfs_client *clp);
 
 #else /* CONFIG_NFS_LOCALIO */
 static inline void nfs_local_probe(struct nfs_client *clp) {}
@@ -498,6 +499,10 @@  static inline bool nfs_server_is_local(const struct nfs_client *clp)
 {
 	return false;
 }
+static inline bool nfs_server_was_local(const struct nfs_client *clp)
+{
+	return false;
+}
 #endif /* CONFIG_NFS_LOCALIO */
 
 /* super.c */
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 1eee5aac28843..d5152aa46813c 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -65,6 +65,17 @@  bool nfs_server_is_local(const struct nfs_client *clp)
 }
 EXPORT_SYMBOL_GPL(nfs_server_is_local);
 
+static inline bool nfs_client_was_local(const struct nfs_client *clp)
+{
+	return !!test_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
+}
+
+bool nfs_server_was_local(const struct nfs_client *clp)
+{
+	return nfs_client_was_local(clp) && localio_enabled;
+}
+EXPORT_SYMBOL_GPL(nfs_server_was_local);
+
 /*
  * UUID_IS_LOCAL XDR functions
  */
@@ -187,8 +198,13 @@  void nfs_local_probe(struct nfs_client *clp)
 
 	if (!nfs_uuid_begin(&clp->cl_uuid))
 		return;
-	if (nfs_server_uuid_is_local(clp))
+	if (nfs_server_uuid_is_local(clp)) {
 		nfs_localio_enable_client(clp);
+		/* Set hint that client and server are LOCALIO capable */
+		spin_lock(&clp->cl_uuid.lock);
+		set_bit(NFS_CS_LOCAL_IO, &clp->cl_flags);
+		spin_unlock(&clp->cl_uuid.lock);
+	}
 	nfs_uuid_end(&clp->cl_uuid);
 }
 EXPORT_SYMBOL_GPL(nfs_local_probe);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 1566163c6d85b..6ed2e4466002d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -844,6 +844,27 @@  nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return status;
 }
 
+static void nfs3_local_probe(struct nfs_server *server)
+{
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+	struct nfs_client *clp = server->nfs_client;
+	nfs_uuid_t *nfs_uuid = &clp->cl_uuid;
+
+	if (likely(!nfs_server_was_local(clp)))
+		return;
+	/*
+	 * Try re-enabling LOCALIO if it was previously enabled, but
+	 * was disabled due to server restart, and IO has successfully
+	 * completed in terms of normal RPC.
+	 */
+	/* Arbitrary throttle to reduce nfs_local_probe_async() frequency */
+	if ((nfs_uuid->local_probe_count++ & 511) == 0) {
+		if (unlikely(!nfs_server_is_local(clp) && nfs_server_was_local(clp)))
+			nfs_local_probe_async(clp);
+	}
+#endif
+}
+
 static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
 	struct inode *inode = hdr->inode;
@@ -855,8 +876,11 @@  static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 
-	if (task->tk_status >= 0 && !server->read_hdrsize)
-		cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+	if (task->tk_status >= 0) {
+		if (!server->read_hdrsize)
+			cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+		nfs3_local_probe(server);
+	}
 
 	nfs_invalidate_atime(inode);
 	nfs_refresh_inode(inode, &hdr->fattr);
@@ -886,8 +910,10 @@  static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 
 	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
-	if (task->tk_status >= 0)
+	if (task->tk_status >= 0) {
 		nfs_writeback_update_inode(hdr);
+		nfs3_local_probe(NFS_SERVER(inode));
+	}
 	return 0;
 }
 
diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
index 0a26c0ca99c21..41c0077ead721 100644
--- a/fs/nfs_common/nfslocalio.c
+++ b/fs/nfs_common/nfslocalio.c
@@ -43,6 +43,7 @@  void nfs_uuid_init(nfs_uuid_t *nfs_uuid)
 	INIT_LIST_HEAD(&nfs_uuid->list);
 	INIT_LIST_HEAD(&nfs_uuid->files);
 	spin_lock_init(&nfs_uuid->lock);
+	nfs_uuid->local_probe_count = 0;
 }
 EXPORT_SYMBOL_GPL(nfs_uuid_init);
 
diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index c68a529230c14..e05f1cf3cf476 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -27,7 +27,8 @@  struct nfs_file_localio;
  */
 typedef struct {
 	uuid_t uuid;
-	/* sadly this struct is just over a cacheline, avoid bouncing */
+	unsigned local_probe_count;
+	/* sadly this struct is over a cacheline, avoid bouncing */
 	spinlock_t ____cacheline_aligned lock;
 	struct list_head list;
 	spinlock_t *list_lock; /* nn->local_clients_lock */