diff mbox series

[for-6.13,02/19] nfs_common: must not hold RCU while calling nfsd_file_put_local

Message ID 20241108234002.16392-3-snitzer@kernel.org (mailing list archive)
State New
Headers show
Series nfs/nfsd: fixes and improvements for LOCALIO | expand

Commit Message

Mike Snitzer Nov. 8, 2024, 11:39 p.m. UTC
Move holding the RCU from nfs_to_nfsd_file_put_local to
nfs_to_nfsd_net_put.  It is the call to nfs_to->nfsd_serv_put that
requires the RCU anyway (the puts for nfsd_file and netns were
combined to avoid an extra indirect reference but that
micro-optimization isn't possible now).

This fixes xfstests generic/013 and it triggering:

"Voluntary context switch within RCU read-side critical section!"

[  143.545738] Call Trace:
[  143.546206]  <TASK>
[  143.546625]  ? show_regs+0x6d/0x80
[  143.547267]  ? __warn+0x91/0x140
[  143.547951]  ? rcu_note_context_switch+0x496/0x5d0
[  143.548856]  ? report_bug+0x193/0x1a0
[  143.549557]  ? handle_bug+0x63/0xa0
[  143.550214]  ? exc_invalid_op+0x1d/0x80
[  143.550938]  ? asm_exc_invalid_op+0x1f/0x30
[  143.551736]  ? rcu_note_context_switch+0x496/0x5d0
[  143.552634]  ? wakeup_preempt+0x62/0x70
[  143.553358]  __schedule+0xaa/0x1380
[  143.554025]  ? _raw_spin_unlock_irqrestore+0x12/0x40
[  143.554958]  ? try_to_wake_up+0x1fe/0x6b0
[  143.555715]  ? wake_up_process+0x19/0x20
[  143.556452]  schedule+0x2e/0x120
[  143.557066]  schedule_preempt_disabled+0x19/0x30
[  143.557933]  rwsem_down_read_slowpath+0x24d/0x4a0
[  143.558818]  ? xfs_efi_item_format+0x50/0xc0 [xfs]
[  143.559894]  down_read+0x4e/0xb0
[  143.560519]  xlog_cil_commit+0x1b2/0xbc0 [xfs]
[  143.561460]  ? _raw_spin_unlock+0x12/0x30
[  143.562212]  ? xfs_inode_item_precommit+0xc7/0x220 [xfs]
[  143.563309]  ? xfs_trans_run_precommits+0x69/0xd0 [xfs]
[  143.564394]  __xfs_trans_commit+0xb5/0x330 [xfs]
[  143.565367]  xfs_trans_roll+0x48/0xc0 [xfs]
[  143.566262]  xfs_defer_trans_roll+0x57/0x100 [xfs]
[  143.567278]  xfs_defer_finish_noroll+0x27a/0x490 [xfs]
[  143.568342]  xfs_defer_finish+0x1a/0x80 [xfs]
[  143.569267]  xfs_bunmapi_range+0x4d/0xb0 [xfs]
[  143.570208]  xfs_itruncate_extents_flags+0x13d/0x230 [xfs]
[  143.571353]  xfs_free_eofblocks+0x12e/0x190 [xfs]
[  143.572359]  xfs_file_release+0x12d/0x140 [xfs]
[  143.573324]  __fput+0xe8/0x2d0
[  143.573922]  __fput_sync+0x1d/0x30
[  143.574574]  nfsd_filp_close+0x33/0x60 [nfsd]
[  143.575430]  nfsd_file_free+0x96/0x150 [nfsd]
[  143.576274]  nfsd_file_put+0xf7/0x1a0 [nfsd]
[  143.577104]  nfsd_file_put_local+0x18/0x30 [nfsd]
[  143.578070]  nfs_close_local_fh+0x101/0x110 [nfs_localio]
[  143.579079]  __put_nfs_open_context+0xc9/0x180 [nfs]
[  143.580031]  nfs_file_clear_open_context+0x4a/0x60 [nfs]
[  143.581038]  nfs_file_release+0x3e/0x60 [nfs]
[  143.581879]  __fput+0xe8/0x2d0
[  143.582464]  __fput_sync+0x1d/0x30
[  143.583108]  __x64_sys_close+0x41/0x80
[  143.583823]  x64_sys_call+0x189a/0x20d0
[  143.584552]  do_syscall_64+0x64/0x170
[  143.585240]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[  143.586185] RIP: 0033:0x7f3c5153efd7

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 fs/nfs_common/nfslocalio.c |  8 +++-----
 fs/nfsd/filecache.c        | 14 +++++++-------
 fs/nfsd/filecache.h        |  2 +-
 include/linux/nfslocalio.h | 18 +++++++++++++++---
 4 files changed, 26 insertions(+), 16 deletions(-)

Comments

NeilBrown Nov. 11, 2024, 1:01 a.m. UTC | #1
On Sat, 09 Nov 2024, Mike Snitzer wrote:
> Move holding the RCU from nfs_to_nfsd_file_put_local to
> nfs_to_nfsd_net_put.  It is the call to nfs_to->nfsd_serv_put that
> requires the RCU anyway (the puts for nfsd_file and netns were
> combined to avoid an extra indirect reference but that
> micro-optimization isn't possible now).
> 
> This fixes xfstests generic/013 and it triggering:
> 
> "Voluntary context switch within RCU read-side critical section!"

I'm surprised it got that far.  For me, the might_sleep() at the top of
nfsd_file_put() always warns that there is a problem here.

The fix is good though.

Reviewed-by: NeilBrown <neilb@suse.de>

NeilBrown


> 
> [  143.545738] Call Trace:
> [  143.546206]  <TASK>
> [  143.546625]  ? show_regs+0x6d/0x80
> [  143.547267]  ? __warn+0x91/0x140
> [  143.547951]  ? rcu_note_context_switch+0x496/0x5d0
> [  143.548856]  ? report_bug+0x193/0x1a0
> [  143.549557]  ? handle_bug+0x63/0xa0
> [  143.550214]  ? exc_invalid_op+0x1d/0x80
> [  143.550938]  ? asm_exc_invalid_op+0x1f/0x30
> [  143.551736]  ? rcu_note_context_switch+0x496/0x5d0
> [  143.552634]  ? wakeup_preempt+0x62/0x70
> [  143.553358]  __schedule+0xaa/0x1380
> [  143.554025]  ? _raw_spin_unlock_irqrestore+0x12/0x40
> [  143.554958]  ? try_to_wake_up+0x1fe/0x6b0
> [  143.555715]  ? wake_up_process+0x19/0x20
> [  143.556452]  schedule+0x2e/0x120
> [  143.557066]  schedule_preempt_disabled+0x19/0x30
> [  143.557933]  rwsem_down_read_slowpath+0x24d/0x4a0
> [  143.558818]  ? xfs_efi_item_format+0x50/0xc0 [xfs]
> [  143.559894]  down_read+0x4e/0xb0
> [  143.560519]  xlog_cil_commit+0x1b2/0xbc0 [xfs]
> [  143.561460]  ? _raw_spin_unlock+0x12/0x30
> [  143.562212]  ? xfs_inode_item_precommit+0xc7/0x220 [xfs]
> [  143.563309]  ? xfs_trans_run_precommits+0x69/0xd0 [xfs]
> [  143.564394]  __xfs_trans_commit+0xb5/0x330 [xfs]
> [  143.565367]  xfs_trans_roll+0x48/0xc0 [xfs]
> [  143.566262]  xfs_defer_trans_roll+0x57/0x100 [xfs]
> [  143.567278]  xfs_defer_finish_noroll+0x27a/0x490 [xfs]
> [  143.568342]  xfs_defer_finish+0x1a/0x80 [xfs]
> [  143.569267]  xfs_bunmapi_range+0x4d/0xb0 [xfs]
> [  143.570208]  xfs_itruncate_extents_flags+0x13d/0x230 [xfs]
> [  143.571353]  xfs_free_eofblocks+0x12e/0x190 [xfs]
> [  143.572359]  xfs_file_release+0x12d/0x140 [xfs]
> [  143.573324]  __fput+0xe8/0x2d0
> [  143.573922]  __fput_sync+0x1d/0x30
> [  143.574574]  nfsd_filp_close+0x33/0x60 [nfsd]
> [  143.575430]  nfsd_file_free+0x96/0x150 [nfsd]
> [  143.576274]  nfsd_file_put+0xf7/0x1a0 [nfsd]
> [  143.577104]  nfsd_file_put_local+0x18/0x30 [nfsd]
> [  143.578070]  nfs_close_local_fh+0x101/0x110 [nfs_localio]
> [  143.579079]  __put_nfs_open_context+0xc9/0x180 [nfs]
> [  143.580031]  nfs_file_clear_open_context+0x4a/0x60 [nfs]
> [  143.581038]  nfs_file_release+0x3e/0x60 [nfs]
> [  143.581879]  __fput+0xe8/0x2d0
> [  143.582464]  __fput_sync+0x1d/0x30
> [  143.583108]  __x64_sys_close+0x41/0x80
> [  143.583823]  x64_sys_call+0x189a/0x20d0
> [  143.584552]  do_syscall_64+0x64/0x170
> [  143.585240]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [  143.586185] RIP: 0033:0x7f3c5153efd7
> 
> Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> ---
>  fs/nfs_common/nfslocalio.c |  8 +++-----
>  fs/nfsd/filecache.c        | 14 +++++++-------
>  fs/nfsd/filecache.h        |  2 +-
>  include/linux/nfslocalio.h | 18 +++++++++++++++---
>  4 files changed, 26 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
> index 09404d142d1a..a74ec08f6c96 100644
> --- a/fs/nfs_common/nfslocalio.c
> +++ b/fs/nfs_common/nfslocalio.c
> @@ -155,11 +155,9 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
>  	/* We have an implied reference to net thanks to nfsd_serv_try_get */
>  	localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt,
>  					     cred, nfs_fh, fmode);
> -	if (IS_ERR(localio)) {
> -		rcu_read_lock();
> -		nfs_to->nfsd_serv_put(net);
> -		rcu_read_unlock();
> -	}
> +	if (IS_ERR(localio))
> +		nfs_to_nfsd_net_put(net);
> +
>  	return localio;
>  }
>  EXPORT_SYMBOL_GPL(nfs_open_local_fh);
> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> index c16671135d17..9a62b4da89bb 100644
> --- a/fs/nfsd/filecache.c
> +++ b/fs/nfsd/filecache.c
> @@ -391,19 +391,19 @@ nfsd_file_put(struct nfsd_file *nf)
>  }
>  
>  /**
> - * nfsd_file_put_local - put the reference to nfsd_file and local nfsd_serv
> - * @nf: nfsd_file of which to put the references
> + * nfsd_file_put_local - put nfsd_file reference and arm nfsd_serv_put in caller
> + * @nf: nfsd_file of which to put the reference
>   *
> - * First put the reference of the nfsd_file and then put the
> - * reference to the associated nn->nfsd_serv.
> + * First save the associated net to return to caller, then put
> + * the reference of the nfsd_file.
>   */
> -void
> -nfsd_file_put_local(struct nfsd_file *nf) __must_hold(rcu)
> +struct net *
> +nfsd_file_put_local(struct nfsd_file *nf)
>  {
>  	struct net *net = nf->nf_net;
>  
>  	nfsd_file_put(nf);
> -	nfsd_serv_put(net);
> +	return net;
>  }
>  
>  /**
> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
> index cadf3c2689c4..d5db6b34ba30 100644
> --- a/fs/nfsd/filecache.h
> +++ b/fs/nfsd/filecache.h
> @@ -55,7 +55,7 @@ void nfsd_file_cache_shutdown(void);
>  int nfsd_file_cache_start_net(struct net *net);
>  void nfsd_file_cache_shutdown_net(struct net *net);
>  void nfsd_file_put(struct nfsd_file *nf);
> -void nfsd_file_put_local(struct nfsd_file *nf);
> +struct net *nfsd_file_put_local(struct nfsd_file *nf);
>  struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
>  struct file *nfsd_file_file(struct nfsd_file *nf);
>  void nfsd_file_close_inode_sync(struct inode *inode);
> diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
> index 3982fea79919..9202f4b24343 100644
> --- a/include/linux/nfslocalio.h
> +++ b/include/linux/nfslocalio.h
> @@ -55,7 +55,7 @@ struct nfsd_localio_operations {
>  						const struct cred *,
>  						const struct nfs_fh *,
>  						const fmode_t);
> -	void (*nfsd_file_put_local)(struct nfsd_file *);
> +	struct net *(*nfsd_file_put_local)(struct nfsd_file *);
>  	struct file *(*nfsd_file_file)(struct nfsd_file *);
>  } ____cacheline_aligned;
>  
> @@ -66,7 +66,7 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *,
>  		   struct rpc_clnt *, const struct cred *,
>  		   const struct nfs_fh *, const fmode_t);
>  
> -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
> +static inline void nfs_to_nfsd_net_put(struct net *net)
>  {
>  	/*
>  	 * Once reference to nfsd_serv is dropped, NFSD could be
> @@ -74,10 +74,22 @@ static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
>  	 * by always taking RCU.
>  	 */
>  	rcu_read_lock();
> -	nfs_to->nfsd_file_put_local(localio);
> +	nfs_to->nfsd_serv_put(net);
>  	rcu_read_unlock();
>  }
>  
> +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
> +{
> +	/*
> +	 * Must not hold RCU otherwise nfsd_file_put() can easily trigger:
> +	 * "Voluntary context switch within RCU read-side critical section!"
> +	 * by scheduling deep in underlying filesystem (e.g. XFS).
> +	 */
> +	struct net *net = nfs_to->nfsd_file_put_local(localio);
> +
> +	nfs_to_nfsd_net_put(net);
> +}
> +
>  #else   /* CONFIG_NFS_LOCALIO */
>  static inline void nfsd_localio_ops_init(void)
>  {
> -- 
> 2.44.0
> 
>
Jeff Layton Nov. 13, 2024, 2:58 p.m. UTC | #2
On Fri, 2024-11-08 at 18:39 -0500, Mike Snitzer wrote:
> Move holding the RCU from nfs_to_nfsd_file_put_local to
> nfs_to_nfsd_net_put.  It is the call to nfs_to->nfsd_serv_put that
> requires the RCU anyway (the puts for nfsd_file and netns were
> combined to avoid an extra indirect reference but that
> micro-optimization isn't possible now).
> 
> This fixes xfstests generic/013 and it triggering:
> 
> "Voluntary context switch within RCU read-side critical section!"
> 
> [  143.545738] Call Trace:
> [  143.546206]  <TASK>
> [  143.546625]  ? show_regs+0x6d/0x80
> [  143.547267]  ? __warn+0x91/0x140
> [  143.547951]  ? rcu_note_context_switch+0x496/0x5d0
> [  143.548856]  ? report_bug+0x193/0x1a0
> [  143.549557]  ? handle_bug+0x63/0xa0
> [  143.550214]  ? exc_invalid_op+0x1d/0x80
> [  143.550938]  ? asm_exc_invalid_op+0x1f/0x30
> [  143.551736]  ? rcu_note_context_switch+0x496/0x5d0
> [  143.552634]  ? wakeup_preempt+0x62/0x70
> [  143.553358]  __schedule+0xaa/0x1380
> [  143.554025]  ? _raw_spin_unlock_irqrestore+0x12/0x40
> [  143.554958]  ? try_to_wake_up+0x1fe/0x6b0
> [  143.555715]  ? wake_up_process+0x19/0x20
> [  143.556452]  schedule+0x2e/0x120
> [  143.557066]  schedule_preempt_disabled+0x19/0x30
> [  143.557933]  rwsem_down_read_slowpath+0x24d/0x4a0
> [  143.558818]  ? xfs_efi_item_format+0x50/0xc0 [xfs]
> [  143.559894]  down_read+0x4e/0xb0
> [  143.560519]  xlog_cil_commit+0x1b2/0xbc0 [xfs]
> [  143.561460]  ? _raw_spin_unlock+0x12/0x30
> [  143.562212]  ? xfs_inode_item_precommit+0xc7/0x220 [xfs]
> [  143.563309]  ? xfs_trans_run_precommits+0x69/0xd0 [xfs]
> [  143.564394]  __xfs_trans_commit+0xb5/0x330 [xfs]
> [  143.565367]  xfs_trans_roll+0x48/0xc0 [xfs]
> [  143.566262]  xfs_defer_trans_roll+0x57/0x100 [xfs]
> [  143.567278]  xfs_defer_finish_noroll+0x27a/0x490 [xfs]
> [  143.568342]  xfs_defer_finish+0x1a/0x80 [xfs]
> [  143.569267]  xfs_bunmapi_range+0x4d/0xb0 [xfs]
> [  143.570208]  xfs_itruncate_extents_flags+0x13d/0x230 [xfs]
> [  143.571353]  xfs_free_eofblocks+0x12e/0x190 [xfs]
> [  143.572359]  xfs_file_release+0x12d/0x140 [xfs]
> [  143.573324]  __fput+0xe8/0x2d0
> [  143.573922]  __fput_sync+0x1d/0x30
> [  143.574574]  nfsd_filp_close+0x33/0x60 [nfsd]
> [  143.575430]  nfsd_file_free+0x96/0x150 [nfsd]
> [  143.576274]  nfsd_file_put+0xf7/0x1a0 [nfsd]
> [  143.577104]  nfsd_file_put_local+0x18/0x30 [nfsd]
> [  143.578070]  nfs_close_local_fh+0x101/0x110 [nfs_localio]
> [  143.579079]  __put_nfs_open_context+0xc9/0x180 [nfs]
> [  143.580031]  nfs_file_clear_open_context+0x4a/0x60 [nfs]
> [  143.581038]  nfs_file_release+0x3e/0x60 [nfs]
> [  143.581879]  __fput+0xe8/0x2d0
> [  143.582464]  __fput_sync+0x1d/0x30
> [  143.583108]  __x64_sys_close+0x41/0x80
> [  143.583823]  x64_sys_call+0x189a/0x20d0
> [  143.584552]  do_syscall_64+0x64/0x170
> [  143.585240]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> [  143.586185] RIP: 0033:0x7f3c5153efd7
> 
> Signed-off-by: Mike Snitzer <snitzer@kernel.org>
> ---
>  fs/nfs_common/nfslocalio.c |  8 +++-----
>  fs/nfsd/filecache.c        | 14 +++++++-------
>  fs/nfsd/filecache.h        |  2 +-
>  include/linux/nfslocalio.h | 18 +++++++++++++++---
>  4 files changed, 26 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
> index 09404d142d1a..a74ec08f6c96 100644
> --- a/fs/nfs_common/nfslocalio.c
> +++ b/fs/nfs_common/nfslocalio.c
> @@ -155,11 +155,9 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
>  	/* We have an implied reference to net thanks to nfsd_serv_try_get */
>  	localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt,
>  					     cred, nfs_fh, fmode);
> -	if (IS_ERR(localio)) {
> -		rcu_read_lock();
> -		nfs_to->nfsd_serv_put(net);
> -		rcu_read_unlock();
> -	}
> +	if (IS_ERR(localio))
> +		nfs_to_nfsd_net_put(net);
> +
>  	return localio;
>  }
>  EXPORT_SYMBOL_GPL(nfs_open_local_fh);
> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> index c16671135d17..9a62b4da89bb 100644
> --- a/fs/nfsd/filecache.c
> +++ b/fs/nfsd/filecache.c
> @@ -391,19 +391,19 @@ nfsd_file_put(struct nfsd_file *nf)
>  }
>  
>  /**
> - * nfsd_file_put_local - put the reference to nfsd_file and local nfsd_serv
> - * @nf: nfsd_file of which to put the references
> + * nfsd_file_put_local - put nfsd_file reference and arm nfsd_serv_put in caller
> + * @nf: nfsd_file of which to put the reference
>   *
> - * First put the reference of the nfsd_file and then put the
> - * reference to the associated nn->nfsd_serv.
> + * First save the associated net to return to caller, then put
> + * the reference of the nfsd_file.
>   */
> -void
> -nfsd_file_put_local(struct nfsd_file *nf) __must_hold(rcu)
> +struct net *
> +nfsd_file_put_local(struct nfsd_file *nf)
>  {
>  	struct net *net = nf->nf_net;
>  
>  	nfsd_file_put(nf);
> -	nfsd_serv_put(net);
> +	return net;
>  }
>  
>  /**
> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
> index cadf3c2689c4..d5db6b34ba30 100644
> --- a/fs/nfsd/filecache.h
> +++ b/fs/nfsd/filecache.h
> @@ -55,7 +55,7 @@ void nfsd_file_cache_shutdown(void);
>  int nfsd_file_cache_start_net(struct net *net);
>  void nfsd_file_cache_shutdown_net(struct net *net);
>  void nfsd_file_put(struct nfsd_file *nf);
> -void nfsd_file_put_local(struct nfsd_file *nf);
> +struct net *nfsd_file_put_local(struct nfsd_file *nf);
>  struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
>  struct file *nfsd_file_file(struct nfsd_file *nf);
>  void nfsd_file_close_inode_sync(struct inode *inode);
> diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
> index 3982fea79919..9202f4b24343 100644
> --- a/include/linux/nfslocalio.h
> +++ b/include/linux/nfslocalio.h
> @@ -55,7 +55,7 @@ struct nfsd_localio_operations {
>  						const struct cred *,
>  						const struct nfs_fh *,
>  						const fmode_t);
> -	void (*nfsd_file_put_local)(struct nfsd_file *);
> +	struct net *(*nfsd_file_put_local)(struct nfsd_file *);
>  	struct file *(*nfsd_file_file)(struct nfsd_file *);
>  } ____cacheline_aligned;
>  
> @@ -66,7 +66,7 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *,
>  		   struct rpc_clnt *, const struct cred *,
>  		   const struct nfs_fh *, const fmode_t);
>  
> -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
> +static inline void nfs_to_nfsd_net_put(struct net *net)
>  {
>  	/*
>  	 * Once reference to nfsd_serv is dropped, NFSD could be
> @@ -74,10 +74,22 @@ static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
>  	 * by always taking RCU.
>  	 */
>  	rcu_read_lock();
> -	nfs_to->nfsd_file_put_local(localio);
> +	nfs_to->nfsd_serv_put(net);
>  	rcu_read_unlock();
>  }
>  
> +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
> +{
> +	/*
> +	 * Must not hold RCU otherwise nfsd_file_put() can easily trigger:
> +	 * "Voluntary context switch within RCU read-side critical section!"
> +	 * by scheduling deep in underlying filesystem (e.g. XFS).
> +	 */
> +	struct net *net = nfs_to->nfsd_file_put_local(localio);
> +
> +	nfs_to_nfsd_net_put(net);
> +}
> +
>  #else   /* CONFIG_NFS_LOCALIO */
>  static inline void nfsd_localio_ops_init(void)
>  {

I think this probably needs to go into v6.12 (or very early into v6.13
and backported). It should also probably get:

    Fixes: 65f2a5c36635 ("nfs_common: fix race in NFS calls to nfsd_file_put_local() and nfsd_serv_put()")

You can also add:

    Reviewed-by: Jeff Layton <jlayton@kernel.org>
Mike Snitzer Nov. 13, 2024, 4:51 p.m. UTC | #3
On Wed, Nov 13, 2024 at 09:58:28AM -0500, Jeff Layton wrote:
> 
> I think this probably needs to go into v6.12 (or very early into v6.13
> and backported). It should also probably get:
> 
>     Fixes: 65f2a5c36635 ("nfs_common: fix race in NFS calls to nfsd_file_put_local() and nfsd_serv_put()")
> 
> You can also add:
> 
>     Reviewed-by: Jeff Layton <jlayton@kernel.org>

OK, I've added these.  I'm about to send out v2 of this series that
includes the other tags and suggested changes others have provided.

Thanks,
Mike
diff mbox series

Patch

diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
index 09404d142d1a..a74ec08f6c96 100644
--- a/fs/nfs_common/nfslocalio.c
+++ b/fs/nfs_common/nfslocalio.c
@@ -155,11 +155,9 @@  struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
 	/* We have an implied reference to net thanks to nfsd_serv_try_get */
 	localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt,
 					     cred, nfs_fh, fmode);
-	if (IS_ERR(localio)) {
-		rcu_read_lock();
-		nfs_to->nfsd_serv_put(net);
-		rcu_read_unlock();
-	}
+	if (IS_ERR(localio))
+		nfs_to_nfsd_net_put(net);
+
 	return localio;
 }
 EXPORT_SYMBOL_GPL(nfs_open_local_fh);
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index c16671135d17..9a62b4da89bb 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -391,19 +391,19 @@  nfsd_file_put(struct nfsd_file *nf)
 }
 
 /**
- * nfsd_file_put_local - put the reference to nfsd_file and local nfsd_serv
- * @nf: nfsd_file of which to put the references
+ * nfsd_file_put_local - put nfsd_file reference and arm nfsd_serv_put in caller
+ * @nf: nfsd_file of which to put the reference
  *
- * First put the reference of the nfsd_file and then put the
- * reference to the associated nn->nfsd_serv.
+ * First save the associated net to return to caller, then put
+ * the reference of the nfsd_file.
  */
-void
-nfsd_file_put_local(struct nfsd_file *nf) __must_hold(rcu)
+struct net *
+nfsd_file_put_local(struct nfsd_file *nf)
 {
 	struct net *net = nf->nf_net;
 
 	nfsd_file_put(nf);
-	nfsd_serv_put(net);
+	return net;
 }
 
 /**
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index cadf3c2689c4..d5db6b34ba30 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -55,7 +55,7 @@  void nfsd_file_cache_shutdown(void);
 int nfsd_file_cache_start_net(struct net *net);
 void nfsd_file_cache_shutdown_net(struct net *net);
 void nfsd_file_put(struct nfsd_file *nf);
-void nfsd_file_put_local(struct nfsd_file *nf);
+struct net *nfsd_file_put_local(struct nfsd_file *nf);
 struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
 struct file *nfsd_file_file(struct nfsd_file *nf);
 void nfsd_file_close_inode_sync(struct inode *inode);
diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
index 3982fea79919..9202f4b24343 100644
--- a/include/linux/nfslocalio.h
+++ b/include/linux/nfslocalio.h
@@ -55,7 +55,7 @@  struct nfsd_localio_operations {
 						const struct cred *,
 						const struct nfs_fh *,
 						const fmode_t);
-	void (*nfsd_file_put_local)(struct nfsd_file *);
+	struct net *(*nfsd_file_put_local)(struct nfsd_file *);
 	struct file *(*nfsd_file_file)(struct nfsd_file *);
 } ____cacheline_aligned;
 
@@ -66,7 +66,7 @@  struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *,
 		   struct rpc_clnt *, const struct cred *,
 		   const struct nfs_fh *, const fmode_t);
 
-static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
+static inline void nfs_to_nfsd_net_put(struct net *net)
 {
 	/*
 	 * Once reference to nfsd_serv is dropped, NFSD could be
@@ -74,10 +74,22 @@  static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
 	 * by always taking RCU.
 	 */
 	rcu_read_lock();
-	nfs_to->nfsd_file_put_local(localio);
+	nfs_to->nfsd_serv_put(net);
 	rcu_read_unlock();
 }
 
+static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
+{
+	/*
+	 * Must not hold RCU otherwise nfsd_file_put() can easily trigger:
+	 * "Voluntary context switch within RCU read-side critical section!"
+	 * by scheduling deep in underlying filesystem (e.g. XFS).
+	 */
+	struct net *net = nfs_to->nfsd_file_put_local(localio);
+
+	nfs_to_nfsd_net_put(net);
+}
+
 #else   /* CONFIG_NFS_LOCALIO */
 static inline void nfsd_localio_ops_init(void)
 {