diff mbox series

[v3] nfsd: fix nfs4_openowner leak when concurrent nfsd4_open occur

Message ID 20241105110314.2122967-1-yangerkun@huaweicloud.com (mailing list archive)
State New
Headers show
Series [v3] nfsd: fix nfs4_openowner leak when concurrent nfsd4_open occur | expand

Commit Message

yangerkun Nov. 5, 2024, 11:03 a.m. UTC
From: Yang Erkun <yangerkun@huawei.com>

The action force umount(umount -f) will attempt to kill all rpc_task even
umount operation may ultimately fail if some files remain open.
Consequently, if an action attempts to open a file, it can potentially
send two rpc_task to nfs server.

                   NFS CLIENT
thread1                             thread2
open("file")
...
nfs4_do_open
 _nfs4_do_open
  _nfs4_open_and_get_state
   _nfs4_proc_open
    nfs4_run_open_task
     /* rpc_task1 */
     rpc_run_task
     rpc_wait_for_completion_task

                                    umount -f
                                    nfs_umount_begin
                                     rpc_killall_tasks
                                      rpc_signal_task
     rpc_task1 been wakeup
     and return -512
 _nfs4_do_open // while loop
    ...
    nfs4_run_open_task
     /* rpc_task2 */
     rpc_run_task
     rpc_wait_for_completion_task

While processing an open request, nfsd will first attempt to find or
allocate an nfs4_openowner. If it finds an nfs4_openowner that is not
marked as NFS4_OO_CONFIRMED, this nfs4_openowner will released. Since
two rpc_task can attempt to open the same file simultaneously from the
client to server, and because two instances of nfsd can run
concurrently, this situation can lead to lots of memory leak.
Additionally, when we echo 0 to /proc/fs/nfsd/threads, warning will be
triggered.

                    NFS SERVER
nfsd1                  nfsd2       echo 0 > /proc/fs/nfsd/threads

nfsd4_open
 nfsd4_process_open1
  find_or_alloc_open_stateowner
   // alloc oo1, stateid1
                       nfsd4_open
                        nfsd4_process_open1
                        find_or_alloc_open_stateowner
                        // find oo1, without NFS4_OO_CONFIRMED
                         release_openowner
                          unhash_openowner_locked
                          list_del_init(&oo->oo_perclient)
                          // cannot find this oo
                          // from client, LEAK!!!
                         alloc_stateowner // alloc oo2

 nfsd4_process_open2
  init_open_stateid
  // associate oo1
  // with stateid1, stateid1 LEAK!!!
  nfs4_get_vfs_file
  // alloc nfsd_file1 and nfsd_file_mark1
  // all LEAK!!!

                         nfsd4_process_open2
                         ...

                                    write_threads
                                     ...
                                     nfsd_destroy_serv
                                      nfsd_shutdown_net
                                       nfs4_state_shutdown_net
                                        nfs4_state_destroy_net
                                         destroy_client
                                          __destroy_client
                                          // won't find oo1!!!
                                     nfsd_shutdown_generic
                                      nfsd_file_cache_shutdown
                                       kmem_cache_destroy
                                       for nfsd_file_slab
                                       and nfsd_file_mark_slab
                                       // bark since nfsd_file1
                                       // and nfsd_file_mark1
                                       // still alive

=======================================================================
BUG nfsd_file (Not tainted): Objects remaining in nfsd_file on
__kmem_cache_shutdown()
-----------------------------------------------------------------------

Slab 0xffd4000004438a80 objects=34 used=1 fp=0xff11000110e2ad28
flags=0x17ffffc0000240(workingset|head|node=0|zone=2|lastcpupid=0x1fffff)
CPU: 4 UID: 0 PID: 757 Comm: sh Not tainted 6.12.0-rc6+ #19
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
1.16.1-2.fc37 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x53/0x70
 slab_err+0xb0/0xf0
 __kmem_cache_shutdown+0x15c/0x310
 kmem_cache_destroy+0x66/0x160
 nfsd_file_cache_shutdown+0xac/0x210 [nfsd]
 nfsd_destroy_serv+0x251/0x2a0 [nfsd]
 nfsd_svc+0x125/0x1e0 [nfsd]
 write_threads+0x16a/0x2a0 [nfsd]
 nfsctl_transaction_write+0x74/0xa0 [nfsd]
 vfs_write+0x1ae/0x6d0
 ksys_write+0xc1/0x160
 do_syscall_64+0x5f/0x170
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Disabling lock debugging due to kernel taint
Object 0xff11000110e2ac38 @offset=3128
Allocated in nfsd_file_do_acquire+0x20f/0xa30 [nfsd] age=1635 cpu=3
pid=800
 nfsd_file_do_acquire+0x20f/0xa30 [nfsd]
 nfsd_file_acquire_opened+0x5f/0x90 [nfsd]
 nfs4_get_vfs_file+0x4c9/0x570 [nfsd]
 nfsd4_process_open2+0x713/0x1070 [nfsd]
 nfsd4_open+0x74b/0x8b0 [nfsd]
 nfsd4_proc_compound+0x70b/0xc20 [nfsd]
 nfsd_dispatch+0x1b4/0x3a0 [nfsd]
 svc_process_common+0x5b8/0xc50 [sunrpc]
 svc_process+0x2ab/0x3b0 [sunrpc]
 svc_handle_xprt+0x681/0xa20 [sunrpc]
 nfsd+0x183/0x220 [nfsd]
 kthread+0x199/0x1e0
 ret_from_fork+0x31/0x60
 ret_from_fork_asm+0x1a/0x30

Add nfs4_openowner_unhashed to help found unhashed nfs4_openowner, and
break nfsd4_open process to fix this problem.

Cc: stable@vger.kernel.org # 2.6
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Yang Erkun <yangerkun@huawei.com>
---
 fs/nfsd/nfs4state.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

v2->v3:
1. add reviewed-by
2. add lockdep_assert_held in nfs4_openowner_unhashed

Comments

Chuck Lever III Nov. 6, 2024, 1:35 p.m. UTC | #1
On Tue, Nov 05, 2024 at 07:03:14PM +0800, Yang Erkun wrote:
> From: Yang Erkun <yangerkun@huawei.com>
> 
> The action force umount(umount -f) will attempt to kill all rpc_task even
> umount operation may ultimately fail if some files remain open.
> Consequently, if an action attempts to open a file, it can potentially
> send two rpc_task to nfs server.
> 
>                    NFS CLIENT
> thread1                             thread2
> open("file")
> ...
> nfs4_do_open
>  _nfs4_do_open
>   _nfs4_open_and_get_state
>    _nfs4_proc_open
>     nfs4_run_open_task
>      /* rpc_task1 */
>      rpc_run_task
>      rpc_wait_for_completion_task
> 
>                                     umount -f
>                                     nfs_umount_begin
>                                      rpc_killall_tasks
>                                       rpc_signal_task
>      rpc_task1 been wakeup
>      and return -512
>  _nfs4_do_open // while loop
>     ...
>     nfs4_run_open_task
>      /* rpc_task2 */
>      rpc_run_task
>      rpc_wait_for_completion_task
> 
> While processing an open request, nfsd will first attempt to find or
> allocate an nfs4_openowner. If it finds an nfs4_openowner that is not
> marked as NFS4_OO_CONFIRMED, this nfs4_openowner will released. Since
> two rpc_task can attempt to open the same file simultaneously from the
> client to server, and because two instances of nfsd can run
> concurrently, this situation can lead to lots of memory leak.
> Additionally, when we echo 0 to /proc/fs/nfsd/threads, warning will be
> triggered.
> 
>                     NFS SERVER
> nfsd1                  nfsd2       echo 0 > /proc/fs/nfsd/threads
> 
> nfsd4_open
>  nfsd4_process_open1
>   find_or_alloc_open_stateowner
>    // alloc oo1, stateid1
>                        nfsd4_open
>                         nfsd4_process_open1
>                         find_or_alloc_open_stateowner
>                         // find oo1, without NFS4_OO_CONFIRMED
>                          release_openowner
>                           unhash_openowner_locked
>                           list_del_init(&oo->oo_perclient)
>                           // cannot find this oo
>                           // from client, LEAK!!!
>                          alloc_stateowner // alloc oo2
> 
>  nfsd4_process_open2
>   init_open_stateid
>   // associate oo1
>   // with stateid1, stateid1 LEAK!!!
>   nfs4_get_vfs_file
>   // alloc nfsd_file1 and nfsd_file_mark1
>   // all LEAK!!!
> 
>                          nfsd4_process_open2
>                          ...
> 
>                                     write_threads
>                                      ...
>                                      nfsd_destroy_serv
>                                       nfsd_shutdown_net
>                                        nfs4_state_shutdown_net
>                                         nfs4_state_destroy_net
>                                          destroy_client
>                                           __destroy_client
>                                           // won't find oo1!!!
>                                      nfsd_shutdown_generic
>                                       nfsd_file_cache_shutdown
>                                        kmem_cache_destroy
>                                        for nfsd_file_slab
>                                        and nfsd_file_mark_slab
>                                        // bark since nfsd_file1
>                                        // and nfsd_file_mark1
>                                        // still alive
> 
> =======================================================================
> BUG nfsd_file (Not tainted): Objects remaining in nfsd_file on
> __kmem_cache_shutdown()
> -----------------------------------------------------------------------
> 
> Slab 0xffd4000004438a80 objects=34 used=1 fp=0xff11000110e2ad28
> flags=0x17ffffc0000240(workingset|head|node=0|zone=2|lastcpupid=0x1fffff)
> CPU: 4 UID: 0 PID: 757 Comm: sh Not tainted 6.12.0-rc6+ #19
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
> 1.16.1-2.fc37 04/01/2014
> Call Trace:
>  <TASK>
>  dump_stack_lvl+0x53/0x70
>  slab_err+0xb0/0xf0
>  __kmem_cache_shutdown+0x15c/0x310
>  kmem_cache_destroy+0x66/0x160
>  nfsd_file_cache_shutdown+0xac/0x210 [nfsd]
>  nfsd_destroy_serv+0x251/0x2a0 [nfsd]
>  nfsd_svc+0x125/0x1e0 [nfsd]
>  write_threads+0x16a/0x2a0 [nfsd]
>  nfsctl_transaction_write+0x74/0xa0 [nfsd]
>  vfs_write+0x1ae/0x6d0
>  ksys_write+0xc1/0x160
>  do_syscall_64+0x5f/0x170
>  entry_SYSCALL_64_after_hwframe+0x76/0x7e
> 
> Disabling lock debugging due to kernel taint
> Object 0xff11000110e2ac38 @offset=3128
> Allocated in nfsd_file_do_acquire+0x20f/0xa30 [nfsd] age=1635 cpu=3
> pid=800
>  nfsd_file_do_acquire+0x20f/0xa30 [nfsd]
>  nfsd_file_acquire_opened+0x5f/0x90 [nfsd]
>  nfs4_get_vfs_file+0x4c9/0x570 [nfsd]
>  nfsd4_process_open2+0x713/0x1070 [nfsd]
>  nfsd4_open+0x74b/0x8b0 [nfsd]
>  nfsd4_proc_compound+0x70b/0xc20 [nfsd]
>  nfsd_dispatch+0x1b4/0x3a0 [nfsd]
>  svc_process_common+0x5b8/0xc50 [sunrpc]
>  svc_process+0x2ab/0x3b0 [sunrpc]
>  svc_handle_xprt+0x681/0xa20 [sunrpc]
>  nfsd+0x183/0x220 [nfsd]
>  kthread+0x199/0x1e0
>  ret_from_fork+0x31/0x60
>  ret_from_fork_asm+0x1a/0x30
> 
> Add nfs4_openowner_unhashed to help found unhashed nfs4_openowner, and
> break nfsd4_open process to fix this problem.
> 
> Cc: stable@vger.kernel.org # 2.6

Hi -

Questions about the "stable@" tag:

 - You refer above to a leak of nfsd_file objects, but the nfsd_file
   cache was added in v5.4. Any thoughts about what might be leaked,
   if anything, in kernels earlier than v5.4?

 - Have you tried applying this patch to LTS kernels?


> Reviewed-by: Jeff Layton <jlayton@kernel.org>
> Signed-off-by: Yang Erkun <yangerkun@huawei.com>
> ---
>  fs/nfsd/nfs4state.c | 19 +++++++++++++++++++
>  1 file changed, 19 insertions(+)
> 
> v2->v3:
> 1. add reviewed-by
> 2. add lockdep_assert_held in nfs4_openowner_unhashed
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 9ddb91d088ae..37888562b436 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1660,6 +1660,14 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
>  	free_ol_stateid_reaplist(&reaplist);
>  }
>  
> +static bool nfs4_openowner_unhashed(struct nfs4_openowner *oo)
> +{
> +	lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
> +
> +	return list_empty(&oo->oo_owner.so_strhash) &&
> +		list_empty(&oo->oo_perclient);
> +}
> +
>  static void unhash_openowner_locked(struct nfs4_openowner *oo)
>  {
>  	struct nfs4_client *clp = oo->oo_owner.so_client;
> @@ -4979,6 +4987,12 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
>  	spin_lock(&oo->oo_owner.so_client->cl_lock);
>  	spin_lock(&fp->fi_lock);
>  
> +	if (nfs4_openowner_unhashed(oo)) {
> +		mutex_unlock(&stp->st_mutex);
> +		stp = NULL;
> +		goto out_unlock;
> +	}
> +
>  	retstp = nfsd4_find_existing_open(fp, open);
>  	if (retstp)
>  		goto out_unlock;
> @@ -6131,6 +6145,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
>  
>  	if (!stp) {
>  		stp = init_open_stateid(fp, open);
> +		if (!stp) {
> +			status = nfserr_jukebox;
> +			goto out;
> +		}
> +
>  		if (!open->op_stp)
>  			new_stp = true;
>  	}
> -- 
> 2.39.2
> 
>
yangerkun Nov. 7, 2024, 1:22 a.m. UTC | #2
在 2024/11/6 21:35, Chuck Lever 写道:
> On Tue, Nov 05, 2024 at 07:03:14PM +0800, Yang Erkun wrote:
>> From: Yang Erkun <yangerkun@huawei.com>
>>
>> The action force umount(umount -f) will attempt to kill all rpc_task even
>> umount operation may ultimately fail if some files remain open.
>> Consequently, if an action attempts to open a file, it can potentially
>> send two rpc_task to nfs server.
>>
>>                     NFS CLIENT
>> thread1                             thread2
>> open("file")
>> ...
>> nfs4_do_open
>>   _nfs4_do_open
>>    _nfs4_open_and_get_state
>>     _nfs4_proc_open
>>      nfs4_run_open_task
>>       /* rpc_task1 */
>>       rpc_run_task
>>       rpc_wait_for_completion_task
>>
>>                                      umount -f
>>                                      nfs_umount_begin
>>                                       rpc_killall_tasks
>>                                        rpc_signal_task
>>       rpc_task1 been wakeup
>>       and return -512
>>   _nfs4_do_open // while loop
>>      ...
>>      nfs4_run_open_task
>>       /* rpc_task2 */
>>       rpc_run_task
>>       rpc_wait_for_completion_task
>>
>> While processing an open request, nfsd will first attempt to find or
>> allocate an nfs4_openowner. If it finds an nfs4_openowner that is not
>> marked as NFS4_OO_CONFIRMED, this nfs4_openowner will released. Since
>> two rpc_task can attempt to open the same file simultaneously from the
>> client to server, and because two instances of nfsd can run
>> concurrently, this situation can lead to lots of memory leak.
>> Additionally, when we echo 0 to /proc/fs/nfsd/threads, warning will be
>> triggered.
>>
>>                      NFS SERVER
>> nfsd1                  nfsd2       echo 0 > /proc/fs/nfsd/threads
>>
>> nfsd4_open
>>   nfsd4_process_open1
>>    find_or_alloc_open_stateowner
>>     // alloc oo1, stateid1
>>                         nfsd4_open
>>                          nfsd4_process_open1
>>                          find_or_alloc_open_stateowner
>>                          // find oo1, without NFS4_OO_CONFIRMED
>>                           release_openowner
>>                            unhash_openowner_locked
>>                            list_del_init(&oo->oo_perclient)
>>                            // cannot find this oo
>>                            // from client, LEAK!!!
>>                           alloc_stateowner // alloc oo2
>>
>>   nfsd4_process_open2
>>    init_open_stateid
>>    // associate oo1
>>    // with stateid1, stateid1 LEAK!!!
>>    nfs4_get_vfs_file
>>    // alloc nfsd_file1 and nfsd_file_mark1
>>    // all LEAK!!!
>>
>>                           nfsd4_process_open2
>>                           ...
>>
>>                                      write_threads
>>                                       ...
>>                                       nfsd_destroy_serv
>>                                        nfsd_shutdown_net
>>                                         nfs4_state_shutdown_net
>>                                          nfs4_state_destroy_net
>>                                           destroy_client
>>                                            __destroy_client
>>                                            // won't find oo1!!!
>>                                       nfsd_shutdown_generic
>>                                        nfsd_file_cache_shutdown
>>                                         kmem_cache_destroy
>>                                         for nfsd_file_slab
>>                                         and nfsd_file_mark_slab
>>                                         // bark since nfsd_file1
>>                                         // and nfsd_file_mark1
>>                                         // still alive
>>
>> =======================================================================
>> BUG nfsd_file (Not tainted): Objects remaining in nfsd_file on
>> __kmem_cache_shutdown()
>> -----------------------------------------------------------------------
>>
>> Slab 0xffd4000004438a80 objects=34 used=1 fp=0xff11000110e2ad28
>> flags=0x17ffffc0000240(workingset|head|node=0|zone=2|lastcpupid=0x1fffff)
>> CPU: 4 UID: 0 PID: 757 Comm: sh Not tainted 6.12.0-rc6+ #19
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
>> 1.16.1-2.fc37 04/01/2014
>> Call Trace:
>>   <TASK>
>>   dump_stack_lvl+0x53/0x70
>>   slab_err+0xb0/0xf0
>>   __kmem_cache_shutdown+0x15c/0x310
>>   kmem_cache_destroy+0x66/0x160
>>   nfsd_file_cache_shutdown+0xac/0x210 [nfsd]
>>   nfsd_destroy_serv+0x251/0x2a0 [nfsd]
>>   nfsd_svc+0x125/0x1e0 [nfsd]
>>   write_threads+0x16a/0x2a0 [nfsd]
>>   nfsctl_transaction_write+0x74/0xa0 [nfsd]
>>   vfs_write+0x1ae/0x6d0
>>   ksys_write+0xc1/0x160
>>   do_syscall_64+0x5f/0x170
>>   entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>
>> Disabling lock debugging due to kernel taint
>> Object 0xff11000110e2ac38 @offset=3128
>> Allocated in nfsd_file_do_acquire+0x20f/0xa30 [nfsd] age=1635 cpu=3
>> pid=800
>>   nfsd_file_do_acquire+0x20f/0xa30 [nfsd]
>>   nfsd_file_acquire_opened+0x5f/0x90 [nfsd]
>>   nfs4_get_vfs_file+0x4c9/0x570 [nfsd]
>>   nfsd4_process_open2+0x713/0x1070 [nfsd]
>>   nfsd4_open+0x74b/0x8b0 [nfsd]
>>   nfsd4_proc_compound+0x70b/0xc20 [nfsd]
>>   nfsd_dispatch+0x1b4/0x3a0 [nfsd]
>>   svc_process_common+0x5b8/0xc50 [sunrpc]
>>   svc_process+0x2ab/0x3b0 [sunrpc]
>>   svc_handle_xprt+0x681/0xa20 [sunrpc]
>>   nfsd+0x183/0x220 [nfsd]
>>   kthread+0x199/0x1e0
>>   ret_from_fork+0x31/0x60
>>   ret_from_fork_asm+0x1a/0x30
>>
>> Add nfs4_openowner_unhashed to help found unhashed nfs4_openowner, and
>> break nfsd4_open process to fix this problem.
>>
>> Cc: stable@vger.kernel.org # 2.6
> 
> Hi -
> 
> Questions about the "stable@" tag:
> 
>   - You refer above to a leak of nfsd_file objects, but the nfsd_file
>     cache was added in v5.4. Any thoughts about what might be leaked,
>     if anything, in kernels earlier than v5.4?

 From the above analysis, actually openowner is leaked, and all object 
associated with it has been leaked too, include nfsd_file, and openowner 
seems already been there since 2.6....

> 
>   - Have you tried applying this patch to LTS kernels?

I have not try to apply this to LTS, what I think is all kernel after 
2.6 has this bug...


> 
> 
>> Reviewed-by: Jeff Layton <jlayton@kernel.org>
>> Signed-off-by: Yang Erkun <yangerkun@huawei.com>
>> ---
>>   fs/nfsd/nfs4state.c | 19 +++++++++++++++++++
>>   1 file changed, 19 insertions(+)
>>
>> v2->v3:
>> 1. add reviewed-by
>> 2. add lockdep_assert_held in nfs4_openowner_unhashed
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 9ddb91d088ae..37888562b436 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -1660,6 +1660,14 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
>>   	free_ol_stateid_reaplist(&reaplist);
>>   }
>>   
>> +static bool nfs4_openowner_unhashed(struct nfs4_openowner *oo)
>> +{
>> +	lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
>> +
>> +	return list_empty(&oo->oo_owner.so_strhash) &&
>> +		list_empty(&oo->oo_perclient);
>> +}
>> +
>>   static void unhash_openowner_locked(struct nfs4_openowner *oo)
>>   {
>>   	struct nfs4_client *clp = oo->oo_owner.so_client;
>> @@ -4979,6 +4987,12 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
>>   	spin_lock(&oo->oo_owner.so_client->cl_lock);
>>   	spin_lock(&fp->fi_lock);
>>   
>> +	if (nfs4_openowner_unhashed(oo)) {
>> +		mutex_unlock(&stp->st_mutex);
>> +		stp = NULL;
>> +		goto out_unlock;
>> +	}
>> +
>>   	retstp = nfsd4_find_existing_open(fp, open);
>>   	if (retstp)
>>   		goto out_unlock;
>> @@ -6131,6 +6145,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
>>   
>>   	if (!stp) {
>>   		stp = init_open_stateid(fp, open);
>> +		if (!stp) {
>> +			status = nfserr_jukebox;
>> +			goto out;
>> +		}
>> +
>>   		if (!open->op_stp)
>>   			new_stp = true;
>>   	}
>> -- 
>> 2.39.2
>>
>>
>
Chuck Lever III Nov. 7, 2024, 2:32 p.m. UTC | #3
On Thu, Nov 07, 2024 at 09:22:39AM +0800, yangerkun wrote:
> 
> 
> 在 2024/11/6 21:35, Chuck Lever 写道:
> > On Tue, Nov 05, 2024 at 07:03:14PM +0800, Yang Erkun wrote:
> > > From: Yang Erkun <yangerkun@huawei.com>

> > > Add nfs4_openowner_unhashed to help found unhashed nfs4_openowner, and
> > > break nfsd4_open process to fix this problem.
> > > 
> > > Cc: stable@vger.kernel.org # 2.6
> > 
> > Hi -
> > 
> > Questions about the "stable@" tag:
> > 
> >   - You refer above to a leak of nfsd_file objects, but the nfsd_file
> >     cache was added in v5.4. Any thoughts about what might be leaked,
> >     if anything, in kernels earlier than v5.4?
> 
> From the above analysis, actually openowner is leaked, and all object
> associated with it has been leaked too, include nfsd_file, and openowner
> seems already been there since 2.6....

Before v5.4, openowners are leaked. After, openowners and nfsd_file
objects are leaked. Got it.


> >   - Have you tried applying this patch to LTS kernels?
> 
> I have not try to apply this to LTS, what I think is all kernel after 2.6
> has this bug...

Understood.

Is "2.6" a guess, or do you know of a specific kernel version where
this problem started to appear? Generally if a problem goes back far
enough or there isn't sufficient evidence about where the problem
started, we don't want a "# xx.yy" annotation.

I expect the stable folks will pull this fix into LTS kernels
automatically, and I have nightly CI running on all of those. That
can catch problems with applying recent fixes to old code bases, but
it ain't perfect.
yangerkun Nov. 8, 2024, 1:34 a.m. UTC | #4
在 2024/11/7 22:32, Chuck Lever 写道:
> On Thu, Nov 07, 2024 at 09:22:39AM +0800, yangerkun wrote:
>>
>>
>> 在 2024/11/6 21:35, Chuck Lever 写道:
>>> On Tue, Nov 05, 2024 at 07:03:14PM +0800, Yang Erkun wrote:
>>>> From: Yang Erkun <yangerkun@huawei.com>
> 
>>>> Add nfs4_openowner_unhashed to help found unhashed nfs4_openowner, and
>>>> break nfsd4_open process to fix this problem.
>>>>
>>>> Cc: stable@vger.kernel.org # 2.6
>>>
>>> Hi -
>>>
>>> Questions about the "stable@" tag:
>>>
>>>    - You refer above to a leak of nfsd_file objects, but the nfsd_file
>>>      cache was added in v5.4. Any thoughts about what might be leaked,
>>>      if anything, in kernels earlier than v5.4?
>>
>>  From the above analysis, actually openowner is leaked, and all object
>> associated with it has been leaked too, include nfsd_file, and openowner
>> seems already been there since 2.6....
> 
> Before v5.4, openowners are leaked. After, openowners and nfsd_file
> objects are leaked. Got it.

Yes

> 
> 
>>>    - Have you tried applying this patch to LTS kernels?
>>
>> I have not try to apply this to LTS, what I think is all kernel after 2.6
>> has this bug...
> 
> Understood.
> 
> Is "2.6" a guess, or do you know of a specific kernel version where
> this problem started to appear? Generally if a problem goes back far
> enough or there isn't sufficient evidence about where the problem
> started, we don't want a "# xx.yy" annotation.

Thanks for pointing that out! Yes, 2.6 is just a guess, and it's really 
not appropriate to say that 2.6 is involved in the beginning.

> 
> I expect the stable folks will pull this fix into LTS kernels
> automatically, and I have nightly CI running on all of those. That
> can catch problems with applying recent fixes to old code bases, but
> it ain't perfect.
> 
>
Chuck Lever Nov. 8, 2024, 1:34 p.m. UTC | #5
From: Chuck Lever <chuck.lever@oracle.com>

On Tue, 05 Nov 2024 19:03:14 +0800, Yang Erkun wrote:                                              
> The action force umount(umount -f) will attempt to kill all rpc_task even
> umount operation may ultimately fail if some files remain open.
> Consequently, if an action attempts to open a file, it can potentially
> send two rpc_task to nfs server.
> 
>                    NFS CLIENT
> thread1                             thread2
> open("file")
> ...
> nfs4_do_open
>  _nfs4_do_open
>   _nfs4_open_and_get_state
>    _nfs4_proc_open
>     nfs4_run_open_task
>      /* rpc_task1 */
>      rpc_run_task
>      rpc_wait_for_completion_task
> 
> [...]                                                                        

Applied to nfsd-next for v6.13, thanks!                                                                

[1/1] nfsd: fix nfs4_openowner leak when concurrent nfsd4_open occur
      commit: 519cdbfe501ddf4a29b9c72b6961c58a33afb041                                                                      

--                                                                              
Chuck Lever
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9ddb91d088ae..37888562b436 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1660,6 +1660,14 @@  static void release_open_stateid(struct nfs4_ol_stateid *stp)
 	free_ol_stateid_reaplist(&reaplist);
 }
 
+static bool nfs4_openowner_unhashed(struct nfs4_openowner *oo)
+{
+	lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
+
+	return list_empty(&oo->oo_owner.so_strhash) &&
+		list_empty(&oo->oo_perclient);
+}
+
 static void unhash_openowner_locked(struct nfs4_openowner *oo)
 {
 	struct nfs4_client *clp = oo->oo_owner.so_client;
@@ -4979,6 +4987,12 @@  init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
 	spin_lock(&oo->oo_owner.so_client->cl_lock);
 	spin_lock(&fp->fi_lock);
 
+	if (nfs4_openowner_unhashed(oo)) {
+		mutex_unlock(&stp->st_mutex);
+		stp = NULL;
+		goto out_unlock;
+	}
+
 	retstp = nfsd4_find_existing_open(fp, open);
 	if (retstp)
 		goto out_unlock;
@@ -6131,6 +6145,11 @@  nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 
 	if (!stp) {
 		stp = init_open_stateid(fp, open);
+		if (!stp) {
+			status = nfserr_jukebox;
+			goto out;
+		}
+
 		if (!open->op_stp)
 			new_stp = true;
 	}