diff mbox series

[2/3] NFSD: restore delegation's sc_count if nfsd4_run_cb fails

Message ID 1702667703-17978-3-git-send-email-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series Bug fixes for NFSD callback | expand

Commit Message

Dai Ngo Dec. 15, 2023, 7:15 p.m. UTC
Under some load conditions the callback work request can not be queued
and nfsd4_run_cb returns 0 to caller. When this happens, the sc_count
of the delegation state was left with an extra reference count preventing
the state to be freed later.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4state.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

Comments

Jeff Layton Dec. 15, 2023, 7:42 p.m. UTC | #1
On Fri, 2023-12-15 at 11:15 -0800, Dai Ngo wrote:
> Under some load conditions the callback work request can not be queued
> and nfsd4_run_cb returns 0 to caller. When this happens, the sc_count
> of the delegation state was left with an extra reference count preventing
> the state to be freed later.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  fs/nfsd/nfs4state.c | 17 +++++++++++++----
>  1 file changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 40415929e2ae..175f3e9f5822 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2947,8 +2947,14 @@ void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
>  
>  	if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
>  		return;
> +
>  	refcount_inc(&dp->dl_stid.sc_count);
> -	nfsd4_run_cb(&ncf->ncf_getattr);
> +	if (!nfsd4_run_cb(&ncf->ncf_getattr)) {
> +		refcount_dec(&dp->dl_stid.sc_count);
> +		clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
> +		wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
> +		WARN_ON_ONCE(1);
> +	}
>  }
>  
>  static struct nfs4_client *create_client(struct xdr_netobj name,
> @@ -4967,7 +4973,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
>  	 * we know it's safe to take a reference.
>  	 */
>  	refcount_inc(&dp->dl_stid.sc_count);
> -	WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
> +	if (!nfsd4_run_cb(&dp->dl_recall)) {
> +		refcount_dec(&dp->dl_stid.sc_count);
> +		WARN_ON_ONCE(1);
> +	}
>  }
>  
>  /* Called from break_lease() with flc_lock held. */
> @@ -8543,12 +8552,12 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
>  				return 0;
>  			}
>  break_lease:
> -			spin_unlock(&ctx->flc_lock);
>  			nfsd_stats_wdeleg_getattr_inc();
> -
>  			dp = fl->fl_owner;
>  			ncf = &dp->dl_cb_fattr;
>  			nfs4_cb_getattr(&dp->dl_cb_fattr);
> +			spin_unlock(&ctx->flc_lock);
> +

The other hunks in this patch make sense, but what's going on here with
moving the lock down? Do we really need to hold the spinlock there? If
so, I would have expected to see an explanation in the changelog.

>  			wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
>  			if (ncf->ncf_cb_status) {
>  				status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
Dai Ngo Dec. 15, 2023, 8 p.m. UTC | #2
On 12/15/23 11:42 AM, Jeff Layton wrote:
> On Fri, 2023-12-15 at 11:15 -0800, Dai Ngo wrote:
>> Under some load conditions the callback work request can not be queued
>> and nfsd4_run_cb returns 0 to caller. When this happens, the sc_count
>> of the delegation state was left with an extra reference count preventing
>> the state to be freed later.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   fs/nfsd/nfs4state.c | 17 +++++++++++++----
>>   1 file changed, 13 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 40415929e2ae..175f3e9f5822 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2947,8 +2947,14 @@ void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
>>   
>>   	if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
>>   		return;
>> +
>>   	refcount_inc(&dp->dl_stid.sc_count);
>> -	nfsd4_run_cb(&ncf->ncf_getattr);
>> +	if (!nfsd4_run_cb(&ncf->ncf_getattr)) {
>> +		refcount_dec(&dp->dl_stid.sc_count);
>> +		clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
>> +		wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
>> +		WARN_ON_ONCE(1);
>> +	}
>>   }
>>   
>>   static struct nfs4_client *create_client(struct xdr_netobj name,
>> @@ -4967,7 +4973,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
>>   	 * we know it's safe to take a reference.
>>   	 */
>>   	refcount_inc(&dp->dl_stid.sc_count);
>> -	WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
>> +	if (!nfsd4_run_cb(&dp->dl_recall)) {
>> +		refcount_dec(&dp->dl_stid.sc_count);
>> +		WARN_ON_ONCE(1);
>> +	}
>>   }
>>   
>>   /* Called from break_lease() with flc_lock held. */
>> @@ -8543,12 +8552,12 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
>>   				return 0;
>>   			}
>>   break_lease:
>> -			spin_unlock(&ctx->flc_lock);
>>   			nfsd_stats_wdeleg_getattr_inc();
>> -
>>   			dp = fl->fl_owner;
>>   			ncf = &dp->dl_cb_fattr;
>>   			nfs4_cb_getattr(&dp->dl_cb_fattr);
>> +			spin_unlock(&ctx->flc_lock);
>> +
> The other hunks in this patch make sense, but what's going on here with
> moving the lock down? Do we really need to hold the spinlock there? If
> so, I would have expected to see an explanation in the changelog.

We need to hold the flc_lock to prevent the lease to be removed which
allows the delegation state to be released. We need to do this since
we just do the refcount_dec if nfsd4_run_cb fails, instead of doing
nfs4_put_stid to free the state if this is the last refcount.

This is done to match the logic in nfsd_break_deleg_cb which has an useful
comment in nfsd_break_one_deleg.

-Dai

>
>>   			wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
>>   			if (ncf->ncf_cb_status) {
>>   				status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
Jeff Layton Dec. 15, 2023, 8:15 p.m. UTC | #3
On Fri, 2023-12-15 at 12:00 -0800, dai.ngo@oracle.com wrote:
> On 12/15/23 11:42 AM, Jeff Layton wrote:
> > On Fri, 2023-12-15 at 11:15 -0800, Dai Ngo wrote:
> > > Under some load conditions the callback work request can not be queued
> > > and nfsd4_run_cb returns 0 to caller. When this happens, the sc_count
> > > of the delegation state was left with an extra reference count preventing
> > > the state to be freed later.
> > > 
> > > Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > > ---
> > >   fs/nfsd/nfs4state.c | 17 +++++++++++++----
> > >   1 file changed, 13 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index 40415929e2ae..175f3e9f5822 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -2947,8 +2947,14 @@ void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
> > >   
> > >   	if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
> > >   		return;
> > > +
> > >   	refcount_inc(&dp->dl_stid.sc_count);
> > > -	nfsd4_run_cb(&ncf->ncf_getattr);
> > > +	if (!nfsd4_run_cb(&ncf->ncf_getattr)) {
> > > +		refcount_dec(&dp->dl_stid.sc_count);
> > > +		clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
> > > +		wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
> > > +		WARN_ON_ONCE(1);
> > > +	}
> > >   }
> > >   
> > >   static struct nfs4_client *create_client(struct xdr_netobj name,
> > > @@ -4967,7 +4973,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
> > >   	 * we know it's safe to take a reference.
> > >   	 */
> > >   	refcount_inc(&dp->dl_stid.sc_count);
> > > -	WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
> > > +	if (!nfsd4_run_cb(&dp->dl_recall)) {
> > > +		refcount_dec(&dp->dl_stid.sc_count);
> > > +		WARN_ON_ONCE(1);
> > > +	}
> > >   }
> > >   
> > >   /* Called from break_lease() with flc_lock held. */
> > > @@ -8543,12 +8552,12 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
> > >   				return 0;
> > >   			}
> > >   break_lease:
> > > -			spin_unlock(&ctx->flc_lock);
> > >   			nfsd_stats_wdeleg_getattr_inc();
> > > -
> > >   			dp = fl->fl_owner;
> > >   			ncf = &dp->dl_cb_fattr;
> > >   			nfs4_cb_getattr(&dp->dl_cb_fattr);
> > > +			spin_unlock(&ctx->flc_lock);
> > > +
> > The other hunks in this patch make sense, but what's going on here with
> > moving the lock down? Do we really need to hold the spinlock there? If
> > so, I would have expected to see an explanation in the changelog.
> 
> We need to hold the flc_lock to prevent the lease to be removed which
> allows the delegation state to be released. We need to do this since
> we just do the refcount_dec if nfsd4_run_cb fails, instead of doing
> nfs4_put_stid to free the state if this is the last refcount.
> 
> This is done to match the logic in nfsd_break_deleg_cb which has an useful
> comment in nfsd_break_one_deleg.
> 
> -Dai
> 

So is this a race today? I think this deserves a mention in the
changelog at least, and maybe a Fixes: tag?

> > 
> > >   			wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
> > >   			if (ncf->ncf_cb_status) {
> > >   				status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
Dai Ngo Dec. 15, 2023, 8:22 p.m. UTC | #4
On 12/15/23 12:15 PM, Jeff Layton wrote:
> On Fri, 2023-12-15 at 12:00 -0800, dai.ngo@oracle.com wrote:
>> On 12/15/23 11:42 AM, Jeff Layton wrote:
>>> On Fri, 2023-12-15 at 11:15 -0800, Dai Ngo wrote:
>>>> Under some load conditions the callback work request can not be queued
>>>> and nfsd4_run_cb returns 0 to caller. When this happens, the sc_count
>>>> of the delegation state was left with an extra reference count preventing
>>>> the state to be freed later.
>>>>
>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>> ---
>>>>    fs/nfsd/nfs4state.c | 17 +++++++++++++----
>>>>    1 file changed, 13 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>> index 40415929e2ae..175f3e9f5822 100644
>>>> --- a/fs/nfsd/nfs4state.c
>>>> +++ b/fs/nfsd/nfs4state.c
>>>> @@ -2947,8 +2947,14 @@ void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
>>>>    
>>>>    	if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
>>>>    		return;
>>>> +
>>>>    	refcount_inc(&dp->dl_stid.sc_count);
>>>> -	nfsd4_run_cb(&ncf->ncf_getattr);
>>>> +	if (!nfsd4_run_cb(&ncf->ncf_getattr)) {
>>>> +		refcount_dec(&dp->dl_stid.sc_count);
>>>> +		clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
>>>> +		wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
>>>> +		WARN_ON_ONCE(1);
>>>> +	}
>>>>    }
>>>>    
>>>>    static struct nfs4_client *create_client(struct xdr_netobj name,
>>>> @@ -4967,7 +4973,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
>>>>    	 * we know it's safe to take a reference.
>>>>    	 */
>>>>    	refcount_inc(&dp->dl_stid.sc_count);
>>>> -	WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
>>>> +	if (!nfsd4_run_cb(&dp->dl_recall)) {
>>>> +		refcount_dec(&dp->dl_stid.sc_count);
>>>> +		WARN_ON_ONCE(1);
>>>> +	}
>>>>    }
>>>>    
>>>>    /* Called from break_lease() with flc_lock held. */
>>>> @@ -8543,12 +8552,12 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
>>>>    				return 0;
>>>>    			}
>>>>    break_lease:
>>>> -			spin_unlock(&ctx->flc_lock);
>>>>    			nfsd_stats_wdeleg_getattr_inc();
>>>> -
>>>>    			dp = fl->fl_owner;
>>>>    			ncf = &dp->dl_cb_fattr;
>>>>    			nfs4_cb_getattr(&dp->dl_cb_fattr);
>>>> +			spin_unlock(&ctx->flc_lock);
>>>> +
>>> The other hunks in this patch make sense, but what's going on here with
>>> moving the lock down? Do we really need to hold the spinlock there? If
>>> so, I would have expected to see an explanation in the changelog.
>> We need to hold the flc_lock to prevent the lease to be removed which
>> allows the delegation state to be released. We need to do this since
>> we just do the refcount_dec if nfsd4_run_cb fails, instead of doing
>> nfs4_put_stid to free the state if this is the last refcount.
>>
>> This is done to match the logic in nfsd_break_deleg_cb which has an useful
>> comment in nfsd_break_one_deleg.
>>
>> -Dai
>>
> So is this a race today? I think this deserves a mention in the
> changelog at least, and maybe a Fixes: tag?

I will add some comments in the changelog and add a Fixes tag in v2.

Thanks,
-Dai

>
>>>>    			wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
>>>>    			if (ncf->ncf_cb_status) {
>>>>    				status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 40415929e2ae..175f3e9f5822 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2947,8 +2947,14 @@  void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
 
 	if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
 		return;
+
 	refcount_inc(&dp->dl_stid.sc_count);
-	nfsd4_run_cb(&ncf->ncf_getattr);
+	if (!nfsd4_run_cb(&ncf->ncf_getattr)) {
+		refcount_dec(&dp->dl_stid.sc_count);
+		clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
+		wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
+		WARN_ON_ONCE(1);
+	}
 }
 
 static struct nfs4_client *create_client(struct xdr_netobj name,
@@ -4967,7 +4973,10 @@  static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
 	 * we know it's safe to take a reference.
 	 */
 	refcount_inc(&dp->dl_stid.sc_count);
-	WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
+	if (!nfsd4_run_cb(&dp->dl_recall)) {
+		refcount_dec(&dp->dl_stid.sc_count);
+		WARN_ON_ONCE(1);
+	}
 }
 
 /* Called from break_lease() with flc_lock held. */
@@ -8543,12 +8552,12 @@  nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
 				return 0;
 			}
 break_lease:
-			spin_unlock(&ctx->flc_lock);
 			nfsd_stats_wdeleg_getattr_inc();
-
 			dp = fl->fl_owner;
 			ncf = &dp->dl_cb_fattr;
 			nfs4_cb_getattr(&dp->dl_cb_fattr);
+			spin_unlock(&ctx->flc_lock);
+
 			wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
 			if (ncf->ncf_cb_status) {
 				status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));