diff mbox series

nfsd: don't put blocked locks on LRU until after vfs_lock_file returns

Message ID 20211123122223.69236-1-jlayton@kernel.org (mailing list archive)
State New, archived
Headers show
Series nfsd: don't put blocked locks on LRU until after vfs_lock_file returns | expand

Commit Message

Jeff Layton Nov. 23, 2021, 12:22 p.m. UTC
Vasily reported a case where vfs_lock_file took a very long time to
return (longer than a lease period). The laundromat eventually ran and
reaped the thing and when the vfs_lock_file returned, it ended up
accessing freed memory.

Don't put entries onto the LRU until vfs_lock_file returns.

Reported-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4state.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

Comments

Vasily Averin Nov. 23, 2021, 3:20 p.m. UTC | #1
On 23.11.2021 15:22, Jeff Layton wrote:
> Vasily reported a case where vfs_lock_file took a very long time to
> return (longer than a lease period). The laundromat eventually ran and
> reaped the thing and when the vfs_lock_file returned, it ended up
> accessing freed memory.
> 
> Don't put entries onto the LRU until vfs_lock_file returns.

Cc: stable@vger.kernel.org
Fixes: 7919d0a27f1e "nfsd: add a LRU list for blocked locks"

> Reported-by: Vasily Averin <vvs@virtuozzo.com>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Vasily Averin <vvs@virtuozzo.com>

> ---
>  fs/nfsd/nfs4state.c | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index bfad94c70b84..8cfef84b9355 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -6966,10 +6966,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	}
>  
>  	if (fl_flags & FL_SLEEP) {
> -		nbl->nbl_time = ktime_get_boottime_seconds();
>  		spin_lock(&nn->blocked_locks_lock);
>  		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
> -		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
>  		spin_unlock(&nn->blocked_locks_lock);
>  	}
>  
> @@ -6982,6 +6980,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  			nn->somebody_reclaimed = true;
>  		break;
>  	case FILE_LOCK_DEFERRED:
> +		nbl->nbl_time = ktime_get_boottime_seconds();
> +		spin_lock(&nn->blocked_locks_lock);
> +		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
> +		spin_unlock(&nn->blocked_locks_lock);
>  		nbl = NULL;
>  		fallthrough;
>  	case -EAGAIN:		/* conflock holds conflicting lock */
>
Chuck Lever III Nov. 23, 2021, 3:21 p.m. UTC | #2
> On Nov 23, 2021, at 10:20 AM, Vasily Averin <vvs@virtuozzo.com> wrote:
> 
> On 23.11.2021 15:22, Jeff Layton wrote:
>> Vasily reported a case where vfs_lock_file took a very long time to
>> return (longer than a lease period). The laundromat eventually ran and
>> reaped the thing and when the vfs_lock_file returned, it ended up
>> accessing freed memory.
>> 
>> Don't put entries onto the LRU until vfs_lock_file returns.
> 
> Cc: stable@vger.kernel.org
> Fixes: 7919d0a27f1e "nfsd: add a LRU list for blocked locks"
> 
>> Reported-by: Vasily Averin <vvs@virtuozzo.com>
>> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> Tested-by: Vasily Averin <vvs@virtuozzo.com>

I assume Bruce is grabbing this for v5.16-rc.


>> ---
>> fs/nfsd/nfs4state.c | 6 ++++--
>> 1 file changed, 4 insertions(+), 2 deletions(-)
>> 
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index bfad94c70b84..8cfef84b9355 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -6966,10 +6966,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>> 	}
>> 
>> 	if (fl_flags & FL_SLEEP) {
>> -		nbl->nbl_time = ktime_get_boottime_seconds();
>> 		spin_lock(&nn->blocked_locks_lock);
>> 		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
>> -		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
>> 		spin_unlock(&nn->blocked_locks_lock);
>> 	}
>> 
>> @@ -6982,6 +6980,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>> 			nn->somebody_reclaimed = true;
>> 		break;
>> 	case FILE_LOCK_DEFERRED:
>> +		nbl->nbl_time = ktime_get_boottime_seconds();
>> +		spin_lock(&nn->blocked_locks_lock);
>> +		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
>> +		spin_unlock(&nn->blocked_locks_lock);
>> 		nbl = NULL;
>> 		fallthrough;
>> 	case -EAGAIN:		/* conflock holds conflicting lock */
>> 
> 

--
Chuck Lever
J. Bruce Fields Nov. 23, 2021, 3:59 p.m. UTC | #3
On Tue, Nov 23, 2021 at 07:22:23AM -0500, Jeff Layton wrote:
> Vasily reported a case where vfs_lock_file took a very long time to
> return (longer than a lease period). The laundromat eventually ran and
> reaped the thing and when the vfs_lock_file returned, it ended up
> accessing freed memory.

By the way, once we've called vfs_lock_file(), is there anything
preventing nfsd4_cb_notify_lock_release() from freeing nbl before we get
here?

> 
> Don't put entries onto the LRU until vfs_lock_file returns.
> 
> Reported-by: Vasily Averin <vvs@virtuozzo.com>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/nfsd/nfs4state.c | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index bfad94c70b84..8cfef84b9355 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -6966,10 +6966,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	}
>  
>  	if (fl_flags & FL_SLEEP) {
> -		nbl->nbl_time = ktime_get_boottime_seconds();
>  		spin_lock(&nn->blocked_locks_lock);
>  		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
> -		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
>  		spin_unlock(&nn->blocked_locks_lock);
>  	}
>  
> @@ -6982,6 +6980,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  			nn->somebody_reclaimed = true;
>  		break;
>  	case FILE_LOCK_DEFERRED:
> +		nbl->nbl_time = ktime_get_boottime_seconds();
> +		spin_lock(&nn->blocked_locks_lock);
> +		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
> +		spin_unlock(&nn->blocked_locks_lock);
>  		nbl = NULL;
>  		fallthrough;
>  	case -EAGAIN:		/* conflock holds conflicting lock */
> -- 
> 2.33.1
Jeff Layton Nov. 23, 2021, 4:31 p.m. UTC | #4
On Tue, 2021-11-23 at 10:59 -0500, J. Bruce Fields wrote:
> On Tue, Nov 23, 2021 at 07:22:23AM -0500, Jeff Layton wrote:
> > Vasily reported a case where vfs_lock_file took a very long time to
> > return (longer than a lease period). The laundromat eventually ran and
> > reaped the thing and when the vfs_lock_file returned, it ended up
> > accessing freed memory.
> 
> By the way, once we've called vfs_lock_file(), is there anything
> preventing nfsd4_cb_notify_lock_release() from freeing nbl before we get
> here?
> 

No, I don't think there is. Good catch.

Hmm...the only way I can see to fix that would be to add a refcount to
these things, in which case we probably don't need this patch since it
would prevent the original issue as well...

> > 
> > Don't put entries onto the LRU until vfs_lock_file returns.
> > 
> > Reported-by: Vasily Averin <vvs@virtuozzo.com>
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/nfsd/nfs4state.c | 6 ++++--
> >  1 file changed, 4 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index bfad94c70b84..8cfef84b9355 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -6966,10 +6966,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  	}
> >  
> >  	if (fl_flags & FL_SLEEP) {
> > -		nbl->nbl_time = ktime_get_boottime_seconds();
> >  		spin_lock(&nn->blocked_locks_lock);
> >  		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
> > -		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
> >  		spin_unlock(&nn->blocked_locks_lock);
> >  	}
> >  
> > @@ -6982,6 +6980,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  			nn->somebody_reclaimed = true;
> >  		break;
> >  	case FILE_LOCK_DEFERRED:
> > +		nbl->nbl_time = ktime_get_boottime_seconds();
> > +		spin_lock(&nn->blocked_locks_lock);
> > +		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
> > +		spin_unlock(&nn->blocked_locks_lock);
> >  		nbl = NULL;
> >  		fallthrough;
> >  	case -EAGAIN:		/* conflock holds conflicting lock */
> > -- 
> > 2.33.1
J. Bruce Fields Nov. 23, 2021, 4:41 p.m. UTC | #5
On Tue, Nov 23, 2021 at 11:31:01AM -0500, Jeff Layton wrote:
> On Tue, 2021-11-23 at 10:59 -0500, J. Bruce Fields wrote:
> > On Tue, Nov 23, 2021 at 07:22:23AM -0500, Jeff Layton wrote:
> > > Vasily reported a case where vfs_lock_file took a very long time to
> > > return (longer than a lease period). The laundromat eventually ran and
> > > reaped the thing and when the vfs_lock_file returned, it ended up
> > > accessing freed memory.
> > 
> > By the way, once we've called vfs_lock_file(), is there anything
> > preventing nfsd4_cb_notify_lock_release() from freeing nbl before we get
> > here?
> > 
> 
> No, I don't think there is. Good catch.

It may be a rare race (an rpc's not normally going to reply in that
time), but I wouldn't be surprised if there's some error condition where
it's possible.

> Hmm...the only way I can see to fix that would be to add a refcount to
> these things, in which case we probably don't need this patch since it
> would prevent the original issue as well...

Depending on how long that might take, I'd be OK with applying this as a
stopgap.

--b.

> 
> > > 
> > > Don't put entries onto the LRU until vfs_lock_file returns.
> > > 
> > > Reported-by: Vasily Averin <vvs@virtuozzo.com>
> > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > ---
> > >  fs/nfsd/nfs4state.c | 6 ++++--
> > >  1 file changed, 4 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index bfad94c70b84..8cfef84b9355 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -6966,10 +6966,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > >  	}
> > >  
> > >  	if (fl_flags & FL_SLEEP) {
> > > -		nbl->nbl_time = ktime_get_boottime_seconds();
> > >  		spin_lock(&nn->blocked_locks_lock);
> > >  		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
> > > -		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
> > >  		spin_unlock(&nn->blocked_locks_lock);
> > >  	}
> > >  
> > > @@ -6982,6 +6980,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > >  			nn->somebody_reclaimed = true;
> > >  		break;
> > >  	case FILE_LOCK_DEFERRED:
> > > +		nbl->nbl_time = ktime_get_boottime_seconds();
> > > +		spin_lock(&nn->blocked_locks_lock);
> > > +		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
> > > +		spin_unlock(&nn->blocked_locks_lock);
> > >  		nbl = NULL;
> > >  		fallthrough;
> > >  	case -EAGAIN:		/* conflock holds conflicting lock */
> > > -- 
> > > 2.33.1
> 
> -- 
> Jeff Layton <jlayton@kernel.org>
Jeff Layton Nov. 23, 2021, 5:02 p.m. UTC | #6
On Tue, 2021-11-23 at 11:41 -0500, J. Bruce Fields wrote:
> On Tue, Nov 23, 2021 at 11:31:01AM -0500, Jeff Layton wrote:
> > On Tue, 2021-11-23 at 10:59 -0500, J. Bruce Fields wrote:
> > > On Tue, Nov 23, 2021 at 07:22:23AM -0500, Jeff Layton wrote:
> > > > Vasily reported a case where vfs_lock_file took a very long time to
> > > > return (longer than a lease period). The laundromat eventually ran and
> > > > reaped the thing and when the vfs_lock_file returned, it ended up
> > > > accessing freed memory.
> > > 
> > > By the way, once we've called vfs_lock_file(), is there anything
> > > preventing nfsd4_cb_notify_lock_release() from freeing nbl before we get
> > > here?
> > > 
> > 
> > No, I don't think there is. Good catch.
> 
> It may be a rare race (an rpc's not normally going to reply in that
> time), but I wouldn't be surprised if there's some error condition where
> it's possible.
> 
> > Hmm...the only way I can see to fix that would be to add a refcount to
> > these things, in which case we probably don't need this patch since it
> > would prevent the original issue as well...
> 
> Depending on how long that might take, I'd be OK with applying this as a
> stopgap.
> 
> 

I won't have the cycles to do that anytime soon, unfortunately, so you
may want to.

> > 
> > > > 
> > > > Don't put entries onto the LRU until vfs_lock_file returns.
> > > > 
> > > > Reported-by: Vasily Averin <vvs@virtuozzo.com>
> > > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > > ---
> > > >  fs/nfsd/nfs4state.c | 6 ++++--
> > > >  1 file changed, 4 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > index bfad94c70b84..8cfef84b9355 100644
> > > > --- a/fs/nfsd/nfs4state.c
> > > > +++ b/fs/nfsd/nfs4state.c
> > > > @@ -6966,10 +6966,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > >  	}
> > > >  
> > > >  	if (fl_flags & FL_SLEEP) {
> > > > -		nbl->nbl_time = ktime_get_boottime_seconds();
> > > >  		spin_lock(&nn->blocked_locks_lock);
> > > >  		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
> > > > -		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
> > > >  		spin_unlock(&nn->blocked_locks_lock);
> > > >  	}
> > > >  
> > > > @@ -6982,6 +6980,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > >  			nn->somebody_reclaimed = true;
> > > >  		break;
> > > >  	case FILE_LOCK_DEFERRED:
> > > > +		nbl->nbl_time = ktime_get_boottime_seconds();
> > > > +		spin_lock(&nn->blocked_locks_lock);
> > > > +		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
> > > > +		spin_unlock(&nn->blocked_locks_lock);
> > > >  		nbl = NULL;
> > > >  		fallthrough;
> > > >  	case -EAGAIN:		/* conflock holds conflicting lock */
> > > > -- 
> > > > 2.33.1
> > 
> > -- 
> > Jeff Layton <jlayton@kernel.org>
diff mbox series

Patch

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bfad94c70b84..8cfef84b9355 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6966,10 +6966,8 @@  nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 
 	if (fl_flags & FL_SLEEP) {
-		nbl->nbl_time = ktime_get_boottime_seconds();
 		spin_lock(&nn->blocked_locks_lock);
 		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
-		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
 		spin_unlock(&nn->blocked_locks_lock);
 	}
 
@@ -6982,6 +6980,10 @@  nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			nn->somebody_reclaimed = true;
 		break;
 	case FILE_LOCK_DEFERRED:
+		nbl->nbl_time = ktime_get_boottime_seconds();
+		spin_lock(&nn->blocked_locks_lock);
+		list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
+		spin_unlock(&nn->blocked_locks_lock);
 		nbl = NULL;
 		fallthrough;
 	case -EAGAIN:		/* conflock holds conflicting lock */