diff mbox series

[RFC,v23,5/7] fs/lock: add 2 callbacks to lock_manager_operations to resolve conflict

Message ID 1651129595-6904-6-git-send-email-dai.ngo@oracle.com (mailing list archive)
State New, archived
Headers show
Series NFSD: Initial implementation of NFSv4 Courteous Server | expand

Commit Message

Dai Ngo April 28, 2022, 7:06 a.m. UTC
Add 2 new callbacks, lm_lock_expirable and lm_expire_lock, to
lock_manager_operations to allow the lock manager to take appropriate
action to resolve the lock conflict if possible.

A new field, lm_mod_owner, is also added to lock_manager_operations.
The lm_mod_owner is used by the fs/lock code to make sure the lock
manager module such as nfsd, is not freed while lock conflict is being
resolved.

lm_lock_expirable checks and returns true to indicate that the lock
conflict can be resolved else return false. This callback must be
called with the flc_lock held so it can not block.

lm_expire_lock is called to resolve the lock conflict if the returned
value from lm_lock_expirable is true. This callback is called without
the flc_lock held since it's allowed to block. Upon returning from
this callback, the lock conflict should be resolved and the caller is
expected to restart the conflict check from the beginnning of the list.

Lock manager, such as NFSv4 courteous server, uses this callback to
resolve conflict by destroying lock owner, or the NFSv4 courtesy client
(client that has expired but allowed to maintains its states) that owns
the lock.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 Documentation/filesystems/locking.rst |  4 ++++
 fs/locks.c                            | 45 +++++++++++++++++++++++++++++++----
 include/linux/fs.h                    |  3 +++
 3 files changed, 48 insertions(+), 4 deletions(-)

Comments

J. Bruce Fields April 29, 2022, 3:16 p.m. UTC | #1
On Thu, Apr 28, 2022 at 12:06:33AM -0700, Dai Ngo wrote:
> Add 2 new callbacks, lm_lock_expirable and lm_expire_lock, to
> lock_manager_operations to allow the lock manager to take appropriate
> action to resolve the lock conflict if possible.
> 
> A new field, lm_mod_owner, is also added to lock_manager_operations.
> The lm_mod_owner is used by the fs/lock code to make sure the lock
> manager module such as nfsd, is not freed while lock conflict is being
> resolved.
> 
> lm_lock_expirable checks and returns true to indicate that the lock
> conflict can be resolved else return false. This callback must be
> called with the flc_lock held so it can not block.
> 
> lm_expire_lock is called to resolve the lock conflict if the returned
> value from lm_lock_expirable is true. This callback is called without
> the flc_lock held since it's allowed to block. Upon returning from
> this callback, the lock conflict should be resolved and the caller is
> expected to restart the conflict check from the beginnning of the list.
> 
> Lock manager, such as NFSv4 courteous server, uses this callback to
> resolve conflict by destroying lock owner, or the NFSv4 courtesy client
> (client that has expired but allowed to maintains its states) that owns
> the lock.
> 
> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> ---
>  Documentation/filesystems/locking.rst |  4 ++++
>  fs/locks.c                            | 45 +++++++++++++++++++++++++++++++----
>  include/linux/fs.h                    |  3 +++
>  3 files changed, 48 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> index c26d854275a0..0997a258361a 100644
> --- a/Documentation/filesystems/locking.rst
> +++ b/Documentation/filesystems/locking.rst
> @@ -428,6 +428,8 @@ prototypes::
>  	void (*lm_break)(struct file_lock *); /* break_lease callback */
>  	int (*lm_change)(struct file_lock **, int);
>  	bool (*lm_breaker_owns_lease)(struct file_lock *);
> +        bool (*lm_lock_expirable)(struct file_lock *);
> +        void (*lm_expire_lock)(void);
>  
>  locking rules:
>  
> @@ -439,6 +441,8 @@ lm_grant:		no		no			no
>  lm_break:		yes		no			no
>  lm_change		yes		no			no
>  lm_breaker_owns_lease:	yes     	no			no
> +lm_lock_expirable	yes		no			no
> +lm_expire_lock		no		no			yes
>  ======================	=============	=================	=========
>  
>  buffer_head
> diff --git a/fs/locks.c b/fs/locks.c
> index c369841ef7d1..d48c3f455657 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -896,6 +896,37 @@ static bool flock_locks_conflict(struct file_lock *caller_fl,
>  	return locks_conflict(caller_fl, sys_fl);
>  }
>  
> +static bool
> +resolve_lock_conflict_locked(struct file_lock_context *ctx,
> +			struct file_lock *cfl, bool rwsem)
> +{
> +	void *owner;
> +	bool ret;
> +	void (*func)(void);
> +
> +	if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable &&
> +				cfl->fl_lmops->lm_expire_lock) {
> +		ret = (*cfl->fl_lmops->lm_lock_expirable)(cfl);
> +		if (!ret)
> +			return false;
> +		owner = cfl->fl_lmops->lm_mod_owner;
> +		if (!owner)
> +			return false;
> +		func = cfl->fl_lmops->lm_expire_lock;
> +		__module_get(owner);
> +		if (rwsem)
> +			percpu_up_read(&file_rwsem);
> +		spin_unlock(&ctx->flc_lock);

Dropping and reacquiring locks inside a function like this makes me
nervous.  It means it's not obvious in the caller that the lock isn't
held throughout.

I know it's more verbose, but let's just open-code this logic in the
callers.

(And, thanks for catching the test_lock case, I'd forgotten it.)

Also: do we *really* need to drop the file_rwsem?  Were you seeing it
that cause problems?  The only possible conflict is with someone trying
to read /proc/locks, and I'm surprised that it'd be a problem to let
them wait here.

--b.

> +		(*func)();
> +		module_put(owner);
> +		if (rwsem)
> +			percpu_down_read(&file_rwsem);
> +		spin_lock(&ctx->flc_lock);
> +		return true;
> +	}
> +	return false;
> +}
> +
>  void
>  posix_test_lock(struct file *filp, struct file_lock *fl)
>  {
> @@ -910,11 +941,14 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
>  	}
>  
>  	spin_lock(&ctx->flc_lock);
> +retry:
>  	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
> -		if (posix_locks_conflict(fl, cfl)) {
> -			locks_copy_conflock(fl, cfl);
> -			goto out;
> -		}
> +		if (!posix_locks_conflict(fl, cfl))
> +			continue;
> +		if (resolve_lock_conflict_locked(ctx, cfl, false))
> +			goto retry;
> +		locks_copy_conflock(fl, cfl);
> +		goto out;
>  	}
>  	fl->fl_type = F_UNLCK;
>  out:
> @@ -1108,6 +1142,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
>  
>  	percpu_down_read(&file_rwsem);
>  	spin_lock(&ctx->flc_lock);
> +retry:
>  	/*
>  	 * New lock request. Walk all POSIX locks and look for conflicts. If
>  	 * there are any, either return error or put the request on the
> @@ -1117,6 +1152,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
>  		list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
>  			if (!posix_locks_conflict(request, fl))
>  				continue;
> +			if (resolve_lock_conflict_locked(ctx, fl, true))
> +				goto retry;
>  			if (conflock)
>  				locks_copy_conflock(conflock, fl);
>  			error = -EAGAIN;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index b8ed7f974fb4..aa6c1bbdb8c4 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1029,6 +1029,7 @@ struct file_lock_operations {
>  };
>  
>  struct lock_manager_operations {
> +	void *lm_mod_owner;
>  	fl_owner_t (*lm_get_owner)(fl_owner_t);
>  	void (*lm_put_owner)(fl_owner_t);
>  	void (*lm_notify)(struct file_lock *);	/* unblock callback */
> @@ -1037,6 +1038,8 @@ struct lock_manager_operations {
>  	int (*lm_change)(struct file_lock *, int, struct list_head *);
>  	void (*lm_setup)(struct file_lock *, void **);
>  	bool (*lm_breaker_owns_lease)(struct file_lock *);
> +	bool (*lm_lock_expirable)(struct file_lock *cfl);
> +	void (*lm_expire_lock)(void);
>  };
>  
>  struct lock_manager {
> -- 
> 2.9.5
Dai Ngo April 29, 2022, 5:24 p.m. UTC | #2
On 4/29/22 8:16 AM, J. Bruce Fields wrote:
> On Thu, Apr 28, 2022 at 12:06:33AM -0700, Dai Ngo wrote:
>> Add 2 new callbacks, lm_lock_expirable and lm_expire_lock, to
>> lock_manager_operations to allow the lock manager to take appropriate
>> action to resolve the lock conflict if possible.
>>
>> A new field, lm_mod_owner, is also added to lock_manager_operations.
>> The lm_mod_owner is used by the fs/lock code to make sure the lock
>> manager module such as nfsd, is not freed while lock conflict is being
>> resolved.
>>
>> lm_lock_expirable checks and returns true to indicate that the lock
>> conflict can be resolved else return false. This callback must be
>> called with the flc_lock held so it can not block.
>>
>> lm_expire_lock is called to resolve the lock conflict if the returned
>> value from lm_lock_expirable is true. This callback is called without
>> the flc_lock held since it's allowed to block. Upon returning from
>> this callback, the lock conflict should be resolved and the caller is
>> expected to restart the conflict check from the beginnning of the list.
>>
>> Lock manager, such as NFSv4 courteous server, uses this callback to
>> resolve conflict by destroying lock owner, or the NFSv4 courtesy client
>> (client that has expired but allowed to maintains its states) that owns
>> the lock.
>>
>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>> ---
>>   Documentation/filesystems/locking.rst |  4 ++++
>>   fs/locks.c                            | 45 +++++++++++++++++++++++++++++++----
>>   include/linux/fs.h                    |  3 +++
>>   3 files changed, 48 insertions(+), 4 deletions(-)
>>
>> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
>> index c26d854275a0..0997a258361a 100644
>> --- a/Documentation/filesystems/locking.rst
>> +++ b/Documentation/filesystems/locking.rst
>> @@ -428,6 +428,8 @@ prototypes::
>>   	void (*lm_break)(struct file_lock *); /* break_lease callback */
>>   	int (*lm_change)(struct file_lock **, int);
>>   	bool (*lm_breaker_owns_lease)(struct file_lock *);
>> +        bool (*lm_lock_expirable)(struct file_lock *);
>> +        void (*lm_expire_lock)(void);
>>   
>>   locking rules:
>>   
>> @@ -439,6 +441,8 @@ lm_grant:		no		no			no
>>   lm_break:		yes		no			no
>>   lm_change		yes		no			no
>>   lm_breaker_owns_lease:	yes     	no			no
>> +lm_lock_expirable	yes		no			no
>> +lm_expire_lock		no		no			yes
>>   ======================	=============	=================	=========
>>   
>>   buffer_head
>> diff --git a/fs/locks.c b/fs/locks.c
>> index c369841ef7d1..d48c3f455657 100644
>> --- a/fs/locks.c
>> +++ b/fs/locks.c
>> @@ -896,6 +896,37 @@ static bool flock_locks_conflict(struct file_lock *caller_fl,
>>   	return locks_conflict(caller_fl, sys_fl);
>>   }
>>   
>> +static bool
>> +resolve_lock_conflict_locked(struct file_lock_context *ctx,
>> +			struct file_lock *cfl, bool rwsem)
>> +{
>> +	void *owner;
>> +	bool ret;
>> +	void (*func)(void);
>> +
>> +	if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable &&
>> +				cfl->fl_lmops->lm_expire_lock) {
>> +		ret = (*cfl->fl_lmops->lm_lock_expirable)(cfl);
>> +		if (!ret)
>> +			return false;
>> +		owner = cfl->fl_lmops->lm_mod_owner;
>> +		if (!owner)
>> +			return false;
>> +		func = cfl->fl_lmops->lm_expire_lock;
>> +		__module_get(owner);
>> +		if (rwsem)
>> +			percpu_up_read(&file_rwsem);
>> +		spin_unlock(&ctx->flc_lock);
> Dropping and reacquiring locks inside a function like this makes me
> nervous.  It means it's not obvious in the caller that the lock isn't
> held throughout.
>
> I know it's more verbose, but let's just open-code this logic in the
> callers.

fix in v24.

>
> (And, thanks for catching the test_lock case, I'd forgotten it.)
>
> Also: do we *really* need to drop the file_rwsem?  Were you seeing it
> that cause problems?  The only possible conflict is with someone trying
> to read /proc/locks, and I'm surprised that it'd be a problem to let
> them wait here.

Yes, apparently file_rwsem is used when the laundromat expires the
COURTESY client client and causes deadlock.

-Dai

>
> --b.
>
>> +		(*func)();
>> +		module_put(owner);
>> +		if (rwsem)
>> +			percpu_down_read(&file_rwsem);
>> +		spin_lock(&ctx->flc_lock);
>> +		return true;
>> +	}
>> +	return false;
>> +}
>> +
>>   void
>>   posix_test_lock(struct file *filp, struct file_lock *fl)
>>   {
>> @@ -910,11 +941,14 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
>>   	}
>>   
>>   	spin_lock(&ctx->flc_lock);
>> +retry:
>>   	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
>> -		if (posix_locks_conflict(fl, cfl)) {
>> -			locks_copy_conflock(fl, cfl);
>> -			goto out;
>> -		}
>> +		if (!posix_locks_conflict(fl, cfl))
>> +			continue;
>> +		if (resolve_lock_conflict_locked(ctx, cfl, false))
>> +			goto retry;
>> +		locks_copy_conflock(fl, cfl);
>> +		goto out;
>>   	}
>>   	fl->fl_type = F_UNLCK;
>>   out:
>> @@ -1108,6 +1142,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
>>   
>>   	percpu_down_read(&file_rwsem);
>>   	spin_lock(&ctx->flc_lock);
>> +retry:
>>   	/*
>>   	 * New lock request. Walk all POSIX locks and look for conflicts. If
>>   	 * there are any, either return error or put the request on the
>> @@ -1117,6 +1152,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
>>   		list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
>>   			if (!posix_locks_conflict(request, fl))
>>   				continue;
>> +			if (resolve_lock_conflict_locked(ctx, fl, true))
>> +				goto retry;
>>   			if (conflock)
>>   				locks_copy_conflock(conflock, fl);
>>   			error = -EAGAIN;
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index b8ed7f974fb4..aa6c1bbdb8c4 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -1029,6 +1029,7 @@ struct file_lock_operations {
>>   };
>>   
>>   struct lock_manager_operations {
>> +	void *lm_mod_owner;
>>   	fl_owner_t (*lm_get_owner)(fl_owner_t);
>>   	void (*lm_put_owner)(fl_owner_t);
>>   	void (*lm_notify)(struct file_lock *);	/* unblock callback */
>> @@ -1037,6 +1038,8 @@ struct lock_manager_operations {
>>   	int (*lm_change)(struct file_lock *, int, struct list_head *);
>>   	void (*lm_setup)(struct file_lock *, void **);
>>   	bool (*lm_breaker_owns_lease)(struct file_lock *);
>> +	bool (*lm_lock_expirable)(struct file_lock *cfl);
>> +	void (*lm_expire_lock)(void);
>>   };
>>   
>>   struct lock_manager {
>> -- 
>> 2.9.5
J. Bruce Fields April 29, 2022, 7:58 p.m. UTC | #3
On Fri, Apr 29, 2022 at 10:24:11AM -0700, dai.ngo@oracle.com wrote:
> 
> On 4/29/22 8:16 AM, J. Bruce Fields wrote:
> >On Thu, Apr 28, 2022 at 12:06:33AM -0700, Dai Ngo wrote:
> >>Add 2 new callbacks, lm_lock_expirable and lm_expire_lock, to
> >>lock_manager_operations to allow the lock manager to take appropriate
> >>action to resolve the lock conflict if possible.
> >>
> >>A new field, lm_mod_owner, is also added to lock_manager_operations.
> >>The lm_mod_owner is used by the fs/lock code to make sure the lock
> >>manager module such as nfsd, is not freed while lock conflict is being
> >>resolved.
> >>
> >>lm_lock_expirable checks and returns true to indicate that the lock
> >>conflict can be resolved else return false. This callback must be
> >>called with the flc_lock held so it can not block.
> >>
> >>lm_expire_lock is called to resolve the lock conflict if the returned
> >>value from lm_lock_expirable is true. This callback is called without
> >>the flc_lock held since it's allowed to block. Upon returning from
> >>this callback, the lock conflict should be resolved and the caller is
> >>expected to restart the conflict check from the beginnning of the list.
> >>
> >>Lock manager, such as NFSv4 courteous server, uses this callback to
> >>resolve conflict by destroying lock owner, or the NFSv4 courtesy client
> >>(client that has expired but allowed to maintains its states) that owns
> >>the lock.
> >>
> >>Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> >>---
> >>  Documentation/filesystems/locking.rst |  4 ++++
> >>  fs/locks.c                            | 45 +++++++++++++++++++++++++++++++----
> >>  include/linux/fs.h                    |  3 +++
> >>  3 files changed, 48 insertions(+), 4 deletions(-)
> >>
> >>diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> >>index c26d854275a0..0997a258361a 100644
> >>--- a/Documentation/filesystems/locking.rst
> >>+++ b/Documentation/filesystems/locking.rst
> >>@@ -428,6 +428,8 @@ prototypes::
> >>  	void (*lm_break)(struct file_lock *); /* break_lease callback */
> >>  	int (*lm_change)(struct file_lock **, int);
> >>  	bool (*lm_breaker_owns_lease)(struct file_lock *);
> >>+        bool (*lm_lock_expirable)(struct file_lock *);
> >>+        void (*lm_expire_lock)(void);
> >>  locking rules:
> >>@@ -439,6 +441,8 @@ lm_grant:		no		no			no
> >>  lm_break:		yes		no			no
> >>  lm_change		yes		no			no
> >>  lm_breaker_owns_lease:	yes     	no			no
> >>+lm_lock_expirable	yes		no			no
> >>+lm_expire_lock		no		no			yes
> >>  ======================	=============	=================	=========
> >>  buffer_head
> >>diff --git a/fs/locks.c b/fs/locks.c
> >>index c369841ef7d1..d48c3f455657 100644
> >>--- a/fs/locks.c
> >>+++ b/fs/locks.c
> >>@@ -896,6 +896,37 @@ static bool flock_locks_conflict(struct file_lock *caller_fl,
> >>  	return locks_conflict(caller_fl, sys_fl);
> >>  }
> >>+static bool
> >>+resolve_lock_conflict_locked(struct file_lock_context *ctx,
> >>+			struct file_lock *cfl, bool rwsem)
> >>+{
> >>+	void *owner;
> >>+	bool ret;
> >>+	void (*func)(void);
> >>+
> >>+	if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable &&
> >>+				cfl->fl_lmops->lm_expire_lock) {
> >>+		ret = (*cfl->fl_lmops->lm_lock_expirable)(cfl);
> >>+		if (!ret)
> >>+			return false;
> >>+		owner = cfl->fl_lmops->lm_mod_owner;
> >>+		if (!owner)
> >>+			return false;
> >>+		func = cfl->fl_lmops->lm_expire_lock;
> >>+		__module_get(owner);
> >>+		if (rwsem)
> >>+			percpu_up_read(&file_rwsem);
> >>+		spin_unlock(&ctx->flc_lock);
> >Dropping and reacquiring locks inside a function like this makes me
> >nervous.  It means it's not obvious in the caller that the lock isn't
> >held throughout.
> >
> >I know it's more verbose, but let's just open-code this logic in the
> >callers.
> 
> fix in v24.
> 
> >
> >(And, thanks for catching the test_lock case, I'd forgotten it.)
> >
> >Also: do we *really* need to drop the file_rwsem?  Were you seeing it
> >that cause problems?  The only possible conflict is with someone trying
> >to read /proc/locks, and I'm surprised that it'd be a problem to let
> >them wait here.
> 
> Yes, apparently file_rwsem is used when the laundromat expires the
> COURTESY client client and causes deadlock.

It's taken, but only for read.  I'm rather surprised that would cause a
deadlock.  Do you have any kind of trace showing what happened?

Oh well, it's not a big deal to just open code this and set the "retry:"
before both lock acquisitions, that's probably best in fact.  I'm just
curious.

--b.

> 
> -Dai
> 
> >
> >--b.
> >
> >>+		(*func)();
> >>+		module_put(owner);
> >>+		if (rwsem)
> >>+			percpu_down_read(&file_rwsem);
> >>+		spin_lock(&ctx->flc_lock);
> >>+		return true;
> >>+	}
> >>+	return false;
> >>+}
> >>+
> >>  void
> >>  posix_test_lock(struct file *filp, struct file_lock *fl)
> >>  {
> >>@@ -910,11 +941,14 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
> >>  	}
> >>  	spin_lock(&ctx->flc_lock);
> >>+retry:
> >>  	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
> >>-		if (posix_locks_conflict(fl, cfl)) {
> >>-			locks_copy_conflock(fl, cfl);
> >>-			goto out;
> >>-		}
> >>+		if (!posix_locks_conflict(fl, cfl))
> >>+			continue;
> >>+		if (resolve_lock_conflict_locked(ctx, cfl, false))
> >>+			goto retry;
> >>+		locks_copy_conflock(fl, cfl);
> >>+		goto out;
> >>  	}
> >>  	fl->fl_type = F_UNLCK;
> >>  out:
> >>@@ -1108,6 +1142,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
> >>  	percpu_down_read(&file_rwsem);
> >>  	spin_lock(&ctx->flc_lock);
> >>+retry:
> >>  	/*
> >>  	 * New lock request. Walk all POSIX locks and look for conflicts. If
> >>  	 * there are any, either return error or put the request on the
> >>@@ -1117,6 +1152,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
> >>  		list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
> >>  			if (!posix_locks_conflict(request, fl))
> >>  				continue;
> >>+			if (resolve_lock_conflict_locked(ctx, fl, true))
> >>+				goto retry;
> >>  			if (conflock)
> >>  				locks_copy_conflock(conflock, fl);
> >>  			error = -EAGAIN;
> >>diff --git a/include/linux/fs.h b/include/linux/fs.h
> >>index b8ed7f974fb4..aa6c1bbdb8c4 100644
> >>--- a/include/linux/fs.h
> >>+++ b/include/linux/fs.h
> >>@@ -1029,6 +1029,7 @@ struct file_lock_operations {
> >>  };
> >>  struct lock_manager_operations {
> >>+	void *lm_mod_owner;
> >>  	fl_owner_t (*lm_get_owner)(fl_owner_t);
> >>  	void (*lm_put_owner)(fl_owner_t);
> >>  	void (*lm_notify)(struct file_lock *);	/* unblock callback */
> >>@@ -1037,6 +1038,8 @@ struct lock_manager_operations {
> >>  	int (*lm_change)(struct file_lock *, int, struct list_head *);
> >>  	void (*lm_setup)(struct file_lock *, void **);
> >>  	bool (*lm_breaker_owns_lease)(struct file_lock *);
> >>+	bool (*lm_lock_expirable)(struct file_lock *cfl);
> >>+	void (*lm_expire_lock)(void);
> >>  };
> >>  struct lock_manager {
> >>-- 
> >>2.9.5
J. Bruce Fields April 30, 2022, 1:18 a.m. UTC | #4
On Fri, Apr 29, 2022 at 03:58:19PM -0400, J. Bruce Fields wrote:
> On Fri, Apr 29, 2022 at 10:24:11AM -0700, dai.ngo@oracle.com wrote:
> > 
> > On 4/29/22 8:16 AM, J. Bruce Fields wrote:
> > >On Thu, Apr 28, 2022 at 12:06:33AM -0700, Dai Ngo wrote:
> > >>Add 2 new callbacks, lm_lock_expirable and lm_expire_lock, to
> > >>lock_manager_operations to allow the lock manager to take appropriate
> > >>action to resolve the lock conflict if possible.
> > >>
> > >>A new field, lm_mod_owner, is also added to lock_manager_operations.
> > >>The lm_mod_owner is used by the fs/lock code to make sure the lock
> > >>manager module such as nfsd, is not freed while lock conflict is being
> > >>resolved.
> > >>
> > >>lm_lock_expirable checks and returns true to indicate that the lock
> > >>conflict can be resolved else return false. This callback must be
> > >>called with the flc_lock held so it can not block.
> > >>
> > >>lm_expire_lock is called to resolve the lock conflict if the returned
> > >>value from lm_lock_expirable is true. This callback is called without
> > >>the flc_lock held since it's allowed to block. Upon returning from
> > >>this callback, the lock conflict should be resolved and the caller is
> > >>expected to restart the conflict check from the beginnning of the list.
> > >>
> > >>Lock manager, such as NFSv4 courteous server, uses this callback to
> > >>resolve conflict by destroying lock owner, or the NFSv4 courtesy client
> > >>(client that has expired but allowed to maintains its states) that owns
> > >>the lock.
> > >>
> > >>Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
> > >>---
> > >>  Documentation/filesystems/locking.rst |  4 ++++
> > >>  fs/locks.c                            | 45 +++++++++++++++++++++++++++++++----
> > >>  include/linux/fs.h                    |  3 +++
> > >>  3 files changed, 48 insertions(+), 4 deletions(-)
> > >>
> > >>diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> > >>index c26d854275a0..0997a258361a 100644
> > >>--- a/Documentation/filesystems/locking.rst
> > >>+++ b/Documentation/filesystems/locking.rst
> > >>@@ -428,6 +428,8 @@ prototypes::
> > >>  	void (*lm_break)(struct file_lock *); /* break_lease callback */
> > >>  	int (*lm_change)(struct file_lock **, int);
> > >>  	bool (*lm_breaker_owns_lease)(struct file_lock *);
> > >>+        bool (*lm_lock_expirable)(struct file_lock *);
> > >>+        void (*lm_expire_lock)(void);
> > >>  locking rules:
> > >>@@ -439,6 +441,8 @@ lm_grant:		no		no			no
> > >>  lm_break:		yes		no			no
> > >>  lm_change		yes		no			no
> > >>  lm_breaker_owns_lease:	yes     	no			no
> > >>+lm_lock_expirable	yes		no			no
> > >>+lm_expire_lock		no		no			yes
> > >>  ======================	=============	=================	=========
> > >>  buffer_head
> > >>diff --git a/fs/locks.c b/fs/locks.c
> > >>index c369841ef7d1..d48c3f455657 100644
> > >>--- a/fs/locks.c
> > >>+++ b/fs/locks.c
> > >>@@ -896,6 +896,37 @@ static bool flock_locks_conflict(struct file_lock *caller_fl,
> > >>  	return locks_conflict(caller_fl, sys_fl);
> > >>  }
> > >>+static bool
> > >>+resolve_lock_conflict_locked(struct file_lock_context *ctx,
> > >>+			struct file_lock *cfl, bool rwsem)
> > >>+{
> > >>+	void *owner;
> > >>+	bool ret;
> > >>+	void (*func)(void);
> > >>+
> > >>+	if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable &&
> > >>+				cfl->fl_lmops->lm_expire_lock) {
> > >>+		ret = (*cfl->fl_lmops->lm_lock_expirable)(cfl);
> > >>+		if (!ret)
> > >>+			return false;
> > >>+		owner = cfl->fl_lmops->lm_mod_owner;
> > >>+		if (!owner)
> > >>+			return false;
> > >>+		func = cfl->fl_lmops->lm_expire_lock;
> > >>+		__module_get(owner);
> > >>+		if (rwsem)
> > >>+			percpu_up_read(&file_rwsem);
> > >>+		spin_unlock(&ctx->flc_lock);
> > >Dropping and reacquiring locks inside a function like this makes me
> > >nervous.  It means it's not obvious in the caller that the lock isn't
> > >held throughout.
> > >
> > >I know it's more verbose, but let's just open-code this logic in the
> > >callers.
> > 
> > fix in v24.
> > 
> > >
> > >(And, thanks for catching the test_lock case, I'd forgotten it.)
> > >
> > >Also: do we *really* need to drop the file_rwsem?  Were you seeing it
> > >that cause problems?  The only possible conflict is with someone trying
> > >to read /proc/locks, and I'm surprised that it'd be a problem to let
> > >them wait here.
> > 
> > Yes, apparently file_rwsem is used when the laundromat expires the
> > COURTESY client client and causes deadlock.
> 
> It's taken, but only for read.  I'm rather surprised that would cause a
> deadlock.  Do you have any kind of trace showing what happened?
> 
> Oh well, it's not a big deal to just open code this and set the "retry:"
> before both lock acquisitions, that's probably best in fact.  I'm just
> curious.

I remember running across this:

	https://lore.kernel.org/linux-nfs/20210927201433.GA1704@fieldses.org/

though that didn't involve the laundromat.  Were you seeing an actual
deadlock with these new patches?  Or a lockdep warning like that one?

--b.
Dai Ngo April 30, 2022, 10:54 p.m. UTC | #5
On 4/29/22 6:18 PM, J. Bruce Fields wrote:
> On Fri, Apr 29, 2022 at 03:58:19PM -0400, J. Bruce Fields wrote:
>> On Fri, Apr 29, 2022 at 10:24:11AM -0700, dai.ngo@oracle.com wrote:
>>> On 4/29/22 8:16 AM, J. Bruce Fields wrote:
>>>> On Thu, Apr 28, 2022 at 12:06:33AM -0700, Dai Ngo wrote:
>>>>> Add 2 new callbacks, lm_lock_expirable and lm_expire_lock, to
>>>>> lock_manager_operations to allow the lock manager to take appropriate
>>>>> action to resolve the lock conflict if possible.
>>>>>
>>>>> A new field, lm_mod_owner, is also added to lock_manager_operations.
>>>>> The lm_mod_owner is used by the fs/lock code to make sure the lock
>>>>> manager module such as nfsd, is not freed while lock conflict is being
>>>>> resolved.
>>>>>
>>>>> lm_lock_expirable checks and returns true to indicate that the lock
>>>>> conflict can be resolved else return false. This callback must be
>>>>> called with the flc_lock held so it can not block.
>>>>>
>>>>> lm_expire_lock is called to resolve the lock conflict if the returned
>>>>> value from lm_lock_expirable is true. This callback is called without
>>>>> the flc_lock held since it's allowed to block. Upon returning from
>>>>> this callback, the lock conflict should be resolved and the caller is
>>>>> expected to restart the conflict check from the beginnning of the list.
>>>>>
>>>>> Lock manager, such as NFSv4 courteous server, uses this callback to
>>>>> resolve conflict by destroying lock owner, or the NFSv4 courtesy client
>>>>> (client that has expired but allowed to maintains its states) that owns
>>>>> the lock.
>>>>>
>>>>> Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
>>>>> ---
>>>>>   Documentation/filesystems/locking.rst |  4 ++++
>>>>>   fs/locks.c                            | 45 +++++++++++++++++++++++++++++++----
>>>>>   include/linux/fs.h                    |  3 +++
>>>>>   3 files changed, 48 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
>>>>> index c26d854275a0..0997a258361a 100644
>>>>> --- a/Documentation/filesystems/locking.rst
>>>>> +++ b/Documentation/filesystems/locking.rst
>>>>> @@ -428,6 +428,8 @@ prototypes::
>>>>>   	void (*lm_break)(struct file_lock *); /* break_lease callback */
>>>>>   	int (*lm_change)(struct file_lock **, int);
>>>>>   	bool (*lm_breaker_owns_lease)(struct file_lock *);
>>>>> +        bool (*lm_lock_expirable)(struct file_lock *);
>>>>> +        void (*lm_expire_lock)(void);
>>>>>   locking rules:
>>>>> @@ -439,6 +441,8 @@ lm_grant:		no		no			no
>>>>>   lm_break:		yes		no			no
>>>>>   lm_change		yes		no			no
>>>>>   lm_breaker_owns_lease:	yes     	no			no
>>>>> +lm_lock_expirable	yes		no			no
>>>>> +lm_expire_lock		no		no			yes
>>>>>   ======================	=============	=================	=========
>>>>>   buffer_head
>>>>> diff --git a/fs/locks.c b/fs/locks.c
>>>>> index c369841ef7d1..d48c3f455657 100644
>>>>> --- a/fs/locks.c
>>>>> +++ b/fs/locks.c
>>>>> @@ -896,6 +896,37 @@ static bool flock_locks_conflict(struct file_lock *caller_fl,
>>>>>   	return locks_conflict(caller_fl, sys_fl);
>>>>>   }
>>>>> +static bool
>>>>> +resolve_lock_conflict_locked(struct file_lock_context *ctx,
>>>>> +			struct file_lock *cfl, bool rwsem)
>>>>> +{
>>>>> +	void *owner;
>>>>> +	bool ret;
>>>>> +	void (*func)(void);
>>>>> +
>>>>> +	if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable &&
>>>>> +				cfl->fl_lmops->lm_expire_lock) {
>>>>> +		ret = (*cfl->fl_lmops->lm_lock_expirable)(cfl);
>>>>> +		if (!ret)
>>>>> +			return false;
>>>>> +		owner = cfl->fl_lmops->lm_mod_owner;
>>>>> +		if (!owner)
>>>>> +			return false;
>>>>> +		func = cfl->fl_lmops->lm_expire_lock;
>>>>> +		__module_get(owner);
>>>>> +		if (rwsem)
>>>>> +			percpu_up_read(&file_rwsem);
>>>>> +		spin_unlock(&ctx->flc_lock);
>>>> Dropping and reacquiring locks inside a function like this makes me
>>>> nervous.  It means it's not obvious in the caller that the lock isn't
>>>> held throughout.
>>>>
>>>> I know it's more verbose, but let's just open-code this logic in the
>>>> callers.
>>> fix in v24.
>>>
>>>> (And, thanks for catching the test_lock case, I'd forgotten it.)
>>>>
>>>> Also: do we *really* need to drop the file_rwsem?  Were you seeing it
>>>> that cause problems?  The only possible conflict is with someone trying
>>>> to read /proc/locks, and I'm surprised that it'd be a problem to let
>>>> them wait here.
>>> Yes, apparently file_rwsem is used when the laundromat expires the
>>> COURTESY client client and causes deadlock.
>> It's taken, but only for read.  I'm rather surprised that would cause a
>> deadlock.  Do you have any kind of trace showing what happened?
>>
>> Oh well, it's not a big deal to just open code this and set the "retry:"
>> before both lock acquisitions, that's probably best in fact.  I'm just
>> curious.
> I remember running across this:
>
> 	https://lore.kernel.org/linux-nfs/20210927201433.GA1704@fieldses.org/
>
> though that didn't involve the laundromat.  Were you seeing an actual
> deadlock with these new patches?  Or a lockdep warning like that one?

Here is the stack traces of the deadlock with the latest patches that
do not release file_rwsem before calling flush_workqueue:

Apr 30 15:12:15 nfsvmf24 kernel:
Apr 30 15:12:15 nfsvmf24 kernel: ======================================================
Apr 30 15:12:15 nfsvmf24 kernel: WARNING: possible circular locking dependency detected
Apr 30 15:12:15 nfsvmf24 kernel: 5.18.0-rc4_bf1+ #1 Not tainted
Apr 30 15:12:15 nfsvmf24 kernel: ------------------------------------------------------
Apr 30 15:12:15 nfsvmf24 kernel: kworker/u2:6/9099 is trying to acquire lock:
Apr 30 15:12:15 nfsvmf24 kernel: ffffffff991a8a50 (file_rwsem){.+.+}-{0:0}, at: locks_remove_posix+0x1af/0x3b0
Apr 30 15:12:15 nfsvmf24 kernel: #012but task is already holding lock:
Apr 30 15:12:15 nfsvmf24 kernel: ffff888115e37de0 ((work_completion)(&(&nn->laundromat_work)->work)){+.+.}-{0:0}, at: process_one_work+0x72d/0x12f0
Apr 30 15:12:15 nfsvmf24 kernel: #012which lock already depends on the new lock.
Apr 30 15:12:15 nfsvmf24 kernel: #012the existing dependency chain (in reverse order) is:
Apr 30 15:12:15 nfsvmf24 kernel: #012-> #2 ((work_completion)(&(&nn->laundromat_work)->work)){+.+.}-{0:0}:
Apr 30 15:12:15 nfsvmf24 kernel:       process_one_work+0x77f/0x12f0
Apr 30 15:12:15 nfsvmf24 kernel:       worker_thread+0x55d/0xe80
Apr 30 15:12:15 nfsvmf24 kernel:       kthread+0x29f/0x340
Apr 30 15:12:15 nfsvmf24 kernel:       ret_from_fork+0x22/0x30
Apr 30 15:12:15 nfsvmf24 kernel: #012-> #1 ((wq_completion)nfsd4){+.+.}-{0:0}:
Apr 30 15:12:15 nfsvmf24 kernel:       flush_workqueue+0xf2/0x1350
Apr 30 15:12:15 nfsvmf24 kernel:       posix_lock_inode+0x13b5/0x15e0
Apr 30 15:12:15 nfsvmf24 kernel:       nfsd4_lock+0xf28/0x3de0 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel:       nfsd4_proc_compound+0xd15/0x25a0 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel:       nfsd_dispatch+0x4ed/0xc30 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel:       svc_process_common+0xd8e/0x1b20 [sunrpc]
Apr 30 15:12:15 nfsvmf24 kernel:       svc_process+0x361/0x4f0 [sunrpc]
Apr 30 15:12:15 nfsvmf24 kernel:       nfsd+0x2d6/0x570 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel:       kthread+0x29f/0x340
Apr 30 15:12:15 nfsvmf24 kernel:       ret_from_fork+0x22/0x30
Apr 30 15:12:15 nfsvmf24 kernel: #012-> #0 (file_rwsem){.+.+}-{0:0}:
Apr 30 15:12:15 nfsvmf24 kernel:       __lock_acquire+0x318d/0x7830
Apr 30 15:12:15 nfsvmf24 kernel:       lock_acquire+0x1b0/0x490
Apr 30 15:12:15 nfsvmf24 kernel:       posix_lock_inode+0x136/0x15e0
Apr 30 15:12:15 nfsvmf24 kernel:       locks_remove_posix+0x1af/0x3b0
Apr 30 15:12:15 nfsvmf24 kernel:       filp_close+0xe7/0x120
Apr 30 15:12:15 nfsvmf24 kernel:       nfs4_free_lock_stateid+0xc0/0x100 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel:       free_ol_stateid_reaplist+0x131/0x210 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel:       release_openowner+0xf7/0x160 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel:       __destroy_client+0x3cc/0x740 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel:       laundromat_main+0x483/0x1cd0 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel:       process_one_work+0x7f6/0x12f0
Apr 30 15:12:15 nfsvmf24 kernel:       worker_thread+0x55d/0xe80
Apr 30 15:12:15 nfsvmf24 kernel:       kthread+0x29f/0x340
Apr 30 15:12:15 nfsvmf24 kernel:       ret_from_fork+0x22/0x30
Apr 30 15:12:15 nfsvmf24 kernel: #012other info that might help us debug this:
Apr 30 15:12:15 nfsvmf24 kernel: Chain exists of:#012  file_rwsem --> (wq_completion)nfsd4 --> (work_completion)(&(&nn->laundromat_work)->work)
Apr 30 15:12:15 nfsvmf24 kernel: Possible unsafe locking scenario:
Apr 30 15:12:15 nfsvmf24 kernel:       CPU0                    CPU1
Apr 30 15:12:15 nfsvmf24 kernel:       ----                    ----
Apr 30 15:12:15 nfsvmf24 kernel:  lock((work_completion)(&(&nn->laundromat_work)->work));
Apr 30 15:12:15 nfsvmf24 kernel:                               lock((wq_completion)nfsd4);
Apr 30 15:12:15 nfsvmf24 kernel:                               lock((work_completion)(&(&nn->laundromat_work)->work));
Apr 30 15:12:15 nfsvmf24 kernel:  lock(file_rwsem);
Apr 30 15:12:15 nfsvmf24 kernel: #012 *** DEADLOCK ***
Apr 30 15:12:15 nfsvmf24 kernel: 2 locks held by kworker/u2:6/9099:
Apr 30 15:12:15 nfsvmf24 kernel: #0: ffff888108cc6938 ((wq_completion)nfsd4){+.+.}-{0:0}, at: process_one_work+0x6ff/0x12f0
Apr 30 15:12:15 nfsvmf24 kernel: #1: ffff888115e37de0 ((work_completion)(&(&nn->laundromat_work)->work)){+.+.}-{0:0}, at: process_one_work+0x72d/0x12f0
Apr 30 15:12:15 nfsvmf24 kernel: #012stack backtrace:
Apr 30 15:12:15 nfsvmf24 kernel: CPU: 0 PID: 9099 Comm: kworker/u2:6 Kdump: loaded Not tainted 5.18.0-rc4_bf1+ #1
Apr 30 15:12:15 nfsvmf24 kernel: Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
Apr 30 15:12:15 nfsvmf24 kernel: Workqueue: nfsd4 laundromat_main [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel: Call Trace:
Apr 30 15:12:15 nfsvmf24 kernel: <TASK>
Apr 30 15:12:15 nfsvmf24 kernel: dump_stack_lvl+0x57/0x7d
Apr 30 15:12:15 nfsvmf24 kernel: check_noncircular+0x262/0x300
Apr 30 15:12:15 nfsvmf24 kernel: __lock_acquire+0x318d/0x7830
Apr 30 15:12:15 nfsvmf24 kernel: lock_acquire+0x1b0/0x490
Apr 30 15:12:15 nfsvmf24 kernel: posix_lock_inode+0x136/0x15e0
Apr 30 15:12:15 nfsvmf24 kernel: locks_remove_posix+0x1af/0x3b0
Apr 30 15:12:15 nfsvmf24 kernel: filp_close+0xe7/0x120
Apr 30 15:12:15 nfsvmf24 kernel: nfs4_free_lock_stateid+0xc0/0x100 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel: free_ol_stateid_reaplist+0x131/0x210 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel: release_openowner+0xf7/0x160 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel: __destroy_client+0x3cc/0x740 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel: laundromat_main+0x483/0x1cd0 [nfsd]
Apr 30 15:12:15 nfsvmf24 kernel: process_one_work+0x7f6/0x12f0
Apr 30 15:12:15 nfsvmf24 kernel: worker_thread+0x55d/0xe80
Apr 30 15:12:15 nfsvmf24 kernel: kthread+0x29f/0x340
Apr 30 15:12:15 nfsvmf24 kernel: ret_from_fork+0x22/0x30
Apr 30 15:12:15 nfsvmf24 kernel: </TASK>


I think the problem is lock ordering of file_rwsem and a lock in the
work queue (not sure which one). The posix_lock_inode thread acquires
file_rwsem and the lock in the work queue. The laundromat thread
acquires the lock in the work queue then try to acquire file_rwsem in
locks_remove_posix.

-Dai

>
> --b.
diff mbox series

Patch

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index c26d854275a0..0997a258361a 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -428,6 +428,8 @@  prototypes::
 	void (*lm_break)(struct file_lock *); /* break_lease callback */
 	int (*lm_change)(struct file_lock **, int);
 	bool (*lm_breaker_owns_lease)(struct file_lock *);
+        bool (*lm_lock_expirable)(struct file_lock *);
+        void (*lm_expire_lock)(void);
 
 locking rules:
 
@@ -439,6 +441,8 @@  lm_grant:		no		no			no
 lm_break:		yes		no			no
 lm_change		yes		no			no
 lm_breaker_owns_lease:	yes     	no			no
+lm_lock_expirable	yes		no			no
+lm_expire_lock		no		no			yes
 ======================	=============	=================	=========
 
 buffer_head
diff --git a/fs/locks.c b/fs/locks.c
index c369841ef7d1..d48c3f455657 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -896,6 +896,37 @@  static bool flock_locks_conflict(struct file_lock *caller_fl,
 	return locks_conflict(caller_fl, sys_fl);
 }
 
+static bool
+resolve_lock_conflict_locked(struct file_lock_context *ctx,
+			struct file_lock *cfl, bool rwsem)
+{
+	void *owner;
+	bool ret;
+	void (*func)(void);
+
+	if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable &&
+				cfl->fl_lmops->lm_expire_lock) {
+		ret = (*cfl->fl_lmops->lm_lock_expirable)(cfl);
+		if (!ret)
+			return false;
+		owner = cfl->fl_lmops->lm_mod_owner;
+		if (!owner)
+			return false;
+		func = cfl->fl_lmops->lm_expire_lock;
+		__module_get(owner);
+		if (rwsem)
+			percpu_up_read(&file_rwsem);
+		spin_unlock(&ctx->flc_lock);
+		(*func)();
+		module_put(owner);
+		if (rwsem)
+			percpu_down_read(&file_rwsem);
+		spin_lock(&ctx->flc_lock);
+		return true;
+	}
+	return false;
+}
+
 void
 posix_test_lock(struct file *filp, struct file_lock *fl)
 {
@@ -910,11 +941,14 @@  posix_test_lock(struct file *filp, struct file_lock *fl)
 	}
 
 	spin_lock(&ctx->flc_lock);
+retry:
 	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
-		if (posix_locks_conflict(fl, cfl)) {
-			locks_copy_conflock(fl, cfl);
-			goto out;
-		}
+		if (!posix_locks_conflict(fl, cfl))
+			continue;
+		if (resolve_lock_conflict_locked(ctx, cfl, false))
+			goto retry;
+		locks_copy_conflock(fl, cfl);
+		goto out;
 	}
 	fl->fl_type = F_UNLCK;
 out:
@@ -1108,6 +1142,7 @@  static int posix_lock_inode(struct inode *inode, struct file_lock *request,
 
 	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
+retry:
 	/*
 	 * New lock request. Walk all POSIX locks and look for conflicts. If
 	 * there are any, either return error or put the request on the
@@ -1117,6 +1152,8 @@  static int posix_lock_inode(struct inode *inode, struct file_lock *request,
 		list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
 			if (!posix_locks_conflict(request, fl))
 				continue;
+			if (resolve_lock_conflict_locked(ctx, fl, true))
+				goto retry;
 			if (conflock)
 				locks_copy_conflock(conflock, fl);
 			error = -EAGAIN;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b8ed7f974fb4..aa6c1bbdb8c4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1029,6 +1029,7 @@  struct file_lock_operations {
 };
 
 struct lock_manager_operations {
+	void *lm_mod_owner;
 	fl_owner_t (*lm_get_owner)(fl_owner_t);
 	void (*lm_put_owner)(fl_owner_t);
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
@@ -1037,6 +1038,8 @@  struct lock_manager_operations {
 	int (*lm_change)(struct file_lock *, int, struct list_head *);
 	void (*lm_setup)(struct file_lock *, void **);
 	bool (*lm_breaker_owns_lease)(struct file_lock *);
+	bool (*lm_lock_expirable)(struct file_lock *cfl);
+	void (*lm_expire_lock)(void);
 };
 
 struct lock_manager {