diff mbox series

[v1,net-next,01/13] fs/lock: Revive LOCK_MAND.

Message ID 20220826000445.46552-2-kuniyu@amazon.com (mailing list archive)
State New, archived
Headers show
Series tcp/udp: Introduce optional per-netns hash table. | expand

Commit Message

Kuniyuki Iwashima Aug. 26, 2022, 12:04 a.m. UTC
The commit 90f7d7a0d0d6 ("locks: remove LOCK_MAND flock lock support")
removed LOCK_MAND support from the kernel because nothing checked the
flag, nor was there no use case.  This patch revives LOCK_MAND to
introduce a mandatory lock for read/write on /proc/sys.  Currently, it's
the only use case, so we added two changes while reverting the commit.

First, we used to allow any f_mode for LOCK_MAND, but now we don't get
it back.  Instead, we enforce being FMODE_READ|FMODE_WRITE as LOCK_SH
and LOCK_EX.

Second, when f_ops->flock() was called with LOCK_MAND, each function
returned -EOPNOTSUPP.  The following patch does not use f_ops->flock(),
so we put the validation before calling f_ops->flock().

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 fs/locks.c                       | 57 ++++++++++++++++++++------------
 include/uapi/asm-generic/fcntl.h |  5 ---
 2 files changed, 35 insertions(+), 27 deletions(-)

Comments

Jeff Layton Aug. 26, 2022, 10:02 a.m. UTC | #1
On Thu, 2022-08-25 at 17:04 -0700, Kuniyuki Iwashima wrote:
> The commit 90f7d7a0d0d6 ("locks: remove LOCK_MAND flock lock support")
> removed LOCK_MAND support from the kernel because nothing checked the
> flag, nor was there no use case.  This patch revives LOCK_MAND to
> introduce a mandatory lock for read/write on /proc/sys.  Currently, it's
> the only use case, so we added two changes while reverting the commit.
> 
> First, we used to allow any f_mode for LOCK_MAND, but now we don't get
> it back.  Instead, we enforce being FMODE_READ|FMODE_WRITE as LOCK_SH
> and LOCK_EX.
> 
> Second, when f_ops->flock() was called with LOCK_MAND, each function
> returned -EOPNOTSUPP.  The following patch does not use f_ops->flock(),
> so we put the validation before calling f_ops->flock().
> 
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> ---
>  fs/locks.c                       | 57 ++++++++++++++++++++------------
>  include/uapi/asm-generic/fcntl.h |  5 ---
>  2 files changed, 35 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/locks.c b/fs/locks.c
> index c266cfdc3291..03ff10a3165e 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -421,6 +421,10 @@ static inline int flock_translate_cmd(int cmd) {
>  	case LOCK_UN:
>  		return F_UNLCK;
>  	}
> +
> +	if (cmd & LOCK_MAND)
> +		return cmd & (LOCK_MAND | LOCK_RW);
> +
>  	return -EINVAL;
>  }
>  
> @@ -879,6 +883,10 @@ static bool flock_locks_conflict(struct file_lock *caller_fl,
>  	if (caller_fl->fl_file == sys_fl->fl_file)
>  		return false;
>  
> +	if (caller_fl->fl_type & LOCK_MAND ||
> +	    sys_fl->fl_type & LOCK_MAND)
> +		return true;
> +
>  	return locks_conflict(caller_fl, sys_fl);
>  }
>  
> @@ -2077,9 +2085,7 @@ EXPORT_SYMBOL(locks_lock_inode_wait);
>   *	- %LOCK_SH -- a shared lock.
>   *	- %LOCK_EX -- an exclusive lock.
>   *	- %LOCK_UN -- remove an existing lock.
> - *	- %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED)
> - *
> - *	%LOCK_MAND support has been removed from the kernel.
> + *	- %LOCK_MAND -- a 'mandatory' flock. (only supported on /proc/sys/)
>   */
>  SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
>  {
> @@ -2087,19 +2093,6 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
>  	struct file_lock fl;
>  	struct fd f;
>  
> -	/*
> -	 * LOCK_MAND locks were broken for a long time in that they never
> -	 * conflicted with one another and didn't prevent any sort of open,
> -	 * read or write activity.
> -	 *
> -	 * Just ignore these requests now, to preserve legacy behavior, but
> -	 * throw a warning to let people know that they don't actually work.
> -	 */
> -	if (cmd & LOCK_MAND) {
> -		pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n");
> -		return 0;
> -	}
> -
>  	type = flock_translate_cmd(cmd & ~LOCK_NB);
>  	if (type < 0)
>  		return type;
> @@ -2109,6 +2102,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
>  	if (!f.file)
>  		return error;
>  
> +	/* LOCK_MAND supports only read/write on proc_sysctl for now */
>  	if (type != F_UNLCK && !(f.file->f_mode & (FMODE_READ | FMODE_WRITE)))
>  		goto out_putf;
>  
> @@ -2122,12 +2116,18 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
>  	if (can_sleep)
>  		fl.fl_flags |= FL_SLEEP;
>  
> -	if (f.file->f_op->flock)
> +	if (f.file->f_op->flock) {
> +		if (cmd & LOCK_MAND) {
> +			error = -EOPNOTSUPP;
> +			goto out_putf;
> +		}
> +
>  		error = f.file->f_op->flock(f.file,
>  					    (can_sleep) ? F_SETLKW : F_SETLK,
>  					    &fl);
> -	else
> +	} else {
>  		error = locks_lock_file_wait(f.file, &fl);
> +	}
>  
>   out_putf:
>  	fdput(f);
> @@ -2711,7 +2711,11 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
>  		seq_printf(f, " %s ",
>  			     (inode == NULL) ? "*NOINODE*" : "ADVISORY ");
>  	} else if (IS_FLOCK(fl)) {
> -		seq_puts(f, "FLOCK  ADVISORY  ");
> +		if (fl->fl_type & LOCK_MAND) {
> +			seq_puts(f, "FLOCK  MANDATORY ");
> +		} else {
> +			seq_puts(f, "FLOCK  ADVISORY  ");
> +		}
>  	} else if (IS_LEASE(fl)) {
>  		if (fl->fl_flags & FL_DELEG)
>  			seq_puts(f, "DELEG  ");
> @@ -2727,10 +2731,19 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
>  	} else {
>  		seq_puts(f, "UNKNOWN UNKNOWN  ");
>  	}
> -	type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
>  
> -	seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
> -			     (type == F_RDLCK) ? "READ" : "UNLCK");
> +	if (fl->fl_type & LOCK_MAND) {
> +		seq_printf(f, "%s ",
> +			   (fl->fl_type & LOCK_READ)
> +			   ? (fl->fl_type & LOCK_WRITE) ? "RW   " : "READ "
> +			   : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE ");
> +	} else {
> +		type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
> +
> +		seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
> +			   (type == F_RDLCK) ? "READ" : "UNLCK");
> +	}
> +
>  	if (inode) {
>  		/* userspace relies on this representation of dev_t */
>  		seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
> diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
> index 1ecdb911add8..94fb8c6fd543 100644
> --- a/include/uapi/asm-generic/fcntl.h
> +++ b/include/uapi/asm-generic/fcntl.h
> @@ -180,11 +180,6 @@ struct f_owner_ex {
>  #define LOCK_NB		4	/* or'd with one of the above to prevent
>  				   blocking */
>  #define LOCK_UN		8	/* remove lock */
> -
> -/*
> - * LOCK_MAND support has been removed from the kernel. We leave the symbols
> - * here to not break legacy builds, but these should not be used in new code.
> - */
>  #define LOCK_MAND	32	/* This is a mandatory flock ... */
>  #define LOCK_READ	64	/* which allows concurrent read operations */
>  #define LOCK_WRITE	128	/* which allows concurrent write operations */

NACK.

This may break legacy userland code that sets LOCK_MAND on flock calls
(e.g. old versions of samba).

If you want to add a new mechanism that does something similar with a
new flag, then that may be possible, but please don't overload old flags
that could still be used in the field with new meanings.

If you do decide to use flock for this functionality (and I'm not sure
this is a good idea), then I'd also like to see a clear description of
the semantics this provides.
Kuniyuki Iwashima Aug. 26, 2022, 4:48 p.m. UTC | #2
From:   Jeff Layton <jlayton@kernel.org>
Date:   Fri, 26 Aug 2022 06:02:44 -0400
> On Thu, 2022-08-25 at 17:04 -0700, Kuniyuki Iwashima wrote:
> > The commit 90f7d7a0d0d6 ("locks: remove LOCK_MAND flock lock support")
> > removed LOCK_MAND support from the kernel because nothing checked the
> > flag, nor was there no use case.  This patch revives LOCK_MAND to
> > introduce a mandatory lock for read/write on /proc/sys.  Currently, it's
> > the only use case, so we added two changes while reverting the commit.
> > 
> > First, we used to allow any f_mode for LOCK_MAND, but now we don't get
> > it back.  Instead, we enforce being FMODE_READ|FMODE_WRITE as LOCK_SH
> > and LOCK_EX.
> > 
> > Second, when f_ops->flock() was called with LOCK_MAND, each function
> > returned -EOPNOTSUPP.  The following patch does not use f_ops->flock(),
> > so we put the validation before calling f_ops->flock().
> > 
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > ---
> >  fs/locks.c                       | 57 ++++++++++++++++++++------------
> >  include/uapi/asm-generic/fcntl.h |  5 ---
> >  2 files changed, 35 insertions(+), 27 deletions(-)
> > 
> > diff --git a/fs/locks.c b/fs/locks.c
> > index c266cfdc3291..03ff10a3165e 100644
> > --- a/fs/locks.c
> > +++ b/fs/locks.c
> > @@ -421,6 +421,10 @@ static inline int flock_translate_cmd(int cmd) {
> >  	case LOCK_UN:
> >  		return F_UNLCK;
> >  	}
> > +
> > +	if (cmd & LOCK_MAND)
> > +		return cmd & (LOCK_MAND | LOCK_RW);
> > +
> >  	return -EINVAL;
> >  }
> >  
> > @@ -879,6 +883,10 @@ static bool flock_locks_conflict(struct file_lock *caller_fl,
> >  	if (caller_fl->fl_file == sys_fl->fl_file)
> >  		return false;
> >  
> > +	if (caller_fl->fl_type & LOCK_MAND ||
> > +	    sys_fl->fl_type & LOCK_MAND)
> > +		return true;
> > +
> >  	return locks_conflict(caller_fl, sys_fl);
> >  }
> >  
> > @@ -2077,9 +2085,7 @@ EXPORT_SYMBOL(locks_lock_inode_wait);
> >   *	- %LOCK_SH -- a shared lock.
> >   *	- %LOCK_EX -- an exclusive lock.
> >   *	- %LOCK_UN -- remove an existing lock.
> > - *	- %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED)
> > - *
> > - *	%LOCK_MAND support has been removed from the kernel.
> > + *	- %LOCK_MAND -- a 'mandatory' flock. (only supported on /proc/sys/)
> >   */
> >  SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
> >  {
> > @@ -2087,19 +2093,6 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
> >  	struct file_lock fl;
> >  	struct fd f;
> >  
> > -	/*
> > -	 * LOCK_MAND locks were broken for a long time in that they never
> > -	 * conflicted with one another and didn't prevent any sort of open,
> > -	 * read or write activity.
> > -	 *
> > -	 * Just ignore these requests now, to preserve legacy behavior, but
> > -	 * throw a warning to let people know that they don't actually work.
> > -	 */
> > -	if (cmd & LOCK_MAND) {
> > -		pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n");
> > -		return 0;
> > -	}
> > -
> >  	type = flock_translate_cmd(cmd & ~LOCK_NB);
> >  	if (type < 0)
> >  		return type;
> > @@ -2109,6 +2102,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
> >  	if (!f.file)
> >  		return error;
> >  
> > +	/* LOCK_MAND supports only read/write on proc_sysctl for now */
> >  	if (type != F_UNLCK && !(f.file->f_mode & (FMODE_READ | FMODE_WRITE)))
> >  		goto out_putf;
> >  
> > @@ -2122,12 +2116,18 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
> >  	if (can_sleep)
> >  		fl.fl_flags |= FL_SLEEP;
> >  
> > -	if (f.file->f_op->flock)
> > +	if (f.file->f_op->flock) {
> > +		if (cmd & LOCK_MAND) {
> > +			error = -EOPNOTSUPP;
> > +			goto out_putf;
> > +		}
> > +
> >  		error = f.file->f_op->flock(f.file,
> >  					    (can_sleep) ? F_SETLKW : F_SETLK,
> >  					    &fl);
> > -	else
> > +	} else {
> >  		error = locks_lock_file_wait(f.file, &fl);
> > +	}
> >  
> >   out_putf:
> >  	fdput(f);
> > @@ -2711,7 +2711,11 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
> >  		seq_printf(f, " %s ",
> >  			     (inode == NULL) ? "*NOINODE*" : "ADVISORY ");
> >  	} else if (IS_FLOCK(fl)) {
> > -		seq_puts(f, "FLOCK  ADVISORY  ");
> > +		if (fl->fl_type & LOCK_MAND) {
> > +			seq_puts(f, "FLOCK  MANDATORY ");
> > +		} else {
> > +			seq_puts(f, "FLOCK  ADVISORY  ");
> > +		}
> >  	} else if (IS_LEASE(fl)) {
> >  		if (fl->fl_flags & FL_DELEG)
> >  			seq_puts(f, "DELEG  ");
> > @@ -2727,10 +2731,19 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
> >  	} else {
> >  		seq_puts(f, "UNKNOWN UNKNOWN  ");
> >  	}
> > -	type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
> >  
> > -	seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
> > -			     (type == F_RDLCK) ? "READ" : "UNLCK");
> > +	if (fl->fl_type & LOCK_MAND) {
> > +		seq_printf(f, "%s ",
> > +			   (fl->fl_type & LOCK_READ)
> > +			   ? (fl->fl_type & LOCK_WRITE) ? "RW   " : "READ "
> > +			   : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE ");
> > +	} else {
> > +		type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
> > +
> > +		seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
> > +			   (type == F_RDLCK) ? "READ" : "UNLCK");
> > +	}
> > +
> >  	if (inode) {
> >  		/* userspace relies on this representation of dev_t */
> >  		seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
> > diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
> > index 1ecdb911add8..94fb8c6fd543 100644
> > --- a/include/uapi/asm-generic/fcntl.h
> > +++ b/include/uapi/asm-generic/fcntl.h
> > @@ -180,11 +180,6 @@ struct f_owner_ex {
> >  #define LOCK_NB		4	/* or'd with one of the above to prevent
> >  				   blocking */
> >  #define LOCK_UN		8	/* remove lock */
> > -
> > -/*
> > - * LOCK_MAND support has been removed from the kernel. We leave the symbols
> > - * here to not break legacy builds, but these should not be used in new code.
> > - */
> >  #define LOCK_MAND	32	/* This is a mandatory flock ... */
> >  #define LOCK_READ	64	/* which allows concurrent read operations */
> >  #define LOCK_WRITE	128	/* which allows concurrent write operations */
> 
> NACK.
> 
> This may break legacy userland code that sets LOCK_MAND on flock calls
> (e.g. old versions of samba).
> 
> If you want to add a new mechanism that does something similar with a
> new flag, then that may be possible, but please don't overload old flags
> that could still be used in the field with new meanings.

Exactly, that makes sense.
Thanks for feedback!


> If you do decide to use flock for this functionality (and I'm not sure
> this is a good idea),

Actually, the patch 1-2 were experimental to show all available options
(flock()'s latency vs unshare()'s memory cost), and I like unshare().
If both of them were unacceptable, I would have added clone() BPF hook.

But it seems unshare() works at least, I'll drop this part in the next
spin.

Thank you.


> then I'd also like to see a clear description of
> the semantics this provides.
> -- 
> Jeff Layton <jlayton@kernel.org>
diff mbox series

Patch

diff --git a/fs/locks.c b/fs/locks.c
index c266cfdc3291..03ff10a3165e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -421,6 +421,10 @@  static inline int flock_translate_cmd(int cmd) {
 	case LOCK_UN:
 		return F_UNLCK;
 	}
+
+	if (cmd & LOCK_MAND)
+		return cmd & (LOCK_MAND | LOCK_RW);
+
 	return -EINVAL;
 }
 
@@ -879,6 +883,10 @@  static bool flock_locks_conflict(struct file_lock *caller_fl,
 	if (caller_fl->fl_file == sys_fl->fl_file)
 		return false;
 
+	if (caller_fl->fl_type & LOCK_MAND ||
+	    sys_fl->fl_type & LOCK_MAND)
+		return true;
+
 	return locks_conflict(caller_fl, sys_fl);
 }
 
@@ -2077,9 +2085,7 @@  EXPORT_SYMBOL(locks_lock_inode_wait);
  *	- %LOCK_SH -- a shared lock.
  *	- %LOCK_EX -- an exclusive lock.
  *	- %LOCK_UN -- remove an existing lock.
- *	- %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED)
- *
- *	%LOCK_MAND support has been removed from the kernel.
+ *	- %LOCK_MAND -- a 'mandatory' flock. (only supported on /proc/sys/)
  */
 SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
 {
@@ -2087,19 +2093,6 @@  SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
 	struct file_lock fl;
 	struct fd f;
 
-	/*
-	 * LOCK_MAND locks were broken for a long time in that they never
-	 * conflicted with one another and didn't prevent any sort of open,
-	 * read or write activity.
-	 *
-	 * Just ignore these requests now, to preserve legacy behavior, but
-	 * throw a warning to let people know that they don't actually work.
-	 */
-	if (cmd & LOCK_MAND) {
-		pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n");
-		return 0;
-	}
-
 	type = flock_translate_cmd(cmd & ~LOCK_NB);
 	if (type < 0)
 		return type;
@@ -2109,6 +2102,7 @@  SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
 	if (!f.file)
 		return error;
 
+	/* LOCK_MAND supports only read/write on proc_sysctl for now */
 	if (type != F_UNLCK && !(f.file->f_mode & (FMODE_READ | FMODE_WRITE)))
 		goto out_putf;
 
@@ -2122,12 +2116,18 @@  SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
 	if (can_sleep)
 		fl.fl_flags |= FL_SLEEP;
 
-	if (f.file->f_op->flock)
+	if (f.file->f_op->flock) {
+		if (cmd & LOCK_MAND) {
+			error = -EOPNOTSUPP;
+			goto out_putf;
+		}
+
 		error = f.file->f_op->flock(f.file,
 					    (can_sleep) ? F_SETLKW : F_SETLK,
 					    &fl);
-	else
+	} else {
 		error = locks_lock_file_wait(f.file, &fl);
+	}
 
  out_putf:
 	fdput(f);
@@ -2711,7 +2711,11 @@  static void lock_get_status(struct seq_file *f, struct file_lock *fl,
 		seq_printf(f, " %s ",
 			     (inode == NULL) ? "*NOINODE*" : "ADVISORY ");
 	} else if (IS_FLOCK(fl)) {
-		seq_puts(f, "FLOCK  ADVISORY  ");
+		if (fl->fl_type & LOCK_MAND) {
+			seq_puts(f, "FLOCK  MANDATORY ");
+		} else {
+			seq_puts(f, "FLOCK  ADVISORY  ");
+		}
 	} else if (IS_LEASE(fl)) {
 		if (fl->fl_flags & FL_DELEG)
 			seq_puts(f, "DELEG  ");
@@ -2727,10 +2731,19 @@  static void lock_get_status(struct seq_file *f, struct file_lock *fl,
 	} else {
 		seq_puts(f, "UNKNOWN UNKNOWN  ");
 	}
-	type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
 
-	seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
-			     (type == F_RDLCK) ? "READ" : "UNLCK");
+	if (fl->fl_type & LOCK_MAND) {
+		seq_printf(f, "%s ",
+			   (fl->fl_type & LOCK_READ)
+			   ? (fl->fl_type & LOCK_WRITE) ? "RW   " : "READ "
+			   : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE ");
+	} else {
+		type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
+
+		seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
+			   (type == F_RDLCK) ? "READ" : "UNLCK");
+	}
+
 	if (inode) {
 		/* userspace relies on this representation of dev_t */
 		seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 1ecdb911add8..94fb8c6fd543 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -180,11 +180,6 @@  struct f_owner_ex {
 #define LOCK_NB		4	/* or'd with one of the above to prevent
 				   blocking */
 #define LOCK_UN		8	/* remove lock */
-
-/*
- * LOCK_MAND support has been removed from the kernel. We leave the symbols
- * here to not break legacy builds, but these should not be used in new code.
- */
 #define LOCK_MAND	32	/* This is a mandatory flock ... */
 #define LOCK_READ	64	/* which allows concurrent read operations */
 #define LOCK_WRITE	128	/* which allows concurrent write operations */