diff mbox series

[v3,10/11] make __set_open_fd() set cloexec state as well

Message ID 20241007174358.396114-10-viro@zeniv.linux.org.uk (mailing list archive)
State New
Headers show
Series [v3,01/11] get rid of ...lookup...fdget_rcu() family | expand

Commit Message

Al Viro Oct. 7, 2024, 5:43 p.m. UTC
->close_on_exec[] state is maintained only for opened descriptors;
as the result, anything that marks a descriptor opened has to
set its cloexec state explicitly.

As the result, all calls of __set_open_fd() are followed by
__set_close_on_exec(); might as well fold it into __set_open_fd()
so that cloexec state is defined as soon as the descriptor is
marked opened.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

Comments

Steven Price Oct. 9, 2024, 10:50 a.m. UTC | #1
On 07/10/2024 18:43, Al Viro wrote:
> ->close_on_exec[] state is maintained only for opened descriptors;
> as the result, anything that marks a descriptor opened has to
> set its cloexec state explicitly.
> 
> As the result, all calls of __set_open_fd() are followed by
> __set_close_on_exec(); might as well fold it into __set_open_fd()
> so that cloexec state is defined as soon as the descriptor is
> marked opened.
> 
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> ---
>  fs/file.c | 9 ++++-----
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/file.c b/fs/file.c
> index d8fccd4796a9..b63294ed85ec 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -248,12 +248,13 @@ static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt,
>  	}
>  }
>  
> -static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
> +static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt, bool set)
>  {
>  	__set_bit(fd, fdt->open_fds);
>  	fd /= BITS_PER_LONG;

Here fd is being modified...

>  	if (!~fdt->open_fds[fd])
>  		__set_bit(fd, fdt->full_fds_bits);
> +	__set_close_on_exec(fd, fdt, set);

... which means fd here isn't the same as the passed in value. So this
call to __set_close_on_exec affects a different fd to the expected one.

Steve

>  }
>  
>  static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
> @@ -517,8 +518,7 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
>  	if (start <= files->next_fd)
>  		files->next_fd = fd + 1;
>  
> -	__set_open_fd(fd, fdt);
> -	__set_close_on_exec(fd, fdt, flags & O_CLOEXEC);
> +	__set_open_fd(fd, fdt, flags & O_CLOEXEC);
>  	error = fd;
>  
>  out:
> @@ -1186,8 +1186,7 @@ __releases(&files->file_lock)
>  		goto Ebusy;
>  	get_file(file);
>  	rcu_assign_pointer(fdt->fd[fd], file);
> -	__set_open_fd(fd, fdt);
> -	__set_close_on_exec(fd, fdt, flags & O_CLOEXEC);
> +	__set_open_fd(fd, fdt, flags & O_CLOEXEC);
>  	spin_unlock(&files->file_lock);
>  
>  	if (tofree)
Marek Szyprowski Oct. 9, 2024, 11:42 a.m. UTC | #2
On 07.10.2024 19:43, Al Viro wrote:
> ->close_on_exec[] state is maintained only for opened descriptors;
> as the result, anything that marks a descriptor opened has to
> set its cloexec state explicitly.
>
> As the result, all calls of __set_open_fd() are followed by
> __set_close_on_exec(); might as well fold it into __set_open_fd()
> so that cloexec state is defined as soon as the descriptor is
> marked opened.
>
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

This patch landed in today's linux-next as commit 218a562f273b ("make 
__set_open_fd() set cloexec state as well"). In my tests I found that it 
breaks booting of many of my test systems (arm 32bit, arm 64bit and 
riscv64). It's hard to describe what exactly is broken, but none of the 
affected boards reached the login shell. All crashed somewhere in the 
userspace during systemd startup. This can be easily reproduced even 
with qemu.

> ---
>   fs/file.c | 9 ++++-----
>   1 file changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/fs/file.c b/fs/file.c
> index d8fccd4796a9..b63294ed85ec 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -248,12 +248,13 @@ static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt,
>   	}
>   }
>   
> -static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
> +static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt, bool set)
>   {
>   	__set_bit(fd, fdt->open_fds);
>   	fd /= BITS_PER_LONG;
>   	if (!~fdt->open_fds[fd])
>   		__set_bit(fd, fdt->full_fds_bits);
> +	__set_close_on_exec(fd, fdt, set);
>   }
>   
>   static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
> @@ -517,8 +518,7 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
>   	if (start <= files->next_fd)
>   		files->next_fd = fd + 1;
>   
> -	__set_open_fd(fd, fdt);
> -	__set_close_on_exec(fd, fdt, flags & O_CLOEXEC);
> +	__set_open_fd(fd, fdt, flags & O_CLOEXEC);
>   	error = fd;
>   
>   out:
> @@ -1186,8 +1186,7 @@ __releases(&files->file_lock)
>   		goto Ebusy;
>   	get_file(file);
>   	rcu_assign_pointer(fdt->fd[fd], file);
> -	__set_open_fd(fd, fdt);
> -	__set_close_on_exec(fd, fdt, flags & O_CLOEXEC);
> +	__set_open_fd(fd, fdt, flags & O_CLOEXEC);
>   	spin_unlock(&files->file_lock);
>   
>   	if (tofree)

Best regards
Christian Brauner Oct. 9, 2024, 2:19 p.m. UTC | #3
On Wed, Oct 09, 2024 at 11:50:36AM GMT, Steven Price wrote:
> On 07/10/2024 18:43, Al Viro wrote:
> > ->close_on_exec[] state is maintained only for opened descriptors;
> > as the result, anything that marks a descriptor opened has to
> > set its cloexec state explicitly.
> > 
> > As the result, all calls of __set_open_fd() are followed by
> > __set_close_on_exec(); might as well fold it into __set_open_fd()
> > so that cloexec state is defined as soon as the descriptor is
> > marked opened.
> > 
> > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> > ---
> >  fs/file.c | 9 ++++-----
> >  1 file changed, 4 insertions(+), 5 deletions(-)
> > 
> > diff --git a/fs/file.c b/fs/file.c
> > index d8fccd4796a9..b63294ed85ec 100644
> > --- a/fs/file.c
> > +++ b/fs/file.c
> > @@ -248,12 +248,13 @@ static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt,
> >  	}
> >  }
> >  
> > -static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
> > +static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt, bool set)
> >  {
> >  	__set_bit(fd, fdt->open_fds);
> >  	fd /= BITS_PER_LONG;
> 
> Here fd is being modified...
> 
> >  	if (!~fdt->open_fds[fd])
> >  		__set_bit(fd, fdt->full_fds_bits);
> > +	__set_close_on_exec(fd, fdt, set);
> 
> ... which means fd here isn't the same as the passed in value. So this
> call to __set_close_on_exec affects a different fd to the expected one.

Good spot. Al, I folded the below fix so from my end you don't have to
do anything unless you want to nitpick how to fix it. Local variable
looked ugly to me.

Pushed and running through tests now.

diff --git a/fs/file.c b/fs/file.c
index c039e21c1cd1..52654415f17e 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -354,12 +354,17 @@ static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt,
        }
 }

-static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt, bool set)
+static inline void __set_open(unsigned int fd, struct fdtable *fdt, bool set)
 {
        __set_bit(fd, fdt->open_fds);
        fd /= BITS_PER_LONG;
        if (!~fdt->open_fds[fd])
                __set_bit(fd, fdt->full_fds_bits);
+}
+
+static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt, bool set)
+{
+       __set_open(fd, fdt, set);
        __set_close_on_exec(fd, fdt, set);
 }
Al Viro Oct. 9, 2024, 3:24 p.m. UTC | #4
On Wed, Oct 09, 2024 at 04:19:47PM +0200, Christian Brauner wrote:
> On Wed, Oct 09, 2024 at 11:50:36AM GMT, Steven Price wrote:
> > On 07/10/2024 18:43, Al Viro wrote:
> > > ->close_on_exec[] state is maintained only for opened descriptors;
> > > as the result, anything that marks a descriptor opened has to
> > > set its cloexec state explicitly.
> > > 
> > > As the result, all calls of __set_open_fd() are followed by
> > > __set_close_on_exec(); might as well fold it into __set_open_fd()
> > > so that cloexec state is defined as soon as the descriptor is
> > > marked opened.
> > > 
> > > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> > > ---
> > >  fs/file.c | 9 ++++-----
> > >  1 file changed, 4 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/fs/file.c b/fs/file.c
> > > index d8fccd4796a9..b63294ed85ec 100644
> > > --- a/fs/file.c
> > > +++ b/fs/file.c
> > > @@ -248,12 +248,13 @@ static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt,
> > >  	}
> > >  }
> > >  
> > > -static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
> > > +static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt, bool set)
> > >  {
> > >  	__set_bit(fd, fdt->open_fds);
> > >  	fd /= BITS_PER_LONG;
> > 
> > Here fd is being modified...
> > 
> > >  	if (!~fdt->open_fds[fd])
> > >  		__set_bit(fd, fdt->full_fds_bits);
> > > +	__set_close_on_exec(fd, fdt, set);
> > 
> > ... which means fd here isn't the same as the passed in value. So this
> > call to __set_close_on_exec affects a different fd to the expected one.

ACK.

> Good spot. Al, I folded the below fix so from my end you don't have to
> do anything unless you want to nitpick how to fix it. Local variable
> looked ugly to me.

Wait, folded it _where_?  And how did it end up pushed to -next in the
first place?

<checks vfs/vfs.git>

FWIW, I do have a problem with your variant.  My preferred incremental would be
simply to deal with close_on_exec bitmap between updating open_fds and full_fds_bits:

diff --git a/fs/file.c b/fs/file.c
index 70cd6d8c6257..7251d215048d 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -249,10 +249,10 @@ static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt,
 static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt, bool set)
 {
 	__set_bit(fd, fdt->open_fds);
+	__set_close_on_exec(fd, fdt, set);
 	fd /= BITS_PER_LONG;
 	if (!~fdt->open_fds[fd])
 		__set_bit(fd, fdt->full_fds_bits);
-	__set_close_on_exec(fd, fdt, set);
 }
 
 static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
Al Viro Oct. 9, 2024, 3:36 p.m. UTC | #5
On Wed, Oct 09, 2024 at 04:24:11PM +0100, Al Viro wrote:

> Wait, folded it _where_?  And how did it end up pushed to -next in the
> first place?
> 
> <checks vfs/vfs.git>

Preferred variant force-pushed to #work.fdtable, and pushed to vfs/vfs.git#work.fdtable
as well.
Christian Brauner Oct. 10, 2024, 8:13 a.m. UTC | #6
On Wed, Oct 09, 2024 at 04:24:11PM +0100, Al Viro wrote:
> On Wed, Oct 09, 2024 at 04:19:47PM +0200, Christian Brauner wrote:
> > On Wed, Oct 09, 2024 at 11:50:36AM GMT, Steven Price wrote:
> > > On 07/10/2024 18:43, Al Viro wrote:
> > > > ->close_on_exec[] state is maintained only for opened descriptors;
> > > > as the result, anything that marks a descriptor opened has to
> > > > set its cloexec state explicitly.
> > > > 
> > > > As the result, all calls of __set_open_fd() are followed by
> > > > __set_close_on_exec(); might as well fold it into __set_open_fd()
> > > > so that cloexec state is defined as soon as the descriptor is
> > > > marked opened.
> > > > 
> > > > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> > > > ---
> > > >  fs/file.c | 9 ++++-----
> > > >  1 file changed, 4 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/fs/file.c b/fs/file.c
> > > > index d8fccd4796a9..b63294ed85ec 100644
> > > > --- a/fs/file.c
> > > > +++ b/fs/file.c
> > > > @@ -248,12 +248,13 @@ static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt,
> > > >  	}
> > > >  }
> > > >  
> > > > -static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
> > > > +static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt, bool set)
> > > >  {
> > > >  	__set_bit(fd, fdt->open_fds);
> > > >  	fd /= BITS_PER_LONG;
> > > 
> > > Here fd is being modified...
> > > 
> > > >  	if (!~fdt->open_fds[fd])
> > > >  		__set_bit(fd, fdt->full_fds_bits);
> > > > +	__set_close_on_exec(fd, fdt, set);
> > > 
> > > ... which means fd here isn't the same as the passed in value. So this
> > > call to __set_close_on_exec affects a different fd to the expected one.
> 
> ACK.
> 
> > Good spot. Al, I folded the below fix so from my end you don't have to
> > do anything unless you want to nitpick how to fix it. Local variable
> > looked ugly to me.
> 
> Wait, folded it _where_?  And how did it end up pushed to -next in the
> first place?
> 
> <checks vfs/vfs.git>

Did you just not read:

https://lore.kernel.org/r/20241008-baufinanzierung-lakai-00b02ba0ac19@brauner

by any chance?
diff mbox series

Patch

diff --git a/fs/file.c b/fs/file.c
index d8fccd4796a9..b63294ed85ec 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -248,12 +248,13 @@  static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt,
 	}
 }
 
-static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
+static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt, bool set)
 {
 	__set_bit(fd, fdt->open_fds);
 	fd /= BITS_PER_LONG;
 	if (!~fdt->open_fds[fd])
 		__set_bit(fd, fdt->full_fds_bits);
+	__set_close_on_exec(fd, fdt, set);
 }
 
 static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
@@ -517,8 +518,7 @@  static int alloc_fd(unsigned start, unsigned end, unsigned flags)
 	if (start <= files->next_fd)
 		files->next_fd = fd + 1;
 
-	__set_open_fd(fd, fdt);
-	__set_close_on_exec(fd, fdt, flags & O_CLOEXEC);
+	__set_open_fd(fd, fdt, flags & O_CLOEXEC);
 	error = fd;
 
 out:
@@ -1186,8 +1186,7 @@  __releases(&files->file_lock)
 		goto Ebusy;
 	get_file(file);
 	rcu_assign_pointer(fdt->fd[fd], file);
-	__set_open_fd(fd, fdt);
-	__set_close_on_exec(fd, fdt, flags & O_CLOEXEC);
+	__set_open_fd(fd, fdt, flags & O_CLOEXEC);
 	spin_unlock(&files->file_lock);
 
 	if (tofree)