Message ID | 90bf6fd43343ca862e7f61b0834baf2bdbd0e24c.1586321767.git.josh@joshtriplett.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support userspace-selected fds | expand |
On 2020-04-07, Josh Triplett <josh@joshtriplett.org> wrote: > Some applications want to prevent the usual "lowest available fd" > allocation from allocating certain file descriptors. For instance, they > may want to prevent allocation of a closed fd 0, 1, or 2 other than via > dup2/dup3, or reserve some low file descriptors for other purposes. > > Add a prctl to increase the minimum fd and return the previous minimum. > > System calls that allocate a specific file descriptor, such as > dup2/dup3, ignore this minimum. > > exec resets the minimum fd, to prevent one program from interfering with > another program's expectations about fd allocation. Why is it implemented as an "increase the value" interface? It feels like this is meant to avoid some kind of security trap (with a library reducing the value) but it means that if you want to temporarily raise the minimum fd number it's not possible (without re-exec()ing yourself, which is hardly a fun thing to do). Then again, this might've been discussed before and I missed it... > Test program: > > #include <err.h> > #include <fcntl.h> > #include <stdio.h> > #include <sys/prctl.h> > > int main(int argc, char *argv[]) > { > if (prctl(PR_INCREASE_MIN_FD, 100, 0, 0, 0) < 0) > err(1, "prctl"); > int fd = open("/dev/null", O_RDONLY); > if (fd < 0) > err(1, "open"); > printf("%d\n", fd); // prints 100 > return 0; > } > > Signed-off-by: Josh Triplett <josh@joshtriplett.org> > --- > fs/file.c | 23 +++++++++++++++++------ > include/linux/fdtable.h | 1 + > include/linux/file.h | 1 + > include/uapi/linux/prctl.h | 3 +++ > kernel/sys.c | 5 +++++ > 5 files changed, 27 insertions(+), 6 deletions(-) > > diff --git a/fs/file.c b/fs/file.c > index c8a4e4c86e55..ba06140d89af 100644 > --- a/fs/file.c > +++ b/fs/file.c > @@ -286,7 +286,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) > spin_lock_init(&newf->file_lock); > newf->resize_in_progress = false; > init_waitqueue_head(&newf->resize_wait); > - newf->next_fd = 0; > new_fdt = &newf->fdtab; > new_fdt->max_fds = NR_OPEN_DEFAULT; > new_fdt->close_on_exec = newf->close_on_exec_init; > @@ -295,6 +294,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) > new_fdt->fd = &newf->fd_array[0]; > > spin_lock(&oldf->file_lock); > + newf->next_fd = newf->min_fd = oldf->min_fd; > old_fdt = files_fdtable(oldf); > open_files = count_open_files(old_fdt); > > @@ -487,9 +487,7 @@ int __alloc_fd(struct files_struct *files, > spin_lock(&files->file_lock); > repeat: > fdt = files_fdtable(files); > - fd = start; > - if (fd < files->next_fd) > - fd = files->next_fd; > + fd = max3(start, files->min_fd, files->next_fd); > > if (fd < fdt->max_fds) > fd = find_next_fd(fdt, fd); > @@ -514,7 +512,7 @@ int __alloc_fd(struct files_struct *files, > goto repeat; > > if (start <= files->next_fd) > - files->next_fd = fd + 1; > + files->next_fd = max(fd + 1, files->min_fd); > > __set_open_fd(fd, fdt); > if (flags & O_CLOEXEC) > @@ -555,7 +553,7 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd) > { > struct fdtable *fdt = files_fdtable(files); > __clear_open_fd(fd, fdt); > - if (fd < files->next_fd) > + if (fd < files->next_fd && fd >= files->min_fd) > files->next_fd = fd; > } > > @@ -684,6 +682,7 @@ void do_close_on_exec(struct files_struct *files) > > /* exec unshares first */ > spin_lock(&files->file_lock); > + files->min_fd = 0; > for (i = 0; ; i++) { > unsigned long set; > unsigned fd = i * BITS_PER_LONG; > @@ -865,6 +864,18 @@ bool get_close_on_exec(unsigned int fd) > return res; > } > > +unsigned int increase_min_fd(unsigned int num) > +{ > + struct files_struct *files = current->files; > + unsigned int old_min_fd; > + > + spin_lock(&files->file_lock); > + old_min_fd = files->min_fd; > + files->min_fd += num; > + spin_unlock(&files->file_lock); > + return old_min_fd; > +} > + > static int do_dup2(struct files_struct *files, > struct file *file, unsigned fd, unsigned flags) > __releases(&files->file_lock) > diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h > index f07c55ea0c22..d1980443d8b3 100644 > --- a/include/linux/fdtable.h > +++ b/include/linux/fdtable.h > @@ -60,6 +60,7 @@ struct files_struct { > */ > spinlock_t file_lock ____cacheline_aligned_in_smp; > unsigned int next_fd; > + unsigned int min_fd; /* min for "lowest available fd" allocation */ > unsigned long close_on_exec_init[1]; > unsigned long open_fds_init[1]; > unsigned long full_fds_bits_init[1]; > diff --git a/include/linux/file.h b/include/linux/file.h > index 142d102f285e..b67986f818d2 100644 > --- a/include/linux/file.h > +++ b/include/linux/file.h > @@ -88,6 +88,7 @@ extern bool get_close_on_exec(unsigned int fd); > extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile); > extern int get_unused_fd_flags(unsigned flags); > extern void put_unused_fd(unsigned int fd); > +extern unsigned int increase_min_fd(unsigned int num); > > extern void fd_install(unsigned int fd, struct file *file); > > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h > index 07b4f8131e36..916327272d21 100644 > --- a/include/uapi/linux/prctl.h > +++ b/include/uapi/linux/prctl.h > @@ -238,4 +238,7 @@ struct prctl_mm_map { > #define PR_SET_IO_FLUSHER 57 > #define PR_GET_IO_FLUSHER 58 > > +/* Increase minimum file descriptor for "lowest available fd" allocation */ > +#define PR_INCREASE_MIN_FD 59 > + > #endif /* _LINUX_PRCTL_H */ > diff --git a/kernel/sys.c b/kernel/sys.c > index d325f3ab624a..daa0ce43cecc 100644 > --- a/kernel/sys.c > +++ b/kernel/sys.c > @@ -2514,6 +2514,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, > > error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER; > break; > + case PR_INCREASE_MIN_FD: > + if (arg3 || arg4 || arg5) > + return -EINVAL; > + error = increase_min_fd((unsigned int)arg2); > + break; > default: > error = -EINVAL; > break; > -- > 2.26.0 >
On Wed, Apr 08, 2020 at 10:00:40PM +1000, Aleksa Sarai wrote: > On 2020-04-07, Josh Triplett <josh@joshtriplett.org> wrote: > > Some applications want to prevent the usual "lowest available fd" > > allocation from allocating certain file descriptors. For instance, they > > may want to prevent allocation of a closed fd 0, 1, or 2 other than via > > dup2/dup3, or reserve some low file descriptors for other purposes. > > > > Add a prctl to increase the minimum fd and return the previous minimum. > > > > System calls that allocate a specific file descriptor, such as > > dup2/dup3, ignore this minimum. > > > > exec resets the minimum fd, to prevent one program from interfering with > > another program's expectations about fd allocation. > > Why is it implemented as an "increase the value" interface? It feels > like this is meant to avoid some kind of security trap (with a library > reducing the value) but it means that if you want to temporarily raise > the minimum fd number it's not possible (without re-exec()ing yourself, > which is hardly a fun thing to do). > > Then again, this might've been discussed before and I missed it... It was: the previous version was a "get" and "set" interface. That interface didn't allow for the possibility that something else in the process had already set a minimum. This new atomic increase interface (which also serves as a "get" interface if you pass 0) makes it possible for a userspace library to reserve a range. (You have no guarantee about previously allocated descriptors in that range, but you know that no *new* automatically allocated descriptors will appear in that range, which suffices; userspace can do the rest.) - Josh Triplett
diff --git a/fs/file.c b/fs/file.c index c8a4e4c86e55..ba06140d89af 100644 --- a/fs/file.c +++ b/fs/file.c @@ -286,7 +286,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) spin_lock_init(&newf->file_lock); newf->resize_in_progress = false; init_waitqueue_head(&newf->resize_wait); - newf->next_fd = 0; new_fdt = &newf->fdtab; new_fdt->max_fds = NR_OPEN_DEFAULT; new_fdt->close_on_exec = newf->close_on_exec_init; @@ -295,6 +294,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) new_fdt->fd = &newf->fd_array[0]; spin_lock(&oldf->file_lock); + newf->next_fd = newf->min_fd = oldf->min_fd; old_fdt = files_fdtable(oldf); open_files = count_open_files(old_fdt); @@ -487,9 +487,7 @@ int __alloc_fd(struct files_struct *files, spin_lock(&files->file_lock); repeat: fdt = files_fdtable(files); - fd = start; - if (fd < files->next_fd) - fd = files->next_fd; + fd = max3(start, files->min_fd, files->next_fd); if (fd < fdt->max_fds) fd = find_next_fd(fdt, fd); @@ -514,7 +512,7 @@ int __alloc_fd(struct files_struct *files, goto repeat; if (start <= files->next_fd) - files->next_fd = fd + 1; + files->next_fd = max(fd + 1, files->min_fd); __set_open_fd(fd, fdt); if (flags & O_CLOEXEC) @@ -555,7 +553,7 @@ static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files); __clear_open_fd(fd, fdt); - if (fd < files->next_fd) + if (fd < files->next_fd && fd >= files->min_fd) files->next_fd = fd; } @@ -684,6 +682,7 @@ void do_close_on_exec(struct files_struct *files) /* exec unshares first */ spin_lock(&files->file_lock); + files->min_fd = 0; for (i = 0; ; i++) { unsigned long set; unsigned fd = i * BITS_PER_LONG; @@ -865,6 +864,18 @@ bool get_close_on_exec(unsigned int fd) return res; } +unsigned int increase_min_fd(unsigned int num) +{ + struct files_struct *files = current->files; + unsigned int old_min_fd; + + spin_lock(&files->file_lock); + old_min_fd = files->min_fd; + files->min_fd += num; + spin_unlock(&files->file_lock); + return old_min_fd; +} + static int do_dup2(struct files_struct *files, struct file *file, unsigned fd, unsigned flags) __releases(&files->file_lock) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f07c55ea0c22..d1980443d8b3 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -60,6 +60,7 @@ struct files_struct { */ spinlock_t file_lock ____cacheline_aligned_in_smp; unsigned int next_fd; + unsigned int min_fd; /* min for "lowest available fd" allocation */ unsigned long close_on_exec_init[1]; unsigned long open_fds_init[1]; unsigned long full_fds_bits_init[1]; diff --git a/include/linux/file.h b/include/linux/file.h index 142d102f285e..b67986f818d2 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -88,6 +88,7 @@ extern bool get_close_on_exec(unsigned int fd); extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile); extern int get_unused_fd_flags(unsigned flags); extern void put_unused_fd(unsigned int fd); +extern unsigned int increase_min_fd(unsigned int num); extern void fd_install(unsigned int fd, struct file *file); diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 07b4f8131e36..916327272d21 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -238,4 +238,7 @@ struct prctl_mm_map { #define PR_SET_IO_FLUSHER 57 #define PR_GET_IO_FLUSHER 58 +/* Increase minimum file descriptor for "lowest available fd" allocation */ +#define PR_INCREASE_MIN_FD 59 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index d325f3ab624a..daa0ce43cecc 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2514,6 +2514,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER; break; + case PR_INCREASE_MIN_FD: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = increase_min_fd((unsigned int)arg2); + break; default: error = -EINVAL; break;
Some applications want to prevent the usual "lowest available fd" allocation from allocating certain file descriptors. For instance, they may want to prevent allocation of a closed fd 0, 1, or 2 other than via dup2/dup3, or reserve some low file descriptors for other purposes. Add a prctl to increase the minimum fd and return the previous minimum. System calls that allocate a specific file descriptor, such as dup2/dup3, ignore this minimum. exec resets the minimum fd, to prevent one program from interfering with another program's expectations about fd allocation. Test program: #include <err.h> #include <fcntl.h> #include <stdio.h> #include <sys/prctl.h> int main(int argc, char *argv[]) { if (prctl(PR_INCREASE_MIN_FD, 100, 0, 0, 0) < 0) err(1, "prctl"); int fd = open("/dev/null", O_RDONLY); if (fd < 0) err(1, "open"); printf("%d\n", fd); // prints 100 return 0; } Signed-off-by: Josh Triplett <josh@joshtriplett.org> --- fs/file.c | 23 +++++++++++++++++------ include/linux/fdtable.h | 1 + include/linux/file.h | 1 + include/uapi/linux/prctl.h | 3 +++ kernel/sys.c | 5 +++++ 5 files changed, 27 insertions(+), 6 deletions(-)