Message ID | 20240219183539.2926165-1-mic@digikod.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [RFC] fs: Add vfs_masks_device_ioctl*() helpers | expand |
Arnd, Christian, are you OK with this approach to identify VFS IOCTLs? If yes, Günther should include it in his next patch series. On Mon, Feb 19, 2024 at 07:35:39PM +0100, Mickaël Salaün wrote: > vfs_masks_device_ioctl() and vfs_masks_device_ioctl_compat() are useful > to differenciate between device driver IOCTL implementations and > filesystem ones. The goal is to be able to filter well-defined IOCTLs > from per-device (i.e. namespaced) IOCTLs and control such access. > > Add a new ioctl_compat() helper, similar to vfs_ioctl(), to wrap > compat_ioctl() calls and handle error conversions. > > Cc: Arnd Bergmann <arnd@arndb.de> > Cc: Christian Brauner <brauner@kernel.org> > Cc: Günther Noack <gnoack@google.com> > --- > fs/ioctl.c | 101 +++++++++++++++++++++++++++++++++++++++++---- > include/linux/fs.h | 12 ++++++ > 2 files changed, 105 insertions(+), 8 deletions(-) > > diff --git a/fs/ioctl.c b/fs/ioctl.c > index 76cf22ac97d7..f72c8da47d21 100644 > --- a/fs/ioctl.c > +++ b/fs/ioctl.c > @@ -763,6 +763,38 @@ static int ioctl_fssetxattr(struct file *file, void __user *argp) > return err; > } > > +/* > + * Safeguard to maintain a list of valid IOCTLs handled by do_vfs_ioctl() > + * instead of def_blk_fops or def_chr_fops (see init_special_inode). > + */ > +__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd) > +{ > + switch (cmd) { > + case FIOCLEX: > + case FIONCLEX: > + case FIONBIO: > + case FIOASYNC: > + case FIOQSIZE: > + case FIFREEZE: > + case FITHAW: > + case FS_IOC_FIEMAP: > + case FIGETBSZ: > + case FICLONE: > + case FICLONERANGE: > + case FIDEDUPERANGE: > + /* FIONREAD is forwarded to device implementations. */ > + case FS_IOC_GETFLAGS: > + case FS_IOC_SETFLAGS: > + case FS_IOC_FSGETXATTR: > + case FS_IOC_FSSETXATTR: > + /* file_ioctl()'s IOCTLs are forwarded to device implementations. */ > + return true; > + default: > + return false; > + } > +} > +EXPORT_SYMBOL(vfs_masked_device_ioctl); > + > /* > * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d. > * It's just a simple helper for sys_ioctl and compat_sys_ioctl. > @@ -858,6 +890,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) > { > struct fd f = fdget(fd); > int error; > + const struct inode *inode; > + bool is_device; > > if (!f.file) > return -EBADF; > @@ -866,9 +900,18 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) > if (error) > goto out; > > + inode = file_inode(f.file); > + is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); > + if (is_device && !vfs_masked_device_ioctl(cmd)) { > + error = vfs_ioctl(f.file, cmd, arg); > + goto out; > + } > + > error = do_vfs_ioctl(f.file, fd, cmd, arg); > - if (error == -ENOIOCTLCMD) > + if (error == -ENOIOCTLCMD) { > + WARN_ON_ONCE(is_device); > error = vfs_ioctl(f.file, cmd, arg); > + } > > out: > fdput(f); > @@ -911,11 +954,49 @@ long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) > } > EXPORT_SYMBOL(compat_ptr_ioctl); > > +static long ioctl_compat(struct file *filp, unsigned int cmd, > + compat_ulong_t arg) > +{ > + int error = -ENOTTY; > + > + if (!filp->f_op->compat_ioctl) > + goto out; > + > + error = filp->f_op->compat_ioctl(filp, cmd, arg); > + if (error == -ENOIOCTLCMD) > + error = -ENOTTY; > + > +out: > + return error; > +} > + > +__attribute_const__ bool vfs_masked_device_ioctl_compat(const unsigned int cmd) > +{ > + switch (cmd) { > + case FICLONE: > +#if defined(CONFIG_X86_64) > + case FS_IOC_RESVSP_32: > + case FS_IOC_RESVSP64_32: > + case FS_IOC_UNRESVSP_32: > + case FS_IOC_UNRESVSP64_32: > + case FS_IOC_ZERO_RANGE_32: > +#endif > + case FS_IOC32_GETFLAGS: > + case FS_IOC32_SETFLAGS: > + return true; > + default: > + return vfs_masked_device_ioctl(cmd); > + } > +} > +EXPORT_SYMBOL(vfs_masked_device_ioctl_compat); > + > COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, > compat_ulong_t, arg) > { > struct fd f = fdget(fd); > int error; > + const struct inode *inode; > + bool is_device; > > if (!f.file) > return -EBADF; > @@ -924,6 +1005,13 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, > if (error) > goto out; > > + inode = file_inode(f.file); > + is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); > + if (is_device && !vfs_masked_device_ioctl_compat(cmd)) { > + error = ioctl_compat(f.file, cmd, arg); > + goto out; > + } > + > switch (cmd) { > /* FICLONE takes an int argument, so don't use compat_ptr() */ > case FICLONE: > @@ -964,13 +1052,10 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, > default: > error = do_vfs_ioctl(f.file, fd, cmd, > (unsigned long)compat_ptr(arg)); > - if (error != -ENOIOCTLCMD) > - break; > - > - if (f.file->f_op->compat_ioctl) > - error = f.file->f_op->compat_ioctl(f.file, cmd, arg); > - if (error == -ENOIOCTLCMD) > - error = -ENOTTY; > + if (error == -ENOIOCTLCMD) { > + WARN_ON_ONCE(is_device); > + error = ioctl_compat(f.file, cmd, arg); > + } > break; > } > > diff --git a/include/linux/fs.h b/include/linux/fs.h > index ed5966a70495..b620d0c00e16 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1902,6 +1902,18 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, > #define compat_ptr_ioctl NULL > #endif > > +extern __attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd); > +#ifdef CONFIG_COMPAT > +extern __attribute_const__ bool > +vfs_masked_device_ioctl_compat(const unsigned int cmd); > +#else /* CONFIG_COMPAT */ > +static inline __attribute_const__ bool > +vfs_masked_device_ioctl_compat(const unsigned int cmd) > +{ > + return vfs_masked_device_ioctl(cmd); > +} > +#endif /* CONFIG_COMPAT */ > + > /* > * VFS file helper functions. > */ > -- > 2.43.0 > >
On Mon, Feb 19, 2024, at 19:35, Mickaël Salaün wrote: > vfs_masks_device_ioctl() and vfs_masks_device_ioctl_compat() are useful > to differenciate between device driver IOCTL implementations and > filesystem ones. The goal is to be able to filter well-defined IOCTLs > from per-device (i.e. namespaced) IOCTLs and control such access. > > Add a new ioctl_compat() helper, similar to vfs_ioctl(), to wrap > compat_ioctl() calls and handle error conversions. I'm still a bit confused by what your goal is here. I see the code getting more complex but don't see the payoff in this patch. > Cc: Arnd Bergmann <arnd@arndb.de> > Cc: Christian Brauner <brauner@kernel.org> > Cc: Günther Noack <gnoack@google.com> I assume the missing Signed-off-by is intentional while you are still gathering feedback? > +/* > + * Safeguard to maintain a list of valid IOCTLs handled by > do_vfs_ioctl() > + * instead of def_blk_fops or def_chr_fops (see init_special_inode). > + */ > +__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int > cmd) > +{ > + switch (cmd) { > + case FIOCLEX: > + case FIONCLEX: > + case FIONBIO: > + case FIOASYNC: > + case FIOQSIZE: > + case FIFREEZE: > + case FITHAW: > + case FS_IOC_FIEMAP: > + case FIGETBSZ: > + case FICLONE: > + case FICLONERANGE: > + case FIDEDUPERANGE: > + /* FIONREAD is forwarded to device implementations. */ > + case FS_IOC_GETFLAGS: > + case FS_IOC_SETFLAGS: > + case FS_IOC_FSGETXATTR: > + case FS_IOC_FSSETXATTR: > + /* file_ioctl()'s IOCTLs are forwarded to device implementations. */ > + return true; > + default: > + return false; > + } > +} > +EXPORT_SYMBOL(vfs_masked_device_ioctl); It looks like this gets added into the hot path of every ioctl command, which is not ideal, especially when this no longer gets inlined into the caller. > + inode = file_inode(f.file); > + is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); > + if (is_device && !vfs_masked_device_ioctl(cmd)) { > + error = vfs_ioctl(f.file, cmd, arg); > + goto out; > + } The S_ISBLK() || S_ISCHR() check here looks like it changes behavior, at least for sockets. If that is intentional, it should probably be a separate patch with a detailed explanation. > error = do_vfs_ioctl(f.file, fd, cmd, arg); > - if (error == -ENOIOCTLCMD) > + if (error == -ENOIOCTLCMD) { > + WARN_ON_ONCE(is_device); > error = vfs_ioctl(f.file, cmd, arg); > + } The WARN_ON_ONCE() looks like it can be triggered from userspace, which is generally a bad idea. > +extern __attribute_const__ bool vfs_masked_device_ioctl(const unsigned > int cmd); > +#ifdef CONFIG_COMPAT > +extern __attribute_const__ bool > +vfs_masked_device_ioctl_compat(const unsigned int cmd); > +#else /* CONFIG_COMPAT */ > +static inline __attribute_const__ bool > +vfs_masked_device_ioctl_compat(const unsigned int cmd) > +{ > + return vfs_masked_device_ioctl(cmd); > +} > +#endif /* CONFIG_COMPAT */ I don't understand the purpose of the #else path here, this should not be needed. Arnd
On Fri, Mar 01, 2024 at 05:24:52PM +0100, Arnd Bergmann wrote: > On Mon, Feb 19, 2024, at 19:35, Mickaël Salaün wrote: > > vfs_masks_device_ioctl() and vfs_masks_device_ioctl_compat() are useful > > to differenciate between device driver IOCTL implementations and > > filesystem ones. The goal is to be able to filter well-defined IOCTLs > > from per-device (i.e. namespaced) IOCTLs and control such access. > > > > Add a new ioctl_compat() helper, similar to vfs_ioctl(), to wrap > > compat_ioctl() calls and handle error conversions. > > I'm still a bit confused by what your goal is here. I see > the code getting more complex but don't see the payoff in this > patch. The main idea is to be able to identify if an IOCTL is handled by a device driver (i.e. block or character devices) or not. This would be used at least by Landlock to control such IOCTL (according to the char/block device file, which can already be identified) while allowing other VFS/FS IOCTLs. > > > Cc: Arnd Bergmann <arnd@arndb.de> > > Cc: Christian Brauner <brauner@kernel.org> > > Cc: Günther Noack <gnoack@google.com> > > I assume the missing Signed-off-by is intentional while you are > still gathering feedback? No, I sent it too quickly and forgot to add it. Signed-off-by: Mickaël Salaün <mic@digikod.net> > > > +/* > > + * Safeguard to maintain a list of valid IOCTLs handled by > > do_vfs_ioctl() > > + * instead of def_blk_fops or def_chr_fops (see init_special_inode). > > + */ > > +__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int > > cmd) > > +{ > > + switch (cmd) { > > + case FIOCLEX: > > + case FIONCLEX: > > + case FIONBIO: > > + case FIOASYNC: > > + case FIOQSIZE: > > + case FIFREEZE: > > + case FITHAW: > > + case FS_IOC_FIEMAP: > > + case FIGETBSZ: > > + case FICLONE: > > + case FICLONERANGE: > > + case FIDEDUPERANGE: > > + /* FIONREAD is forwarded to device implementations. */ > > + case FS_IOC_GETFLAGS: > > + case FS_IOC_SETFLAGS: > > + case FS_IOC_FSGETXATTR: > > + case FS_IOC_FSSETXATTR: > > + /* file_ioctl()'s IOCTLs are forwarded to device implementations. */ > > + return true; > > + default: > > + return false; > > + } > > +} > > +EXPORT_SYMBOL(vfs_masked_device_ioctl); > > It looks like this gets added into the hot path of every > ioctl command, which is not ideal, especially when this > no longer gets inlined into the caller. I'm looking for a way to guarantee that the list of IOCTLs in this helper will be kept up-to-date. This kind of run time check might be too much though. Do you have other suggestions? Do you think a simple comment to remind contributors to update this helper would be enough? I guess VFS IOCTLs should not be added often, but I'm worried that this list could get out of sync... > > > + inode = file_inode(f.file); > > + is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); > > + if (is_device && !vfs_masked_device_ioctl(cmd)) { > > + error = vfs_ioctl(f.file, cmd, arg); > > + goto out; > > + } > > The S_ISBLK() || S_ISCHR() check here looks like it changes > behavior, at least for sockets. If that is intentional, > it should probably be a separate patch with a detailed > explanation. I don't think this changes the behavior for sockets, and at least that's not intentionnal. This patch should not change the current behavior at all. The path to reach socket IOCTLs goes through a vfs_ioctl() call, which is... > > > error = do_vfs_ioctl(f.file, fd, cmd, arg); > > - if (error == -ENOIOCTLCMD) > > + if (error == -ENOIOCTLCMD) { > > + WARN_ON_ONCE(is_device); > > error = vfs_ioctl(f.file, cmd, arg); ...here! > > + } > > The WARN_ON_ONCE() looks like it can be triggered from > userspace, which is generally a bad idea. This WARN_ON_ONCE() should never be triggered because if the it is a device IOCTL it goes through the previous vfs_ioctl() (for device-specific command) call or through this do_vfs_ioctl() call (for VFS-specific command). > > > +extern __attribute_const__ bool vfs_masked_device_ioctl(const unsigned > > int cmd); > > +#ifdef CONFIG_COMPAT > > +extern __attribute_const__ bool > > +vfs_masked_device_ioctl_compat(const unsigned int cmd); > > +#else /* CONFIG_COMPAT */ > > +static inline __attribute_const__ bool > > +vfs_masked_device_ioctl_compat(const unsigned int cmd) > > +{ > > + return vfs_masked_device_ioctl(cmd); > > +} > > +#endif /* CONFIG_COMPAT */ > > I don't understand the purpose of the #else path here, > this should not be needed. Correct, this else branch is useless. > > Arnd >
Hello! More questions than answers in this code review, but maybe this discusison will help to get a clearer picture about what we are going for here. On Mon, Feb 19, 2024 at 07:35:39PM +0100, Mickaël Salaün wrote: > vfs_masks_device_ioctl() and vfs_masks_device_ioctl_compat() are useful > to differenciate between device driver IOCTL implementations and > filesystem ones. The goal is to be able to filter well-defined IOCTLs > from per-device (i.e. namespaced) IOCTLs and control such access. > > Add a new ioctl_compat() helper, similar to vfs_ioctl(), to wrap > compat_ioctl() calls and handle error conversions. > > Cc: Arnd Bergmann <arnd@arndb.de> > Cc: Christian Brauner <brauner@kernel.org> > Cc: Günther Noack <gnoack@google.com> > --- > fs/ioctl.c | 101 +++++++++++++++++++++++++++++++++++++++++---- > include/linux/fs.h | 12 ++++++ > 2 files changed, 105 insertions(+), 8 deletions(-) > > diff --git a/fs/ioctl.c b/fs/ioctl.c > index 76cf22ac97d7..f72c8da47d21 100644 > --- a/fs/ioctl.c > +++ b/fs/ioctl.c > @@ -763,6 +763,38 @@ static int ioctl_fssetxattr(struct file *file, void __user *argp) > return err; > } > > +/* > + * Safeguard to maintain a list of valid IOCTLs handled by do_vfs_ioctl() > + * instead of def_blk_fops or def_chr_fops (see init_special_inode). > + */ > +__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd) > +{ > + switch (cmd) { > + case FIOCLEX: > + case FIONCLEX: > + case FIONBIO: > + case FIOASYNC: > + case FIOQSIZE: > + case FIFREEZE: > + case FITHAW: > + case FS_IOC_FIEMAP: > + case FIGETBSZ: > + case FICLONE: > + case FICLONERANGE: > + case FIDEDUPERANGE: > + /* FIONREAD is forwarded to device implementations. */ > + case FS_IOC_GETFLAGS: > + case FS_IOC_SETFLAGS: > + case FS_IOC_FSGETXATTR: > + case FS_IOC_FSSETXATTR: > + /* file_ioctl()'s IOCTLs are forwarded to device implementations. */ > + return true; > + default: > + return false; > + } > +} > +EXPORT_SYMBOL(vfs_masked_device_ioctl); [ Technical implementation notes about this function: the list of IOCTLs here are the same ones which do_vfs_ioctl() implements directly. There are only two cases in which do_vfs_ioctl() does more complicated handling: (1) FIONREAD falls back to the device's ioctl implemenetation. Therefore, we omit FIONREAD in our own list - we do not want to allow that. (2) The default case falls back to the file_ioctl() function, but *only* for S_ISREG() files, so it does not matter for the Landlock case. ] ## What we are actually trying to do (?) Let me try to take a step back and paraphrase what I think we are *actually* trying to do here -- please correct me if I am wrong about that: I think what we *really* are trying to do is to control from the Landlock LSM whether the filp->f_op->unlocked_ioctl() or filp->f_op->ioctl_compat() operations are getting called for device files. So in a world where we cared only about correctness, we could create a new LSM hook security_file_vfs_ioctl(), which gets checked just before these two f_op operations get called. With that, we could permit all IOCTLs that are implemented in fs/ioctl.c, and we could deny all IOCTL commands that are implemented in the device implementation. I guess the reasons why we are not using that approach are performance, and that it might mess up the LSM hook interface with special cases that only Landlcok needs? But it seems like it would be easier to reason about..? Or maybe we can find a middle ground, where we have the existing hook return a special value with the meaning "permit this IOCTL, but do not invoke the f_op hook"? ## What we implemented Of course, the existing security_file_ioctl LSM hook works differently, and so with that hook, we need to make our blocking decision purely based on the struct file*, the IOCTL command number and the IOCTL argument. So in order to make that decision correctly based on that information, we end up listing all the IOCTLs which are directly(!) implemented in do_vfs_ioctl(), because for Landlock, this is the list of IOCTL commands which is safe to permit on device files. And we need to keep that list in sync with fs/ioctl.c, which is why it ended up in the same place in this commit. (Is it maybe possible to check with a KUnit test whether such lists are in sync? It sounds superficially like it should be feasible to create a device file which records whether its ioctl implementation was called. So we could at least check that the Landlock command list is a subset of the do_vfs_ioctl() one.) > + > /* > * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d. > * It's just a simple helper for sys_ioctl and compat_sys_ioctl. > @@ -858,6 +890,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) > { > struct fd f = fdget(fd); > int error; > + const struct inode *inode; > + bool is_device; > > if (!f.file) > return -EBADF; > @@ -866,9 +900,18 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) > if (error) > goto out; > > + inode = file_inode(f.file); > + is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); > + if (is_device && !vfs_masked_device_ioctl(cmd)) { > + error = vfs_ioctl(f.file, cmd, arg); > + goto out; > + } > + > error = do_vfs_ioctl(f.file, fd, cmd, arg); > - if (error == -ENOIOCTLCMD) > + if (error == -ENOIOCTLCMD) { > + WARN_ON_ONCE(is_device); > error = vfs_ioctl(f.file, cmd, arg); > + } It is not obvious at first that adding this list requires a change to the ioctl syscall implementations. If I understand this right, the idea is that you want to be 100% sure that we are not calling vfs_ioctl() for the commands in that list. And there is a scenario where this could potentially happen: do_vfs_ioctl() implements most things like this: static int do_vfs_ioctl(...) { switch (cmd) { /* many cases like the following: */ case FITHAW: return ioctl_fsthaw(filp); /* ... */ } return -ENOIOCTLCMD; } So I believe the scenario you want to avoid is the one where ioctl_fsthaw() or one of the other functions return -ENOIOCTLCMD by accident, and where that will then make the surrounding syscall implementation fall back to vfs_ioctl() despite the cmd being listed as safe for Landlock? Is that right? Looking at do_vfs_ioctl() and its helper functions, I am getting the impression that -ENOIOCTLCMD is only supposed to be returned at the very end of it, but not by any of the helper functions? If that were the case, we could maybe just as well just solve that problem local to do_vfs_ioctl()? A bit inelegant maybe, but just to get the idea across: static int sanitize_enoioctlcmd(int res) { if (res == -ENOIOCTLCMD) return ENOTTY; return res; } static int do_vfs_ioctl(...) { switch (cmd) { /* many cases like the following: */ case FITHAW: return sanitize_enoioctlcmd(ioctl_fsthaw(filp)); /* ... */ } return -ENOIOCTLCMD; } Would that be better? —Günther
On Tue, Mar 05, 2024 at 07:13:33PM +0100, Günther Noack wrote: > Hello! > > More questions than answers in this code review, but maybe this discusison will > help to get a clearer picture about what we are going for here. > > On Mon, Feb 19, 2024 at 07:35:39PM +0100, Mickaël Salaün wrote: > > vfs_masks_device_ioctl() and vfs_masks_device_ioctl_compat() are useful > > to differenciate between device driver IOCTL implementations and > > filesystem ones. The goal is to be able to filter well-defined IOCTLs > > from per-device (i.e. namespaced) IOCTLs and control such access. > > > > Add a new ioctl_compat() helper, similar to vfs_ioctl(), to wrap > > compat_ioctl() calls and handle error conversions. > > > > Cc: Arnd Bergmann <arnd@arndb.de> > > Cc: Christian Brauner <brauner@kernel.org> > > Cc: Günther Noack <gnoack@google.com> > > --- > > fs/ioctl.c | 101 +++++++++++++++++++++++++++++++++++++++++---- > > include/linux/fs.h | 12 ++++++ > > 2 files changed, 105 insertions(+), 8 deletions(-) > > > > diff --git a/fs/ioctl.c b/fs/ioctl.c > > index 76cf22ac97d7..f72c8da47d21 100644 > > --- a/fs/ioctl.c > > +++ b/fs/ioctl.c > > @@ -763,6 +763,38 @@ static int ioctl_fssetxattr(struct file *file, void __user *argp) > > return err; > > } > > > > +/* > > + * Safeguard to maintain a list of valid IOCTLs handled by do_vfs_ioctl() > > + * instead of def_blk_fops or def_chr_fops (see init_special_inode). > > + */ > > +__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd) > > +{ > > + switch (cmd) { > > + case FIOCLEX: > > + case FIONCLEX: > > + case FIONBIO: > > + case FIOASYNC: > > + case FIOQSIZE: > > + case FIFREEZE: > > + case FITHAW: > > + case FS_IOC_FIEMAP: > > + case FIGETBSZ: > > + case FICLONE: > > + case FICLONERANGE: > > + case FIDEDUPERANGE: > > + /* FIONREAD is forwarded to device implementations. */ > > + case FS_IOC_GETFLAGS: > > + case FS_IOC_SETFLAGS: > > + case FS_IOC_FSGETXATTR: > > + case FS_IOC_FSSETXATTR: > > + /* file_ioctl()'s IOCTLs are forwarded to device implementations. */ > > + return true; > > + default: > > + return false; > > + } > > +} > > +EXPORT_SYMBOL(vfs_masked_device_ioctl); > > [ > Technical implementation notes about this function: the list of IOCTLs here are > the same ones which do_vfs_ioctl() implements directly. > > There are only two cases in which do_vfs_ioctl() does more complicated handling: > > (1) FIONREAD falls back to the device's ioctl implemenetation. > Therefore, we omit FIONREAD in our own list - we do not want to allow that. > (2) The default case falls back to the file_ioctl() function, but *only* for > S_ISREG() files, so it does not matter for the Landlock case. > ] > > > ## What we are actually trying to do (?) > > Let me try to take a step back and paraphrase what I think we are *actually* > trying to do here -- please correct me if I am wrong about that: > > I think what we *really* are trying to do is to control from the Landlock LSM > whether the filp->f_op->unlocked_ioctl() or filp->f_op->ioctl_compat() > operations are getting called for device files. > > So in a world where we cared only about correctness, we could create a new LSM > hook security_file_vfs_ioctl(), which gets checked just before these two f_op > operations get called. With that, we could permit all IOCTLs that are > implemented in fs/ioctl.c, and we could deny all IOCTL commands that are > implemented in the device implementation. > > I guess the reasons why we are not using that approach are performance, and that > it might mess up the LSM hook interface with special cases that only Landlcok > needs? But it seems like it would be easier to reason about..? Or maybe we can > find a middle ground, where we have the existing hook return a special value > with the meaning "permit this IOCTL, but do not invoke the f_op hook"? Your security_file_vfs_ioctl() approach is simpler and better, I like it! From a performance point of view it should not change much because either an LSM would use the current IOCTL hook or this new one. Using a flag with the current IOCTL hook would be a missed opportunity for performance improvements because this hook could be called even if it is not needed. I don't think it would be worth it to create a new hook for compat and non-compat mode because we want to control these IOCTLs the same way for now, so it would not have a performance impact, but for consistency with the current IOCTL hooks I guess Paul would prefer two new hooks: security_file_vfs_ioctl() and security_file_vfs_ioctl_compat()? Another approach would be to split the IOCTL hook into two: one for the VFS layer and another for the underlying implementations. However, it looks like a difficult and brittle approach according to the current IOCTL implementations. Arnd, Christian, Paul, are you OK with this new hook proposal? > > > ## What we implemented > > Of course, the existing security_file_ioctl LSM hook works differently, and so > with that hook, we need to make our blocking decision purely based on the struct > file*, the IOCTL command number and the IOCTL argument. > > So in order to make that decision correctly based on that information, we end up > listing all the IOCTLs which are directly(!) implemented in do_vfs_ioctl(), > because for Landlock, this is the list of IOCTL commands which is safe to permit > on device files. And we need to keep that list in sync with fs/ioctl.c, which > is why it ended up in the same place in this commit. > > > (Is it maybe possible to check with a KUnit test whether such lists are in sync? > It sounds superficially like it should be feasible to create a device file which > records whether its ioctl implementation was called. So we could at least check > that the Landlock command list is a subset of the do_vfs_ioctl() one.) > > > > + > > /* > > * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d. > > * It's just a simple helper for sys_ioctl and compat_sys_ioctl. > > @@ -858,6 +890,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) > > { > > struct fd f = fdget(fd); > > int error; > > + const struct inode *inode; > > + bool is_device; > > > > if (!f.file) > > return -EBADF; > > @@ -866,9 +900,18 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) > > if (error) > > goto out; > > > > + inode = file_inode(f.file); > > + is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); > > + if (is_device && !vfs_masked_device_ioctl(cmd)) { > > + error = vfs_ioctl(f.file, cmd, arg); > > + goto out; > > + } > > + > > error = do_vfs_ioctl(f.file, fd, cmd, arg); > > - if (error == -ENOIOCTLCMD) > > + if (error == -ENOIOCTLCMD) { > > + WARN_ON_ONCE(is_device); > > error = vfs_ioctl(f.file, cmd, arg); > > + } > > It is not obvious at first that adding this list requires a change to the ioctl > syscall implementations. If I understand this right, the idea is that you want > to be 100% sure that we are not calling vfs_ioctl() for the commands in that > list. Correct > And there is a scenario where this could potentially happen: > > do_vfs_ioctl() implements most things like this: > > static int do_vfs_ioctl(...) { > switch (cmd) { > /* many cases like the following: */ > case FITHAW: > return ioctl_fsthaw(filp); > /* ... */ > } > return -ENOIOCTLCMD; > } > > So I believe the scenario you want to avoid is the one where ioctl_fsthaw() or > one of the other functions return -ENOIOCTLCMD by accident, and where that will > then make the surrounding syscall implementation fall back to vfs_ioctl() > despite the cmd being listed as safe for Landlock? Is that right? Yes > > Looking at do_vfs_ioctl() and its helper functions, I am getting the impression > that -ENOIOCTLCMD is only supposed to be returned at the very end of it, but not > by any of the helper functions? If that were the case, we could maybe just as > well just solve that problem local to do_vfs_ioctl()? > > A bit inelegant maybe, but just to get the idea across: > > static int sanitize_enoioctlcmd(int res) { > if (res == -ENOIOCTLCMD) > return ENOTTY; > return res; > } > > static int do_vfs_ioctl(...) { > switch (cmd) { > /* many cases like the following: */ > case FITHAW: > return sanitize_enoioctlcmd(ioctl_fsthaw(filp)); > /* ... */ > } > return -ENOIOCTLCMD; > } > > Would that be better? I guess so, but a bit more intrusive. Anyway, the new LSM hook would be much cleaner and would require less intrusive changes in fs/ioctl.c The ioctl_compat() helper from this patch could still be useful though. > > —Günther > >
On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote: > On Tue, Mar 05, 2024 at 07:13:33PM +0100, Günther Noack wrote: >> On Mon, Feb 19, 2024 at 07:35:39PM +0100, Mickaël Salaün wrote: >> > + case FS_IOC_FSGETXATTR: >> > + case FS_IOC_FSSETXATTR: >> > + /* file_ioctl()'s IOCTLs are forwarded to device implementations. */ >> > + return true; >> > + default: >> > + return false; >> > + } >> > +} >> > +EXPORT_SYMBOL(vfs_masked_device_ioctl); >> >> [ >> Technical implementation notes about this function: the list of IOCTLs here are >> the same ones which do_vfs_ioctl() implements directly. >> >> There are only two cases in which do_vfs_ioctl() does more complicated handling: >> >> (1) FIONREAD falls back to the device's ioctl implemenetation. >> Therefore, we omit FIONREAD in our own list - we do not want to allow that. >> (2) The default case falls back to the file_ioctl() function, but *only* for >> S_ISREG() files, so it does not matter for the Landlock case. How about changing do_vfs_ioctl() to return -ENOIOCTLCMD for FIONREAD on special files? That way, the two cases become the same. >> I guess the reasons why we are not using that approach are performance, and that >> it might mess up the LSM hook interface with special cases that only Landlcok >> needs? But it seems like it would be easier to reason about..? Or maybe we can >> find a middle ground, where we have the existing hook return a special value >> with the meaning "permit this IOCTL, but do not invoke the f_op hook"? > > Your security_file_vfs_ioctl() approach is simpler and better, I like > it! From a performance point of view it should not change much because > either an LSM would use the current IOCTL hook or this new one. Using a > flag with the current IOCTL hook would be a missed opportunity for > performance improvements because this hook could be called even if it is > not needed. > > I don't think it would be worth it to create a new hook for compat and > non-compat mode because we want to control these IOCTLs the same way for > now, so it would not have a performance impact, but for consistency with > the current IOCTL hooks I guess Paul would prefer two new hooks: > security_file_vfs_ioctl() and security_file_vfs_ioctl_compat()? > > Another approach would be to split the IOCTL hook into two: one for the > VFS layer and another for the underlying implementations. However, it > looks like a difficult and brittle approach according to the current > IOCTL implementations. > > Arnd, Christian, Paul, are you OK with this new hook proposal? I think this sounds better. It would fit more closely into the overall structure of the ioctl handlers with their multiple levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl, you have the same structure for sockets and blockdev, and then additional levels below that and some weirdness for things like tty, scsi or cdrom. >> And there is a scenario where this could potentially happen: >> >> do_vfs_ioctl() implements most things like this: >> >> static int do_vfs_ioctl(...) { >> switch (cmd) { >> /* many cases like the following: */ >> case FITHAW: >> return ioctl_fsthaw(filp); >> /* ... */ >> } >> return -ENOIOCTLCMD; >> } >> >> So I believe the scenario you want to avoid is the one where ioctl_fsthaw() or >> one of the other functions return -ENOIOCTLCMD by accident, and where that will >> then make the surrounding syscall implementation fall back to vfs_ioctl() >> despite the cmd being listed as safe for Landlock? Is that right? > > Yes This does go against the normal structure a bit then, where any of the commands is allowed to return -ENOIOCTLCMD specifically for the purpose of passing control to the next level of callbacks. Having the landlock hook explicitly at the place where the callback is entered, as Günther suggested makes much more sense to me then. Arnd
On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote: > On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote: > > On Tue, Mar 05, 2024 at 07:13:33PM +0100, Günther Noack wrote: > >> On Mon, Feb 19, 2024 at 07:35:39PM +0100, Mickaël Salaün wrote: > > >> > + case FS_IOC_FSGETXATTR: > >> > + case FS_IOC_FSSETXATTR: > >> > + /* file_ioctl()'s IOCTLs are forwarded to device implementations. */ > >> > + return true; > >> > + default: > >> > + return false; > >> > + } > >> > +} > >> > +EXPORT_SYMBOL(vfs_masked_device_ioctl); > >> > >> [ > >> Technical implementation notes about this function: the list of IOCTLs here are > >> the same ones which do_vfs_ioctl() implements directly. > >> > >> There are only two cases in which do_vfs_ioctl() does more complicated handling: > >> > >> (1) FIONREAD falls back to the device's ioctl implemenetation. > >> Therefore, we omit FIONREAD in our own list - we do not want to allow that. > > >> (2) The default case falls back to the file_ioctl() function, but *only* for > >> S_ISREG() files, so it does not matter for the Landlock case. > > How about changing do_vfs_ioctl() to return -ENOIOCTLCMD for > FIONREAD on special files? That way, the two cases become the > same. > > >> I guess the reasons why we are not using that approach are performance, and that > >> it might mess up the LSM hook interface with special cases that only Landlcok > >> needs? But it seems like it would be easier to reason about..? Or maybe we can > >> find a middle ground, where we have the existing hook return a special value > >> with the meaning "permit this IOCTL, but do not invoke the f_op hook"? > > > > Your security_file_vfs_ioctl() approach is simpler and better, I like > > it! From a performance point of view it should not change much because > > either an LSM would use the current IOCTL hook or this new one. Using a > > flag with the current IOCTL hook would be a missed opportunity for > > performance improvements because this hook could be called even if it is > > not needed. > > > > I don't think it would be worth it to create a new hook for compat and > > non-compat mode because we want to control these IOCTLs the same way for > > now, so it would not have a performance impact, but for consistency with > > the current IOCTL hooks I guess Paul would prefer two new hooks: > > security_file_vfs_ioctl() and security_file_vfs_ioctl_compat()? > > > > Another approach would be to split the IOCTL hook into two: one for the > > VFS layer and another for the underlying implementations. However, it > > looks like a difficult and brittle approach according to the current > > IOCTL implementations. > > > > Arnd, Christian, Paul, are you OK with this new hook proposal? > > I think this sounds better. It would fit more closely into > the overall structure of the ioctl handlers with their multiple > levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl, > you have the same structure for sockets and blockdev, and > then additional levels below that and some weirdness for > things like tty, scsi or cdrom. So an additional security hook called from tty, scsi, or cdrom? And the original hook is left where it is right now?
On Thu, Mar 7, 2024, at 13:15, Christian Brauner wrote: > On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote: >> On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote: >> > >> > Arnd, Christian, Paul, are you OK with this new hook proposal? >> >> I think this sounds better. It would fit more closely into >> the overall structure of the ioctl handlers with their multiple >> levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl, >> you have the same structure for sockets and blockdev, and >> then additional levels below that and some weirdness for >> things like tty, scsi or cdrom. > > So an additional security hook called from tty, scsi, or cdrom? > And the original hook is left where it is right now? For the moment, I think adding another hook in vfs_ioctl() and the corresponding compat path would do what Mickaël wants. Beyond that, we could consider having hooks in socket and block ioctls if needed as they are easy to filter out based on inode->i_mode. The tty/scsi/cdrom hooks would be harder to do, let's assume for now that we don't need them. Arnd
On Thu, Mar 07, 2024 at 01:21:48PM +0100, Arnd Bergmann wrote: > On Thu, Mar 7, 2024, at 13:15, Christian Brauner wrote: > > On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote: > >> On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote: > >> > > >> > Arnd, Christian, Paul, are you OK with this new hook proposal? > >> > >> I think this sounds better. It would fit more closely into > >> the overall structure of the ioctl handlers with their multiple > >> levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl, > >> you have the same structure for sockets and blockdev, and > >> then additional levels below that and some weirdness for > >> things like tty, scsi or cdrom. > > > > So an additional security hook called from tty, scsi, or cdrom? > > And the original hook is left where it is right now? > > For the moment, I think adding another hook in vfs_ioctl() > and the corresponding compat path would do what Mickaël > wants. Beyond that, we could consider having hooks in > socket and block ioctls if needed as they are easy to > filter out based on inode->i_mode. > > The tty/scsi/cdrom hooks would be harder to do, let's assume > for now that we don't need them. Thank you all for the help! Yes, tty/scsi/cdrom are just examples. We do not need special features for these for Landlock right now. What I would do is to invoke the new LSM hook in the following two places in fs/ioctl.c: 1) at the top of vfs_ioctl() 2) at the top of ioctl_compat() (Both of these functions are just invoking the f_op->unlocked_ioctl() and f_op->compat_ioctl() operations with a safeguard for that being a NULL pointer.) The intent is that the new hook gets called everytime before an ioctl is sent to these IOCTL operations in f_op, so that the LSM can distinguish cleanly between the "safe" IOCTLs that are implemented fully within fs/ioctl.c and the "potentially unsafe" IOCTLs which are implemented by these hooks (as it is unrealistic for us to holistically reason about the safety of all possible implementations). The alternative approach where we try to do the same based on the existing LSM IOCTL hook resulted in the patch further up in this mail thread - it involves maintaining a list of "safe" IOCTL commands, and it is difficult to guarantee that these lists of IOCTL commands stay in sync. Christian, does that make sense in your mind? Thanks, —Günther
On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote: > On Thu, Mar 07, 2024 at 01:21:48PM +0100, Arnd Bergmann wrote: > > On Thu, Mar 7, 2024, at 13:15, Christian Brauner wrote: > > > On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote: > > >> On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote: > > >> > > > >> > Arnd, Christian, Paul, are you OK with this new hook proposal? > > >> > > >> I think this sounds better. It would fit more closely into > > >> the overall structure of the ioctl handlers with their multiple > > >> levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl, > > >> you have the same structure for sockets and blockdev, and > > >> then additional levels below that and some weirdness for > > >> things like tty, scsi or cdrom. > > > > > > So an additional security hook called from tty, scsi, or cdrom? > > > And the original hook is left where it is right now? > > > > For the moment, I think adding another hook in vfs_ioctl() > > and the corresponding compat path would do what Mickaël > > wants. Beyond that, we could consider having hooks in > > socket and block ioctls if needed as they are easy to > > filter out based on inode->i_mode. > > > > The tty/scsi/cdrom hooks would be harder to do, let's assume > > for now that we don't need them. > > Thank you all for the help! > > Yes, tty/scsi/cdrom are just examples. We do not need special features for > these for Landlock right now. > > What I would do is to invoke the new LSM hook in the following two places in > fs/ioctl.c: > > 1) at the top of vfs_ioctl() > 2) at the top of ioctl_compat() > > (Both of these functions are just invoking the f_op->unlocked_ioctl() and > f_op->compat_ioctl() operations with a safeguard for that being a NULL pointer.) > > The intent is that the new hook gets called everytime before an ioctl is sent to > these IOCTL operations in f_op, so that the LSM can distinguish cleanly between > the "safe" IOCTLs that are implemented fully within fs/ioctl.c and the > "potentially unsafe" IOCTLs which are implemented by these hooks (as it is > unrealistic for us to holistically reason about the safety of all possible > implementations). > > The alternative approach where we try to do the same based on the existing LSM > IOCTL hook resulted in the patch further up in this mail thread - it involves > maintaining a list of "safe" IOCTL commands, and it is difficult to guarantee > that these lists of IOCTL commands stay in sync. I need some more convincing as to why we need to introduce these new hooks, or even the vfs_masked_device_ioctl() classifier as originally proposed at the top of this thread. I believe I understand why Landlock wants this, but I worry that we all might have different definitions of a "safe" ioctl list, and encoding a definition into the LSM hooks seems like a bad idea to me. At this point in time, I think I'd rather see LSMs that care about ioctls maintaining their own list of "safe" ioctls and after a while if it looks like everyone is in agreement (VFS folks, individual LSMs, etc.) we can look into either an ioctl classifier or multiple LSM ioctl hooks focused on different categories of ioctls.
On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote: > On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote: > > On Thu, Mar 07, 2024 at 01:21:48PM +0100, Arnd Bergmann wrote: > > > On Thu, Mar 7, 2024, at 13:15, Christian Brauner wrote: > > > > On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote: > > > >> On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote: > > > >> > > > > >> > Arnd, Christian, Paul, are you OK with this new hook proposal? > > > >> > > > >> I think this sounds better. It would fit more closely into > > > >> the overall structure of the ioctl handlers with their multiple > > > >> levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl, > > > >> you have the same structure for sockets and blockdev, and > > > >> then additional levels below that and some weirdness for > > > >> things like tty, scsi or cdrom. > > > > > > > > So an additional security hook called from tty, scsi, or cdrom? > > > > And the original hook is left where it is right now? > > > > > > For the moment, I think adding another hook in vfs_ioctl() > > > and the corresponding compat path would do what Mickaël > > > wants. Beyond that, we could consider having hooks in > > > socket and block ioctls if needed as they are easy to > > > filter out based on inode->i_mode. > > > > > > The tty/scsi/cdrom hooks would be harder to do, let's assume > > > for now that we don't need them. > > > > Thank you all for the help! > > > > Yes, tty/scsi/cdrom are just examples. We do not need special features for > > these for Landlock right now. > > > > What I would do is to invoke the new LSM hook in the following two places in > > fs/ioctl.c: > > > > 1) at the top of vfs_ioctl() > > 2) at the top of ioctl_compat() > > > > (Both of these functions are just invoking the f_op->unlocked_ioctl() and > > f_op->compat_ioctl() operations with a safeguard for that being a NULL pointer.) > > > > The intent is that the new hook gets called everytime before an ioctl is sent to > > these IOCTL operations in f_op, so that the LSM can distinguish cleanly between > > the "safe" IOCTLs that are implemented fully within fs/ioctl.c and the > > "potentially unsafe" IOCTLs which are implemented by these hooks (as it is > > unrealistic for us to holistically reason about the safety of all possible > > implementations). > > > > The alternative approach where we try to do the same based on the existing LSM > > IOCTL hook resulted in the patch further up in this mail thread - it involves > > maintaining a list of "safe" IOCTL commands, and it is difficult to guarantee > > that these lists of IOCTL commands stay in sync. > > I need some more convincing as to why we need to introduce these new > hooks, or even the vfs_masked_device_ioctl() classifier as originally > proposed at the top of this thread. I believe I understand why > Landlock wants this, but I worry that we all might have different > definitions of a "safe" ioctl list, and encoding a definition into the > LSM hooks seems like a bad idea to me. I have no idea what a "safe" ioctl means here. Subsystems already restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so "safe" clearly means something different here. > At this point in time, I think I'd rather see LSMs that care about > ioctls maintaining their own list of "safe" ioctls and after a while > if it looks like everyone is in agreement (VFS folks, individual LSMs, > etc.) we can look into either an ioctl classifier or multiple LSM > ioctl hooks focused on different categories of ioctls. From the perspective of a VFS and subsystem developer, I really have no clue what would make a "safe" ioctl from a LSM perspective, and I very much doubt an LSM developer has any clue whether deep, dark subsystem ioctls are "safe" to allow, or even what would stop working if they decided something was not "safe". This just seems like a complex recipe for creating unusable and/or impossible to configure/secure systems to me. -Dave.
On Thu, Mar 7, 2024 at 6:09 PM Dave Chinner <david@fromorbit.com> wrote: > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote: > > On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote: > > > On Thu, Mar 07, 2024 at 01:21:48PM +0100, Arnd Bergmann wrote: > > > > On Thu, Mar 7, 2024, at 13:15, Christian Brauner wrote: > > > > > On Wed, Mar 06, 2024 at 04:18:53PM +0100, Arnd Bergmann wrote: > > > > >> On Wed, Mar 6, 2024, at 14:47, Mickaël Salaün wrote: > > > > >> > > > > > >> > Arnd, Christian, Paul, are you OK with this new hook proposal? > > > > >> > > > > >> I think this sounds better. It would fit more closely into > > > > >> the overall structure of the ioctl handlers with their multiple > > > > >> levels, where below vfs_ioctl() calling into f_ops->unlocked_ioctl, > > > > >> you have the same structure for sockets and blockdev, and > > > > >> then additional levels below that and some weirdness for > > > > >> things like tty, scsi or cdrom. > > > > > > > > > > So an additional security hook called from tty, scsi, or cdrom? > > > > > And the original hook is left where it is right now? > > > > > > > > For the moment, I think adding another hook in vfs_ioctl() > > > > and the corresponding compat path would do what Mickaël > > > > wants. Beyond that, we could consider having hooks in > > > > socket and block ioctls if needed as they are easy to > > > > filter out based on inode->i_mode. > > > > > > > > The tty/scsi/cdrom hooks would be harder to do, let's assume > > > > for now that we don't need them. > > > > > > Thank you all for the help! > > > > > > Yes, tty/scsi/cdrom are just examples. We do not need special features for > > > these for Landlock right now. > > > > > > What I would do is to invoke the new LSM hook in the following two places in > > > fs/ioctl.c: > > > > > > 1) at the top of vfs_ioctl() > > > 2) at the top of ioctl_compat() > > > > > > (Both of these functions are just invoking the f_op->unlocked_ioctl() and > > > f_op->compat_ioctl() operations with a safeguard for that being a NULL pointer.) > > > > > > The intent is that the new hook gets called everytime before an ioctl is sent to > > > these IOCTL operations in f_op, so that the LSM can distinguish cleanly between > > > the "safe" IOCTLs that are implemented fully within fs/ioctl.c and the > > > "potentially unsafe" IOCTLs which are implemented by these hooks (as it is > > > unrealistic for us to holistically reason about the safety of all possible > > > implementations). > > > > > > The alternative approach where we try to do the same based on the existing LSM > > > IOCTL hook resulted in the patch further up in this mail thread - it involves > > > maintaining a list of "safe" IOCTL commands, and it is difficult to guarantee > > > that these lists of IOCTL commands stay in sync. > > > > I need some more convincing as to why we need to introduce these new > > hooks, or even the vfs_masked_device_ioctl() classifier as originally > > proposed at the top of this thread. I believe I understand why > > Landlock wants this, but I worry that we all might have different > > definitions of a "safe" ioctl list, and encoding a definition into the > > LSM hooks seems like a bad idea to me. > > I have no idea what a "safe" ioctl means here. Subsystems already > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so > "safe" clearly means something different here. That's the point I was trying to make. I'm not sure exactly what Günther meant either (I was simply copying his idea of a "safe" ioctl, complete with all of the associations around the double quotes), which helps underscore the idea that different groups are likely to have different ideas of what ioctls they want to allow based on their security model, environment, etc. > > At this point in time, I think I'd rather see LSMs that care about > > ioctls maintaining their own list of "safe" ioctls and after a while > > if it looks like everyone is in agreement (VFS folks, individual LSMs, > > etc.) we can look into either an ioctl classifier or multiple LSM > > ioctl hooks focused on different categories of ioctls. > > From the perspective of a VFS and subsystem developer, I really have > no clue what would make a "safe" ioctl from a LSM perspective ... We also need to keep in mind that we have multiple LSM implementations and we need to support different ideas around how to control access to ioctls, including which ioctls are "safe" for multiple definitions of the word. > ... and I > very much doubt an LSM developer has any clue whether deep, dark > subsystem ioctls are "safe" to allow, or even what would stop > working if they decided something was not "safe". ... or for those LSMs with configurable security policies, which ioctls are allowed by the LSM's policy developer to fit their particular needs. > This just seems like a complex recipe for creating unusable and/or > impossible to configure/secure systems to me. FWIW, Android has been using the existing LSM ioctl controls for several years now to help increase the security of Android devices. https://security.googleblog.com/2016/07/protecting-android-with-more-linux.html https://kernsec.org/files/lss2015/vanderstoep.pdf
On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote: > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote: >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote: >> I need some more convincing as to why we need to introduce these new >> hooks, or even the vfs_masked_device_ioctl() classifier as originally >> proposed at the top of this thread. I believe I understand why >> Landlock wants this, but I worry that we all might have different >> definitions of a "safe" ioctl list, and encoding a definition into the >> LSM hooks seems like a bad idea to me. > > I have no idea what a "safe" ioctl means here. Subsystems already > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so > "safe" clearly means something different here. That was my problem with the first version as well, but I think drawing the line between "implemented in fs/ioctl.c" and "implemented in a random device driver fops->unlock_ioctl()" seems like a more helpful definition. This won't just protect from calling into drivers that are lacking a CAP_SYS_ADMIN check, but also from those that end up being harmful regardless of the ioctl command code passed into them because of stupid driver bugs. Arnd
On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote: > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote: > > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote: > >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote: > >> I need some more convincing as to why we need to introduce these new > >> hooks, or even the vfs_masked_device_ioctl() classifier as originally > >> proposed at the top of this thread. I believe I understand why > >> Landlock wants this, but I worry that we all might have different > >> definitions of a "safe" ioctl list, and encoding a definition into the > >> LSM hooks seems like a bad idea to me. > > > > I have no idea what a "safe" ioctl means here. Subsystems already > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so > > "safe" clearly means something different here. > > That was my problem with the first version as well, but I think > drawing the line between "implemented in fs/ioctl.c" and > "implemented in a random device driver fops->unlock_ioctl()" > seems like a more helpful definition. > > This won't just protect from calling into drivers that are lacking > a CAP_SYS_ADMIN check, but also from those that end up being > harmful regardless of the ioctl command code passed into them > because of stupid driver bugs. Indeed. "safe" is definitely not the right word, it is too broad, relative to use cases and threat models. There is no "safe" IOCTL. Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock sandbox". Our assumptions are (in the context of Landlock): 1. There are IOCTLs tied to file types (e.g. block device with major/minor) that can easily be identified from user space (e.g. with the path name and file's metadata). /dev/* files make sense for user space and they scope to a specific use case (with relative privileges). This category of IOCTLs is implemented in standalone device drivers (for most of them). 2. Most user space processes should not be denied access to IOCTLs that are managed by the VFS layer or the underlying filesystem implementations. For instance, the do_vfs_ioctl()'s ones (e.g. FIOCLEX, FIONREAD) should always be allowed because they may be required to legitimately use files, and for performance and security reasons (e.g. fs-crypt, fsverity implemented at the filesystem layer). Moreover, these IOCTLs should already check the read/write permission (on the related FD), which is not the case for most block/char device IOCTL. 3. IOCTLs to pipes and sockets are out of scope. They should always be allowed for now because they don't directly expose files' data but IPCs instead, and we are focusing on FS access rights for now. We want to add a new LANDLOCK_ACCESS_FS_IOCTL_DEV right that could match on char/block device's specific IOCTLs, but it would not have any impact on other IOCTLs which would then always be allowed (if the sandboxed process is allowed to open the file). Because IOCTLs are implemented in layers and all IOCTLs commands live in the same 32-bit namespace, we need a way to identify the layer implemented by block and character devices. The new LSM hook proposal enables us to cleanly and efficiently identify the char/block device IOCTL layer with an additional check on the file type.
On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote: > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote: > > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote: > >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote: > >> I need some more convincing as to why we need to introduce these new > >> hooks, or even the vfs_masked_device_ioctl() classifier as originally > >> proposed at the top of this thread. I believe I understand why > >> Landlock wants this, but I worry that we all might have different > >> definitions of a "safe" ioctl list, and encoding a definition into the > >> LSM hooks seems like a bad idea to me. > > > > I have no idea what a "safe" ioctl means here. Subsystems already > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so > > "safe" clearly means something different here. > > That was my problem with the first version as well, but I think > drawing the line between "implemented in fs/ioctl.c" and > "implemented in a random device driver fops->unlock_ioctl()" > seems like a more helpful definition. Yes, sorry for the confusion - that is exactly what I meant to say with "safe".: Those are the IOCTL commands implemented in fs/ioctl.c which do not go through f_ops->unlocked_ioctl (or the compat equivalent). We want to give people a way with Landlock so that they can restrict the use of device-driver implemented IOCTLs, but where they can keep using the bulk of more harmless IOCTLs in fs/ioctl.c. > This won't just protect from calling into drivers that are lacking > a CAP_SYS_ADMIN check, but also from those that end up being > harmful regardless of the ioctl command code passed into them > because of stupid driver bugs. Exactly -- there is a surprising number of f_ops->unlocked_ioctl implementations that already run various resource management and locking logic before even looking at the command number. That means that even the command numbers that are not implemented there are executing code in the driver layer, before the IOCTL returns with an error. So the f_ops->unlocked_ioctl() invocation is in itself increasing the surface of exposed functionality, even completely independent of the command number. Which makes the invocation of f_ops->unlocked_ioctl() a security boundary that we would like to restrict. —Günther
On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote: > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote: > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote: > > > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote: > > >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote: > > >> I need some more convincing as to why we need to introduce these new > > >> hooks, or even the vfs_masked_device_ioctl() classifier as originally > > >> proposed at the top of this thread. I believe I understand why > > >> Landlock wants this, but I worry that we all might have different > > >> definitions of a "safe" ioctl list, and encoding a definition into the > > >> LSM hooks seems like a bad idea to me. > > > > > > I have no idea what a "safe" ioctl means here. Subsystems already > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so > > > "safe" clearly means something different here. > > > > That was my problem with the first version as well, but I think > > drawing the line between "implemented in fs/ioctl.c" and > > "implemented in a random device driver fops->unlock_ioctl()" > > seems like a more helpful definition. > > > > This won't just protect from calling into drivers that are lacking > > a CAP_SYS_ADMIN check, but also from those that end up being > > harmful regardless of the ioctl command code passed into them > > because of stupid driver bugs. > > Indeed. > > "safe" is definitely not the right word, it is too broad, relative to > use cases and threat models. There is no "safe" IOCTL. > > Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock > sandbox". Which is a problem from a LSM perspective as we want to avoid hooks which are tightly bound to a single LSM or security model. It's okay if a new hook only has a single LSM implementation, but the hook's definition should be such that it is reasonably generalized to support multiple LSM/models. > Our assumptions are (in the context of Landlock): > > 1. There are IOCTLs tied to file types (e.g. block device with > major/minor) that can easily be identified from user space (e.g. with > the path name and file's metadata). /dev/* files make sense for user > space and they scope to a specific use case (with relative > privileges). This category of IOCTLs is implemented in standalone > device drivers (for most of them). > > 2. Most user space processes should not be denied access to IOCTLs that > are managed by the VFS layer or the underlying filesystem > implementations. For instance, the do_vfs_ioctl()'s ones (e.g. > FIOCLEX, FIONREAD) should always be allowed because they may be > required to legitimately use files, and for performance and security > reasons (e.g. fs-crypt, fsverity implemented at the filesystem layer). > Moreover, these IOCTLs should already check the read/write permission > (on the related FD), which is not the case for most block/char device > IOCTL. > > 3. IOCTLs to pipes and sockets are out of scope. They should always be > allowed for now because they don't directly expose files' data but > IPCs instead, and we are focusing on FS access rights for now. > > We want to add a new LANDLOCK_ACCESS_FS_IOCTL_DEV right that could match > on char/block device's specific IOCTLs, but it would not have any impact > on other IOCTLs which would then always be allowed (if the sandboxed > process is allowed to open the file). > > Because IOCTLs are implemented in layers and all IOCTLs commands live in > the same 32-bit namespace, we need a way to identify the layer > implemented by block and character devices. The new LSM hook proposal > enables us to cleanly and efficiently identify the char/block device > IOCTL layer with an additional check on the file type. I guess I should wait until there is an actual patch, but as of right now a VFS ioctl specific LSM hook looks far too limited to me and isn't something I can support at this point in time. It's obviously limited to only a subset of the ioctls, meaning that in order to have comprehensive coverage we would either need to implement a full range of subsystem ioctl hooks (ugh), or just use the existing security_file_ioctl(). I understand that this makes things a bit more complicated for Landlock's initial ioctl implementation, but considering my thoughts above and the fact that Landlock's ioctl protections are still evolving I'd rather not add a lot of extra hooks right now.
On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote: > On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote: > > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote: > > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote: > > > > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote: > > > >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote: > > > >> I need some more convincing as to why we need to introduce these new > > > >> hooks, or even the vfs_masked_device_ioctl() classifier as originally > > > >> proposed at the top of this thread. I believe I understand why > > > >> Landlock wants this, but I worry that we all might have different > > > >> definitions of a "safe" ioctl list, and encoding a definition into the > > > >> LSM hooks seems like a bad idea to me. > > > > > > > > I have no idea what a "safe" ioctl means here. Subsystems already > > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so > > > > "safe" clearly means something different here. > > > > > > That was my problem with the first version as well, but I think > > > drawing the line between "implemented in fs/ioctl.c" and > > > "implemented in a random device driver fops->unlock_ioctl()" > > > seems like a more helpful definition. > > > > > > This won't just protect from calling into drivers that are lacking > > > a CAP_SYS_ADMIN check, but also from those that end up being > > > harmful regardless of the ioctl command code passed into them > > > because of stupid driver bugs. > > > > Indeed. > > > > "safe" is definitely not the right word, it is too broad, relative to > > use cases and threat models. There is no "safe" IOCTL. > > > > Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock > > sandbox". > > Which is a problem from a LSM perspective as we want to avoid hooks > which are tightly bound to a single LSM or security model. It's okay > if a new hook only has a single LSM implementation, but the hook's > definition should be such that it is reasonably generalized to support > multiple LSM/models. As any new hook, there is a first user. Obviously this new hook would not be restricted to Landlock, it is a generic approach. I'm pretty sure a few hooks are only used by one LSM though. ;) > > > Our assumptions are (in the context of Landlock): > > > > 1. There are IOCTLs tied to file types (e.g. block device with > > major/minor) that can easily be identified from user space (e.g. with > > the path name and file's metadata). /dev/* files make sense for user > > space and they scope to a specific use case (with relative > > privileges). This category of IOCTLs is implemented in standalone > > device drivers (for most of them). > > > > 2. Most user space processes should not be denied access to IOCTLs that > > are managed by the VFS layer or the underlying filesystem > > implementations. For instance, the do_vfs_ioctl()'s ones (e.g. > > FIOCLEX, FIONREAD) should always be allowed because they may be > > required to legitimately use files, and for performance and security > > reasons (e.g. fs-crypt, fsverity implemented at the filesystem layer). > > Moreover, these IOCTLs should already check the read/write permission > > (on the related FD), which is not the case for most block/char device > > IOCTL. > > > > 3. IOCTLs to pipes and sockets are out of scope. They should always be > > allowed for now because they don't directly expose files' data but > > IPCs instead, and we are focusing on FS access rights for now. > > > > We want to add a new LANDLOCK_ACCESS_FS_IOCTL_DEV right that could match > > on char/block device's specific IOCTLs, but it would not have any impact > > on other IOCTLs which would then always be allowed (if the sandboxed > > process is allowed to open the file). > > > > Because IOCTLs are implemented in layers and all IOCTLs commands live in > > the same 32-bit namespace, we need a way to identify the layer > > implemented by block and character devices. The new LSM hook proposal > > enables us to cleanly and efficiently identify the char/block device > > IOCTL layer with an additional check on the file type. > > I guess I should wait until there is an actual patch, but as of right > now a VFS ioctl specific LSM hook looks far too limited to me and > isn't something I can support at this point in time. It's obviously > limited to only a subset of the ioctls, meaning that in order to have > comprehensive coverage we would either need to implement a full range > of subsystem ioctl hooks (ugh), or just use the existing > security_file_ioctl(). I think there is a misunderstanding. The subset of IOCTL commands the new hook will see would be 99% of them (i.e. all except those implemented in fs/ioctl.c). Being able to only handle this (big) subset would empower LSMs to control IOCTL commands without collision (e.g. the same command/value may have different meanings according to the implementation/layer), which is not currently possible (without manual tweaking). This proposal is to add a new hook for the layer just beneath the VFS catch-all IOCTL implementation. This layer can then differentiate between the underlying implementation according to the file properties. There is no need for additional hooks for other layers/subsystems. The existing security_file_ioctl() hook is useful to catch all IOCTL commands, but it doesn't enable to identify the underlying target and then the semantic of the command. Furthermore, as Günther said, an IOCTL call can already do kernel operations without looking at the command, but we would then be able to identify that by looking at the char/block device file for instance. > I understand that this makes things a bit more > complicated for Landlock's initial ioctl implementation, but > considering my thoughts above and the fact that Landlock's ioctl > protections are still evolving I'd rather not add a lot of extra hooks > right now. Without this hook, we'll need to rely on a list of allowed IOCTLs, which will be out-of-sync eventually. It would be a maintenance burden and an hacky approach. We're definitely open to new proposals, but until now this is the best approach we found from a maintenance, performance, and security point of view.
On 3/8/2024 12:12 PM, Mickaël Salaün wrote: > On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote: >> On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote: >>> On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote: >>>> On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote: >>>>> On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote: >>>>>> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote: >>>>>> I need some more convincing as to why we need to introduce these new >>>>>> hooks, or even the vfs_masked_device_ioctl() classifier as originally >>>>>> proposed at the top of this thread. I believe I understand why >>>>>> Landlock wants this, but I worry that we all might have different >>>>>> definitions of a "safe" ioctl list, and encoding a definition into the >>>>>> LSM hooks seems like a bad idea to me. >>>>> I have no idea what a "safe" ioctl means here. Subsystems already >>>>> restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so >>>>> "safe" clearly means something different here. >>>> That was my problem with the first version as well, but I think >>>> drawing the line between "implemented in fs/ioctl.c" and >>>> "implemented in a random device driver fops->unlock_ioctl()" >>>> seems like a more helpful definition. >>>> >>>> This won't just protect from calling into drivers that are lacking >>>> a CAP_SYS_ADMIN check, but also from those that end up being >>>> harmful regardless of the ioctl command code passed into them >>>> because of stupid driver bugs. >>> Indeed. >>> >>> "safe" is definitely not the right word, it is too broad, relative to >>> use cases and threat models. There is no "safe" IOCTL. >>> >>> Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock >>> sandbox". >> Which is a problem from a LSM perspective as we want to avoid hooks >> which are tightly bound to a single LSM or security model. It's okay >> if a new hook only has a single LSM implementation, but the hook's >> definition should be such that it is reasonably generalized to support >> multiple LSM/models. I've been watching this thread with some interest, as one of my side projects has been an attempt to address the "CAP_SYS_ADMIN problem", and there looks to be a lot of similarity between that and the "ioctl problem". In both cases it comes down to a matter of: 1. uniquely identifying the action 2. providing the information to code that can act upon it 3. providing "policy" to determine what to do about it My thought for the CAP_SYS_ADMIN case was to provide a new LSM hook security_sysadmin() that takes a single parameter which is the action ID. I called the action ID a "chit", because it's short and all the good, more descriptive words where taken. Calls to cap_able(CAP_SYS_ADMIN) could be replaced by calls to security_sysadmin(chit). security_sysadmin() would first call cap_able(CAP_SYSADMIN) and, if that succeeded, allow LSMs with registered hooks (selinux_sysadmin() etc) the opportunity to disallow the operation. I planned to include a small LSM (chits) that would allow the operation only if the process had the chit on its chit list. Landlock could add policy to deal with chits if so inclined. A generalization of this scheme would be to leave the cap_able(CAP_SYS_ADMIN) checks as they are and add an optional security_chit() hook for places where additional enforcement information is desired. Adding security_chit(CHIT_IOCTL_TTY_SOMETHING) to an ioctl would allow any LSM to make policy decisions about that ioctl operation. Adding security_chit(CHIT_ERASE_TAPE_REGISTERS) after a cap_able(CAP_SYS_ADMIN) could appease the driver writer who would otherwise be begging for CAP_ERASE_TAPE_REGISTERS. My biggest concern with this scheme is the management of chit values, which would have to be kept in a uapi header. A major advantage of this is that the security_chit() calls would only have to be added where someone wants to take advantage of the mechanism. People who are happy with CAP_SYS_ADMIN or ioctl as it is don't have to do anything, and their code won't get churned for the new world order. The downside is the potentially onerous process of deciding if an LSM cares about an action known only by its number. I have patches close to ready. The chit LSM isn't passing the laugh test quite yet, so I'm holding it back for now. I wanted to bring this up before we go too far down a more complicated path.
On Fri, Mar 8, 2024 at 3:12 PM Mickaël Salaün <mic@digikod.net> wrote: > On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote: > > On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote: > > > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote: > > > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote: > > > > > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote: > > > > >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote: > > > > >> I need some more convincing as to why we need to introduce these new > > > > >> hooks, or even the vfs_masked_device_ioctl() classifier as originally > > > > >> proposed at the top of this thread. I believe I understand why > > > > >> Landlock wants this, but I worry that we all might have different > > > > >> definitions of a "safe" ioctl list, and encoding a definition into the > > > > >> LSM hooks seems like a bad idea to me. > > > > > > > > > > I have no idea what a "safe" ioctl means here. Subsystems already > > > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so > > > > > "safe" clearly means something different here. > > > > > > > > That was my problem with the first version as well, but I think > > > > drawing the line between "implemented in fs/ioctl.c" and > > > > "implemented in a random device driver fops->unlock_ioctl()" > > > > seems like a more helpful definition. > > > > > > > > This won't just protect from calling into drivers that are lacking > > > > a CAP_SYS_ADMIN check, but also from those that end up being > > > > harmful regardless of the ioctl command code passed into them > > > > because of stupid driver bugs. > > > > > > Indeed. > > > > > > "safe" is definitely not the right word, it is too broad, relative to > > > use cases and threat models. There is no "safe" IOCTL. > > > > > > Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock > > > sandbox". > > > > Which is a problem from a LSM perspective as we want to avoid hooks > > which are tightly bound to a single LSM or security model. It's okay > > if a new hook only has a single LSM implementation, but the hook's > > definition should be such that it is reasonably generalized to support > > multiple LSM/models. > > As any new hook, there is a first user. Obviously this new hook would > not be restricted to Landlock, it is a generic approach. I'm pretty > sure a few hooks are only used by one LSM though. ;) Sure, as I said above, it's okay for there to only be a single LSM implementation, but the basic idea behind the hook needs to have some hope of being generic. Your "let's redefine a safe ioctl as 'IOCTL always allowed in a Landlock sandbox'" doesn't fill me with confidence about the hook being generic; who knows, maybe it will be, but in the absence of a patch, I'm left with descriptions like those. > > > Our assumptions are (in the context of Landlock): > > > > > > 1. There are IOCTLs tied to file types (e.g. block device with > > > major/minor) that can easily be identified from user space (e.g. with > > > the path name and file's metadata). /dev/* files make sense for user > > > space and they scope to a specific use case (with relative > > > privileges). This category of IOCTLs is implemented in standalone > > > device drivers (for most of them). > > > > > > 2. Most user space processes should not be denied access to IOCTLs that > > > are managed by the VFS layer or the underlying filesystem > > > implementations. For instance, the do_vfs_ioctl()'s ones (e.g. > > > FIOCLEX, FIONREAD) should always be allowed because they may be > > > required to legitimately use files, and for performance and security > > > reasons (e.g. fs-crypt, fsverity implemented at the filesystem layer). > > > Moreover, these IOCTLs should already check the read/write permission > > > (on the related FD), which is not the case for most block/char device > > > IOCTL. > > > > > > 3. IOCTLs to pipes and sockets are out of scope. They should always be > > > allowed for now because they don't directly expose files' data but > > > IPCs instead, and we are focusing on FS access rights for now. > > > > > > We want to add a new LANDLOCK_ACCESS_FS_IOCTL_DEV right that could match > > > on char/block device's specific IOCTLs, but it would not have any impact > > > on other IOCTLs which would then always be allowed (if the sandboxed > > > process is allowed to open the file). > > > > > > Because IOCTLs are implemented in layers and all IOCTLs commands live in > > > the same 32-bit namespace, we need a way to identify the layer > > > implemented by block and character devices. The new LSM hook proposal > > > enables us to cleanly and efficiently identify the char/block device > > > IOCTL layer with an additional check on the file type. > > > > I guess I should wait until there is an actual patch, but as of right > > now a VFS ioctl specific LSM hook looks far too limited to me and > > isn't something I can support at this point in time. It's obviously > > limited to only a subset of the ioctls, meaning that in order to have > > comprehensive coverage we would either need to implement a full range > > of subsystem ioctl hooks (ugh), or just use the existing > > security_file_ioctl(). > > I think there is a misunderstanding. The subset of IOCTL commands the > new hook will see would be 99% of them (i.e. all except those > implemented in fs/ioctl.c). *cough* 99% != 100% *cough* > Being able to only handle this (big) subset > would empower LSMs to control IOCTL commands without collision (e.g. the > same command/value may have different meanings according to the > implementation/layer), which is not currently possible (without manual > tweaking). > > This proposal is to add a new hook for the layer just beneath the VFS > catch-all IOCTL implementation. This layer can then differentiate > between the underlying implementation according to the file properties. > There is no need for additional hooks for other layers/subsystems. I'm not sure how you reconcile less than 100% coverage, the need for a generic hook, and the idea that there will not be a need for additional hooks. That still seems like a problem to me. > The existing security_file_ioctl() hook is useful to catch all IOCTL > commands, but it doesn't enable to identify the underlying target and > then the semantic of the command. The LSM hook gets the file pointer, the command, and the argument, how is a LSM not able to identify the underlying target? > Furthermore, as Günther said, an > IOCTL call can already do kernel operations without looking at the > command, but we would then be able to identify that by looking at the > char/block device file for instance. > > > I understand that this makes things a bit more > > complicated for Landlock's initial ioctl implementation, but > > considering my thoughts above and the fact that Landlock's ioctl > > protections are still evolving I'd rather not add a lot of extra hooks > > right now. > > Without this hook, we'll need to rely on a list of allowed IOCTLs, which > will be out-of-sync eventually. It would be a maintenance burden and an > hacky approach. Welcome to the painful world of a LSM developer, ioctls are not the only place where this is a problem, and it should be easy enough to watch for changes in the ioctl list and update your favorite LSM accordingly. Honestly, I think that is kinda the right thing anyway, I'm skeptical that one could have a generic solution that would automatically allow or disallow a new ioctl without potentially breaking your favorite LSM's security model. If a new ioctl is introduced it seems like having someone manually review it's impact on your LSM would be a good idea. > We're definitely open to new proposals, but until now this is the best > approach we found from a maintenance, performance, and security point of > view. At this point it's probably a good idea to post another RFC patch with your revised idea, if nothing else it will help rule out any confusion. While I remain skeptical, perhaps I am misunderstanding the design and you'll get my apology and an ACK, but be warned that as of right now I'm not convinced. -- paul-moore.com
On Fri, Mar 08, 2024 at 05:25:21PM -0500, Paul Moore wrote: > On Fri, Mar 8, 2024 at 3:12 PM Mickaël Salaün <mic@digikod.net> wrote: > > On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote: > > > On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote: > > > > Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock > > > > sandbox". > > > > > > Which is a problem from a LSM perspective as we want to avoid hooks > > > which are tightly bound to a single LSM or security model. It's okay > > > if a new hook only has a single LSM implementation, but the hook's > > > definition should be such that it is reasonably generalized to support > > > multiple LSM/models. > > > > As any new hook, there is a first user. Obviously this new hook would > > not be restricted to Landlock, it is a generic approach. I'm pretty > > sure a few hooks are only used by one LSM though. ;) > > Sure, as I said above, it's okay for there to only be a single LSM > implementation, but the basic idea behind the hook needs to have some > hope of being generic. Your "let's redefine a safe ioctl as 'IOCTL > always allowed in a Landlock sandbox'" doesn't fill me with confidence > about the hook being generic; who knows, maybe it will be, but in the > absence of a patch, I'm left with descriptions like those. FWIW, the existing IOCTL hook is used in the following places: * TOMOYO: seemingly configurable per IOCTL command? (I did not dig deeper) * SELinux: has a hardcoded switch of IOCTL commands, some with special checks. These are also a subset of the do_vfs_ioctl() commands, plus KDSKBENT, KDSKBSENT (from ioctl_console(2)). * Smack: Decomposes the IOCTL command number to look at the _IOC_WRITE and _IOC_READ bits. (This is a known problematic approach, because (1) these bits describe whether the argument is getting read or written, not whether the operation is a mutating one, and (2) some IOCTL commands do not adhere to the convention and don't use these macros) AppArmor does not use the LSM IOCTL hook. > > > I understand that this makes things a bit more > > > complicated for Landlock's initial ioctl implementation, but > > > considering my thoughts above and the fact that Landlock's ioctl > > > protections are still evolving I'd rather not add a lot of extra hooks > > > right now. > > > > Without this hook, we'll need to rely on a list of allowed IOCTLs, which > > will be out-of-sync eventually. It would be a maintenance burden and an > > hacky approach. > > Welcome to the painful world of a LSM developer, ioctls are not the > only place where this is a problem, and it should be easy enough to > watch for changes in the ioctl list and update your favorite LSM > accordingly. Honestly, I think that is kinda the right thing anyway, > I'm skeptical that one could have a generic solution that would > automatically allow or disallow a new ioctl without potentially > breaking your favorite LSM's security model. If a new ioctl is > introduced it seems like having someone manually review it's impact on > your LSM would be a good idea. We are concerned that we will miss a change in do_vfs_ioctl(), which we would like to reflect in the matching Landlock code. Do other LSMs have any approaches for that which go beyond just watching the do_vfs_ioctl() implementation for changes? > > We're definitely open to new proposals, but until now this is the best > > approach we found from a maintenance, performance, and security point of > > view. > > At this point it's probably a good idea to post another RFC patch with > your revised idea, if nothing else it will help rule out any > confusion. While I remain skeptical, perhaps I am misunderstanding > the design and you'll get my apology and an ACK, but be warned that as > of right now I'm not convinced. Thanks you for your feedback! Here is V10 with the approach where we use a new LSM hook: https://lore.kernel.org/all/20240309075320.160128-1-gnoack@google.com/ I hope this helps to clarify the approach a bit. I'm explaining it in more detail again in the commit which adds the LSM hook, including a call graph, and avoiding the word "safe" this time ;-) Let me know what you think! Thanks! —Günther
On 3/9/2024 12:14 AM, Günther Noack wrote: > On Fri, Mar 08, 2024 at 05:25:21PM -0500, Paul Moore wrote: >> On Fri, Mar 8, 2024 at 3:12 PM Mickaël Salaün <mic@digikod.net> wrote: >>> On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote: >>>> On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote: >>>>> Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock >>>>> sandbox". >>>> Which is a problem from a LSM perspective as we want to avoid hooks >>>> which are tightly bound to a single LSM or security model. It's okay >>>> if a new hook only has a single LSM implementation, but the hook's >>>> definition should be such that it is reasonably generalized to support >>>> multiple LSM/models. >>> As any new hook, there is a first user. Obviously this new hook would >>> not be restricted to Landlock, it is a generic approach. I'm pretty >>> sure a few hooks are only used by one LSM though. ;) >> Sure, as I said above, it's okay for there to only be a single LSM >> implementation, but the basic idea behind the hook needs to have some >> hope of being generic. Your "let's redefine a safe ioctl as 'IOCTL >> always allowed in a Landlock sandbox'" doesn't fill me with confidence >> about the hook being generic; who knows, maybe it will be, but in the >> absence of a patch, I'm left with descriptions like those. > FWIW, the existing IOCTL hook is used in the following places: > > * TOMOYO: seemingly configurable per IOCTL command? (I did not dig deeper) > * SELinux: has a hardcoded switch of IOCTL commands, some with special checks. > These are also a subset of the do_vfs_ioctl() commands, > plus KDSKBENT, KDSKBSENT (from ioctl_console(2)). > * Smack: Decomposes the IOCTL command number to look at the _IOC_WRITE and > _IOC_READ bits. (This is a known problematic approach, because (1) these bits > describe whether the argument is getting read or written, not whether the > operation is a mutating one, and (2) some IOCTL commands do not adhere to the > convention and don't use these macros) These shortcomings are well understood. It's a whole lot better than what was done originally, but definitely not up to formal scrutiny. Back in the bad old days of UNIX security evaluations we spent as much time on ioctl() as we did on the rest of the system. Or so it seemed. > > AppArmor does not use the LSM IOCTL hook. > > >>>> I understand that this makes things a bit more >>>> complicated for Landlock's initial ioctl implementation, but >>>> considering my thoughts above and the fact that Landlock's ioctl >>>> protections are still evolving I'd rather not add a lot of extra hooks >>>> right now. >>> Without this hook, we'll need to rely on a list of allowed IOCTLs, which >>> will be out-of-sync eventually. It would be a maintenance burden and an >>> hacky approach. >> Welcome to the painful world of a LSM developer, ioctls are not the >> only place where this is a problem, and it should be easy enough to >> watch for changes in the ioctl list and update your favorite LSM >> accordingly. Honestly, I think that is kinda the right thing anyway, >> I'm skeptical that one could have a generic solution that would >> automatically allow or disallow a new ioctl without potentially >> breaking your favorite LSM's security model. If a new ioctl is >> introduced it seems like having someone manually review it's impact on >> your LSM would be a good idea. > We are concerned that we will miss a change in do_vfs_ioctl(), which we would > like to reflect in the matching Landlock code. Do other LSMs have any > approaches for that which go beyond just watching the do_vfs_ioctl() > implementation for changes? > > >>> We're definitely open to new proposals, but until now this is the best >>> approach we found from a maintenance, performance, and security point of >>> view. >> At this point it's probably a good idea to post another RFC patch with >> your revised idea, if nothing else it will help rule out any >> confusion. While I remain skeptical, perhaps I am misunderstanding >> the design and you'll get my apology and an ACK, but be warned that as >> of right now I'm not convinced. > Thanks you for your feedback! > > Here is V10 with the approach where we use a new LSM hook: > https://lore.kernel.org/all/20240309075320.160128-1-gnoack@google.com/ > > I hope this helps to clarify the approach a bit. I'm explaining it in more > detail again in the commit which adds the LSM hook, including a call graph, and > avoiding the word "safe" this time ;-) > > Let me know what you think! > > Thanks! > —Günther >
On Fri, Mar 08, 2024 at 12:03:01PM +0100, Günther Noack wrote: > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote: > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote: > > > On Thu, Mar 07, 2024 at 03:40:44PM -0500, Paul Moore wrote: > > >> On Thu, Mar 7, 2024 at 7:57 AM Günther Noack <gnoack@google.com> wrote: > > >> I need some more convincing as to why we need to introduce these new > > >> hooks, or even the vfs_masked_device_ioctl() classifier as originally > > >> proposed at the top of this thread. I believe I understand why > > >> Landlock wants this, but I worry that we all might have different > > >> definitions of a "safe" ioctl list, and encoding a definition into the > > >> LSM hooks seems like a bad idea to me. > > > > > > I have no idea what a "safe" ioctl means here. Subsystems already > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so > > > "safe" clearly means something different here. > > > > That was my problem with the first version as well, but I think > > drawing the line between "implemented in fs/ioctl.c" and > > "implemented in a random device driver fops->unlock_ioctl()" > > seems like a more helpful definition. > > Yes, sorry for the confusion - that is exactly what I meant to say with "safe".: > > Those are the IOCTL commands implemented in fs/ioctl.c which do not go through > f_ops->unlocked_ioctl (or the compat equivalent). Which means all the ioctls we wrequire for to manage filesystems are going to be considered "unsafe" and barred, yes? That means you'll break basic commands like 'xfs_info' that tell you the configuration of the filesystem. It will prevent things like online growing and shrinking, online defrag, fstrim, online scrubbing and repair, etc will not worki anymore. It will break backup utilities like xfsdump, and break -all- the device management of btrfs and bcachefs filesystems. Further, all the setup and management of -VFS functionality- like fsverity and fscrypt is actually done at the filesystem level (i.e through ->unlocked_ioctl, no do_vfs_ioctl()) so those are all going to get broken as well despite them being "vfs features". Hence from a filesystem perspective, this is a fundamentally unworkable definition of "safe". > We want to give people a way with Landlock so that they can restrict the use of > device-driver implemented IOCTLs, but where they can keep using the bulk of > more harmless IOCTLs in fs/ioctl.c. Hah! There's plenty of "harm" that can be done through those ioctls. It's the entry point for things like filesystem freeze/thaw, FIEMAP (returns physical data location information), file cloning, deduplication and per-inode feature manipulation. Lots of this stuff is under CAP_SYS_ADMIN because they aren't safe for to be exposed to general users... So, yeah, I don't think this definition of "safe" is actually useful in any way. It's arbitrary, and will require both widespread whitelisting of ioctls to maintain a useful working system and widespread blacklisting to create a secure system.... -Dave.
On Mon, Mar 11, 2024 at 12:03:13PM +1100, Dave Chinner wrote: > On Fri, Mar 08, 2024 at 12:03:01PM +0100, Günther Noack wrote: > > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote: > > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote: > > > > I have no idea what a "safe" ioctl means here. Subsystems already > > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so > > > > "safe" clearly means something different here. > > > > > > That was my problem with the first version as well, but I think > > > drawing the line between "implemented in fs/ioctl.c" and > > > "implemented in a random device driver fops->unlock_ioctl()" > > > seems like a more helpful definition. > > > > Yes, sorry for the confusion - that is exactly what I meant to say with "safe".: > > > > Those are the IOCTL commands implemented in fs/ioctl.c which do not go through > > f_ops->unlocked_ioctl (or the compat equivalent). > > Which means all the ioctls we wrequire for to manage filesystems are > going to be considered "unsafe" and barred, yes? > > That means you'll break basic commands like 'xfs_info' that tell you > the configuration of the filesystem. It will prevent things like > online growing and shrinking, online defrag, fstrim, online > scrubbing and repair, etc will not worki anymore. It will break > backup utilities like xfsdump, and break -all- the device management > of btrfs and bcachefs filesystems. > > Further, all the setup and management of -VFS functionality- like > fsverity and fscrypt is actually done at the filesystem level (i.e > through ->unlocked_ioctl, no do_vfs_ioctl()) so those are all going > to get broken as well despite them being "vfs features". > > Hence from a filesystem perspective, this is a fundamentally > unworkable definition of "safe". As discussed further up in this thread[1], we want to only apply the IOCTL command filtering to block and character devices. I think this should resolve your concerns about file system specific IOCTLs? This is implemented in patch V10 going forward[2]. [1] https://lore.kernel.org/all/20240219.chu4Yeegh3oo@digikod.net/ [2] https://lore.kernel.org/all/20240309075320.160128-1-gnoack@google.com/ > > We want to give people a way with Landlock so that they can restrict the use of > > device-driver implemented IOCTLs, but where they can keep using the bulk of > > more harmless IOCTLs in fs/ioctl.c. > > Hah! There's plenty of "harm" that can be done through those ioctls. > It's the entry point for things like filesystem freeze/thaw, FIEMAP > (returns physical data location information), file cloning, > deduplication and per-inode feature manipulation. Lots of this stuff > is under CAP_SYS_ADMIN because they aren't safe for to be exposed to > general users... The operations themselves are not all harmless, but they are harmless to permit from the Landlock perspective, because (as you point out as well) their use is already adequately controlled in their existing implementations. The proposed patch v10 only influences IOCTL operations on device files, so the "reflink" deduplication IOCTLs, FIEMAP, etc. should not matter. —Günther
On Sat, Mar 9, 2024 at 3:14 AM Günther Noack <gnoack@google.com> wrote: > On Fri, Mar 08, 2024 at 05:25:21PM -0500, Paul Moore wrote: > > On Fri, Mar 8, 2024 at 3:12 PM Mickaël Salaün <mic@digikod.net> wrote: > > > On Fri, Mar 08, 2024 at 02:22:58PM -0500, Paul Moore wrote: > > > > On Fri, Mar 8, 2024 at 4:29 AM Mickaël Salaün <mic@digikod.net> wrote: > > > > > Let's replace "safe IOCTL" with "IOCTL always allowed in a Landlock > > > > > sandbox". > > > > > > > > Which is a problem from a LSM perspective as we want to avoid hooks > > > > which are tightly bound to a single LSM or security model. It's okay > > > > if a new hook only has a single LSM implementation, but the hook's > > > > definition should be such that it is reasonably generalized to support > > > > multiple LSM/models. > > > > > > As any new hook, there is a first user. Obviously this new hook would > > > not be restricted to Landlock, it is a generic approach. I'm pretty > > > sure a few hooks are only used by one LSM though. ;) > > > > Sure, as I said above, it's okay for there to only be a single LSM > > implementation, but the basic idea behind the hook needs to have some > > hope of being generic. Your "let's redefine a safe ioctl as 'IOCTL > > always allowed in a Landlock sandbox'" doesn't fill me with confidence > > about the hook being generic; who knows, maybe it will be, but in the > > absence of a patch, I'm left with descriptions like those. > > FWIW, the existing IOCTL hook is used in the following places: > > * TOMOYO: seemingly configurable per IOCTL command? (I did not dig deeper) > * SELinux: has a hardcoded switch of IOCTL commands, some with special checks. > These are also a subset of the do_vfs_ioctl() commands, > plus KDSKBENT, KDSKBSENT (from ioctl_console(2)). One should be careful using the term "hardcoded" here as I believe it is misleading in the SELinux case. SELinux has 11 explicitly defined ioctls, with an additional two configurable on a per-policy basis depending on the state of the SELinux IOCTL_SKIP_CLOEXEC policy capability. The security policy associated with these explicit ioctl checks is not hardcoded into the kernel, it is defined as part of the greater SELinux security policy. One could make an argument that FIONBIO and FIOASYNC look a bit hardcoded, but there is some subtlety there that is probably not worth exploring further in this context but I'm happy to discuss in a different thread if that is helpful. All the ioctls that are not explicitly defined in the SELinux code, are still subject to SELinux policy through both the file/ioctl permission and the extended permission (xperm) functionality. The SELinux xperm functionality, when tied to an ioctl operation, allows policy developers to allow or deny specific ioctl operations on a per-domain basis. > * Smack: Decomposes the IOCTL command number to look at the _IOC_WRITE and > _IOC_READ bits. (This is a known problematic approach, because (1) these bits > describe whether the argument is getting read or written, not whether the > operation is a mutating one, and (2) some IOCTL commands do not adhere to the > convention and don't use these macros) > > AppArmor does not use the LSM IOCTL hook.
On Mon, Mar 11, 2024 at 10:01:33AM +0100, Günther Noack wrote: > On Mon, Mar 11, 2024 at 12:03:13PM +1100, Dave Chinner wrote: > > On Fri, Mar 08, 2024 at 12:03:01PM +0100, Günther Noack wrote: > > > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote: > > > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote: > > > > > I have no idea what a "safe" ioctl means here. Subsystems already > > > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so > > > > > "safe" clearly means something different here. > > > > > > > > That was my problem with the first version as well, but I think > > > > drawing the line between "implemented in fs/ioctl.c" and > > > > "implemented in a random device driver fops->unlock_ioctl()" > > > > seems like a more helpful definition. > > > > > > Yes, sorry for the confusion - that is exactly what I meant to say with "safe".: > > > > > > Those are the IOCTL commands implemented in fs/ioctl.c which do not go through > > > f_ops->unlocked_ioctl (or the compat equivalent). > > > > Which means all the ioctls we wrequire for to manage filesystems are > > going to be considered "unsafe" and barred, yes? > > > > That means you'll break basic commands like 'xfs_info' that tell you > > the configuration of the filesystem. It will prevent things like > > online growing and shrinking, online defrag, fstrim, online > > scrubbing and repair, etc will not worki anymore. It will break > > backup utilities like xfsdump, and break -all- the device management > > of btrfs and bcachefs filesystems. > > > > Further, all the setup and management of -VFS functionality- like > > fsverity and fscrypt is actually done at the filesystem level (i.e > > through ->unlocked_ioctl, no do_vfs_ioctl()) so those are all going > > to get broken as well despite them being "vfs features". > > > > Hence from a filesystem perspective, this is a fundamentally > > unworkable definition of "safe". > > As discussed further up in this thread[1], we want to only apply the IOCTL > command filtering to block and character devices. I think this should resolve > your concerns about file system specific IOCTLs? This is implemented in patch > V10 going forward[2]. I think you misunderstand. I used filesystem ioctls as an obvious counter argument to this "VFS-only ioctls are safe" proposal to show that it fundamentally breaks core filesystem boot and management interfaces. Operations to prepare filesystems for mount may require block device ioctls to be run. i.e. block device ioctls are required core boot and management interfaces. Disallowing ioctls on block devices will break udev rules that set up block devices on kernel device instantiation events. It will break partitioning tools that need to read/modify/rescan the partition table. This will prevent discard, block zeroing and *secure erase* operations. It may prevent libblkid from reporting optimal device IO parameters to filesystem utilities like mkfs. You won't be able to mark block devices as read only. Management of zoned block devices will be impossible. Then stuff like DM and MD devices (e.g. LVM, RAID, etc) simply won't appear on the system because they can't be scanned, configured, assembled, etc. And so on. The fundamental fact is that system critical block device ioctls are implemented by generic infrastructure below the VFS layer. They have their own generic ioctl layer - blkdev_ioctl() is equivalent of do_vfs_ioctl() for the block layer. But if we cut off everything below ->unlocked_ioctl() at the VFS, then we simply can't run any of these generic block device ioctls. As I said: this proposal is fundamentally unworkable without extensive white- and black-listing of individual ioctls in the security policies. That's not really a viable situation, because we're going to change code and hence likely silently break those security policy lists regularly.... -Dave.
On Tue, Mar 12, 2024 at 09:12:28AM +1100, Dave Chinner wrote: > On Mon, Mar 11, 2024 at 10:01:33AM +0100, Günther Noack wrote: > > On Mon, Mar 11, 2024 at 12:03:13PM +1100, Dave Chinner wrote: > > > On Fri, Mar 08, 2024 at 12:03:01PM +0100, Günther Noack wrote: > > > > On Fri, Mar 08, 2024 at 08:02:13AM +0100, Arnd Bergmann wrote: > > > > > On Fri, Mar 8, 2024, at 00:09, Dave Chinner wrote: > > > > > > I have no idea what a "safe" ioctl means here. Subsystems already > > > > > > restrict ioctls that can do damage if misused to CAP_SYS_ADMIN, so > > > > > > "safe" clearly means something different here. > > > > > > > > > > That was my problem with the first version as well, but I think > > > > > drawing the line between "implemented in fs/ioctl.c" and > > > > > "implemented in a random device driver fops->unlock_ioctl()" > > > > > seems like a more helpful definition. > > > > > > > > Yes, sorry for the confusion - that is exactly what I meant to say with "safe".: > > > > > > > > Those are the IOCTL commands implemented in fs/ioctl.c which do not go through > > > > f_ops->unlocked_ioctl (or the compat equivalent). > > > > > > Which means all the ioctls we wrequire for to manage filesystems are > > > going to be considered "unsafe" and barred, yes? > > > > > > That means you'll break basic commands like 'xfs_info' that tell you > > > the configuration of the filesystem. It will prevent things like > > > online growing and shrinking, online defrag, fstrim, online > > > scrubbing and repair, etc will not worki anymore. It will break > > > backup utilities like xfsdump, and break -all- the device management > > > of btrfs and bcachefs filesystems. > > > > > > Further, all the setup and management of -VFS functionality- like > > > fsverity and fscrypt is actually done at the filesystem level (i.e > > > through ->unlocked_ioctl, no do_vfs_ioctl()) so those are all going > > > to get broken as well despite them being "vfs features". > > > > > > Hence from a filesystem perspective, this is a fundamentally > > > unworkable definition of "safe". > > > > As discussed further up in this thread[1], we want to only apply the IOCTL > > command filtering to block and character devices. I think this should resolve > > your concerns about file system specific IOCTLs? This is implemented in patch > > V10 going forward[2]. > > I think you misunderstand. I used filesystem ioctls as an obvious > counter argument to this "VFS-only ioctls are safe" proposal to show > that it fundamentally breaks core filesystem boot and management > interfaces. Operations to prepare filesystems for mount may require > block device ioctls to be run. i.e. block device ioctls are required > core boot and management interfaces. > > Disallowing ioctls on block devices will break udev rules that set > up block devices on kernel device instantiation events. It will > break partitioning tools that need to read/modify/rescan the > partition table. This will prevent discard, block zeroing and > *secure erase* operations. It may prevent libblkid from reporting > optimal device IO parameters to filesystem utilities like mkfs. You > won't be able to mark block devices as read only. Management of > zoned block devices will be impossible. > > Then stuff like DM and MD devices (e.g. LVM, RAID, etc) simply won't > appear on the system because they can't be scanned, configured, > assembled, etc. > > And so on. > > The fundamental fact is that system critical block device ioctls are > implemented by generic infrastructure below the VFS layer. They have > their own generic ioctl layer - blkdev_ioctl() is equivalent of > do_vfs_ioctl() for the block layer. But if we cut off everything > below ->unlocked_ioctl() at the VFS, then we simply can't run any > of these generic block device ioctls. > > As I said: this proposal is fundamentally unworkable without > extensive white- and black-listing of individual ioctls in the > security policies. That's not really a viable situation, because > we're going to change code and hence likely silently break those > security policy lists regularly.... Landlock is an optional sandboxing mechanism targeting unprivileged users/processes (even if it can of course be used by privileged ones). This means that there is no global security policy for the whole system (unlike SELinux, AppArmor...). System administrators that need to manage a file system or any block devices would just not sandbox themselves. Moreover, most block devices should only be accessible to the root user (which makes root the only one able to send IOCTL commands to these block devices). In a nutshell, processes using boot and management interfaces are already privileged and they don't use Landlock. For instance, a landlocked process cannot do any mount action, which is documented and it makes sense for the sandboxing use case (to avoid sandbox bypass). However, it would be interesting to know if unprivileged users can request legitimate IOCTL commands on block devices (on a generic distro), and if this is required for a common file system use (i.e. excluding administration tasks). I think all required IOCTL for common file system use are available through the file system, not block devices, but please correct me if I'm wrong. What is nice with this LANDLOCK_ACCESS_FS_IOCTL_DEV approach is that user space can identify (with path and dev major/minor) on which device IOCTLs should be allowed. This is simple to understand and the information to identify such devices is already well known. We can also allow IOCTLs on a set of devices, e.g. /dev/snd/. The goal of this patch series is to enable applications to sandbox themselves and avoid an attacker (exploiting a bug in this application) to send arbitrary IOCTL commands to any devices available to the user running this application. For this sandboxing use case, I think it wouldn't be useful to differentiate between blkdev_ioctl()'s commands and device-specific commands because we want to either allow all IOCTL on a block device or deny most of them (not those handled by do_vfs_ioctl(), e.g. FIOCLEX, but that's a detail because of the file access rights). This is a trade off to ease sandboxing while being able to limit access to unneeded features (which could potentially be used to bypass the sandbox, e.g. TTY's IOCTLs).
diff --git a/fs/ioctl.c b/fs/ioctl.c index 76cf22ac97d7..f72c8da47d21 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -763,6 +763,38 @@ static int ioctl_fssetxattr(struct file *file, void __user *argp) return err; } +/* + * Safeguard to maintain a list of valid IOCTLs handled by do_vfs_ioctl() + * instead of def_blk_fops or def_chr_fops (see init_special_inode). + */ +__attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd) +{ + switch (cmd) { + case FIOCLEX: + case FIONCLEX: + case FIONBIO: + case FIOASYNC: + case FIOQSIZE: + case FIFREEZE: + case FITHAW: + case FS_IOC_FIEMAP: + case FIGETBSZ: + case FICLONE: + case FICLONERANGE: + case FIDEDUPERANGE: + /* FIONREAD is forwarded to device implementations. */ + case FS_IOC_GETFLAGS: + case FS_IOC_SETFLAGS: + case FS_IOC_FSGETXATTR: + case FS_IOC_FSSETXATTR: + /* file_ioctl()'s IOCTLs are forwarded to device implementations. */ + return true; + default: + return false; + } +} +EXPORT_SYMBOL(vfs_masked_device_ioctl); + /* * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d. * It's just a simple helper for sys_ioctl and compat_sys_ioctl. @@ -858,6 +890,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct fd f = fdget(fd); int error; + const struct inode *inode; + bool is_device; if (!f.file) return -EBADF; @@ -866,9 +900,18 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) if (error) goto out; + inode = file_inode(f.file); + is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); + if (is_device && !vfs_masked_device_ioctl(cmd)) { + error = vfs_ioctl(f.file, cmd, arg); + goto out; + } + error = do_vfs_ioctl(f.file, fd, cmd, arg); - if (error == -ENOIOCTLCMD) + if (error == -ENOIOCTLCMD) { + WARN_ON_ONCE(is_device); error = vfs_ioctl(f.file, cmd, arg); + } out: fdput(f); @@ -911,11 +954,49 @@ long compat_ptr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL(compat_ptr_ioctl); +static long ioctl_compat(struct file *filp, unsigned int cmd, + compat_ulong_t arg) +{ + int error = -ENOTTY; + + if (!filp->f_op->compat_ioctl) + goto out; + + error = filp->f_op->compat_ioctl(filp, cmd, arg); + if (error == -ENOIOCTLCMD) + error = -ENOTTY; + +out: + return error; +} + +__attribute_const__ bool vfs_masked_device_ioctl_compat(const unsigned int cmd) +{ + switch (cmd) { + case FICLONE: +#if defined(CONFIG_X86_64) + case FS_IOC_RESVSP_32: + case FS_IOC_RESVSP64_32: + case FS_IOC_UNRESVSP_32: + case FS_IOC_UNRESVSP64_32: + case FS_IOC_ZERO_RANGE_32: +#endif + case FS_IOC32_GETFLAGS: + case FS_IOC32_SETFLAGS: + return true; + default: + return vfs_masked_device_ioctl(cmd); + } +} +EXPORT_SYMBOL(vfs_masked_device_ioctl_compat); + COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { struct fd f = fdget(fd); int error; + const struct inode *inode; + bool is_device; if (!f.file) return -EBADF; @@ -924,6 +1005,13 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, if (error) goto out; + inode = file_inode(f.file); + is_device = S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); + if (is_device && !vfs_masked_device_ioctl_compat(cmd)) { + error = ioctl_compat(f.file, cmd, arg); + goto out; + } + switch (cmd) { /* FICLONE takes an int argument, so don't use compat_ptr() */ case FICLONE: @@ -964,13 +1052,10 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, default: error = do_vfs_ioctl(f.file, fd, cmd, (unsigned long)compat_ptr(arg)); - if (error != -ENOIOCTLCMD) - break; - - if (f.file->f_op->compat_ioctl) - error = f.file->f_op->compat_ioctl(f.file, cmd, arg); - if (error == -ENOIOCTLCMD) - error = -ENOTTY; + if (error == -ENOIOCTLCMD) { + WARN_ON_ONCE(is_device); + error = ioctl_compat(f.file, cmd, arg); + } break; } diff --git a/include/linux/fs.h b/include/linux/fs.h index ed5966a70495..b620d0c00e16 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1902,6 +1902,18 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, #define compat_ptr_ioctl NULL #endif +extern __attribute_const__ bool vfs_masked_device_ioctl(const unsigned int cmd); +#ifdef CONFIG_COMPAT +extern __attribute_const__ bool +vfs_masked_device_ioctl_compat(const unsigned int cmd); +#else /* CONFIG_COMPAT */ +static inline __attribute_const__ bool +vfs_masked_device_ioctl_compat(const unsigned int cmd) +{ + return vfs_masked_device_ioctl(cmd); +} +#endif /* CONFIG_COMPAT */ + /* * VFS file helper functions. */