diff mbox series

[v2,2/2] rust: miscdevice: add base miscdevice abstraction

Message ID 20241001-b4-miscdevice-v2-2-330d760041fa@google.com (mailing list archive)
State New
Headers show
Series Miscdevices in Rust | expand

Commit Message

Alice Ryhl Oct. 1, 2024, 8:22 a.m. UTC
Provide a `MiscDevice` trait that lets you specify the file operations
that you wish to provide for your misc device. For now, only three file
operations are provided: open, close, ioctl.

These abstractions only support MISC_DYNAMIC_MINOR. This enforces that
new miscdevices should not hard-code a minor number.

When implementing ioctl, the Result type is used. This means that you
can choose to return either of:
* An integer of type isize.
* An errno using the kernel::error::Error type.
When returning an isize, the integer is returned verbatim. It's mainly
intended for returning positive integers to userspace. However, it is
technically possible to return errors via the isize return value too.

To avoid having a dependency on files, this patch does not provide the
file operations callbacks a pointer to the file. This means that they
cannot check file properties such as O_NONBLOCK (which Binder needs).
Support for that can be added as a follow-up.

To avoid having a dependency on vma, this patch does not provide any way
to implement mmap (which Binder needs). Support for that can be added as
a follow-up.

Rust Binder will use these abstractions to create the /dev/binder file
when binderfs is disabled.

Signed-off-by: Alice Ryhl <aliceryhl@google.com>
---
 rust/bindings/bindings_helper.h |   1 +
 rust/kernel/lib.rs              |   1 +
 rust/kernel/miscdevice.rs       | 241 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 243 insertions(+)

Comments

Dirk Behme Oct. 1, 2024, 8:53 a.m. UTC | #1
On 01.10.2024 10:22, Alice Ryhl wrote:
....
> diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
> new file mode 100644
> index 000000000000..cbd5249b5b45
> --- /dev/null
> +++ b/rust/kernel/miscdevice.rs
...
> +/// Trait implemented by the private data of an open misc device.
> +#[vtable]
> +pub trait MiscDevice {
> +    /// What kind of pointer should `Self` be wrapped in.
> +    type Ptr: ForeignOwnable + Send + Sync;
> +
> +    /// Called when the misc device is opened.
> +    ///
> +    /// The returned pointer will be stored as the private data for the file.
> +    fn open() -> Result<Self::Ptr>;
> +
> +    /// Called when the misc device is released.
> +    fn release(device: Self::Ptr) {
> +        drop(device);
> +    }
> +
> +    /// Handler for ioctls.
> +    ///
> +    /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`].

Nit: utilties -> utilities

Dirk
Alice Ryhl Oct. 1, 2024, 9:13 a.m. UTC | #2
On Tue, Oct 1, 2024 at 10:54 AM Dirk Behme <dirk.behme@de.bosch.com> wrote:
>
> On 01.10.2024 10:22, Alice Ryhl wrote:
> ....
> > diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
> > new file mode 100644
> > index 000000000000..cbd5249b5b45
> > --- /dev/null
> > +++ b/rust/kernel/miscdevice.rs
> ...
> > +/// Trait implemented by the private data of an open misc device.
> > +#[vtable]
> > +pub trait MiscDevice {
> > +    /// What kind of pointer should `Self` be wrapped in.
> > +    type Ptr: ForeignOwnable + Send + Sync;
> > +
> > +    /// Called when the misc device is opened.
> > +    ///
> > +    /// The returned pointer will be stored as the private data for the file.
> > +    fn open() -> Result<Self::Ptr>;
> > +
> > +    /// Called when the misc device is released.
> > +    fn release(device: Self::Ptr) {
> > +        drop(device);
> > +    }
> > +
> > +    /// Handler for ioctls.
> > +    ///
> > +    /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`].
>
> Nit: utilties -> utilities

Thanks!

Alice
Alice Ryhl Oct. 1, 2024, 11:28 a.m. UTC | #3
On Tue, Oct 1, 2024 at 10:22 AM Alice Ryhl <aliceryhl@google.com> wrote:
> +impl<T: MiscDevice> MiscDeviceRegistration<T> {
> +    /// Register a misc device.
> +    pub fn register(opts: MiscDeviceOptions) -> impl PinInit<Self, Error> {
> +        try_pin_init!(Self {
> +            inner <- Opaque::try_ffi_init(move |slot: *mut bindings::miscdevice| {
> +                // SAFETY: The initializer can write to the provided `slot`.
> +                unsafe { slot.write(opts.into_raw::<T>()) };
> +
> +                // SAFETY: We just wrote the misc device options to the slot. The miscdevice will
> +                // get unregistered before `slot` is deallocated because the memory is pinned and
> +                // the destructor of this type deallocates the memory.
> +                // INVARIANT: If this returns `Ok(())`, then the `slot` will contain a registered
> +                // misc device.
> +                to_result(unsafe { bindings::misc_register(slot) })
> +            }),
> +            _t: PhantomData,
> +        })
> +    }

Note that right now this can only be used in the module init function
if the registration is stored in a pinned box. We need the in-place
initialization change to fix that.

Does anyone want to revive the in-place initialization patch?

Link: https://lore.kernel.org/rust-for-linux/20240328195457.225001-1-wedsonaf@gmail.com/

Alice
Arnd Bergmann Oct. 2, 2024, 12:48 p.m. UTC | #4
On Tue, Oct 1, 2024, at 08:22, Alice Ryhl wrote:
> +#[cfg(CONFIG_COMPAT)]
> +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>(
> +    file: *mut bindings::file,
> +    cmd: c_uint,
> +    arg: c_ulong,
> +) -> c_long {
> +    // SAFETY: The compat ioctl call of a file can access the private 
> data.
> +    let private = unsafe { (*file).private_data };
> +    // SAFETY: Ioctl calls can borrow the private data of the file.
> +    let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) 
> };
> +
> +    match T::compat_ioctl(device, cmd as u32, arg as usize) {
> +        Ok(ret) => ret as c_long,
> +        Err(err) => err.to_errno() as c_long,
> +    }
> +}

I think this works fine as a 1:1 mapping of the C API, so this
is certainly something we can do. On the other hand, it would be
nice to improve the interface in some way and make it better than
the C version.

The changes that I think would be straightforward and helpful are:

- combine native and compat handlers and pass a flag argument
  that the callback can check in case it has to do something
  special for compat mode

- pass the 'arg' value as both a __user pointer and a 'long'
  value to avoid having to cast. This specifically simplifies
  the compat version since that needs different types of
  64-bit extension for incoming 32-bit values.

On top of that, my ideal implementation would significantly
simplify writing safe ioctl handlers by using the information
encoded in the command word:

 - copy the __user data into a kernel buffer for _IOW()
   and back for _IOR() type commands, or both for _IOWR()
 - check that the argument size matches the size of the
   structure it gets assigned to

We have a couple of subsystems in the kernel that already
do something like this, but they all do it differently.
For newly written drivers in rust, we could try to do
this well from the start and only offer a single reliable
way to do it. For drivers implementing existing ioctl
commands, an additional complication is that there are
many command codes that encode incorrect size/direction
data, or none at all.

I don't know if there is a good way to do that last bit
in rust, and even if there is, we may well decide to not
do it at first in order to get something working.

      Arnd
Alice Ryhl Oct. 2, 2024, 12:58 p.m. UTC | #5
On Wed, Oct 2, 2024 at 2:48 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Tue, Oct 1, 2024, at 08:22, Alice Ryhl wrote:
> > +#[cfg(CONFIG_COMPAT)]
> > +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>(
> > +    file: *mut bindings::file,
> > +    cmd: c_uint,
> > +    arg: c_ulong,
> > +) -> c_long {
> > +    // SAFETY: The compat ioctl call of a file can access the private
> > data.
> > +    let private = unsafe { (*file).private_data };
> > +    // SAFETY: Ioctl calls can borrow the private data of the file.
> > +    let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private)
> > };
> > +
> > +    match T::compat_ioctl(device, cmd as u32, arg as usize) {
> > +        Ok(ret) => ret as c_long,
> > +        Err(err) => err.to_errno() as c_long,
> > +    }
> > +}
>
> I think this works fine as a 1:1 mapping of the C API, so this
> is certainly something we can do. On the other hand, it would be
> nice to improve the interface in some way and make it better than
> the C version.
>
> The changes that I think would be straightforward and helpful are:
>
> - combine native and compat handlers and pass a flag argument
>   that the callback can check in case it has to do something
>   special for compat mode
>
> - pass the 'arg' value as both a __user pointer and a 'long'
>   value to avoid having to cast. This specifically simplifies
>   the compat version since that needs different types of
>   64-bit extension for incoming 32-bit values.
>
> On top of that, my ideal implementation would significantly
> simplify writing safe ioctl handlers by using the information
> encoded in the command word:
>
>  - copy the __user data into a kernel buffer for _IOW()
>    and back for _IOR() type commands, or both for _IOWR()
>  - check that the argument size matches the size of the
>    structure it gets assigned to
>
> We have a couple of subsystems in the kernel that already
> do something like this, but they all do it differently.
> For newly written drivers in rust, we could try to do
> this well from the start and only offer a single reliable
> way to do it. For drivers implementing existing ioctl
> commands, an additional complication is that there are
> many command codes that encode incorrect size/direction
> data, or none at all.
>
> I don't know if there is a good way to do that last bit
> in rust, and even if there is, we may well decide to not
> do it at first in order to get something working.

A quick sketch.

One option is to do something along these lines:

struct IoctlParams {
    pub cmd: u32,
    pub arg: usize,
}

impl IoctlParams {
    fn user_slice(&self) -> IoctlUser {
        let userslice = UserSlice::new(self.arg, _IOC_SIZE(self.cmd));
        match _IOC_DIR(self.cmd) {
            _IOC_READ => IoctlParams::Read(userslice.reader()),
            _IOC_WRITE => IoctlParams::Write(userslice.writer()),
            _IOC_READ|_IOC_WRITE => IoctlParams::WriteRead(userslice),
            _ => unreachable!(),
        }
    }
}

enum IoctlUser {
    Read(UserSliceReader),
    Write(UserSliceWriter),
    WriteRead(UserSlice),
}

Then ioctl implementations can use a match statement like this:

match ioc_params.user_slice() {
    IoctlUser::Read(slice) => {},
    IoctlUser::Write(slice) => {},
    IoctlUser::WriteRead(slice) => {},
}

Where each branch of the match handles that case.

Alice
Christian Brauner Oct. 2, 2024, 1:24 p.m. UTC | #6
On Wed, Oct 02, 2024 at 12:48:12PM GMT, Arnd Bergmann wrote:
> On Tue, Oct 1, 2024, at 08:22, Alice Ryhl wrote:
> > +#[cfg(CONFIG_COMPAT)]
> > +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>(
> > +    file: *mut bindings::file,
> > +    cmd: c_uint,
> > +    arg: c_ulong,
> > +) -> c_long {
> > +    // SAFETY: The compat ioctl call of a file can access the private 
> > data.
> > +    let private = unsafe { (*file).private_data };
> > +    // SAFETY: Ioctl calls can borrow the private data of the file.
> > +    let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) 
> > };
> > +
> > +    match T::compat_ioctl(device, cmd as u32, arg as usize) {
> > +        Ok(ret) => ret as c_long,
> > +        Err(err) => err.to_errno() as c_long,
> > +    }
> > +}
> 
> I think this works fine as a 1:1 mapping of the C API, so this
> is certainly something we can do. On the other hand, it would be
> nice to improve the interface in some way and make it better than
> the C version.
> 
> The changes that I think would be straightforward and helpful are:
> 
> - combine native and compat handlers and pass a flag argument
>   that the callback can check in case it has to do something
>   special for compat mode
> 
> - pass the 'arg' value as both a __user pointer and a 'long'
>   value to avoid having to cast. This specifically simplifies
>   the compat version since that needs different types of
>   64-bit extension for incoming 32-bit values.
> 
> On top of that, my ideal implementation would significantly
> simplify writing safe ioctl handlers by using the information
> encoded in the command word:
> 
>  - copy the __user data into a kernel buffer for _IOW()
>    and back for _IOR() type commands, or both for _IOWR()
>  - check that the argument size matches the size of the
>    structure it gets assigned to

- Handle versioning by size for ioctl()s correctly so stuff like:

        /* extensible ioctls */
        switch (_IOC_NR(ioctl)) {
        case _IOC_NR(NS_MNT_GET_INFO): {
                struct mnt_ns_info kinfo = {};
                struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg;
                size_t usize = _IOC_SIZE(ioctl);

                if (ns->ops->type != CLONE_NEWNS)
                        return -EINVAL;

                if (!uinfo)
                        return -EINVAL;

                if (usize < MNT_NS_INFO_SIZE_VER0)
                        return -EINVAL;

                return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo);
        }

This is not well-known and noone versions ioctl()s correctly and if they
do it's their own hand-rolled thing. Ideally, this would be a first
class concept with Rust bindings and versioning like this would be
universally enforced.

> 
> We have a couple of subsystems in the kernel that already
> do something like this, but they all do it differently.
> For newly written drivers in rust, we could try to do
> this well from the start and only offer a single reliable
> way to do it. For drivers implementing existing ioctl
> commands, an additional complication is that there are
> many command codes that encode incorrect size/direction
> data, or none at all.
> 
> I don't know if there is a good way to do that last bit
> in rust, and even if there is, we may well decide to not
> do it at first in order to get something working.
> 
>       Arnd
Arnd Bergmann Oct. 2, 2024, 1:24 p.m. UTC | #7
On Wed, Oct 2, 2024, at 12:58, Alice Ryhl wrote:
> On Wed, Oct 2, 2024 at 2:48 PM Arnd Bergmann <arnd@arndb.de> wrote:
> A quick sketch.
>
> One option is to do something along these lines:

This does seem promising, at least if I read your sketch
correctly. I'd probably need a more concrete example to
understand better how this would be used in a driver.

> struct IoctlParams {
>     pub cmd: u32,
>     pub arg: usize,
> }
>
> impl IoctlParams {
>     fn user_slice(&self) -> IoctlUser {
>         let userslice = UserSlice::new(self.arg, _IOC_SIZE(self.cmd));
>         match _IOC_DIR(self.cmd) {
>             _IOC_READ => IoctlParams::Read(userslice.reader()),
>             _IOC_WRITE => IoctlParams::Write(userslice.writer()),
>             _IOC_READ|_IOC_WRITE => IoctlParams::WriteRead(userslice),
>             _ => unreachable!(),

Does the unreachable() here mean that something bad happens
if userspace passes something other than one of the three,
or are the 'cmd' values here in-kernel constants that are
always valid?

> enum IoctlUser {
>     Read(UserSliceReader),
>     Write(UserSliceWriter),
>     WriteRead(UserSlice),
> }
>
> Then ioctl implementations can use a match statement like this:
>
> match ioc_params.user_slice() {
>     IoctlUser::Read(slice) => {},
>     IoctlUser::Write(slice) => {},
>     IoctlUser::WriteRead(slice) => {},
> }
>
> Where each branch of the match handles that case.

This is where I fail to see how that would fit in. If there
is a match statement in a driver, I would assume that it would
always match on the entire cmd code, but never have a command
that could with more than one _IOC_DIR type.

      Arnd
Alice Ryhl Oct. 2, 2024, 1:31 p.m. UTC | #8
On Wed, Oct 2, 2024 at 3:25 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Wed, Oct 2, 2024, at 12:58, Alice Ryhl wrote:
> > On Wed, Oct 2, 2024 at 2:48 PM Arnd Bergmann <arnd@arndb.de> wrote:
> > A quick sketch.
> >
> > One option is to do something along these lines:
>
> This does seem promising, at least if I read your sketch
> correctly. I'd probably need a more concrete example to
> understand better how this would be used in a driver.

Could you point me at a driver that uses all of the features we want
to support? Then I can try to sketch it.

> > struct IoctlParams {
> >     pub cmd: u32,
> >     pub arg: usize,
> > }
> >
> > impl IoctlParams {
> >     fn user_slice(&self) -> IoctlUser {
> >         let userslice = UserSlice::new(self.arg, _IOC_SIZE(self.cmd));
> >         match _IOC_DIR(self.cmd) {
> >             _IOC_READ => IoctlParams::Read(userslice.reader()),
> >             _IOC_WRITE => IoctlParams::Write(userslice.writer()),
> >             _IOC_READ|_IOC_WRITE => IoctlParams::WriteRead(userslice),
> >             _ => unreachable!(),
>
> Does the unreachable() here mean that something bad happens
> if userspace passes something other than one of the three,
> or are the 'cmd' values here in-kernel constants that are
> always valid?

The unreachable!() macro is equivalent to a call to BUG() .. we
probably need to handle the fourth case too so that userspace can't
trigger it ... but _IOC_DIR only has 4 possible return values.

> > enum IoctlUser {
> >     Read(UserSliceReader),
> >     Write(UserSliceWriter),
> >     WriteRead(UserSlice),
> > }
> >
> > Then ioctl implementations can use a match statement like this:
> >
> > match ioc_params.user_slice() {
> >     IoctlUser::Read(slice) => {},
> >     IoctlUser::Write(slice) => {},
> >     IoctlUser::WriteRead(slice) => {},
> > }
> >
> > Where each branch of the match handles that case.
>
> This is where I fail to see how that would fit in. If there
> is a match statement in a driver, I would assume that it would
> always match on the entire cmd code, but never have a command
> that could with more than one _IOC_DIR type.

Here's what Rust Binder does today:

/// The ioctl handler.
impl Process {
    /// Ioctls that are write-only from the perspective of userspace.
    ///
    /// The kernel will only read from the pointer that userspace
provided to us.
    fn ioctl_write_only(
        this: ArcBorrow<'_, Process>,
        _file: &File,
        cmd: u32,
        reader: &mut UserSliceReader,
    ) -> Result {
        let thread = this.get_current_thread()?;
        match cmd {
            bindings::BINDER_SET_MAX_THREADS =>
this.set_max_threads(reader.read()?),
            bindings::BINDER_THREAD_EXIT => this.remove_thread(thread),
            bindings::BINDER_SET_CONTEXT_MGR =>
this.set_as_manager(None, &thread)?,
            bindings::BINDER_SET_CONTEXT_MGR_EXT => {
                this.set_as_manager(Some(reader.read()?), &thread)?
            }
            bindings::BINDER_ENABLE_ONEWAY_SPAM_DETECTION => {
                this.set_oneway_spam_detection_enabled(reader.read()?)
            }
            bindings::BINDER_FREEZE => ioctl_freeze(reader)?,
            _ => return Err(EINVAL),
        }
        Ok(())
    }

    /// Ioctls that are read/write from the perspective of userspace.
    ///
    /// The kernel will both read from and write to the pointer that
userspace provided to us.
    fn ioctl_write_read(
        this: ArcBorrow<'_, Process>,
        file: &File,
        cmd: u32,
        data: UserSlice,
    ) -> Result {
        let thread = this.get_current_thread()?;
        let blocking = (file.flags() & file::flags::O_NONBLOCK) == 0;
        match cmd {
            bindings::BINDER_WRITE_READ => thread.write_read(data, blocking)?,
            bindings::BINDER_GET_NODE_DEBUG_INFO =>
this.get_node_debug_info(data)?,
            bindings::BINDER_GET_NODE_INFO_FOR_REF =>
this.get_node_info_from_ref(data)?,
            bindings::BINDER_VERSION => this.version(data)?,
            bindings::BINDER_GET_FROZEN_INFO => get_frozen_status(data)?,
            bindings::BINDER_GET_EXTENDED_ERROR =>
thread.get_extended_error(data)?,
            _ => return Err(EINVAL),
        }
        Ok(())
    }

    pub(crate) fn ioctl(this: ArcBorrow<'_, Process>, file: &File,
cmd: u32, arg: usize) -> Result {
        use kernel::ioctl::{_IOC_DIR, _IOC_SIZE};
        use kernel::uapi::{_IOC_READ, _IOC_WRITE};

        crate::trace::trace_ioctl(cmd, arg as usize);

        let user_slice = UserSlice::new(arg, _IOC_SIZE(cmd));

        const _IOC_READ_WRITE: u32 = _IOC_READ | _IOC_WRITE;

        let res = match _IOC_DIR(cmd) {
            _IOC_WRITE => Self::ioctl_write_only(this, file, cmd, &mut
user_slice.reader()),
            _IOC_READ_WRITE => Self::ioctl_write_read(this, file, cmd,
user_slice),
            _ => Err(EINVAL),
        };

        crate::trace::trace_ioctl_done(res);
        res
    }
}

Alice
Christian Brauner Oct. 2, 2024, 1:31 p.m. UTC | #9
On Tue, Oct 01, 2024 at 08:22:22AM GMT, Alice Ryhl wrote:
> Provide a `MiscDevice` trait that lets you specify the file operations
> that you wish to provide for your misc device. For now, only three file
> operations are provided: open, close, ioctl.
> 
> These abstractions only support MISC_DYNAMIC_MINOR. This enforces that
> new miscdevices should not hard-code a minor number.
> 
> When implementing ioctl, the Result type is used. This means that you
> can choose to return either of:
> * An integer of type isize.
> * An errno using the kernel::error::Error type.
> When returning an isize, the integer is returned verbatim. It's mainly
> intended for returning positive integers to userspace. However, it is
> technically possible to return errors via the isize return value too.
> 
> To avoid having a dependency on files, this patch does not provide the
> file operations callbacks a pointer to the file. This means that they
> cannot check file properties such as O_NONBLOCK (which Binder needs).
> Support for that can be added as a follow-up.
> 
> To avoid having a dependency on vma, this patch does not provide any way
> to implement mmap (which Binder needs). Support for that can be added as
> a follow-up.
> 
> Rust Binder will use these abstractions to create the /dev/binder file
> when binderfs is disabled.

Do you really need to expose both CONFIG_BINDERFS and the hard-coded
device creation stuff in the Kconfig that was done before that? Seems a
bit pointless to me.

> 
> Signed-off-by: Alice Ryhl <aliceryhl@google.com>
> ---
>  rust/bindings/bindings_helper.h |   1 +
>  rust/kernel/lib.rs              |   1 +
>  rust/kernel/miscdevice.rs       | 241 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 243 insertions(+)
> 
> diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
> index ae82e9c941af..84303bf221dd 100644
> --- a/rust/bindings/bindings_helper.h
> +++ b/rust/bindings/bindings_helper.h
> @@ -15,6 +15,7 @@
>  #include <linux/firmware.h>
>  #include <linux/jiffies.h>
>  #include <linux/mdio.h>
> +#include <linux/miscdevice.h>
>  #include <linux/phy.h>
>  #include <linux/refcount.h>
>  #include <linux/sched.h>
> diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
> index 22a3bfa5a9e9..e268eae54c81 100644
> --- a/rust/kernel/lib.rs
> +++ b/rust/kernel/lib.rs
> @@ -39,6 +39,7 @@
>  #[cfg(CONFIG_KUNIT)]
>  pub mod kunit;
>  pub mod list;
> +pub mod miscdevice;
>  #[cfg(CONFIG_NET)]
>  pub mod net;
>  pub mod page;
> diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
> new file mode 100644
> index 000000000000..cbd5249b5b45
> --- /dev/null
> +++ b/rust/kernel/miscdevice.rs
> @@ -0,0 +1,241 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +// Copyright (C) 2024 Google LLC.
> +
> +//! Miscdevice support.
> +//!
> +//! C headers: [`include/linux/miscdevice.h`](srctree/include/linux/miscdevice.h).
> +//!
> +//! Reference: <https://www.kernel.org/doc/html/latest/driver-api/misc_devices.html>
> +
> +use crate::{
> +    bindings,
> +    error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR},
> +    prelude::*,
> +    str::CStr,
> +    types::{ForeignOwnable, Opaque},
> +};
> +use core::{
> +    ffi::{c_int, c_long, c_uint, c_ulong},
> +    marker::PhantomData,
> +    mem::MaybeUninit,
> +    pin::Pin,
> +};
> +
> +/// Options for creating a misc device.
> +#[derive(Copy, Clone)]
> +pub struct MiscDeviceOptions {
> +    /// The name of the miscdevice.
> +    pub name: &'static CStr,
> +}
> +
> +impl MiscDeviceOptions {
> +    /// Create a raw `struct miscdev` ready for registration.
> +    pub const fn into_raw<T: MiscDevice>(self) -> bindings::miscdevice {
> +        // SAFETY: All zeros is valid for this C type.
> +        let mut result: bindings::miscdevice = unsafe { MaybeUninit::zeroed().assume_init() };
> +        result.minor = bindings::MISC_DYNAMIC_MINOR as _;
> +        result.name = self.name.as_char_ptr();
> +        result.fops = create_vtable::<T>();
> +        result
> +    }
> +}
> +
> +/// A registration of a miscdevice.
> +///
> +/// # Invariants
> +///
> +/// `inner` is a registered misc device.
> +#[repr(transparent)]
> +#[pin_data(PinnedDrop)]
> +pub struct MiscDeviceRegistration<T> {
> +    #[pin]
> +    inner: Opaque<bindings::miscdevice>,
> +    _t: PhantomData<T>,
> +}
> +
> +// SAFETY: It is allowed to call `misc_deregister` on a different thread from where you called
> +// `misc_register`.
> +unsafe impl<T> Send for MiscDeviceRegistration<T> {}
> +// SAFETY: All `&self` methods on this type are written to ensure that it is safe to call them in
> +// parallel.
> +unsafe impl<T> Sync for MiscDeviceRegistration<T> {}
> +
> +impl<T: MiscDevice> MiscDeviceRegistration<T> {
> +    /// Register a misc device.
> +    pub fn register(opts: MiscDeviceOptions) -> impl PinInit<Self, Error> {
> +        try_pin_init!(Self {
> +            inner <- Opaque::try_ffi_init(move |slot: *mut bindings::miscdevice| {
> +                // SAFETY: The initializer can write to the provided `slot`.
> +                unsafe { slot.write(opts.into_raw::<T>()) };
> +
> +                // SAFETY: We just wrote the misc device options to the slot. The miscdevice will
> +                // get unregistered before `slot` is deallocated because the memory is pinned and
> +                // the destructor of this type deallocates the memory.
> +                // INVARIANT: If this returns `Ok(())`, then the `slot` will contain a registered
> +                // misc device.
> +                to_result(unsafe { bindings::misc_register(slot) })
> +            }),
> +            _t: PhantomData,
> +        })
> +    }
> +
> +    /// Returns a raw pointer to the misc device.
> +    pub fn as_raw(&self) -> *mut bindings::miscdevice {
> +        self.inner.get()
> +    }
> +}
> +
> +#[pinned_drop]
> +impl<T> PinnedDrop for MiscDeviceRegistration<T> {
> +    fn drop(self: Pin<&mut Self>) {
> +        // SAFETY: We know that the device is registered by the type invariants.
> +        unsafe { bindings::misc_deregister(self.inner.get()) };
> +    }
> +}
> +
> +/// Trait implemented by the private data of an open misc device.
> +#[vtable]
> +pub trait MiscDevice {
> +    /// What kind of pointer should `Self` be wrapped in.
> +    type Ptr: ForeignOwnable + Send + Sync;
> +
> +    /// Called when the misc device is opened.
> +    ///
> +    /// The returned pointer will be stored as the private data for the file.
> +    fn open() -> Result<Self::Ptr>;
> +
> +    /// Called when the misc device is released.
> +    fn release(device: Self::Ptr) {
> +        drop(device);
> +    }
> +
> +    /// Handler for ioctls.
> +    ///
> +    /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`].
> +    ///
> +    /// [`kernel::ioctl`]: mod@crate::ioctl
> +    fn ioctl(
> +        _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
> +        _cmd: u32,
> +        _arg: usize,
> +    ) -> Result<isize> {
> +        kernel::build_error(VTABLE_DEFAULT_ERROR)
> +    }
> +
> +    /// Handler for ioctls.
> +    ///
> +    /// Used for 32-bit userspace on 64-bit platforms.
> +    ///
> +    /// This method is optional and only needs to be provided if the ioctl relies on structures
> +    /// that have different layout on 32-bit and 64-bit userspace. If no implementation is
> +    /// provided, then `compat_ptr_ioctl` will be used instead.
> +    #[cfg(CONFIG_COMPAT)]
> +    fn compat_ioctl(
> +        _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
> +        _cmd: u32,
> +        _arg: usize,
> +    ) -> Result<isize> {
> +        kernel::build_error(VTABLE_DEFAULT_ERROR)
> +    }
> +}
> +
> +const fn create_vtable<T: MiscDevice>() -> &'static bindings::file_operations {
> +    const fn maybe_fn<T: Copy>(check: bool, func: T) -> Option<T> {
> +        if check {
> +            Some(func)
> +        } else {
> +            None
> +        }
> +    }
> +
> +    struct VtableHelper<T: MiscDevice> {
> +        _t: PhantomData<T>,
> +    }
> +    impl<T: MiscDevice> VtableHelper<T> {
> +        const VTABLE: bindings::file_operations = bindings::file_operations {
> +            open: Some(fops_open::<T>),
> +            release: Some(fops_release::<T>),
> +            unlocked_ioctl: maybe_fn(T::HAS_IOCTL, fops_ioctl::<T>),
> +            #[cfg(CONFIG_COMPAT)]
> +            compat_ioctl: if T::HAS_COMPAT_IOCTL {
> +                Some(fops_compat_ioctl::<T>)
> +            } else if T::HAS_IOCTL {
> +                Some(bindings::compat_ptr_ioctl)
> +            } else {
> +                None
> +            },
> +            ..unsafe { MaybeUninit::zeroed().assume_init() }
> +        };
> +    }
> +
> +    &VtableHelper::<T>::VTABLE
> +}
> +
> +unsafe extern "C" fn fops_open<T: MiscDevice>(
> +    inode: *mut bindings::inode,
> +    file: *mut bindings::file,
> +) -> c_int {
> +    // SAFETY: The pointers are valid and for a file being opened.
> +    let ret = unsafe { bindings::generic_file_open(inode, file) };
> +    if ret != 0 {
> +        return ret;
> +    }
> +
> +    let ptr = match T::open() {
> +        Ok(ptr) => ptr,
> +        Err(err) => return err.to_errno(),
> +    };
> +
> +    // SAFETY: The open call of a file owns the private data.
> +    unsafe { (*file).private_data = ptr.into_foreign().cast_mut() };
> +
> +    0
> +}
> +
> +unsafe extern "C" fn fops_release<T: MiscDevice>(
> +    _inode: *mut bindings::inode,
> +    file: *mut bindings::file,
> +) -> c_int {
> +    // SAFETY: The release call of a file owns the private data.
> +    let private = unsafe { (*file).private_data };
> +    // SAFETY: The release call of a file owns the private data.
> +    let ptr = unsafe { <T::Ptr as ForeignOwnable>::from_foreign(private) };
> +
> +    T::release(ptr);
> +
> +    0
> +}
> +
> +unsafe extern "C" fn fops_ioctl<T: MiscDevice>(
> +    file: *mut bindings::file,
> +    cmd: c_uint,
> +    arg: c_ulong,
> +) -> c_long {
> +    // SAFETY: The ioctl call of a file can access the private data.
> +    let private = unsafe { (*file).private_data };
> +    // SAFETY: Ioctl calls can borrow the private data of the file.
> +    let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
> +
> +    match T::ioctl(device, cmd as u32, arg as usize) {
> +        Ok(ret) => ret as c_long,
> +        Err(err) => err.to_errno() as c_long,
> +    }
> +}
> +
> +#[cfg(CONFIG_COMPAT)]
> +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>(
> +    file: *mut bindings::file,
> +    cmd: c_uint,
> +    arg: c_ulong,
> +) -> c_long {
> +    // SAFETY: The compat ioctl call of a file can access the private data.
> +    let private = unsafe { (*file).private_data };
> +    // SAFETY: Ioctl calls can borrow the private data of the file.
> +    let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
> +
> +    match T::compat_ioctl(device, cmd as u32, arg as usize) {
> +        Ok(ret) => ret as c_long,
> +        Err(err) => err.to_errno() as c_long,
> +    }
> +}
> 
> -- 
> 2.46.1.824.gd892dcdcdd-goog
>
Alice Ryhl Oct. 2, 2024, 1:36 p.m. UTC | #10
On Wed, Oct 2, 2024 at 3:24 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Wed, Oct 02, 2024 at 12:48:12PM GMT, Arnd Bergmann wrote:
> > On Tue, Oct 1, 2024, at 08:22, Alice Ryhl wrote:
> > > +#[cfg(CONFIG_COMPAT)]
> > > +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>(
> > > +    file: *mut bindings::file,
> > > +    cmd: c_uint,
> > > +    arg: c_ulong,
> > > +) -> c_long {
> > > +    // SAFETY: The compat ioctl call of a file can access the private
> > > data.
> > > +    let private = unsafe { (*file).private_data };
> > > +    // SAFETY: Ioctl calls can borrow the private data of the file.
> > > +    let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private)
> > > };
> > > +
> > > +    match T::compat_ioctl(device, cmd as u32, arg as usize) {
> > > +        Ok(ret) => ret as c_long,
> > > +        Err(err) => err.to_errno() as c_long,
> > > +    }
> > > +}
> >
> > I think this works fine as a 1:1 mapping of the C API, so this
> > is certainly something we can do. On the other hand, it would be
> > nice to improve the interface in some way and make it better than
> > the C version.
> >
> > The changes that I think would be straightforward and helpful are:
> >
> > - combine native and compat handlers and pass a flag argument
> >   that the callback can check in case it has to do something
> >   special for compat mode
> >
> > - pass the 'arg' value as both a __user pointer and a 'long'
> >   value to avoid having to cast. This specifically simplifies
> >   the compat version since that needs different types of
> >   64-bit extension for incoming 32-bit values.
> >
> > On top of that, my ideal implementation would significantly
> > simplify writing safe ioctl handlers by using the information
> > encoded in the command word:
> >
> >  - copy the __user data into a kernel buffer for _IOW()
> >    and back for _IOR() type commands, or both for _IOWR()
> >  - check that the argument size matches the size of the
> >    structure it gets assigned to
>
> - Handle versioning by size for ioctl()s correctly so stuff like:
>
>         /* extensible ioctls */
>         switch (_IOC_NR(ioctl)) {
>         case _IOC_NR(NS_MNT_GET_INFO): {
>                 struct mnt_ns_info kinfo = {};
>                 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg;
>                 size_t usize = _IOC_SIZE(ioctl);
>
>                 if (ns->ops->type != CLONE_NEWNS)
>                         return -EINVAL;
>
>                 if (!uinfo)
>                         return -EINVAL;
>
>                 if (usize < MNT_NS_INFO_SIZE_VER0)
>                         return -EINVAL;
>
>                 return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo);
>         }
>
> This is not well-known and noone versions ioctl()s correctly and if they
> do it's their own hand-rolled thing. Ideally, this would be a first
> class concept with Rust bindings and versioning like this would be
> universally enforced.

Could you point me at some more complete documentation or example of
how to correctly do versioning?

Alice
Arnd Bergmann Oct. 2, 2024, 1:57 p.m. UTC | #11
On Wed, Oct 2, 2024, at 13:31, Alice Ryhl wrote:
> On Wed, Oct 2, 2024 at 3:25 PM Arnd Bergmann <arnd@arndb.de> wrote:
>>
>> On Wed, Oct 2, 2024, at 12:58, Alice Ryhl wrote:
>> > On Wed, Oct 2, 2024 at 2:48 PM Arnd Bergmann <arnd@arndb.de> wrote:
>> > A quick sketch.
>> >
>> > One option is to do something along these lines:
>>
>> This does seem promising, at least if I read your sketch
>> correctly. I'd probably need a more concrete example to
>> understand better how this would be used in a driver.
>
> Could you point me at a driver that uses all of the features we want
> to support? Then I can try to sketch it.

drivers/media/v4l2-core/v4l2-ioctl.c probably has all of the
things we want here, plus more. This is a big ugly for having
to pass a function pointer into the video_usercopy() function
and then have both functions know about particular commands.

You can also see the effects of the compat handlers there,
e.g. VIDIOC_QUERYBUF has three possible sizes associated
with it, depending on sizeof(long) and sizeof(time_t).

There is a small optimization for buffers up to 128 bytes
to avoid the dynamic allocation, and this is likely a good
idea elsewhere as well.

>> > struct IoctlParams {
>> >     pub cmd: u32,
>> >     pub arg: usize,
>> > }
>> >
>> > impl IoctlParams {
>> >     fn user_slice(&self) -> IoctlUser {
>> >         let userslice = UserSlice::new(self.arg, _IOC_SIZE(self.cmd));
>> >         match _IOC_DIR(self.cmd) {
>> >             _IOC_READ => IoctlParams::Read(userslice.reader()),
>> >             _IOC_WRITE => IoctlParams::Write(userslice.writer()),
>> >             _IOC_READ|_IOC_WRITE => IoctlParams::WriteRead(userslice),
>> >             _ => unreachable!(),
>>
>> Does the unreachable() here mean that something bad happens
>> if userspace passes something other than one of the three,
>> or are the 'cmd' values here in-kernel constants that are
>> always valid?
>
> The unreachable!() macro is equivalent to a call to BUG() .. we
> probably need to handle the fourth case too so that userspace can't
> trigger it ... but _IOC_DIR only has 4 possible return values.

As a small complication, _IOC_DIR is architecture specific,
and sometimes uses three bits that lead to four additional values
that are all invalid but could be passed by userspace.

>>
>> This is where I fail to see how that would fit in. If there
>> is a match statement in a driver, I would assume that it would
>> always match on the entire cmd code, but never have a command
>> that could with more than one _IOC_DIR type.
>
> Here's what Rust Binder does today:
>
> /// The ioctl handler.
> impl Process {
>     /// Ioctls that are write-only from the perspective of userspace.
>     ///
>     /// The kernel will only read from the pointer that userspace
> provided to us.
>     fn ioctl_write_only(
>         this: ArcBorrow<'_, Process>,
>         _file: &File,
>         cmd: u32,
>         reader: &mut UserSliceReader,
>     ) -> Result {
>         let thread = this.get_current_thread()?;
>         match cmd {
>             bindings::BINDER_SET_MAX_THREADS =>
> this.set_max_threads(reader.read()?),
>             bindings::BINDER_THREAD_EXIT => this.remove_thread(thread),
>             bindings::BINDER_SET_CONTEXT_MGR =>
> this.set_as_manager(None, &thread)?,
>             bindings::BINDER_SET_CONTEXT_MGR_EXT => {
>                 this.set_as_manager(Some(reader.read()?), &thread)?
>             }
>             bindings::BINDER_ENABLE_ONEWAY_SPAM_DETECTION => {
>                 this.set_oneway_spam_detection_enabled(reader.read()?)
>             }
>             bindings::BINDER_FREEZE => ioctl_freeze(reader)?,
>             _ => return Err(EINVAL),
>         }
>         Ok(())
>     }

I see. So the 'match cmd' bit is what we want to have
for certain, this is a sensible way to structure things.

Having the split into none/read/write/readwrite functions
feels odd to me, and this means we can't group a pair of
get/set commands together in one place, but I can also see
how this makes sense from the perspective of writing the
output buffer back to userspace.

It seems like it should be possible to validate the size of
the argument against _IOC_SIZE(cmd) at compile time, but this
is not currently done, right?

     Arnd
Christian Brauner Oct. 2, 2024, 2:06 p.m. UTC | #12
On Tue, Oct 01, 2024 at 08:22:22AM GMT, Alice Ryhl wrote:
> Provide a `MiscDevice` trait that lets you specify the file operations
> that you wish to provide for your misc device. For now, only three file
> operations are provided: open, close, ioctl.
> 
> These abstractions only support MISC_DYNAMIC_MINOR. This enforces that
> new miscdevices should not hard-code a minor number.
> 
> When implementing ioctl, the Result type is used. This means that you
> can choose to return either of:
> * An integer of type isize.
> * An errno using the kernel::error::Error type.
> When returning an isize, the integer is returned verbatim. It's mainly
> intended for returning positive integers to userspace. However, it is
> technically possible to return errors via the isize return value too.
> 
> To avoid having a dependency on files, this patch does not provide the
> file operations callbacks a pointer to the file. This means that they
> cannot check file properties such as O_NONBLOCK (which Binder needs).
> Support for that can be added as a follow-up.
> 
> To avoid having a dependency on vma, this patch does not provide any way
> to implement mmap (which Binder needs). Support for that can be added as
> a follow-up.
> 
> Rust Binder will use these abstractions to create the /dev/binder file
> when binderfs is disabled.
> 
> Signed-off-by: Alice Ryhl <aliceryhl@google.com>
> ---
>  rust/bindings/bindings_helper.h |   1 +
>  rust/kernel/lib.rs              |   1 +
>  rust/kernel/miscdevice.rs       | 241 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 243 insertions(+)
> 
> diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
> index ae82e9c941af..84303bf221dd 100644
> --- a/rust/bindings/bindings_helper.h
> +++ b/rust/bindings/bindings_helper.h
> @@ -15,6 +15,7 @@
>  #include <linux/firmware.h>
>  #include <linux/jiffies.h>
>  #include <linux/mdio.h>
> +#include <linux/miscdevice.h>
>  #include <linux/phy.h>
>  #include <linux/refcount.h>
>  #include <linux/sched.h>
> diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
> index 22a3bfa5a9e9..e268eae54c81 100644
> --- a/rust/kernel/lib.rs
> +++ b/rust/kernel/lib.rs
> @@ -39,6 +39,7 @@
>  #[cfg(CONFIG_KUNIT)]
>  pub mod kunit;
>  pub mod list;
> +pub mod miscdevice;
>  #[cfg(CONFIG_NET)]
>  pub mod net;
>  pub mod page;
> diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
> new file mode 100644
> index 000000000000..cbd5249b5b45
> --- /dev/null
> +++ b/rust/kernel/miscdevice.rs
> @@ -0,0 +1,241 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +// Copyright (C) 2024 Google LLC.
> +
> +//! Miscdevice support.
> +//!
> +//! C headers: [`include/linux/miscdevice.h`](srctree/include/linux/miscdevice.h).
> +//!
> +//! Reference: <https://www.kernel.org/doc/html/latest/driver-api/misc_devices.html>
> +
> +use crate::{
> +    bindings,
> +    error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR},
> +    prelude::*,
> +    str::CStr,
> +    types::{ForeignOwnable, Opaque},
> +};
> +use core::{
> +    ffi::{c_int, c_long, c_uint, c_ulong},
> +    marker::PhantomData,
> +    mem::MaybeUninit,
> +    pin::Pin,
> +};
> +
> +/// Options for creating a misc device.
> +#[derive(Copy, Clone)]
> +pub struct MiscDeviceOptions {
> +    /// The name of the miscdevice.
> +    pub name: &'static CStr,
> +}
> +
> +impl MiscDeviceOptions {
> +    /// Create a raw `struct miscdev` ready for registration.
> +    pub const fn into_raw<T: MiscDevice>(self) -> bindings::miscdevice {
> +        // SAFETY: All zeros is valid for this C type.
> +        let mut result: bindings::miscdevice = unsafe { MaybeUninit::zeroed().assume_init() };
> +        result.minor = bindings::MISC_DYNAMIC_MINOR as _;
> +        result.name = self.name.as_char_ptr();
> +        result.fops = create_vtable::<T>();
> +        result
> +    }
> +}
> +
> +/// A registration of a miscdevice.
> +///
> +/// # Invariants
> +///
> +/// `inner` is a registered misc device.
> +#[repr(transparent)]
> +#[pin_data(PinnedDrop)]
> +pub struct MiscDeviceRegistration<T> {
> +    #[pin]
> +    inner: Opaque<bindings::miscdevice>,
> +    _t: PhantomData<T>,
> +}
> +
> +// SAFETY: It is allowed to call `misc_deregister` on a different thread from where you called
> +// `misc_register`.
> +unsafe impl<T> Send for MiscDeviceRegistration<T> {}
> +// SAFETY: All `&self` methods on this type are written to ensure that it is safe to call them in
> +// parallel.
> +unsafe impl<T> Sync for MiscDeviceRegistration<T> {}
> +
> +impl<T: MiscDevice> MiscDeviceRegistration<T> {
> +    /// Register a misc device.
> +    pub fn register(opts: MiscDeviceOptions) -> impl PinInit<Self, Error> {
> +        try_pin_init!(Self {
> +            inner <- Opaque::try_ffi_init(move |slot: *mut bindings::miscdevice| {
> +                // SAFETY: The initializer can write to the provided `slot`.
> +                unsafe { slot.write(opts.into_raw::<T>()) };
> +
> +                // SAFETY: We just wrote the misc device options to the slot. The miscdevice will
> +                // get unregistered before `slot` is deallocated because the memory is pinned and
> +                // the destructor of this type deallocates the memory.
> +                // INVARIANT: If this returns `Ok(())`, then the `slot` will contain a registered
> +                // misc device.
> +                to_result(unsafe { bindings::misc_register(slot) })
> +            }),
> +            _t: PhantomData,
> +        })
> +    }
> +
> +    /// Returns a raw pointer to the misc device.
> +    pub fn as_raw(&self) -> *mut bindings::miscdevice {
> +        self.inner.get()
> +    }
> +}
> +
> +#[pinned_drop]
> +impl<T> PinnedDrop for MiscDeviceRegistration<T> {
> +    fn drop(self: Pin<&mut Self>) {
> +        // SAFETY: We know that the device is registered by the type invariants.
> +        unsafe { bindings::misc_deregister(self.inner.get()) };
> +    }
> +}
> +
> +/// Trait implemented by the private data of an open misc device.
> +#[vtable]
> +pub trait MiscDevice {
> +    /// What kind of pointer should `Self` be wrapped in.
> +    type Ptr: ForeignOwnable + Send + Sync;
> +
> +    /// Called when the misc device is opened.
> +    ///
> +    /// The returned pointer will be stored as the private data for the file.
> +    fn open() -> Result<Self::Ptr>;
> +
> +    /// Called when the misc device is released.
> +    fn release(device: Self::Ptr) {
> +        drop(device);
> +    }
> +
> +    /// Handler for ioctls.
> +    ///
> +    /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`].
> +    ///
> +    /// [`kernel::ioctl`]: mod@crate::ioctl
> +    fn ioctl(
> +        _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
> +        _cmd: u32,
> +        _arg: usize,
> +    ) -> Result<isize> {
> +        kernel::build_error(VTABLE_DEFAULT_ERROR)
> +    }
> +
> +    /// Handler for ioctls.
> +    ///
> +    /// Used for 32-bit userspace on 64-bit platforms.
> +    ///
> +    /// This method is optional and only needs to be provided if the ioctl relies on structures
> +    /// that have different layout on 32-bit and 64-bit userspace. If no implementation is
> +    /// provided, then `compat_ptr_ioctl` will be used instead.
> +    #[cfg(CONFIG_COMPAT)]
> +    fn compat_ioctl(
> +        _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
> +        _cmd: u32,
> +        _arg: usize,
> +    ) -> Result<isize> {
> +        kernel::build_error(VTABLE_DEFAULT_ERROR)
> +    }
> +}
> +
> +const fn create_vtable<T: MiscDevice>() -> &'static bindings::file_operations {
> +    const fn maybe_fn<T: Copy>(check: bool, func: T) -> Option<T> {
> +        if check {
> +            Some(func)
> +        } else {
> +            None
> +        }
> +    }
> +
> +    struct VtableHelper<T: MiscDevice> {
> +        _t: PhantomData<T>,
> +    }
> +    impl<T: MiscDevice> VtableHelper<T> {
> +        const VTABLE: bindings::file_operations = bindings::file_operations {
> +            open: Some(fops_open::<T>),
> +            release: Some(fops_release::<T>),
> +            unlocked_ioctl: maybe_fn(T::HAS_IOCTL, fops_ioctl::<T>),
> +            #[cfg(CONFIG_COMPAT)]
> +            compat_ioctl: if T::HAS_COMPAT_IOCTL {
> +                Some(fops_compat_ioctl::<T>)
> +            } else if T::HAS_IOCTL {
> +                Some(bindings::compat_ptr_ioctl)
> +            } else {
> +                None
> +            },
> +            ..unsafe { MaybeUninit::zeroed().assume_init() }
> +        };
> +    }
> +
> +    &VtableHelper::<T>::VTABLE
> +}
> +
> +unsafe extern "C" fn fops_open<T: MiscDevice>(
> +    inode: *mut bindings::inode,
> +    file: *mut bindings::file,
> +) -> c_int {
> +    // SAFETY: The pointers are valid and for a file being opened.
> +    let ret = unsafe { bindings::generic_file_open(inode, file) };
> +    if ret != 0 {
> +        return ret;
> +    }

Do you have code where that function is used? Because this looks wrong
or at least I don't understand from just a glance whether that
generic_file_open() call makes sense.

Illustrating how we get from opening /dev/binder to this call would
help.

> +
> +    let ptr = match T::open() {
> +        Ok(ptr) => ptr,
> +        Err(err) => return err.to_errno(),
> +    };
> +
> +    // SAFETY: The open call of a file owns the private data.
> +    unsafe { (*file).private_data = ptr.into_foreign().cast_mut() };
> +
> +    0
> +}
> +
> +unsafe extern "C" fn fops_release<T: MiscDevice>(
> +    _inode: *mut bindings::inode,
> +    file: *mut bindings::file,
> +) -> c_int {
> +    // SAFETY: The release call of a file owns the private data.
> +    let private = unsafe { (*file).private_data };
> +    // SAFETY: The release call of a file owns the private data.
> +    let ptr = unsafe { <T::Ptr as ForeignOwnable>::from_foreign(private) };
> +
> +    T::release(ptr);
> +
> +    0
> +}
> +
> +unsafe extern "C" fn fops_ioctl<T: MiscDevice>(
> +    file: *mut bindings::file,
> +    cmd: c_uint,
> +    arg: c_ulong,
> +) -> c_long {
> +    // SAFETY: The ioctl call of a file can access the private data.
> +    let private = unsafe { (*file).private_data };
> +    // SAFETY: Ioctl calls can borrow the private data of the file.
> +    let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
> +
> +    match T::ioctl(device, cmd as u32, arg as usize) {
> +        Ok(ret) => ret as c_long,
> +        Err(err) => err.to_errno() as c_long,
> +    }
> +}
> +
> +#[cfg(CONFIG_COMPAT)]
> +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>(
> +    file: *mut bindings::file,
> +    cmd: c_uint,
> +    arg: c_ulong,
> +) -> c_long {
> +    // SAFETY: The compat ioctl call of a file can access the private data.
> +    let private = unsafe { (*file).private_data };
> +    // SAFETY: Ioctl calls can borrow the private data of the file.
> +    let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
> +
> +    match T::compat_ioctl(device, cmd as u32, arg as usize) {
> +        Ok(ret) => ret as c_long,
> +        Err(err) => err.to_errno() as c_long,
> +    }
> +}
> 
> -- 
> 2.46.1.824.gd892dcdcdd-goog
>
Alice Ryhl Oct. 2, 2024, 2:23 p.m. UTC | #13
On Wed, Oct 2, 2024 at 3:59 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Wed, Oct 2, 2024, at 13:31, Alice Ryhl wrote:
> > On Wed, Oct 2, 2024 at 3:25 PM Arnd Bergmann <arnd@arndb.de> wrote:
> >>
> >> On Wed, Oct 2, 2024, at 12:58, Alice Ryhl wrote:
> >> > On Wed, Oct 2, 2024 at 2:48 PM Arnd Bergmann <arnd@arndb.de> wrote:
> >> > A quick sketch.
> >> >
> >> > One option is to do something along these lines:
> >>
> >> This does seem promising, at least if I read your sketch
> >> correctly. I'd probably need a more concrete example to
> >> understand better how this would be used in a driver.
> >
> > Could you point me at a driver that uses all of the features we want
> > to support? Then I can try to sketch it.
>
> drivers/media/v4l2-core/v4l2-ioctl.c probably has all of the
> things we want here, plus more. This is a big ugly for having
> to pass a function pointer into the video_usercopy() function
> and then have both functions know about particular commands.
>
> You can also see the effects of the compat handlers there,
> e.g. VIDIOC_QUERYBUF has three possible sizes associated
> with it, depending on sizeof(long) and sizeof(time_t).
>
> There is a small optimization for buffers up to 128 bytes
> to avoid the dynamic allocation, and this is likely a good
> idea elsewhere as well.

Oh, my. That seems like a rather sophisticated ioctl handler.

Do we want all new ioctl handlers to work along those lines?

> >> > struct IoctlParams {
> >> >     pub cmd: u32,
> >> >     pub arg: usize,
> >> > }
> >> >
> >> > impl IoctlParams {
> >> >     fn user_slice(&self) -> IoctlUser {
> >> >         let userslice = UserSlice::new(self.arg, _IOC_SIZE(self.cmd));
> >> >         match _IOC_DIR(self.cmd) {
> >> >             _IOC_READ => IoctlParams::Read(userslice.reader()),
> >> >             _IOC_WRITE => IoctlParams::Write(userslice.writer()),
> >> >             _IOC_READ|_IOC_WRITE => IoctlParams::WriteRead(userslice),
> >> >             _ => unreachable!(),
> >>
> >> Does the unreachable() here mean that something bad happens
> >> if userspace passes something other than one of the three,
> >> or are the 'cmd' values here in-kernel constants that are
> >> always valid?
> >
> > The unreachable!() macro is equivalent to a call to BUG() .. we
> > probably need to handle the fourth case too so that userspace can't
> > trigger it ... but _IOC_DIR only has 4 possible return values.
>
> As a small complication, _IOC_DIR is architecture specific,
> and sometimes uses three bits that lead to four additional values
> that are all invalid but could be passed by userspace.

Interesting. I did not know that.

> >> This is where I fail to see how that would fit in. If there
> >> is a match statement in a driver, I would assume that it would
> >> always match on the entire cmd code, but never have a command
> >> that could with more than one _IOC_DIR type.
> >
> > Here's what Rust Binder does today:
> >
> > /// The ioctl handler.
> > impl Process {
> >     /// Ioctls that are write-only from the perspective of userspace.
> >     ///
> >     /// The kernel will only read from the pointer that userspace
> > provided to us.
> >     fn ioctl_write_only(
> >         this: ArcBorrow<'_, Process>,
> >         _file: &File,
> >         cmd: u32,
> >         reader: &mut UserSliceReader,
> >     ) -> Result {
> >         let thread = this.get_current_thread()?;
> >         match cmd {
> >             bindings::BINDER_SET_MAX_THREADS =>
> > this.set_max_threads(reader.read()?),
> >             bindings::BINDER_THREAD_EXIT => this.remove_thread(thread),
> >             bindings::BINDER_SET_CONTEXT_MGR =>
> > this.set_as_manager(None, &thread)?,
> >             bindings::BINDER_SET_CONTEXT_MGR_EXT => {
> >                 this.set_as_manager(Some(reader.read()?), &thread)?
> >             }
> >             bindings::BINDER_ENABLE_ONEWAY_SPAM_DETECTION => {
> >                 this.set_oneway_spam_detection_enabled(reader.read()?)
> >             }
> >             bindings::BINDER_FREEZE => ioctl_freeze(reader)?,
> >             _ => return Err(EINVAL),
> >         }
> >         Ok(())
> >     }
>
> I see. So the 'match cmd' bit is what we want to have
> for certain, this is a sensible way to structure things.
>
> Having the split into none/read/write/readwrite functions
> feels odd to me, and this means we can't group a pair of
> get/set commands together in one place, but I can also see
> how this makes sense from the perspective of writing the
> output buffer back to userspace.

It's the most convenient way to do it without having any
infrastructure for helping with writing ioctls. I imagine that adding
something to help with that could eliminate the reason for matching
twice in this way.

> It seems like it should be possible to validate the size of
> the argument against _IOC_SIZE(cmd) at compile time, but this
> is not currently done, right?

No, right now that validation happens at runtime. The ioctl handler
tries to use the UserSliceReader to read a struct, which fails if the
struct is too large.

I wonder if we could go for something more comprehensive than the
super simple thing I just put together. I'm sure we can validate more
things at compile time.

Alice
Christian Brauner Oct. 2, 2024, 2:23 p.m. UTC | #14
On Wed, Oct 02, 2024 at 03:36:33PM GMT, Alice Ryhl wrote:
> On Wed, Oct 2, 2024 at 3:24 PM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Wed, Oct 02, 2024 at 12:48:12PM GMT, Arnd Bergmann wrote:
> > > On Tue, Oct 1, 2024, at 08:22, Alice Ryhl wrote:
> > > > +#[cfg(CONFIG_COMPAT)]
> > > > +unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>(
> > > > +    file: *mut bindings::file,
> > > > +    cmd: c_uint,
> > > > +    arg: c_ulong,
> > > > +) -> c_long {
> > > > +    // SAFETY: The compat ioctl call of a file can access the private
> > > > data.
> > > > +    let private = unsafe { (*file).private_data };
> > > > +    // SAFETY: Ioctl calls can borrow the private data of the file.
> > > > +    let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private)
> > > > };
> > > > +
> > > > +    match T::compat_ioctl(device, cmd as u32, arg as usize) {
> > > > +        Ok(ret) => ret as c_long,
> > > > +        Err(err) => err.to_errno() as c_long,
> > > > +    }
> > > > +}
> > >
> > > I think this works fine as a 1:1 mapping of the C API, so this
> > > is certainly something we can do. On the other hand, it would be
> > > nice to improve the interface in some way and make it better than
> > > the C version.
> > >
> > > The changes that I think would be straightforward and helpful are:
> > >
> > > - combine native and compat handlers and pass a flag argument
> > >   that the callback can check in case it has to do something
> > >   special for compat mode
> > >
> > > - pass the 'arg' value as both a __user pointer and a 'long'
> > >   value to avoid having to cast. This specifically simplifies
> > >   the compat version since that needs different types of
> > >   64-bit extension for incoming 32-bit values.
> > >
> > > On top of that, my ideal implementation would significantly
> > > simplify writing safe ioctl handlers by using the information
> > > encoded in the command word:
> > >
> > >  - copy the __user data into a kernel buffer for _IOW()
> > >    and back for _IOR() type commands, or both for _IOWR()
> > >  - check that the argument size matches the size of the
> > >    structure it gets assigned to
> >
> > - Handle versioning by size for ioctl()s correctly so stuff like:
> >
> >         /* extensible ioctls */
> >         switch (_IOC_NR(ioctl)) {
> >         case _IOC_NR(NS_MNT_GET_INFO): {
> >                 struct mnt_ns_info kinfo = {};
> >                 struct mnt_ns_info __user *uinfo = (struct mnt_ns_info __user *)arg;
> >                 size_t usize = _IOC_SIZE(ioctl);
> >
> >                 if (ns->ops->type != CLONE_NEWNS)
> >                         return -EINVAL;
> >
> >                 if (!uinfo)
> >                         return -EINVAL;
> >
> >                 if (usize < MNT_NS_INFO_SIZE_VER0)
> >                         return -EINVAL;
> >
> >                 return copy_ns_info_to_user(to_mnt_ns(ns), uinfo, usize, &kinfo);
> >         }
> >
> > This is not well-known and noone versions ioctl()s correctly and if they
> > do it's their own hand-rolled thing. Ideally, this would be a first
> > class concept with Rust bindings and versioning like this would be
> > universally enforced.
> 
> Could you point me at some more complete documentation or example of
> how to correctly do versioning?

So I don't want you to lead astray so if this is out of reach for now I
understand but basically we do have the concept of versioning structs by
size.

So I'm taking an example from the mount_setattr() man page though
openat2() would also work:

   Extensibility
       In order to allow for future extensibility, mount_setattr()
       requires the user-space application to specify the size of the
       mount_attr structure that it is passing.  By  providing  this
       information,  it  is  possible for mount_setattr() to provide
       both forwards- and backwards-compatibility, with size acting as
       an implicit version number.  (Because new extension fields will
       always be appended, the structure size will always increase.)
       This extensibility design is very similar  to  other  system
       calls  such  as  perf_setattr(2),  perf_event_open(2), clone3(2)
       and openat2(2).

       Let  usize  be the size of the structure as specified by the
       user-space application, and let ksize be the size of the
       structure which the kernel supports, then there are three cases
       to consider:

       •  If ksize equals usize, then there is no version mismatch and
          attr can be used verbatim.

       •  If ksize is larger than usize, then there are some extension
	  fields that the kernel supports which the user-space
	  application is unaware of.  Because a zero value in any  added
	  extension field signifies a no-op, the kernel treats all of
	  the extension fields not provided by the user-space
	  application as having zero values.  This provides
	  backwards-compatibility.

       •  If ksize is smaller than usize, then there are some extension
	  fields which the user-space application is aware of but which
	  the kernel does not support.  Because any extension field must
	  have  its  zero  values signify a no-op, the kernel can safely
	  ignore the unsupported extension fields if they are all zero.
	  If any unsupported extension fields are non-zero, then -1 is
	  returned and errno is set to E2BIG.  This provides
	  forwards-compatibility.

   [...]

In essence ioctl()s are already versioned by size because the size of
the passed argument is encoded in the ioctl cmd:

struct my_struct {
	__u64 a;
};

ioctl(fd, MY_IOCTL, &my_struct);

then _IOC_SIZE(MY_IOCTL) will give you the expected size.

If the kernel extends the struct to:

struct my_struct {
	__u64 a;
	__u64 b;
};

then the value of MY_IOCTL changes. Most code currently cannot deal with
such an extension because it's coded as a simple switch on the ioctl
command:

switch (cmd) {
	case MY_IOCTL:
		/* do something */
		break;
}

So on an older kernel the ioctl would now fail because it won't be able
to handle MY_STRUCT with an increased struct my_struct size because the
switch wouldn't trigger.

The correct way to handle this is to grab the actual ioctl number out
from the ioctl command:

switch (_IOC_NR(cmd)) {
        case _IOC_NR(MY_STRUCT): {

and then grab the size of the ioctl:

        size_t usize = _IOC_SIZE(ioctl);

perform sanity checks:

	// garbage
        if (usize < MY_STRUCT_SIZE_VER0)
                return -EINVAL;

	// ¿qué?
	if (usize > PAGE_SIZE)
		return -EINVAL;

and then copy the stuff via copy_struct_from_user() or copy back out to
user via other means.

This way you can safely extend ioctl()s in a backward and forward
compatible manner and if we can enforce this for new drivers then I
think that's what we should do.
Alice Ryhl Oct. 2, 2024, 2:24 p.m. UTC | #15
On Wed, Oct 2, 2024 at 4:06 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Tue, Oct 01, 2024 at 08:22:22AM GMT, Alice Ryhl wrote:
> > +unsafe extern "C" fn fops_open<T: MiscDevice>(
> > +    inode: *mut bindings::inode,
> > +    file: *mut bindings::file,
> > +) -> c_int {
> > +    // SAFETY: The pointers are valid and for a file being opened.
> > +    let ret = unsafe { bindings::generic_file_open(inode, file) };
> > +    if ret != 0 {
> > +        return ret;
> > +    }
>
> Do you have code where that function is used? Because this looks wrong
> or at least I don't understand from just a glance whether that
> generic_file_open() call makes sense.
>
> Illustrating how we get from opening /dev/binder to this call would
> help.

Hmm. I wrote this by comparing with the ashmem open callback. Now that
you mention it you are right that Binder does not call
generic_file_open ... I have to admit that I don't know what
generic_file_open does.

Alice
Arnd Bergmann Oct. 2, 2024, 3:31 p.m. UTC | #16
On Wed, Oct 2, 2024, at 14:23, Alice Ryhl wrote:
> On Wed, Oct 2, 2024 at 3:59 PM Arnd Bergmann <arnd@arndb.de> wrote:
>>
>> On Wed, Oct 2, 2024, at 13:31, Alice Ryhl wrote:
>> > On Wed, Oct 2, 2024 at 3:25 PM Arnd Bergmann <arnd@arndb.de> wrote:
>> >>
>> You can also see the effects of the compat handlers there,
>> e.g. VIDIOC_QUERYBUF has three possible sizes associated
>> with it, depending on sizeof(long) and sizeof(time_t).
>>
>> There is a small optimization for buffers up to 128 bytes
>> to avoid the dynamic allocation, and this is likely a good
>> idea elsewhere as well.
>
> Oh, my. That seems like a rather sophisticated ioctl handler.
>
> Do we want all new ioctl handlers to work along those lines?

It was intentionally an example to demonstrate the worst
case one might hit, and I would hope that most drivers end
up not having to worry about them. 

To clarify: the file I mentioned is itself a piece of
infrastructure that is used to make the actual drivers
simpler, in this case by having drivers just fill out
a 'struct v4l2_ioctl_ops' with the command specific callbacks.

This works because video_ioctl2() has a clearly defined set
of commands that is shared by a large number of drivers.
For a generic bit of infrastructure, we obviously wouldn't
do anything that knows about specific commands, but the way
the get_user/put_user part works based on the size can be
quite similar.

There is similar piece of infrastructure in
drivers/gpu/drm/drm_ioctl.c, which is a bit simpler.

>> It seems like it should be possible to validate the size of
>> the argument against _IOC_SIZE(cmd) at compile time, but this
>> is not currently done, right?
>
> No, right now that validation happens at runtime. The ioctl handler
> tries to use the UserSliceReader to read a struct, which fails if the
> struct is too large.

Ok.

> I wonder if we could go for something more comprehensive than the
> super simple thing I just put together. I'm sure we can validate more
> things at compile time.



     Arnd
Arnd Bergmann Oct. 2, 2024, 3:45 p.m. UTC | #17
On Wed, Oct 2, 2024, at 14:23, Christian Brauner wrote:

> and then copy the stuff via copy_struct_from_user() or copy back out to
> user via other means.
>
> This way you can safely extend ioctl()s in a backward and forward
> compatible manner and if we can enforce this for new drivers then I
> think that's what we should do.

I don't see much value in building generic code for ioctl around
this specific variant of extensibility. Extending ioctl commands
by having a larger structure that results in a new cmd code
constant is fine, but there is little difference between doing
this with the same or a different 'nr' value. Most drivers just
always use a new nr here, and I see no reason to discourage that.

There is actually a small risk in your example where it can
break if you have the same size between native and compat
variants of the same command, like

struct old {
    long a;
};

struct new {
    long a;
    int b;
};

Here, the 64-bit 'old' has the same size as the 32-bit 'new',
so if we try to handle them in a shared native/compat ioctl
function, this needs an extra in_conmpat_syscall() check that
adds complexity and is easy to forget.

    Arnd
Greg KH Oct. 2, 2024, 4:04 p.m. UTC | #18
On Wed, Oct 02, 2024 at 03:45:08PM +0000, Arnd Bergmann wrote:
> On Wed, Oct 2, 2024, at 14:23, Christian Brauner wrote:
> 
> > and then copy the stuff via copy_struct_from_user() or copy back out to
> > user via other means.
> >
> > This way you can safely extend ioctl()s in a backward and forward
> > compatible manner and if we can enforce this for new drivers then I
> > think that's what we should do.
> 
> I don't see much value in building generic code for ioctl around
> this specific variant of extensibility. Extending ioctl commands
> by having a larger structure that results in a new cmd code
> constant is fine, but there is little difference between doing
> this with the same or a different 'nr' value. Most drivers just
> always use a new nr here, and I see no reason to discourage that.
> 
> There is actually a small risk in your example where it can
> break if you have the same size between native and compat
> variants of the same command, like
> 
> struct old {
>     long a;
> };
> 
> struct new {
>     long a;
>     int b;
> };
> 
> Here, the 64-bit 'old' has the same size as the 32-bit 'new',
> so if we try to handle them in a shared native/compat ioctl
> function, this needs an extra in_conmpat_syscall() check that
> adds complexity and is easy to forget.

Agreed, "extending" ioctls is considered a bad thing and it's just
easier to create a new one.  Or use some flags and reserved fields, if
you remember to add them in the beginning...

Anyway, this is all great, but for now, I'll take this series in my tree
and we can add onto it from there.  I'll dig up some sample code that
uses this too, so that we make sure it works properly.  Give me a few
days to catch up before it lands in my trees...

thanks,

greg k-h
Arnd Bergmann Oct. 2, 2024, 8:08 p.m. UTC | #19
On Wed, Oct 2, 2024, at 16:04, Greg Kroah-Hartman wrote:
> On Wed, Oct 02, 2024 at 03:45:08PM +0000, Arnd Bergmann wrote:
>> On Wed, Oct 2, 2024, at 14:23, Christian Brauner wrote:
>> 
>> Here, the 64-bit 'old' has the same size as the 32-bit 'new',
>> so if we try to handle them in a shared native/compat ioctl
>> function, this needs an extra in_conmpat_syscall() check that
>> adds complexity and is easy to forget.
>
> Agreed, "extending" ioctls is considered a bad thing and it's just
> easier to create a new one.  Or use some flags and reserved fields, if
> you remember to add them in the beginning...
>
> Anyway, this is all great, but for now, I'll take this series in my tree
> and we can add onto it from there.  I'll dig up some sample code that
> uses this too, so that we make sure it works properly.  Give me a few
> days to catch up before it lands in my trees...

Sounds good to me, it's clear we don't get a quick solution and
there is nothing stopping us from revisiting this after we have a
couple of drivers using ioctl.

      Arnd
Christian Brauner Oct. 3, 2024, 8:09 a.m. UTC | #20
On Wed, Oct 02, 2024 at 03:45:08PM GMT, Arnd Bergmann wrote:
> On Wed, Oct 2, 2024, at 14:23, Christian Brauner wrote:
> 
> > and then copy the stuff via copy_struct_from_user() or copy back out to
> > user via other means.
> >
> > This way you can safely extend ioctl()s in a backward and forward
> > compatible manner and if we can enforce this for new drivers then I
> > think that's what we should do.
> 
> I don't see much value in building generic code for ioctl around
> this specific variant of extensibility. Extending ioctl commands
> by having a larger structure that results in a new cmd code
> constant is fine, but there is little difference between doing
> this with the same or a different 'nr' value. Most drivers just
> always use a new nr here, and I see no reason to discourage that.
> 
> There is actually a small risk in your example where it can
> break if you have the same size between native and compat
> variants of the same command, like
> 
> struct old {
>     long a;
> };
> 
> struct new {
>     long a;
>     int b;
> };
> 
> Here, the 64-bit 'old' has the same size as the 32-bit 'new',
> so if we try to handle them in a shared native/compat ioctl
> function, this needs an extra in_conmpat_syscall() check that
> adds complexity and is easy to forget.

This presupposes that we will have Rust drivers - not C drivers - that
define structs like it's 1990. You yourself and me included try to
enforce that structs are correctly aligned and padded. So I see this as
a non-argument. We wouldn't let this slide in new system calls so I
don't see why we would in new ioctls.
Christian Brauner Oct. 3, 2024, 8:19 a.m. UTC | #21
On Wed, Oct 02, 2024 at 06:04:38PM GMT, Greg Kroah-Hartman wrote:
> On Wed, Oct 02, 2024 at 03:45:08PM +0000, Arnd Bergmann wrote:
> > On Wed, Oct 2, 2024, at 14:23, Christian Brauner wrote:
> > 
> > > and then copy the stuff via copy_struct_from_user() or copy back out to
> > > user via other means.
> > >
> > > This way you can safely extend ioctl()s in a backward and forward
> > > compatible manner and if we can enforce this for new drivers then I
> > > think that's what we should do.
> > 
> > I don't see much value in building generic code for ioctl around
> > this specific variant of extensibility. Extending ioctl commands
> > by having a larger structure that results in a new cmd code
> > constant is fine, but there is little difference between doing
> > this with the same or a different 'nr' value. Most drivers just
> > always use a new nr here, and I see no reason to discourage that.
> > 
> > There is actually a small risk in your example where it can
> > break if you have the same size between native and compat
> > variants of the same command, like
> > 
> > struct old {
> >     long a;
> > };
> > 
> > struct new {
> >     long a;
> >     int b;
> > };
> > 
> > Here, the 64-bit 'old' has the same size as the 32-bit 'new',
> > so if we try to handle them in a shared native/compat ioctl
> > function, this needs an extra in_conmpat_syscall() check that
> > adds complexity and is easy to forget.
> 
> Agreed, "extending" ioctls is considered a bad thing and it's just
> easier to create a new one.  Or use some flags and reserved fields, if
> you remember to add them in the beginning...

This statement misses the reality of what has been happening outside of
arbitrary drivers for years. Let alone that it simply asserts that it's
a bad thing.
Christian Brauner Oct. 3, 2024, 8:33 a.m. UTC | #22
On Wed, Oct 02, 2024 at 04:24:58PM GMT, Alice Ryhl wrote:
> On Wed, Oct 2, 2024 at 4:06 PM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Tue, Oct 01, 2024 at 08:22:22AM GMT, Alice Ryhl wrote:
> > > +unsafe extern "C" fn fops_open<T: MiscDevice>(
> > > +    inode: *mut bindings::inode,
> > > +    file: *mut bindings::file,
> > > +) -> c_int {
> > > +    // SAFETY: The pointers are valid and for a file being opened.
> > > +    let ret = unsafe { bindings::generic_file_open(inode, file) };
> > > +    if ret != 0 {
> > > +        return ret;
> > > +    }
> >
> > Do you have code where that function is used? Because this looks wrong
> > or at least I don't understand from just a glance whether that
> > generic_file_open() call makes sense.
> >
> > Illustrating how we get from opening /dev/binder to this call would
> > help.
> 
> Hmm. I wrote this by comparing with the ashmem open callback. Now that
> you mention it you are right that Binder does not call
> generic_file_open ... I have to admit that I don't know what
> generic_file_open does.

It's irrelevant for binder.
Miguel Ojeda Oct. 21, 2024, 10:34 a.m. UTC | #23
Hi Alice, Greg,

On Tue, Oct 1, 2024 at 10:23 AM Alice Ryhl <aliceryhl@google.com> wrote:
>
> +            compat_ioctl: if T::HAS_COMPAT_IOCTL {
> +                Some(fops_compat_ioctl::<T>)
> +            } else if T::HAS_IOCTL {
> +                Some(bindings::compat_ptr_ioctl)
> +            } else {
> +                None
> +            },
> +            ..unsafe { MaybeUninit::zeroed().assume_init() }

With the lints series queued for the next cycle, Clippy spots the
missing `// SAFETY` comment here...

> +unsafe extern "C" fn fops_open<T: MiscDevice>(
> +    inode: *mut bindings::inode,
> +    file: *mut bindings::file,
> +) -> c_int {

...as well as the missing `# Safety` section for each of these.

It can be seen in e.g. today's -next.

I hope that helps!

Cheers,
Miguel
Alice Ryhl Oct. 22, 2024, 1:15 p.m. UTC | #24
On Mon, Oct 21, 2024 at 12:34 PM Miguel Ojeda
<miguel.ojeda.sandonis@gmail.com> wrote:
>
> Hi Alice, Greg,
>
> On Tue, Oct 1, 2024 at 10:23 AM Alice Ryhl <aliceryhl@google.com> wrote:
> >
> > +            compat_ioctl: if T::HAS_COMPAT_IOCTL {
> > +                Some(fops_compat_ioctl::<T>)
> > +            } else if T::HAS_IOCTL {
> > +                Some(bindings::compat_ptr_ioctl)
> > +            } else {
> > +                None
> > +            },
> > +            ..unsafe { MaybeUninit::zeroed().assume_init() }
>
> With the lints series queued for the next cycle, Clippy spots the
> missing `// SAFETY` comment here...
>
> > +unsafe extern "C" fn fops_open<T: MiscDevice>(
> > +    inode: *mut bindings::inode,
> > +    file: *mut bindings::file,
> > +) -> c_int {
>
> ...as well as the missing `# Safety` section for each of these.
>
> It can be seen in e.g. today's -next.
>
> I hope that helps!

I sent https://lore.kernel.org/all/20241022-miscdevice-unsafe-warn-fix-v1-1-a78fde1740d6@google.com/

Thanks!
Alice
diff mbox series

Patch

diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index ae82e9c941af..84303bf221dd 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -15,6 +15,7 @@ 
 #include <linux/firmware.h>
 #include <linux/jiffies.h>
 #include <linux/mdio.h>
+#include <linux/miscdevice.h>
 #include <linux/phy.h>
 #include <linux/refcount.h>
 #include <linux/sched.h>
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 22a3bfa5a9e9..e268eae54c81 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -39,6 +39,7 @@ 
 #[cfg(CONFIG_KUNIT)]
 pub mod kunit;
 pub mod list;
+pub mod miscdevice;
 #[cfg(CONFIG_NET)]
 pub mod net;
 pub mod page;
diff --git a/rust/kernel/miscdevice.rs b/rust/kernel/miscdevice.rs
new file mode 100644
index 000000000000..cbd5249b5b45
--- /dev/null
+++ b/rust/kernel/miscdevice.rs
@@ -0,0 +1,241 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+// Copyright (C) 2024 Google LLC.
+
+//! Miscdevice support.
+//!
+//! C headers: [`include/linux/miscdevice.h`](srctree/include/linux/miscdevice.h).
+//!
+//! Reference: <https://www.kernel.org/doc/html/latest/driver-api/misc_devices.html>
+
+use crate::{
+    bindings,
+    error::{to_result, Error, Result, VTABLE_DEFAULT_ERROR},
+    prelude::*,
+    str::CStr,
+    types::{ForeignOwnable, Opaque},
+};
+use core::{
+    ffi::{c_int, c_long, c_uint, c_ulong},
+    marker::PhantomData,
+    mem::MaybeUninit,
+    pin::Pin,
+};
+
+/// Options for creating a misc device.
+#[derive(Copy, Clone)]
+pub struct MiscDeviceOptions {
+    /// The name of the miscdevice.
+    pub name: &'static CStr,
+}
+
+impl MiscDeviceOptions {
+    /// Create a raw `struct miscdev` ready for registration.
+    pub const fn into_raw<T: MiscDevice>(self) -> bindings::miscdevice {
+        // SAFETY: All zeros is valid for this C type.
+        let mut result: bindings::miscdevice = unsafe { MaybeUninit::zeroed().assume_init() };
+        result.minor = bindings::MISC_DYNAMIC_MINOR as _;
+        result.name = self.name.as_char_ptr();
+        result.fops = create_vtable::<T>();
+        result
+    }
+}
+
+/// A registration of a miscdevice.
+///
+/// # Invariants
+///
+/// `inner` is a registered misc device.
+#[repr(transparent)]
+#[pin_data(PinnedDrop)]
+pub struct MiscDeviceRegistration<T> {
+    #[pin]
+    inner: Opaque<bindings::miscdevice>,
+    _t: PhantomData<T>,
+}
+
+// SAFETY: It is allowed to call `misc_deregister` on a different thread from where you called
+// `misc_register`.
+unsafe impl<T> Send for MiscDeviceRegistration<T> {}
+// SAFETY: All `&self` methods on this type are written to ensure that it is safe to call them in
+// parallel.
+unsafe impl<T> Sync for MiscDeviceRegistration<T> {}
+
+impl<T: MiscDevice> MiscDeviceRegistration<T> {
+    /// Register a misc device.
+    pub fn register(opts: MiscDeviceOptions) -> impl PinInit<Self, Error> {
+        try_pin_init!(Self {
+            inner <- Opaque::try_ffi_init(move |slot: *mut bindings::miscdevice| {
+                // SAFETY: The initializer can write to the provided `slot`.
+                unsafe { slot.write(opts.into_raw::<T>()) };
+
+                // SAFETY: We just wrote the misc device options to the slot. The miscdevice will
+                // get unregistered before `slot` is deallocated because the memory is pinned and
+                // the destructor of this type deallocates the memory.
+                // INVARIANT: If this returns `Ok(())`, then the `slot` will contain a registered
+                // misc device.
+                to_result(unsafe { bindings::misc_register(slot) })
+            }),
+            _t: PhantomData,
+        })
+    }
+
+    /// Returns a raw pointer to the misc device.
+    pub fn as_raw(&self) -> *mut bindings::miscdevice {
+        self.inner.get()
+    }
+}
+
+#[pinned_drop]
+impl<T> PinnedDrop for MiscDeviceRegistration<T> {
+    fn drop(self: Pin<&mut Self>) {
+        // SAFETY: We know that the device is registered by the type invariants.
+        unsafe { bindings::misc_deregister(self.inner.get()) };
+    }
+}
+
+/// Trait implemented by the private data of an open misc device.
+#[vtable]
+pub trait MiscDevice {
+    /// What kind of pointer should `Self` be wrapped in.
+    type Ptr: ForeignOwnable + Send + Sync;
+
+    /// Called when the misc device is opened.
+    ///
+    /// The returned pointer will be stored as the private data for the file.
+    fn open() -> Result<Self::Ptr>;
+
+    /// Called when the misc device is released.
+    fn release(device: Self::Ptr) {
+        drop(device);
+    }
+
+    /// Handler for ioctls.
+    ///
+    /// The `cmd` argument is usually manipulated using the utilties in [`kernel::ioctl`].
+    ///
+    /// [`kernel::ioctl`]: mod@crate::ioctl
+    fn ioctl(
+        _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
+        _cmd: u32,
+        _arg: usize,
+    ) -> Result<isize> {
+        kernel::build_error(VTABLE_DEFAULT_ERROR)
+    }
+
+    /// Handler for ioctls.
+    ///
+    /// Used for 32-bit userspace on 64-bit platforms.
+    ///
+    /// This method is optional and only needs to be provided if the ioctl relies on structures
+    /// that have different layout on 32-bit and 64-bit userspace. If no implementation is
+    /// provided, then `compat_ptr_ioctl` will be used instead.
+    #[cfg(CONFIG_COMPAT)]
+    fn compat_ioctl(
+        _device: <Self::Ptr as ForeignOwnable>::Borrowed<'_>,
+        _cmd: u32,
+        _arg: usize,
+    ) -> Result<isize> {
+        kernel::build_error(VTABLE_DEFAULT_ERROR)
+    }
+}
+
+const fn create_vtable<T: MiscDevice>() -> &'static bindings::file_operations {
+    const fn maybe_fn<T: Copy>(check: bool, func: T) -> Option<T> {
+        if check {
+            Some(func)
+        } else {
+            None
+        }
+    }
+
+    struct VtableHelper<T: MiscDevice> {
+        _t: PhantomData<T>,
+    }
+    impl<T: MiscDevice> VtableHelper<T> {
+        const VTABLE: bindings::file_operations = bindings::file_operations {
+            open: Some(fops_open::<T>),
+            release: Some(fops_release::<T>),
+            unlocked_ioctl: maybe_fn(T::HAS_IOCTL, fops_ioctl::<T>),
+            #[cfg(CONFIG_COMPAT)]
+            compat_ioctl: if T::HAS_COMPAT_IOCTL {
+                Some(fops_compat_ioctl::<T>)
+            } else if T::HAS_IOCTL {
+                Some(bindings::compat_ptr_ioctl)
+            } else {
+                None
+            },
+            ..unsafe { MaybeUninit::zeroed().assume_init() }
+        };
+    }
+
+    &VtableHelper::<T>::VTABLE
+}
+
+unsafe extern "C" fn fops_open<T: MiscDevice>(
+    inode: *mut bindings::inode,
+    file: *mut bindings::file,
+) -> c_int {
+    // SAFETY: The pointers are valid and for a file being opened.
+    let ret = unsafe { bindings::generic_file_open(inode, file) };
+    if ret != 0 {
+        return ret;
+    }
+
+    let ptr = match T::open() {
+        Ok(ptr) => ptr,
+        Err(err) => return err.to_errno(),
+    };
+
+    // SAFETY: The open call of a file owns the private data.
+    unsafe { (*file).private_data = ptr.into_foreign().cast_mut() };
+
+    0
+}
+
+unsafe extern "C" fn fops_release<T: MiscDevice>(
+    _inode: *mut bindings::inode,
+    file: *mut bindings::file,
+) -> c_int {
+    // SAFETY: The release call of a file owns the private data.
+    let private = unsafe { (*file).private_data };
+    // SAFETY: The release call of a file owns the private data.
+    let ptr = unsafe { <T::Ptr as ForeignOwnable>::from_foreign(private) };
+
+    T::release(ptr);
+
+    0
+}
+
+unsafe extern "C" fn fops_ioctl<T: MiscDevice>(
+    file: *mut bindings::file,
+    cmd: c_uint,
+    arg: c_ulong,
+) -> c_long {
+    // SAFETY: The ioctl call of a file can access the private data.
+    let private = unsafe { (*file).private_data };
+    // SAFETY: Ioctl calls can borrow the private data of the file.
+    let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
+
+    match T::ioctl(device, cmd as u32, arg as usize) {
+        Ok(ret) => ret as c_long,
+        Err(err) => err.to_errno() as c_long,
+    }
+}
+
+#[cfg(CONFIG_COMPAT)]
+unsafe extern "C" fn fops_compat_ioctl<T: MiscDevice>(
+    file: *mut bindings::file,
+    cmd: c_uint,
+    arg: c_ulong,
+) -> c_long {
+    // SAFETY: The compat ioctl call of a file can access the private data.
+    let private = unsafe { (*file).private_data };
+    // SAFETY: Ioctl calls can borrow the private data of the file.
+    let device = unsafe { <T::Ptr as ForeignOwnable>::borrow(private) };
+
+    match T::compat_ioctl(device, cmd as u32, arg as usize) {
+        Ok(ret) => ret as c_long,
+        Err(err) => err.to_errno() as c_long,
+    }
+}