diff mbox series

[RFC,v2,30/30] WIP: fs: ext2: add rust ro ext2 implementation

Message ID 20240514131711.379322-31-wedsonaf@gmail.com (mailing list archive)
State New, archived
Headers show
Series Rust abstractions for VFS | expand

Commit Message

Wedson Almeida Filho May 14, 2024, 1:17 p.m. UTC
From: Wedson Almeida Filho <walmeida@microsoft.com>

Signed-off-by: Wedson Almeida Filho <walmeida@microsoft.com>
---
 fs/Kconfig            |   1 +
 fs/Makefile           |   1 +
 fs/rust-ext2/Kconfig  |  13 +
 fs/rust-ext2/Makefile |   8 +
 fs/rust-ext2/defs.rs  | 173 +++++++++++++
 fs/rust-ext2/ext2.rs  | 551 ++++++++++++++++++++++++++++++++++++++++++
 rust/kernel/lib.rs    |   3 +
 7 files changed, 750 insertions(+)
 create mode 100644 fs/rust-ext2/Kconfig
 create mode 100644 fs/rust-ext2/Makefile
 create mode 100644 fs/rust-ext2/defs.rs
 create mode 100644 fs/rust-ext2/ext2.rs

Comments

Darrick J. Wong May 20, 2024, 8:01 p.m. UTC | #1
On Tue, May 14, 2024 at 10:17:11AM -0300, Wedson Almeida Filho wrote:
> From: Wedson Almeida Filho <walmeida@microsoft.com>
> 
> Signed-off-by: Wedson Almeida Filho <walmeida@microsoft.com>
> ---
>  fs/Kconfig            |   1 +
>  fs/Makefile           |   1 +
>  fs/rust-ext2/Kconfig  |  13 +
>  fs/rust-ext2/Makefile |   8 +
>  fs/rust-ext2/defs.rs  | 173 +++++++++++++
>  fs/rust-ext2/ext2.rs  | 551 ++++++++++++++++++++++++++++++++++++++++++
>  rust/kernel/lib.rs    |   3 +
>  7 files changed, 750 insertions(+)
>  create mode 100644 fs/rust-ext2/Kconfig
>  create mode 100644 fs/rust-ext2/Makefile
>  create mode 100644 fs/rust-ext2/defs.rs
>  create mode 100644 fs/rust-ext2/ext2.rs
> 
> diff --git a/fs/Kconfig b/fs/Kconfig
> index 2cbd99d6784c..cf0cac5c5b1e 100644
> --- a/fs/Kconfig
> +++ b/fs/Kconfig
> @@ -338,6 +338,7 @@ source "fs/ufs/Kconfig"
>  source "fs/erofs/Kconfig"
>  source "fs/vboxsf/Kconfig"
>  source "fs/tarfs/Kconfig"
> +source "fs/rust-ext2/Kconfig"
>  
>  endif # MISC_FILESYSTEMS
>  
> diff --git a/fs/Makefile b/fs/Makefile
> index d8bbda73e3a9..c1a3007efc7d 100644
> --- a/fs/Makefile
> +++ b/fs/Makefile
> @@ -130,3 +130,4 @@ obj-$(CONFIG_EROFS_FS)		+= erofs/
>  obj-$(CONFIG_VBOXSF_FS)		+= vboxsf/
>  obj-$(CONFIG_ZONEFS_FS)		+= zonefs/
>  obj-$(CONFIG_TARFS_FS)		+= tarfs/
> +obj-$(CONFIG_RUST_EXT2_FS)	+= rust-ext2/
> diff --git a/fs/rust-ext2/Kconfig b/fs/rust-ext2/Kconfig
> new file mode 100644
> index 000000000000..976371655ca6
> --- /dev/null
> +++ b/fs/rust-ext2/Kconfig
> @@ -0,0 +1,13 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +#
> +
> +config RUST_EXT2_FS
> +	tristate "Rust second extended fs support"
> +	depends on RUST && BLOCK
> +	help
> +	  Ext2 is a standard Linux file system for hard disks.
> +
> +	  To compile this file system support as a module, choose M here: the
> +	  module will be called rust_ext2.
> +
> +	  If unsure, say Y.
> diff --git a/fs/rust-ext2/Makefile b/fs/rust-ext2/Makefile
> new file mode 100644
> index 000000000000..ac960b5f89d7
> --- /dev/null
> +++ b/fs/rust-ext2/Makefile
> @@ -0,0 +1,8 @@
> +# SPDX-License-Identifier: GPL-2.0
> +#
> +# Makefile for the linux tarfs filesystem routines.
> +#
> +
> +obj-$(CONFIG_RUST_EXT2_FS) += rust_ext2.o
> +
> +rust_ext2-y := ext2.o
> diff --git a/fs/rust-ext2/defs.rs b/fs/rust-ext2/defs.rs
> new file mode 100644
> index 000000000000..5f84852b4961
> --- /dev/null
> +++ b/fs/rust-ext2/defs.rs
> @@ -0,0 +1,173 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! Definitions of tarfs structures.
> +
> +use kernel::types::LE;
> +
> +pub(crate) const EXT2_SUPER_MAGIC: u16 = 0xEF53;
> +
> +pub(crate) const EXT2_MAX_BLOCK_LOG_SIZE: u32 = 16;
> +
> +pub(crate) const EXT2_GOOD_OLD_REV: u32 = 0; /* The good old (original) format */
> +pub(crate) const EXT2_DYNAMIC_REV: u32 = 1; /* V2 format w/ dynamic inode sizes */
> +
> +pub(crate) const EXT2_GOOD_OLD_INODE_SIZE: u16 = 128;
> +
> +pub(crate) const EXT2_ROOT_INO: u32 = 2; /* Root inode */
> +
> +/* First non-reserved inode for old ext2 filesystems. */
> +pub(crate) const EXT2_GOOD_OLD_FIRST_INO: u32 = 11;
> +
> +pub(crate) const EXT2_FEATURE_INCOMPAT_FILETYPE: u32 = 0x0002;
> +
> +/*
> + * Constants relative to the data blocks
> + */
> +pub(crate) const EXT2_NDIR_BLOCKS: usize = 12;
> +pub(crate) const EXT2_IND_BLOCK: usize = EXT2_NDIR_BLOCKS;
> +pub(crate) const EXT2_DIND_BLOCK: usize = EXT2_IND_BLOCK + 1;
> +pub(crate) const EXT2_TIND_BLOCK: usize = EXT2_DIND_BLOCK + 1;
> +pub(crate) const EXT2_N_BLOCKS: usize = EXT2_TIND_BLOCK + 1;
> +
> +kernel::derive_readable_from_bytes! {
> +    #[repr(C)]
> +    pub(crate) struct Super {
> +        pub(crate) inodes_count: LE<u32>,
> +        pub(crate) blocks_count: LE<u32>,
> +        pub(crate) r_blocks_count: LE<u32>,
> +        pub(crate) free_blocks_count: LE<u32>, /* Free blocks count */
> +        pub(crate) free_inodes_count: LE<u32>, /* Free inodes count */
> +        pub(crate) first_data_block: LE<u32>,  /* First Data Block */
> +        pub(crate) log_block_size: LE<u32>,    /* Block size */
> +        pub(crate) log_frag_size: LE<u32>,     /* Fragment size */
> +        pub(crate) blocks_per_group: LE<u32>,  /* # Blocks per group */
> +        pub(crate) frags_per_group: LE<u32>,   /* # Fragments per group */
> +        pub(crate) inodes_per_group: LE<u32>,  /* # Inodes per group */
> +        pub(crate) mtime: LE<u32>,             /* Mount time */
> +        pub(crate) wtime: LE<u32>,             /* Write time */
> +        pub(crate) mnt_count: LE<u16>,         /* Mount count */
> +        pub(crate) max_mnt_count: LE<u16>,     /* Maximal mount count */
> +        pub(crate) magic: LE<u16>,             /* Magic signature */
> +        pub(crate) state: LE<u16>,             /* File system state */
> +        pub(crate) errors: LE<u16>,            /* Behaviour when detecting errors */
> +        pub(crate) minor_rev_level: LE<u16>,   /* minor revision level */
> +        pub(crate) lastcheck: LE<u32>,         /* time of last check */
> +        pub(crate) checkinterval: LE<u32>,     /* max. time between checks */
> +        pub(crate) creator_os: LE<u32>,        /* OS */
> +        pub(crate) rev_level: LE<u32>,         /* Revision level */
> +        pub(crate) def_resuid: LE<u16>,        /* Default uid for reserved blocks */
> +        pub(crate) def_resgid: LE<u16>,        /* Default gid for reserved blocks */
> +        /*
> +         * These fields are for EXT2_DYNAMIC_REV superblocks only.
> +         *
> +         * Note: the difference between the compatible feature set and
> +         * the incompatible feature set is that if there is a bit set
> +         * in the incompatible feature set that the kernel doesn't
> +         * know about, it should refuse to mount the filesystem.
> +         *
> +         * e2fsck's requirements are more strict; if it doesn't know
> +         * about a feature in either the compatible or incompatible
> +         * feature set, it must abort and not try to meddle with
> +         * things it doesn't understand...
> +         */
> +        pub(crate) first_ino: LE<u32>,              /* First non-reserved inode */
> +        pub(crate) inode_size: LE<u16>,             /* size of inode structure */
> +        pub(crate) block_group_nr: LE<u16>,         /* block group # of this superblock */
> +        pub(crate) feature_compat: LE<u32>,         /* compatible feature set */
> +        pub(crate) feature_incompat: LE<u32>,       /* incompatible feature set */
> +        pub(crate) feature_ro_compat: LE<u32>,      /* readonly-compatible feature set */
> +        pub(crate) uuid: [u8; 16],                  /* 128-bit uuid for volume */
> +        pub(crate) volume_name: [u8; 16],           /* volume name */
> +        pub(crate) last_mounted: [u8; 64],          /* directory where last mounted */
> +        pub(crate) algorithm_usage_bitmap: LE<u32>, /* For compression */
> +        /*
> +         * Performance hints.  Directory preallocation should only
> +         * happen if the EXT2_COMPAT_PREALLOC flag is on.
> +         */
> +        pub(crate) prealloc_blocks: u8,    /* Nr of blocks to try to preallocate*/
> +        pub(crate) prealloc_dir_blocks: u8,        /* Nr to preallocate for dirs */
> +        padding1: u16,
> +        /*
> +         * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
> +         */
> +        pub(crate) journal_uuid: [u8; 16],      /* uuid of journal superblock */
> +        pub(crate) journal_inum: u32,           /* inode number of journal file */
> +        pub(crate) journal_dev: u32,            /* device number of journal file */
> +        pub(crate) last_orphan: u32,            /* start of list of inodes to delete */
> +        pub(crate) hash_seed: [u32; 4],         /* HTREE hash seed */
> +        pub(crate) def_hash_version: u8,        /* Default hash version to use */
> +        pub(crate) reserved_char_pad: u8,
> +        pub(crate) reserved_word_pad: u16,
> +        pub(crate) default_mount_opts: LE<u32>,
> +        pub(crate) first_meta_bg: LE<u32>,      /* First metablock block group */
> +        reserved: [u32; 190],                   /* Padding to the end of the block */
> +    }
> +
> +    #[repr(C)]
> +    #[derive(Clone, Copy)]
> +    pub(crate) struct Group {

Might want to call these GroupDescriptor to match(ish) the ext2
structure?  I dunno, it's going to be hard to remember to change
"struct ext2_group_desc" in my head to "struct ext2::GroupDescriptor" or
even "struct ext2::group_desc".

> +        /// Blocks bitmap block.
> +        pub block_bitmap: LE<u32>,
> +
> +        /// Inodes bitmap block.
> +        pub inode_bitmap: LE<u32>,
> +
> +        /// Inodes table block.
> +        pub inode_table: LE<u32>,
> +
> +        /// Number of free blocks.
> +        pub free_blocks_count: LE<u16>,
> +
> +        /// Number of free inodes.
> +        pub free_inodes_count: LE<u16>,
> +
> +        /// Number of directories.
> +        pub used_dirs_count: LE<u16>,
> +
> +        pad: LE<u16>,
> +        reserved: [u32; 3],
> +    }
> +
> +    #[repr(C)]
> +    pub(crate) struct INode {
> +        pub mode: LE<u16>,                  /* File mode */
> +        pub uid: LE<u16>,                   /* Low 16 bits of Owner Uid */
> +        pub size: LE<u32>,                  /* Size in bytes */
> +        pub atime: LE<u32>,                 /* Access time */
> +        pub ctime: LE<u32>,                 /* Creation time */
> +        pub mtime: LE<u32>,                 /* Modification time */
> +        pub dtime: LE<u32>,                 /* Deletion Time */
> +        pub gid: LE<u16>,                   /* Low 16 bits of Group Id */
> +        pub links_count: LE<u16>,           /* Links count */
> +        pub blocks: LE<u32>,                /* Blocks count */
> +        pub flags: LE<u32>,                 /* File flags */
> +        pub reserved1: LE<u32>,
> +        pub block: [LE<u32>; EXT2_N_BLOCKS],/* Pointers to blocks */
> +        pub generation: LE<u32>,            /* File version (for NFS) */
> +        pub file_acl: LE<u32>,              /* File ACL */
> +        pub dir_acl: LE<u32>,               /* Directory ACL */
> +        pub faddr: LE<u32>,                 /* Fragment address */
> +        pub frag: u8,	                    /* Fragment number */
> +        pub fsize: u8,	                    /* Fragment size */
> +        pub pad1: LE<u16>,
> +        pub uid_high: LE<u16>,
> +        pub gid_high: LE<u16>,
> +        pub reserved2: LE<u32>,
> +    }
> +
> +    #[repr(C)]
> +    pub(crate) struct DirEntry {
> +        pub(crate) inode: LE<u32>,       /* Inode number */
> +        pub(crate) rec_len: LE<u16>,     /* Directory entry length */
> +        pub(crate) name_len: u8,         /* Name length */
> +        pub(crate) file_type: u8,        /* Only if the "filetype" feature flag is set. */
> +    }
> +}
> +
> +pub(crate) const FT_REG_FILE: u8 = 1;
> +pub(crate) const FT_DIR: u8 = 2;
> +pub(crate) const FT_CHRDEV: u8 = 3;
> +pub(crate) const FT_BLKDEV: u8 = 4;
> +pub(crate) const FT_FIFO: u8 = 5;
> +pub(crate) const FT_SOCK: u8 = 6;
> +pub(crate) const FT_SYMLINK: u8 = 7;
> diff --git a/fs/rust-ext2/ext2.rs b/fs/rust-ext2/ext2.rs
> new file mode 100644
> index 000000000000..2d6b1e7ca156
> --- /dev/null
> +++ b/fs/rust-ext2/ext2.rs
> @@ -0,0 +1,551 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! Ext2 file system.
> +
> +use alloc::vec::Vec;
> +use core::mem::size_of;
> +use defs::*;
> +use kernel::fs::{
> +    self, address_space, dentry, dentry::DEntry, file, file::File, inode, inode::INode, iomap, sb,
> +    sb::SuperBlock, Offset,
> +};
> +use kernel::types::{ARef, Either, FromBytes, Locked, LE};
> +use kernel::{block, c_str, prelude::*, str::CString, time::Timespec, user, PAGE_SIZE};
> +
> +pub mod defs;
> +
> +kernel::module_fs! {
> +    type: Ext2Fs,
> +    name: "ext2",
> +    author: "Wedson Almeida Filho <walmeida@microsoft.com>",
> +    description: "ext2 file system",
> +    license: "GPL",
> +}
> +
> +const SB_OFFSET: Offset = 1024;
> +
> +struct INodeData {
> +    data_blocks: [u32; defs::EXT2_N_BLOCKS],
> +}
> +
> +struct Ext2Fs {
> +    mapper: inode::Mapper,
> +    block_size: u32,
> +    has_file_type: bool,
> +    _block_size_bits: u32,
> +    inodes_per_block: u32,
> +    inodes_per_group: u32,
> +    inode_count: u32,
> +    inode_size: u16,
> +    first_ino: u32,
> +    group: Vec<defs::Group>,
> +}
> +
> +impl Ext2Fs {
> +    fn iget(sb: &SuperBlock<Self>, ino: u32) -> Result<ARef<INode<Self>>> {
> +        let s = sb.data();
> +        if (ino != EXT2_ROOT_INO && ino < s.first_ino) || ino > s.inode_count {
> +            return Err(ENOENT);
> +        }
> +        let group = ((ino - 1) / s.inodes_per_group) as usize;
> +        let offset = (ino - 1) % s.inodes_per_group;
> +
> +        if group >= s.group.len() {
> +            return Err(ENOENT);
> +        }
> +
> +        // Create an inode or find an existing (cached) one.
> +        let mut inode = match sb.get_or_create_inode(ino.into())? {
> +            Either::Left(existing) => return Ok(existing),
> +            Either::Right(new) => new,
> +        };
> +
> +        let inodes_block = Offset::from(s.group[group].inode_table.value());
> +        let inode_block = inodes_block + Offset::from(offset / s.inodes_per_block);
> +        let offset = (offset % s.inodes_per_block) as usize;
> +        let b = sb
> +            .data()
> +            .mapper
> +            .mapped_folio(inode_block * Offset::from(s.block_size))?;

It almost feels like you need a buffer cache here for fs metadata... ;)

> +        let idata = defs::INode::from_bytes(&b, offset * s.inode_size as usize).ok_or(EIO)?;
> +        let mode = idata.mode.value();
> +
> +        if idata.links_count.value() == 0 && (mode == 0 || idata.dtime.value() != 0) {
> +            return Err(ESTALE);
> +        }
> +
> +        const DIR_FOPS: file::Ops<Ext2Fs> = file::Ops::new::<Ext2Fs>();
> +        const DIR_IOPS: inode::Ops<Ext2Fs> = inode::Ops::new::<Ext2Fs>();
> +        const FILE_AOPS: address_space::Ops<Ext2Fs> = iomap::ro_aops::<Ext2Fs>();
> +
> +        let mut size = idata.size.value().into();
> +        let typ = match mode & fs::mode::S_IFMT {
> +            fs::mode::S_IFREG => {
> +                size |= Offset::from(idata.dir_acl.value())
> +                    .checked_shl(32)
> +                    .ok_or(EUCLEAN)?;

I wonder, is there a clean way to log these kinds of corruption errors?
ext4 (and soon xfs) have the ability to log health problems and pass
those kinds of errors to a monitoring daemon via fanotify.

> +                inode
> +                    .set_aops(FILE_AOPS)
> +                    .set_fops(file::Ops::generic_ro_file());
> +                inode::Type::Reg
> +            }
> +            fs::mode::S_IFDIR => {
> +                inode
> +                    .set_iops(DIR_IOPS)
> +                    .set_fops(DIR_FOPS)
> +                    .set_aops(FILE_AOPS);
> +                inode::Type::Dir
> +            }
> +            fs::mode::S_IFLNK => {
> +                if idata.blocks.value() == 0 {
> +                    const OFFSET: usize = core::mem::offset_of!(defs::INode, block);
> +                    let name = &b[offset * usize::from(s.inode_size) + OFFSET..];
> +                    let name_len = size as usize;
> +                    if name_len > name.len() || name_len == 0 {
> +                        return Err(EIO);
> +                    }
> +                    inode.set_iops(inode::Ops::simple_symlink_inode());
> +                    inode::Type::Lnk(Some(CString::try_from(&name[..name_len])?))
> +                } else {
> +                    inode
> +                        .set_aops(FILE_AOPS)
> +                        .set_iops(inode::Ops::page_symlink_inode());
> +                    inode::Type::Lnk(None)
> +                }
> +            }
> +            fs::mode::S_IFSOCK => inode::Type::Sock,
> +            fs::mode::S_IFIFO => inode::Type::Fifo,
> +            fs::mode::S_IFCHR => {
> +                let (major, minor) = decode_dev(&idata.block);
> +                inode::Type::Chr(major, minor)
> +            }
> +            fs::mode::S_IFBLK => {
> +                let (major, minor) = decode_dev(&idata.block);
> +                inode::Type::Blk(major, minor)
> +            }
> +            _ => return Err(ENOENT),
> +        };
> +        inode.init(inode::Params {
> +            typ,
> +            mode: mode & 0o777,
> +            size,
> +            blocks: idata.blocks.value().into(),
> +            nlink: idata.links_count.value().into(),
> +            uid: u32::from(idata.uid.value()) | u32::from(idata.uid_high.value()) << 16,
> +            gid: u32::from(idata.gid.value()) | u32::from(idata.gid_high.value()) << 16,
> +            ctime: Timespec::new(idata.ctime.value().into(), 0)?,
> +            mtime: Timespec::new(idata.mtime.value().into(), 0)?,
> +            atime: Timespec::new(idata.atime.value().into(), 0)?,
> +            value: INodeData {
> +                data_blocks: core::array::from_fn(|i| idata.block[i].value()),
> +            },
> +        })
> +    }
> +
> +    fn offsets<'a>(&self, mut block: u64, out: &'a mut [u32]) -> Option<&'a [u32]> {
> +        let ptrs = u64::from(self.block_size / size_of::<u32>() as u32);
> +        let ptr_mask = ptrs - 1;
> +        let ptr_bits = ptrs.trailing_zeros();
> +
> +        if block < EXT2_NDIR_BLOCKS as u64 {
> +            out[0] = block as u32;
> +            return Some(&out[..1]);
> +        }
> +
> +        block -= EXT2_NDIR_BLOCKS as u64;
> +        if block < ptrs {
> +            out[0] = EXT2_IND_BLOCK as u32;
> +            out[1] = block as u32;
> +            return Some(&out[..2]);
> +        }
> +
> +        block -= ptrs;
> +        if block < (1 << (2 * ptr_bits)) {
> +            out[0] = EXT2_DIND_BLOCK as u32;
> +            out[1] = (block >> ptr_bits) as u32;
> +            out[2] = (block & ptr_mask) as u32;
> +            return Some(&out[..3]);
> +        }
> +
> +        block -= ptrs * ptrs;
> +        if block < ptrs * ptrs * ptrs {
> +            out[0] = EXT2_TIND_BLOCK as u32;
> +            out[1] = (block >> (2 * ptr_bits)) as u32;
> +            out[2] = ((block >> ptr_bits) & ptr_mask) as u32;
> +            out[3] = (block & ptr_mask) as u32;
> +            return Some(&out[..4]);
> +        }
> +
> +        None
> +    }
> +
> +    fn offset_to_block(inode: &INode<Self>, block: Offset) -> Result<u64> {
> +        let s = inode.super_block().data();
> +        let mut indices = [0u32; 4];
> +        let boffsets = s.offsets(block as u64, &mut indices).ok_or(EIO)?;
> +        let mut boffset = inode.data().data_blocks[boffsets[0] as usize];
> +        let mapper = &s.mapper;
> +        for i in &boffsets[1..] {
> +            let b = mapper.mapped_folio(Offset::from(boffset) * Offset::from(s.block_size))?;
> +            let table = LE::<u32>::from_bytes_to_slice(&b).ok_or(EIO)?;
> +            boffset = table[*i as usize].value();
> +        }
> +        Ok(boffset.into())
> +    }
> +
> +    fn check_descriptors(s: &Super, groups: &[Group]) -> Result {

It's ... very odd to mix file space mapping functions and group
descriptors into the same structure.  Does offset_to_block belong in a
Ext2Inode structure?

I was also wondering, is there a convenient way to make it so that the
compiler can enforce that a directory inode can only be passed an
Operations that actually has all the directory operations initialized?
Or that a regular file can't have a lookup function in its iops?

> +        for (i, g) in groups.iter().enumerate() {
> +            let first = i as u32 * s.blocks_per_group.value() + s.first_data_block.value();
> +            let last = if i == groups.len() - 1 {
> +                s.blocks_count.value()
> +            } else {
> +                first + s.blocks_per_group.value() - 1
> +            };
> +
> +            if g.block_bitmap.value() < first || g.block_bitmap.value() > last {
> +                pr_err!(
> +                    "Block bitmap for group {i} no in group (block {})\n",
> +                    g.block_bitmap.value()
> +                );
> +                return Err(EINVAL);
> +            }
> +
> +            if g.inode_bitmap.value() < first || g.inode_bitmap.value() > last {
> +                pr_err!(
> +                    "Inode bitmap for group {i} no in group (block {})\n",
> +                    g.inode_bitmap.value()
> +                );
> +                return Err(EINVAL);
> +            }
> +
> +            if g.inode_table.value() < first || g.inode_table.value() > last {
> +                pr_err!(
> +                    "Inode table for group {i} no in group (block {})\n",
> +                    g.inode_table.value()
> +                );
> +                return Err(EINVAL);
> +            }
> +        }
> +        Ok(())
> +    }
> +}
> +
> +impl fs::FileSystem for Ext2Fs {
> +    type Data = Box<Self>;
> +    type INodeData = INodeData;
> +    const NAME: &'static CStr = c_str!("rust-ext2");
> +    const SUPER_TYPE: sb::Type = sb::Type::BlockDev;
> +
> +    fn fill_super(
> +        sb: &mut SuperBlock<Self, sb::New>,
> +        mapper: Option<inode::Mapper>,
> +    ) -> Result<Self::Data> {
> +        let Some(mapper) = mapper else {
> +            return Err(EINVAL);
> +        };
> +
> +        if sb.min_blocksize(PAGE_SIZE as i32) == 0 {
> +            pr_err!("Unable to set block size\n");
> +            return Err(EINVAL);
> +        }
> +
> +        // Map the super block and check the magic number.
> +        let mapped = mapper.mapped_folio(SB_OFFSET)?;
> +        let s = Super::from_bytes(&mapped, 0).ok_or(EIO)?;
> +
> +        if s.magic.value() != EXT2_SUPER_MAGIC {
> +            return Err(EINVAL);
> +        }
> +
> +        // Check for unsupported flags.
> +        let mut has_file_type = false;
> +        if s.rev_level.value() >= EXT2_DYNAMIC_REV {
> +            let features = s.feature_incompat.value();
> +            if features & !EXT2_FEATURE_INCOMPAT_FILETYPE != 0 {
> +                pr_err!("Unsupported incompatible feature: {:x}\n", features);
> +                return Err(EINVAL);
> +            }
> +
> +            has_file_type = features & EXT2_FEATURE_INCOMPAT_FILETYPE != 0;
> +
> +            let features = s.feature_ro_compat.value();
> +            if !sb.rdonly() && features != 0 {
> +                pr_err!("Unsupported rw incompatible feature: {:x}\n", features);
> +                return Err(EINVAL);
> +            }
> +        }
> +
> +        // Set the block size.
> +        let block_size_bits = s.log_block_size.value();
> +        if block_size_bits > EXT2_MAX_BLOCK_LOG_SIZE - 10 {
> +            pr_err!("Invalid log block size: {}\n", block_size_bits);
> +            return Err(EINVAL);
> +        }
> +
> +        let block_size = 1024u32 << block_size_bits;
> +        if sb.min_blocksize(block_size as i32) != block_size as i32 {
> +            pr_err!("Bad block size: {}\n", block_size);
> +            return Err(ENXIO);
> +        }
> +
> +        // Get the first inode and the inode size.
> +        let (inode_size, first_ino) = if s.rev_level.value() == EXT2_GOOD_OLD_REV {
> +            (EXT2_GOOD_OLD_INODE_SIZE, EXT2_GOOD_OLD_FIRST_INO)
> +        } else {
> +            let size = s.inode_size.value();
> +            if size < EXT2_GOOD_OLD_INODE_SIZE
> +                || !size.is_power_of_two()
> +                || u32::from(size) > block_size
> +            {
> +                pr_err!("Unsupported inode size: {}\n", size);
> +                return Err(EINVAL);
> +            }
> +            (size, s.first_ino.value())
> +        };
> +
> +        // Get the number of inodes per group and per block.
> +        let inode_count = s.inodes_count.value();
> +        let inodes_per_group = s.inodes_per_group.value();
> +        let inodes_per_block = block_size / u32::from(inode_size);
> +        if inodes_per_group == 0 || inodes_per_block == 0 {
> +            return Err(EINVAL);
> +        }
> +
> +        if inodes_per_group > block_size * 8 || inodes_per_group < inodes_per_block {
> +            pr_err!("Bad inodes per group: {}\n", inodes_per_group);
> +            return Err(EINVAL);
> +        }
> +
> +        // Check the size of the groups.
> +        let itb_per_group = inodes_per_group / inodes_per_block;
> +        let blocks_per_group = s.blocks_per_group.value();
> +        if blocks_per_group > block_size * 8 || blocks_per_group <= itb_per_group + 3 {
> +            pr_err!("Bad blocks per group: {}\n", blocks_per_group);
> +            return Err(EINVAL);
> +        }
> +
> +        let blocks_count = s.blocks_count.value();
> +        if block::Sector::from(blocks_count) > sb.sector_count() >> (1 + block_size_bits) {
> +            pr_err!(
> +                "Block count ({blocks_count}) exceeds size of device ({})\n",
> +                sb.sector_count() >> (1 + block_size_bits)
> +            );
> +            return Err(EINVAL);
> +        }
> +
> +        let group_count = (blocks_count - s.first_data_block.value() - 1) / blocks_per_group + 1;
> +        if group_count * inodes_per_group != inode_count {
> +            pr_err!(
> +                "Unexpected inode count: {inode_count} vs {}",
> +                group_count * inodes_per_group
> +            );
> +            return Err(EINVAL);
> +        }
> +
> +        let mut groups = Vec::new();
> +        groups.reserve(group_count as usize, GFP_NOFS)?;

Why not GFP_KERNEL here?

Oh wow the C ext2 driver pins a bunch of buffer heads doesn't it...
/me runs

> +
> +        let mut remain = group_count;
> +        let mut offset = (SB_OFFSET / Offset::from(block_size) + 1) * Offset::from(block_size);
> +        while remain > 0 {
> +            let b = mapper.mapped_folio(offset)?;
> +            for g in Group::from_bytes_to_slice(&b).ok_or(EIO)? {
> +                groups.push(*g, GFP_NOFS)?;
> +                remain -= 1;
> +                if remain == 0 {
> +                    break;
> +                }
> +            }
> +            offset += Offset::try_from(b.len())?;
> +        }
> +
> +        Self::check_descriptors(s, &groups)?;
> +
> +        sb.set_magic(s.magic.value().into());
> +        drop(mapped);
> +        Ok(Box::new(
> +            Ext2Fs {
> +                mapper,
> +                block_size,
> +                _block_size_bits: block_size_bits,
> +                has_file_type,
> +                inodes_per_group,
> +                inodes_per_block,
> +                inode_count,
> +                inode_size,
> +                first_ino,
> +                group: groups,
> +            },
> +            GFP_KERNEL,
> +        )?)
> +    }
> +
> +    fn init_root(sb: &SuperBlock<Self>) -> Result<dentry::Root<Self>> {
> +        let inode = Self::iget(sb, EXT2_ROOT_INO)?;
> +        dentry::Root::try_new(inode)
> +    }
> +}
> +
> +fn rec_len(d: &DirEntry) -> u32 {
> +    let len = d.rec_len.value();
> +
> +    if PAGE_SIZE >= 65536 && len == u16::MAX {
> +        1u32 << 16
> +    } else {
> +        len.into()
> +    }
> +}
> +
> +#[vtable]
> +impl file::Operations for Ext2Fs {
> +    type FileSystem = Self;
> +
> +    fn seek(file: &File<Self>, offset: Offset, whence: file::Whence) -> Result<Offset> {
> +        file::generic_seek(file, offset, whence)
> +    }
> +
> +    fn read(_: &File<Self>, _: &mut user::Writer, _: &mut Offset) -> Result<usize> {
> +        Err(EISDIR)
> +    }
> +
> +    fn read_dir(

Wait, does this imply that regular files also have read_dir that you can
call?

> +        _file: &File<Self>,
> +        inode: &Locked<&INode<Self>, inode::ReadSem>,
> +        emitter: &mut file::DirEmitter,
> +    ) -> Result {
> +        let has_file_type = inode.super_block().data().has_file_type;
> +
> +        inode.for_each_page(emitter.pos(), Offset::MAX, |data| {

Neat that Rust can turn an indirect call to a lambda function into a
direct call.  I hear C can do that now too?

> +            let mut offset = 0usize;
> +            let mut acc: Offset = 0;
> +            let limit = data.len().saturating_sub(size_of::<DirEntry>());
> +            while offset < limit {
> +                let dirent = DirEntry::from_bytes(data, offset).ok_or(EIO)?;
> +                offset += size_of::<DirEntry>();
> +
> +                let name_len = usize::from(dirent.name_len);
> +                if data.len() - offset < name_len {
> +                    return Err(EIO);
> +                }
> +
> +                let name = &data[offset..][..name_len];
> +                let rec_len = rec_len(dirent);
> +                offset = offset - size_of::<DirEntry>() + rec_len as usize;
> +                if rec_len == 0 || offset > data.len() {
> +                    return Err(EIO);
> +                }
> +
> +                acc += Offset::from(rec_len);
> +                let ino = dirent.inode.value();
> +                if ino == 0 {
> +                    continue;
> +                }
> +
> +                let t = if !has_file_type {
> +                    file::DirEntryType::Unknown
> +                } else {
> +                    match dirent.file_type {
> +                        FT_REG_FILE => file::DirEntryType::Reg,
> +                        FT_DIR => file::DirEntryType::Dir,
> +                        FT_SYMLINK => file::DirEntryType::Lnk,
> +                        FT_CHRDEV => file::DirEntryType::Chr,
> +                        FT_BLKDEV => file::DirEntryType::Blk,
> +                        FT_FIFO => file::DirEntryType::Fifo,
> +                        FT_SOCK => file::DirEntryType::Sock,
> +                        _ => continue,

Isn't this a directory corruption?  return Err(EFSCORRUPTED) ?

> +                    }
> +                };
> +
> +                if !emitter.emit(acc, name, ino.into(), t) {
> +                    return Ok(Some(()));
> +                }
> +                acc = 0;
> +            }
> +            Ok(None)
> +        })?;
> +        Ok(())
> +    }
> +}
> +
> +#[vtable]
> +impl inode::Operations for Ext2Fs {
> +    type FileSystem = Self;
> +
> +    fn lookup(
> +        parent: &Locked<&INode<Self>, inode::ReadSem>,
> +        dentry: dentry::Unhashed<'_, Self>,
> +    ) -> Result<Option<ARef<DEntry<Self>>>> {
> +        let inode = parent.for_each_page(0, Offset::MAX, |data| {
> +            let mut offset = 0usize;
> +            while data.len() - offset > size_of::<DirEntry>() {
> +                let dirent = DirEntry::from_bytes(data, offset).ok_or(EIO)?;
> +                offset += size_of::<DirEntry>();
> +
> +                let name_len = usize::from(dirent.name_len);
> +                if data.len() - offset < name_len {
> +                    return Err(EIO);
> +                }
> +
> +                let name = &data[offset..][..name_len];
> +
> +                offset = offset - size_of::<DirEntry>() + usize::from(dirent.rec_len.value());
> +                if offset > data.len() {
> +                    return Err(EIO);
> +                }
> +
> +                let ino = dirent.inode.value();
> +                if ino != 0 && name == dentry.name() {
> +                    return Ok(Some(Self::iget(parent.super_block(), ino)?));
> +                }
> +            }
> +            Ok(None)
> +        })?;
> +
> +        dentry.splice_alias(inode)
> +    }
> +}
> +
> +impl iomap::Operations for Ext2Fs {
> +    type FileSystem = Self;
> +
> +    fn begin<'a>(
> +        inode: &'a INode<Self>,
> +        pos: Offset,
> +        length: Offset,
> +        _flags: u32,
> +        map: &mut iomap::Map<'a>,
> +        _srcmap: &mut iomap::Map<'a>,
> +    ) -> Result {
> +        let size = inode.size();
> +        if pos >= size {
> +            map.set_offset(pos)
> +                .set_length(length.try_into()?)
> +                .set_flags(iomap::map_flags::MERGED)
> +                .set_type(iomap::Type::Hole);
> +            return Ok(());
> +        }
> +
> +        let block_size = inode.super_block().data().block_size as Offset;
> +        let block = pos / block_size;
> +
> +        let boffset = Self::offset_to_block(inode, block)?;
> +        map.set_offset(block * block_size)
> +            .set_length(block_size as u64)
> +            .set_flags(iomap::map_flags::MERGED)
> +            .set_type(iomap::Type::Mapped)
> +            .set_bdev(Some(inode.super_block().bdev()))
> +            .set_addr(boffset * block_size as u64);

Neat use of chaining here.

> +
> +        Ok(())
> +    }
> +}
> +
> +fn decode_dev(block: &[LE<u32>]) -> (u32, u32) {
> +    let v = block[0].value();
> +    if v != 0 {
> +        ((v >> 8) & 255, v & 255)
> +    } else {
> +        let v = block[1].value();
> +        ((v & 0xfff00) >> 8, (v & 0xff) | ((v >> 12) & 0xfff00))

Nice not to have leXX_to_cpu calls everywhere here.

--D

> +    }
> +}
> diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
> index 445599d4bff6..732bc9939f7f 100644
> --- a/rust/kernel/lib.rs
> +++ b/rust/kernel/lib.rs
> @@ -165,3 +165,6 @@ macro_rules! container_of {
>          ptr.wrapping_sub(offset) as *const $type
>      }}
>  }
> +
> +/// The size in bytes of a page of memory.
> +pub const PAGE_SIZE: usize = bindings::PAGE_SIZE;
> -- 
> 2.34.1
> 
>
diff mbox series

Patch

diff --git a/fs/Kconfig b/fs/Kconfig
index 2cbd99d6784c..cf0cac5c5b1e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -338,6 +338,7 @@  source "fs/ufs/Kconfig"
 source "fs/erofs/Kconfig"
 source "fs/vboxsf/Kconfig"
 source "fs/tarfs/Kconfig"
+source "fs/rust-ext2/Kconfig"
 
 endif # MISC_FILESYSTEMS
 
diff --git a/fs/Makefile b/fs/Makefile
index d8bbda73e3a9..c1a3007efc7d 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -130,3 +130,4 @@  obj-$(CONFIG_EROFS_FS)		+= erofs/
 obj-$(CONFIG_VBOXSF_FS)		+= vboxsf/
 obj-$(CONFIG_ZONEFS_FS)		+= zonefs/
 obj-$(CONFIG_TARFS_FS)		+= tarfs/
+obj-$(CONFIG_RUST_EXT2_FS)	+= rust-ext2/
diff --git a/fs/rust-ext2/Kconfig b/fs/rust-ext2/Kconfig
new file mode 100644
index 000000000000..976371655ca6
--- /dev/null
+++ b/fs/rust-ext2/Kconfig
@@ -0,0 +1,13 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+#
+
+config RUST_EXT2_FS
+	tristate "Rust second extended fs support"
+	depends on RUST && BLOCK
+	help
+	  Ext2 is a standard Linux file system for hard disks.
+
+	  To compile this file system support as a module, choose M here: the
+	  module will be called rust_ext2.
+
+	  If unsure, say Y.
diff --git a/fs/rust-ext2/Makefile b/fs/rust-ext2/Makefile
new file mode 100644
index 000000000000..ac960b5f89d7
--- /dev/null
+++ b/fs/rust-ext2/Makefile
@@ -0,0 +1,8 @@ 
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux tarfs filesystem routines.
+#
+
+obj-$(CONFIG_RUST_EXT2_FS) += rust_ext2.o
+
+rust_ext2-y := ext2.o
diff --git a/fs/rust-ext2/defs.rs b/fs/rust-ext2/defs.rs
new file mode 100644
index 000000000000..5f84852b4961
--- /dev/null
+++ b/fs/rust-ext2/defs.rs
@@ -0,0 +1,173 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+//! Definitions of tarfs structures.
+
+use kernel::types::LE;
+
+pub(crate) const EXT2_SUPER_MAGIC: u16 = 0xEF53;
+
+pub(crate) const EXT2_MAX_BLOCK_LOG_SIZE: u32 = 16;
+
+pub(crate) const EXT2_GOOD_OLD_REV: u32 = 0; /* The good old (original) format */
+pub(crate) const EXT2_DYNAMIC_REV: u32 = 1; /* V2 format w/ dynamic inode sizes */
+
+pub(crate) const EXT2_GOOD_OLD_INODE_SIZE: u16 = 128;
+
+pub(crate) const EXT2_ROOT_INO: u32 = 2; /* Root inode */
+
+/* First non-reserved inode for old ext2 filesystems. */
+pub(crate) const EXT2_GOOD_OLD_FIRST_INO: u32 = 11;
+
+pub(crate) const EXT2_FEATURE_INCOMPAT_FILETYPE: u32 = 0x0002;
+
+/*
+ * Constants relative to the data blocks
+ */
+pub(crate) const EXT2_NDIR_BLOCKS: usize = 12;
+pub(crate) const EXT2_IND_BLOCK: usize = EXT2_NDIR_BLOCKS;
+pub(crate) const EXT2_DIND_BLOCK: usize = EXT2_IND_BLOCK + 1;
+pub(crate) const EXT2_TIND_BLOCK: usize = EXT2_DIND_BLOCK + 1;
+pub(crate) const EXT2_N_BLOCKS: usize = EXT2_TIND_BLOCK + 1;
+
+kernel::derive_readable_from_bytes! {
+    #[repr(C)]
+    pub(crate) struct Super {
+        pub(crate) inodes_count: LE<u32>,
+        pub(crate) blocks_count: LE<u32>,
+        pub(crate) r_blocks_count: LE<u32>,
+        pub(crate) free_blocks_count: LE<u32>, /* Free blocks count */
+        pub(crate) free_inodes_count: LE<u32>, /* Free inodes count */
+        pub(crate) first_data_block: LE<u32>,  /* First Data Block */
+        pub(crate) log_block_size: LE<u32>,    /* Block size */
+        pub(crate) log_frag_size: LE<u32>,     /* Fragment size */
+        pub(crate) blocks_per_group: LE<u32>,  /* # Blocks per group */
+        pub(crate) frags_per_group: LE<u32>,   /* # Fragments per group */
+        pub(crate) inodes_per_group: LE<u32>,  /* # Inodes per group */
+        pub(crate) mtime: LE<u32>,             /* Mount time */
+        pub(crate) wtime: LE<u32>,             /* Write time */
+        pub(crate) mnt_count: LE<u16>,         /* Mount count */
+        pub(crate) max_mnt_count: LE<u16>,     /* Maximal mount count */
+        pub(crate) magic: LE<u16>,             /* Magic signature */
+        pub(crate) state: LE<u16>,             /* File system state */
+        pub(crate) errors: LE<u16>,            /* Behaviour when detecting errors */
+        pub(crate) minor_rev_level: LE<u16>,   /* minor revision level */
+        pub(crate) lastcheck: LE<u32>,         /* time of last check */
+        pub(crate) checkinterval: LE<u32>,     /* max. time between checks */
+        pub(crate) creator_os: LE<u32>,        /* OS */
+        pub(crate) rev_level: LE<u32>,         /* Revision level */
+        pub(crate) def_resuid: LE<u16>,        /* Default uid for reserved blocks */
+        pub(crate) def_resgid: LE<u16>,        /* Default gid for reserved blocks */
+        /*
+         * These fields are for EXT2_DYNAMIC_REV superblocks only.
+         *
+         * Note: the difference between the compatible feature set and
+         * the incompatible feature set is that if there is a bit set
+         * in the incompatible feature set that the kernel doesn't
+         * know about, it should refuse to mount the filesystem.
+         *
+         * e2fsck's requirements are more strict; if it doesn't know
+         * about a feature in either the compatible or incompatible
+         * feature set, it must abort and not try to meddle with
+         * things it doesn't understand...
+         */
+        pub(crate) first_ino: LE<u32>,              /* First non-reserved inode */
+        pub(crate) inode_size: LE<u16>,             /* size of inode structure */
+        pub(crate) block_group_nr: LE<u16>,         /* block group # of this superblock */
+        pub(crate) feature_compat: LE<u32>,         /* compatible feature set */
+        pub(crate) feature_incompat: LE<u32>,       /* incompatible feature set */
+        pub(crate) feature_ro_compat: LE<u32>,      /* readonly-compatible feature set */
+        pub(crate) uuid: [u8; 16],                  /* 128-bit uuid for volume */
+        pub(crate) volume_name: [u8; 16],           /* volume name */
+        pub(crate) last_mounted: [u8; 64],          /* directory where last mounted */
+        pub(crate) algorithm_usage_bitmap: LE<u32>, /* For compression */
+        /*
+         * Performance hints.  Directory preallocation should only
+         * happen if the EXT2_COMPAT_PREALLOC flag is on.
+         */
+        pub(crate) prealloc_blocks: u8,    /* Nr of blocks to try to preallocate*/
+        pub(crate) prealloc_dir_blocks: u8,        /* Nr to preallocate for dirs */
+        padding1: u16,
+        /*
+         * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
+         */
+        pub(crate) journal_uuid: [u8; 16],      /* uuid of journal superblock */
+        pub(crate) journal_inum: u32,           /* inode number of journal file */
+        pub(crate) journal_dev: u32,            /* device number of journal file */
+        pub(crate) last_orphan: u32,            /* start of list of inodes to delete */
+        pub(crate) hash_seed: [u32; 4],         /* HTREE hash seed */
+        pub(crate) def_hash_version: u8,        /* Default hash version to use */
+        pub(crate) reserved_char_pad: u8,
+        pub(crate) reserved_word_pad: u16,
+        pub(crate) default_mount_opts: LE<u32>,
+        pub(crate) first_meta_bg: LE<u32>,      /* First metablock block group */
+        reserved: [u32; 190],                   /* Padding to the end of the block */
+    }
+
+    #[repr(C)]
+    #[derive(Clone, Copy)]
+    pub(crate) struct Group {
+        /// Blocks bitmap block.
+        pub block_bitmap: LE<u32>,
+
+        /// Inodes bitmap block.
+        pub inode_bitmap: LE<u32>,
+
+        /// Inodes table block.
+        pub inode_table: LE<u32>,
+
+        /// Number of free blocks.
+        pub free_blocks_count: LE<u16>,
+
+        /// Number of free inodes.
+        pub free_inodes_count: LE<u16>,
+
+        /// Number of directories.
+        pub used_dirs_count: LE<u16>,
+
+        pad: LE<u16>,
+        reserved: [u32; 3],
+    }
+
+    #[repr(C)]
+    pub(crate) struct INode {
+        pub mode: LE<u16>,                  /* File mode */
+        pub uid: LE<u16>,                   /* Low 16 bits of Owner Uid */
+        pub size: LE<u32>,                  /* Size in bytes */
+        pub atime: LE<u32>,                 /* Access time */
+        pub ctime: LE<u32>,                 /* Creation time */
+        pub mtime: LE<u32>,                 /* Modification time */
+        pub dtime: LE<u32>,                 /* Deletion Time */
+        pub gid: LE<u16>,                   /* Low 16 bits of Group Id */
+        pub links_count: LE<u16>,           /* Links count */
+        pub blocks: LE<u32>,                /* Blocks count */
+        pub flags: LE<u32>,                 /* File flags */
+        pub reserved1: LE<u32>,
+        pub block: [LE<u32>; EXT2_N_BLOCKS],/* Pointers to blocks */
+        pub generation: LE<u32>,            /* File version (for NFS) */
+        pub file_acl: LE<u32>,              /* File ACL */
+        pub dir_acl: LE<u32>,               /* Directory ACL */
+        pub faddr: LE<u32>,                 /* Fragment address */
+        pub frag: u8,	                    /* Fragment number */
+        pub fsize: u8,	                    /* Fragment size */
+        pub pad1: LE<u16>,
+        pub uid_high: LE<u16>,
+        pub gid_high: LE<u16>,
+        pub reserved2: LE<u32>,
+    }
+
+    #[repr(C)]
+    pub(crate) struct DirEntry {
+        pub(crate) inode: LE<u32>,       /* Inode number */
+        pub(crate) rec_len: LE<u16>,     /* Directory entry length */
+        pub(crate) name_len: u8,         /* Name length */
+        pub(crate) file_type: u8,        /* Only if the "filetype" feature flag is set. */
+    }
+}
+
+pub(crate) const FT_REG_FILE: u8 = 1;
+pub(crate) const FT_DIR: u8 = 2;
+pub(crate) const FT_CHRDEV: u8 = 3;
+pub(crate) const FT_BLKDEV: u8 = 4;
+pub(crate) const FT_FIFO: u8 = 5;
+pub(crate) const FT_SOCK: u8 = 6;
+pub(crate) const FT_SYMLINK: u8 = 7;
diff --git a/fs/rust-ext2/ext2.rs b/fs/rust-ext2/ext2.rs
new file mode 100644
index 000000000000..2d6b1e7ca156
--- /dev/null
+++ b/fs/rust-ext2/ext2.rs
@@ -0,0 +1,551 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+//! Ext2 file system.
+
+use alloc::vec::Vec;
+use core::mem::size_of;
+use defs::*;
+use kernel::fs::{
+    self, address_space, dentry, dentry::DEntry, file, file::File, inode, inode::INode, iomap, sb,
+    sb::SuperBlock, Offset,
+};
+use kernel::types::{ARef, Either, FromBytes, Locked, LE};
+use kernel::{block, c_str, prelude::*, str::CString, time::Timespec, user, PAGE_SIZE};
+
+pub mod defs;
+
+kernel::module_fs! {
+    type: Ext2Fs,
+    name: "ext2",
+    author: "Wedson Almeida Filho <walmeida@microsoft.com>",
+    description: "ext2 file system",
+    license: "GPL",
+}
+
+const SB_OFFSET: Offset = 1024;
+
+struct INodeData {
+    data_blocks: [u32; defs::EXT2_N_BLOCKS],
+}
+
+struct Ext2Fs {
+    mapper: inode::Mapper,
+    block_size: u32,
+    has_file_type: bool,
+    _block_size_bits: u32,
+    inodes_per_block: u32,
+    inodes_per_group: u32,
+    inode_count: u32,
+    inode_size: u16,
+    first_ino: u32,
+    group: Vec<defs::Group>,
+}
+
+impl Ext2Fs {
+    fn iget(sb: &SuperBlock<Self>, ino: u32) -> Result<ARef<INode<Self>>> {
+        let s = sb.data();
+        if (ino != EXT2_ROOT_INO && ino < s.first_ino) || ino > s.inode_count {
+            return Err(ENOENT);
+        }
+        let group = ((ino - 1) / s.inodes_per_group) as usize;
+        let offset = (ino - 1) % s.inodes_per_group;
+
+        if group >= s.group.len() {
+            return Err(ENOENT);
+        }
+
+        // Create an inode or find an existing (cached) one.
+        let mut inode = match sb.get_or_create_inode(ino.into())? {
+            Either::Left(existing) => return Ok(existing),
+            Either::Right(new) => new,
+        };
+
+        let inodes_block = Offset::from(s.group[group].inode_table.value());
+        let inode_block = inodes_block + Offset::from(offset / s.inodes_per_block);
+        let offset = (offset % s.inodes_per_block) as usize;
+        let b = sb
+            .data()
+            .mapper
+            .mapped_folio(inode_block * Offset::from(s.block_size))?;
+        let idata = defs::INode::from_bytes(&b, offset * s.inode_size as usize).ok_or(EIO)?;
+        let mode = idata.mode.value();
+
+        if idata.links_count.value() == 0 && (mode == 0 || idata.dtime.value() != 0) {
+            return Err(ESTALE);
+        }
+
+        const DIR_FOPS: file::Ops<Ext2Fs> = file::Ops::new::<Ext2Fs>();
+        const DIR_IOPS: inode::Ops<Ext2Fs> = inode::Ops::new::<Ext2Fs>();
+        const FILE_AOPS: address_space::Ops<Ext2Fs> = iomap::ro_aops::<Ext2Fs>();
+
+        let mut size = idata.size.value().into();
+        let typ = match mode & fs::mode::S_IFMT {
+            fs::mode::S_IFREG => {
+                size |= Offset::from(idata.dir_acl.value())
+                    .checked_shl(32)
+                    .ok_or(EUCLEAN)?;
+                inode
+                    .set_aops(FILE_AOPS)
+                    .set_fops(file::Ops::generic_ro_file());
+                inode::Type::Reg
+            }
+            fs::mode::S_IFDIR => {
+                inode
+                    .set_iops(DIR_IOPS)
+                    .set_fops(DIR_FOPS)
+                    .set_aops(FILE_AOPS);
+                inode::Type::Dir
+            }
+            fs::mode::S_IFLNK => {
+                if idata.blocks.value() == 0 {
+                    const OFFSET: usize = core::mem::offset_of!(defs::INode, block);
+                    let name = &b[offset * usize::from(s.inode_size) + OFFSET..];
+                    let name_len = size as usize;
+                    if name_len > name.len() || name_len == 0 {
+                        return Err(EIO);
+                    }
+                    inode.set_iops(inode::Ops::simple_symlink_inode());
+                    inode::Type::Lnk(Some(CString::try_from(&name[..name_len])?))
+                } else {
+                    inode
+                        .set_aops(FILE_AOPS)
+                        .set_iops(inode::Ops::page_symlink_inode());
+                    inode::Type::Lnk(None)
+                }
+            }
+            fs::mode::S_IFSOCK => inode::Type::Sock,
+            fs::mode::S_IFIFO => inode::Type::Fifo,
+            fs::mode::S_IFCHR => {
+                let (major, minor) = decode_dev(&idata.block);
+                inode::Type::Chr(major, minor)
+            }
+            fs::mode::S_IFBLK => {
+                let (major, minor) = decode_dev(&idata.block);
+                inode::Type::Blk(major, minor)
+            }
+            _ => return Err(ENOENT),
+        };
+        inode.init(inode::Params {
+            typ,
+            mode: mode & 0o777,
+            size,
+            blocks: idata.blocks.value().into(),
+            nlink: idata.links_count.value().into(),
+            uid: u32::from(idata.uid.value()) | u32::from(idata.uid_high.value()) << 16,
+            gid: u32::from(idata.gid.value()) | u32::from(idata.gid_high.value()) << 16,
+            ctime: Timespec::new(idata.ctime.value().into(), 0)?,
+            mtime: Timespec::new(idata.mtime.value().into(), 0)?,
+            atime: Timespec::new(idata.atime.value().into(), 0)?,
+            value: INodeData {
+                data_blocks: core::array::from_fn(|i| idata.block[i].value()),
+            },
+        })
+    }
+
+    fn offsets<'a>(&self, mut block: u64, out: &'a mut [u32]) -> Option<&'a [u32]> {
+        let ptrs = u64::from(self.block_size / size_of::<u32>() as u32);
+        let ptr_mask = ptrs - 1;
+        let ptr_bits = ptrs.trailing_zeros();
+
+        if block < EXT2_NDIR_BLOCKS as u64 {
+            out[0] = block as u32;
+            return Some(&out[..1]);
+        }
+
+        block -= EXT2_NDIR_BLOCKS as u64;
+        if block < ptrs {
+            out[0] = EXT2_IND_BLOCK as u32;
+            out[1] = block as u32;
+            return Some(&out[..2]);
+        }
+
+        block -= ptrs;
+        if block < (1 << (2 * ptr_bits)) {
+            out[0] = EXT2_DIND_BLOCK as u32;
+            out[1] = (block >> ptr_bits) as u32;
+            out[2] = (block & ptr_mask) as u32;
+            return Some(&out[..3]);
+        }
+
+        block -= ptrs * ptrs;
+        if block < ptrs * ptrs * ptrs {
+            out[0] = EXT2_TIND_BLOCK as u32;
+            out[1] = (block >> (2 * ptr_bits)) as u32;
+            out[2] = ((block >> ptr_bits) & ptr_mask) as u32;
+            out[3] = (block & ptr_mask) as u32;
+            return Some(&out[..4]);
+        }
+
+        None
+    }
+
+    fn offset_to_block(inode: &INode<Self>, block: Offset) -> Result<u64> {
+        let s = inode.super_block().data();
+        let mut indices = [0u32; 4];
+        let boffsets = s.offsets(block as u64, &mut indices).ok_or(EIO)?;
+        let mut boffset = inode.data().data_blocks[boffsets[0] as usize];
+        let mapper = &s.mapper;
+        for i in &boffsets[1..] {
+            let b = mapper.mapped_folio(Offset::from(boffset) * Offset::from(s.block_size))?;
+            let table = LE::<u32>::from_bytes_to_slice(&b).ok_or(EIO)?;
+            boffset = table[*i as usize].value();
+        }
+        Ok(boffset.into())
+    }
+
+    fn check_descriptors(s: &Super, groups: &[Group]) -> Result {
+        for (i, g) in groups.iter().enumerate() {
+            let first = i as u32 * s.blocks_per_group.value() + s.first_data_block.value();
+            let last = if i == groups.len() - 1 {
+                s.blocks_count.value()
+            } else {
+                first + s.blocks_per_group.value() - 1
+            };
+
+            if g.block_bitmap.value() < first || g.block_bitmap.value() > last {
+                pr_err!(
+                    "Block bitmap for group {i} no in group (block {})\n",
+                    g.block_bitmap.value()
+                );
+                return Err(EINVAL);
+            }
+
+            if g.inode_bitmap.value() < first || g.inode_bitmap.value() > last {
+                pr_err!(
+                    "Inode bitmap for group {i} no in group (block {})\n",
+                    g.inode_bitmap.value()
+                );
+                return Err(EINVAL);
+            }
+
+            if g.inode_table.value() < first || g.inode_table.value() > last {
+                pr_err!(
+                    "Inode table for group {i} no in group (block {})\n",
+                    g.inode_table.value()
+                );
+                return Err(EINVAL);
+            }
+        }
+        Ok(())
+    }
+}
+
+impl fs::FileSystem for Ext2Fs {
+    type Data = Box<Self>;
+    type INodeData = INodeData;
+    const NAME: &'static CStr = c_str!("rust-ext2");
+    const SUPER_TYPE: sb::Type = sb::Type::BlockDev;
+
+    fn fill_super(
+        sb: &mut SuperBlock<Self, sb::New>,
+        mapper: Option<inode::Mapper>,
+    ) -> Result<Self::Data> {
+        let Some(mapper) = mapper else {
+            return Err(EINVAL);
+        };
+
+        if sb.min_blocksize(PAGE_SIZE as i32) == 0 {
+            pr_err!("Unable to set block size\n");
+            return Err(EINVAL);
+        }
+
+        // Map the super block and check the magic number.
+        let mapped = mapper.mapped_folio(SB_OFFSET)?;
+        let s = Super::from_bytes(&mapped, 0).ok_or(EIO)?;
+
+        if s.magic.value() != EXT2_SUPER_MAGIC {
+            return Err(EINVAL);
+        }
+
+        // Check for unsupported flags.
+        let mut has_file_type = false;
+        if s.rev_level.value() >= EXT2_DYNAMIC_REV {
+            let features = s.feature_incompat.value();
+            if features & !EXT2_FEATURE_INCOMPAT_FILETYPE != 0 {
+                pr_err!("Unsupported incompatible feature: {:x}\n", features);
+                return Err(EINVAL);
+            }
+
+            has_file_type = features & EXT2_FEATURE_INCOMPAT_FILETYPE != 0;
+
+            let features = s.feature_ro_compat.value();
+            if !sb.rdonly() && features != 0 {
+                pr_err!("Unsupported rw incompatible feature: {:x}\n", features);
+                return Err(EINVAL);
+            }
+        }
+
+        // Set the block size.
+        let block_size_bits = s.log_block_size.value();
+        if block_size_bits > EXT2_MAX_BLOCK_LOG_SIZE - 10 {
+            pr_err!("Invalid log block size: {}\n", block_size_bits);
+            return Err(EINVAL);
+        }
+
+        let block_size = 1024u32 << block_size_bits;
+        if sb.min_blocksize(block_size as i32) != block_size as i32 {
+            pr_err!("Bad block size: {}\n", block_size);
+            return Err(ENXIO);
+        }
+
+        // Get the first inode and the inode size.
+        let (inode_size, first_ino) = if s.rev_level.value() == EXT2_GOOD_OLD_REV {
+            (EXT2_GOOD_OLD_INODE_SIZE, EXT2_GOOD_OLD_FIRST_INO)
+        } else {
+            let size = s.inode_size.value();
+            if size < EXT2_GOOD_OLD_INODE_SIZE
+                || !size.is_power_of_two()
+                || u32::from(size) > block_size
+            {
+                pr_err!("Unsupported inode size: {}\n", size);
+                return Err(EINVAL);
+            }
+            (size, s.first_ino.value())
+        };
+
+        // Get the number of inodes per group and per block.
+        let inode_count = s.inodes_count.value();
+        let inodes_per_group = s.inodes_per_group.value();
+        let inodes_per_block = block_size / u32::from(inode_size);
+        if inodes_per_group == 0 || inodes_per_block == 0 {
+            return Err(EINVAL);
+        }
+
+        if inodes_per_group > block_size * 8 || inodes_per_group < inodes_per_block {
+            pr_err!("Bad inodes per group: {}\n", inodes_per_group);
+            return Err(EINVAL);
+        }
+
+        // Check the size of the groups.
+        let itb_per_group = inodes_per_group / inodes_per_block;
+        let blocks_per_group = s.blocks_per_group.value();
+        if blocks_per_group > block_size * 8 || blocks_per_group <= itb_per_group + 3 {
+            pr_err!("Bad blocks per group: {}\n", blocks_per_group);
+            return Err(EINVAL);
+        }
+
+        let blocks_count = s.blocks_count.value();
+        if block::Sector::from(blocks_count) > sb.sector_count() >> (1 + block_size_bits) {
+            pr_err!(
+                "Block count ({blocks_count}) exceeds size of device ({})\n",
+                sb.sector_count() >> (1 + block_size_bits)
+            );
+            return Err(EINVAL);
+        }
+
+        let group_count = (blocks_count - s.first_data_block.value() - 1) / blocks_per_group + 1;
+        if group_count * inodes_per_group != inode_count {
+            pr_err!(
+                "Unexpected inode count: {inode_count} vs {}",
+                group_count * inodes_per_group
+            );
+            return Err(EINVAL);
+        }
+
+        let mut groups = Vec::new();
+        groups.reserve(group_count as usize, GFP_NOFS)?;
+
+        let mut remain = group_count;
+        let mut offset = (SB_OFFSET / Offset::from(block_size) + 1) * Offset::from(block_size);
+        while remain > 0 {
+            let b = mapper.mapped_folio(offset)?;
+            for g in Group::from_bytes_to_slice(&b).ok_or(EIO)? {
+                groups.push(*g, GFP_NOFS)?;
+                remain -= 1;
+                if remain == 0 {
+                    break;
+                }
+            }
+            offset += Offset::try_from(b.len())?;
+        }
+
+        Self::check_descriptors(s, &groups)?;
+
+        sb.set_magic(s.magic.value().into());
+        drop(mapped);
+        Ok(Box::new(
+            Ext2Fs {
+                mapper,
+                block_size,
+                _block_size_bits: block_size_bits,
+                has_file_type,
+                inodes_per_group,
+                inodes_per_block,
+                inode_count,
+                inode_size,
+                first_ino,
+                group: groups,
+            },
+            GFP_KERNEL,
+        )?)
+    }
+
+    fn init_root(sb: &SuperBlock<Self>) -> Result<dentry::Root<Self>> {
+        let inode = Self::iget(sb, EXT2_ROOT_INO)?;
+        dentry::Root::try_new(inode)
+    }
+}
+
+fn rec_len(d: &DirEntry) -> u32 {
+    let len = d.rec_len.value();
+
+    if PAGE_SIZE >= 65536 && len == u16::MAX {
+        1u32 << 16
+    } else {
+        len.into()
+    }
+}
+
+#[vtable]
+impl file::Operations for Ext2Fs {
+    type FileSystem = Self;
+
+    fn seek(file: &File<Self>, offset: Offset, whence: file::Whence) -> Result<Offset> {
+        file::generic_seek(file, offset, whence)
+    }
+
+    fn read(_: &File<Self>, _: &mut user::Writer, _: &mut Offset) -> Result<usize> {
+        Err(EISDIR)
+    }
+
+    fn read_dir(
+        _file: &File<Self>,
+        inode: &Locked<&INode<Self>, inode::ReadSem>,
+        emitter: &mut file::DirEmitter,
+    ) -> Result {
+        let has_file_type = inode.super_block().data().has_file_type;
+
+        inode.for_each_page(emitter.pos(), Offset::MAX, |data| {
+            let mut offset = 0usize;
+            let mut acc: Offset = 0;
+            let limit = data.len().saturating_sub(size_of::<DirEntry>());
+            while offset < limit {
+                let dirent = DirEntry::from_bytes(data, offset).ok_or(EIO)?;
+                offset += size_of::<DirEntry>();
+
+                let name_len = usize::from(dirent.name_len);
+                if data.len() - offset < name_len {
+                    return Err(EIO);
+                }
+
+                let name = &data[offset..][..name_len];
+                let rec_len = rec_len(dirent);
+                offset = offset - size_of::<DirEntry>() + rec_len as usize;
+                if rec_len == 0 || offset > data.len() {
+                    return Err(EIO);
+                }
+
+                acc += Offset::from(rec_len);
+                let ino = dirent.inode.value();
+                if ino == 0 {
+                    continue;
+                }
+
+                let t = if !has_file_type {
+                    file::DirEntryType::Unknown
+                } else {
+                    match dirent.file_type {
+                        FT_REG_FILE => file::DirEntryType::Reg,
+                        FT_DIR => file::DirEntryType::Dir,
+                        FT_SYMLINK => file::DirEntryType::Lnk,
+                        FT_CHRDEV => file::DirEntryType::Chr,
+                        FT_BLKDEV => file::DirEntryType::Blk,
+                        FT_FIFO => file::DirEntryType::Fifo,
+                        FT_SOCK => file::DirEntryType::Sock,
+                        _ => continue,
+                    }
+                };
+
+                if !emitter.emit(acc, name, ino.into(), t) {
+                    return Ok(Some(()));
+                }
+                acc = 0;
+            }
+            Ok(None)
+        })?;
+        Ok(())
+    }
+}
+
+#[vtable]
+impl inode::Operations for Ext2Fs {
+    type FileSystem = Self;
+
+    fn lookup(
+        parent: &Locked<&INode<Self>, inode::ReadSem>,
+        dentry: dentry::Unhashed<'_, Self>,
+    ) -> Result<Option<ARef<DEntry<Self>>>> {
+        let inode = parent.for_each_page(0, Offset::MAX, |data| {
+            let mut offset = 0usize;
+            while data.len() - offset > size_of::<DirEntry>() {
+                let dirent = DirEntry::from_bytes(data, offset).ok_or(EIO)?;
+                offset += size_of::<DirEntry>();
+
+                let name_len = usize::from(dirent.name_len);
+                if data.len() - offset < name_len {
+                    return Err(EIO);
+                }
+
+                let name = &data[offset..][..name_len];
+
+                offset = offset - size_of::<DirEntry>() + usize::from(dirent.rec_len.value());
+                if offset > data.len() {
+                    return Err(EIO);
+                }
+
+                let ino = dirent.inode.value();
+                if ino != 0 && name == dentry.name() {
+                    return Ok(Some(Self::iget(parent.super_block(), ino)?));
+                }
+            }
+            Ok(None)
+        })?;
+
+        dentry.splice_alias(inode)
+    }
+}
+
+impl iomap::Operations for Ext2Fs {
+    type FileSystem = Self;
+
+    fn begin<'a>(
+        inode: &'a INode<Self>,
+        pos: Offset,
+        length: Offset,
+        _flags: u32,
+        map: &mut iomap::Map<'a>,
+        _srcmap: &mut iomap::Map<'a>,
+    ) -> Result {
+        let size = inode.size();
+        if pos >= size {
+            map.set_offset(pos)
+                .set_length(length.try_into()?)
+                .set_flags(iomap::map_flags::MERGED)
+                .set_type(iomap::Type::Hole);
+            return Ok(());
+        }
+
+        let block_size = inode.super_block().data().block_size as Offset;
+        let block = pos / block_size;
+
+        let boffset = Self::offset_to_block(inode, block)?;
+        map.set_offset(block * block_size)
+            .set_length(block_size as u64)
+            .set_flags(iomap::map_flags::MERGED)
+            .set_type(iomap::Type::Mapped)
+            .set_bdev(Some(inode.super_block().bdev()))
+            .set_addr(boffset * block_size as u64);
+
+        Ok(())
+    }
+}
+
+fn decode_dev(block: &[LE<u32>]) -> (u32, u32) {
+    let v = block[0].value();
+    if v != 0 {
+        ((v >> 8) & 255, v & 255)
+    } else {
+        let v = block[1].value();
+        ((v & 0xfff00) >> 8, (v & 0xff) | ((v >> 12) & 0xfff00))
+    }
+}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 445599d4bff6..732bc9939f7f 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -165,3 +165,6 @@  macro_rules! container_of {
         ptr.wrapping_sub(offset) as *const $type
     }}
 }
+
+/// The size in bytes of a page of memory.
+pub const PAGE_SIZE: usize = bindings::PAGE_SIZE;