diff mbox series

[v5,10/24] iomap: integrate fs-verity verification into iomap's read path

Message ID 20240304191046.157464-12-aalbersh@redhat.com (mailing list archive)
State Superseded, archived
Headers show
Series fs-verity support for XFS | expand

Commit Message

Andrey Albershteyn March 4, 2024, 7:10 p.m. UTC
This patch adds fs-verity verification into iomap's read path. After
BIO's io operation is complete the data are verified against
fs-verity's Merkle tree. Verification work is done in a separate
workqueue.

The read path ioend iomap_read_ioend are stored side by side with
BIOs if FS_VERITY is enabled.

Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/iomap/buffered-io.c | 92 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 83 insertions(+), 9 deletions(-)

Comments

Eric Biggers March 4, 2024, 11:39 p.m. UTC | #1
On Mon, Mar 04, 2024 at 08:10:33PM +0100, Andrey Albershteyn wrote:
> +#ifdef CONFIG_FS_VERITY
> +struct iomap_fsverity_bio {
> +	struct work_struct	work;
> +	struct bio		bio;
> +};

Maybe leave a comment above that mentions that bio must be the last field.

> @@ -471,6 +529,7 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
>   * iomap_readahead - Attempt to read pages from a file.
>   * @rac: Describes the pages to be read.
>   * @ops: The operations vector for the filesystem.
> + * @wq: Workqueue for post-I/O processing (only need for fsverity)

This should not be here.

> +#define IOMAP_POOL_SIZE		(4 * (PAGE_SIZE / SECTOR_SIZE))
> +
>  static int __init iomap_init(void)
>  {
> -	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
> -			   offsetof(struct iomap_ioend, io_inline_bio),
> -			   BIOSET_NEED_BVECS);
> +	int error;
> +
> +	error = bioset_init(&iomap_ioend_bioset, IOMAP_POOL_SIZE,
> +			    offsetof(struct iomap_ioend, io_inline_bio),
> +			    BIOSET_NEED_BVECS);
> +#ifdef CONFIG_FS_VERITY
> +	if (error)
> +		return error;
> +
> +	error = bioset_init(&iomap_fsverity_bioset, IOMAP_POOL_SIZE,
> +			    offsetof(struct iomap_fsverity_bio, bio),
> +			    BIOSET_NEED_BVECS);
> +	if (error)
> +		bioset_exit(&iomap_ioend_bioset);
> +#endif
> +	return error;
>  }
>  fs_initcall(iomap_init);

This makes all kernels with CONFIG_FS_VERITY enabled start preallocating memory
for these bios, regardless of whether they end up being used or not.  When
PAGE_SIZE==4096 it comes out to about 134 KiB of memory total (32 bios at just
over 4 KiB per bio, most of which is used for the BIO_MAX_VECS bvecs), and it
scales up with PAGE_SIZE such that with PAGE_SIZE==65536 it's about 2144 KiB.

How about allocating the pool when it's known it's actually going to be used,
similar to what fs/crypto/ does for fscrypt_bounce_page_pool?  For example,
there could be a flag in struct fsverity_operations that says whether filesystem
wants the iomap fsverity bioset, and when fs/verity/ sets up the fsverity_info
for any file for the first time since boot, it could call into fs/iomap/ to
initialize the iomap fsverity bioset if needed.

BTW, errors from builtin initcalls such as iomap_init() get ignored.  So the
error handling logic above does not really work as may have been intended.

- Eric
Darrick J. Wong March 7, 2024, 10:06 p.m. UTC | #2
On Mon, Mar 04, 2024 at 03:39:27PM -0800, Eric Biggers wrote:
> On Mon, Mar 04, 2024 at 08:10:33PM +0100, Andrey Albershteyn wrote:
> > +#ifdef CONFIG_FS_VERITY
> > +struct iomap_fsverity_bio {
> > +	struct work_struct	work;
> > +	struct bio		bio;
> > +};
> 
> Maybe leave a comment above that mentions that bio must be the last field.
> 
> > @@ -471,6 +529,7 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
> >   * iomap_readahead - Attempt to read pages from a file.
> >   * @rac: Describes the pages to be read.
> >   * @ops: The operations vector for the filesystem.
> > + * @wq: Workqueue for post-I/O processing (only need for fsverity)
> 
> This should not be here.
> 
> > +#define IOMAP_POOL_SIZE		(4 * (PAGE_SIZE / SECTOR_SIZE))
> > +
> >  static int __init iomap_init(void)
> >  {
> > -	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
> > -			   offsetof(struct iomap_ioend, io_inline_bio),
> > -			   BIOSET_NEED_BVECS);
> > +	int error;
> > +
> > +	error = bioset_init(&iomap_ioend_bioset, IOMAP_POOL_SIZE,
> > +			    offsetof(struct iomap_ioend, io_inline_bio),
> > +			    BIOSET_NEED_BVECS);
> > +#ifdef CONFIG_FS_VERITY
> > +	if (error)
> > +		return error;
> > +
> > +	error = bioset_init(&iomap_fsverity_bioset, IOMAP_POOL_SIZE,
> > +			    offsetof(struct iomap_fsverity_bio, bio),
> > +			    BIOSET_NEED_BVECS);
> > +	if (error)
> > +		bioset_exit(&iomap_ioend_bioset);
> > +#endif
> > +	return error;
> >  }
> >  fs_initcall(iomap_init);
> 
> This makes all kernels with CONFIG_FS_VERITY enabled start preallocating memory
> for these bios, regardless of whether they end up being used or not.  When
> PAGE_SIZE==4096 it comes out to about 134 KiB of memory total (32 bios at just
> over 4 KiB per bio, most of which is used for the BIO_MAX_VECS bvecs), and it
> scales up with PAGE_SIZE such that with PAGE_SIZE==65536 it's about 2144 KiB.
> 
> How about allocating the pool when it's known it's actually going to be used,
> similar to what fs/crypto/ does for fscrypt_bounce_page_pool?  For example,
> there could be a flag in struct fsverity_operations that says whether filesystem
> wants the iomap fsverity bioset, and when fs/verity/ sets up the fsverity_info
> for any file for the first time since boot, it could call into fs/iomap/ to
> initialize the iomap fsverity bioset if needed.

I was thinking the same thing.

Though I wondered if you want to have a CONFIG_IOMAP_FS_VERITY as well?
But that might be tinykernel tetrapyloctomy.

--D

> BTW, errors from builtin initcalls such as iomap_init() get ignored.  So the
> error handling logic above does not really work as may have been intended.
> 
> - Eric
>
Eric Biggers March 7, 2024, 10:19 p.m. UTC | #3
On Thu, Mar 07, 2024 at 02:06:08PM -0800, Darrick J. Wong wrote:
> On Mon, Mar 04, 2024 at 03:39:27PM -0800, Eric Biggers wrote:
> > On Mon, Mar 04, 2024 at 08:10:33PM +0100, Andrey Albershteyn wrote:
> > > +#ifdef CONFIG_FS_VERITY
> > > +struct iomap_fsverity_bio {
> > > +	struct work_struct	work;
> > > +	struct bio		bio;
> > > +};
> > 
> > Maybe leave a comment above that mentions that bio must be the last field.
> > 
> > > @@ -471,6 +529,7 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
> > >   * iomap_readahead - Attempt to read pages from a file.
> > >   * @rac: Describes the pages to be read.
> > >   * @ops: The operations vector for the filesystem.
> > > + * @wq: Workqueue for post-I/O processing (only need for fsverity)
> > 
> > This should not be here.
> > 
> > > +#define IOMAP_POOL_SIZE		(4 * (PAGE_SIZE / SECTOR_SIZE))
> > > +
> > >  static int __init iomap_init(void)
> > >  {
> > > -	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
> > > -			   offsetof(struct iomap_ioend, io_inline_bio),
> > > -			   BIOSET_NEED_BVECS);
> > > +	int error;
> > > +
> > > +	error = bioset_init(&iomap_ioend_bioset, IOMAP_POOL_SIZE,
> > > +			    offsetof(struct iomap_ioend, io_inline_bio),
> > > +			    BIOSET_NEED_BVECS);
> > > +#ifdef CONFIG_FS_VERITY
> > > +	if (error)
> > > +		return error;
> > > +
> > > +	error = bioset_init(&iomap_fsverity_bioset, IOMAP_POOL_SIZE,
> > > +			    offsetof(struct iomap_fsverity_bio, bio),
> > > +			    BIOSET_NEED_BVECS);
> > > +	if (error)
> > > +		bioset_exit(&iomap_ioend_bioset);
> > > +#endif
> > > +	return error;
> > >  }
> > >  fs_initcall(iomap_init);
> > 
> > This makes all kernels with CONFIG_FS_VERITY enabled start preallocating memory
> > for these bios, regardless of whether they end up being used or not.  When
> > PAGE_SIZE==4096 it comes out to about 134 KiB of memory total (32 bios at just
> > over 4 KiB per bio, most of which is used for the BIO_MAX_VECS bvecs), and it
> > scales up with PAGE_SIZE such that with PAGE_SIZE==65536 it's about 2144 KiB.
> > 
> > How about allocating the pool when it's known it's actually going to be used,
> > similar to what fs/crypto/ does for fscrypt_bounce_page_pool?  For example,
> > there could be a flag in struct fsverity_operations that says whether filesystem
> > wants the iomap fsverity bioset, and when fs/verity/ sets up the fsverity_info
> > for any file for the first time since boot, it could call into fs/iomap/ to
> > initialize the iomap fsverity bioset if needed.
> 
> I was thinking the same thing.
> 
> Though I wondered if you want to have a CONFIG_IOMAP_FS_VERITY as well?
> But that might be tinykernel tetrapyloctomy.
> 

I'd prefer just doing the alloc-on-first-use optimization, and not do
CONFIG_IOMAP_FS_VERITY.  CONFIG_IOMAP_FS_VERITY would serve a similar purpose,
but only if XFS is not built into the kernel, and only before the other
filesystems that support fsverity convert their buffered reads to use iomap.

For tiny kernels there's always the option of CONFIG_FS_VERITY=n.

- Eric
Dave Chinner March 7, 2024, 11:38 p.m. UTC | #4
On Mon, Mar 04, 2024 at 03:39:27PM -0800, Eric Biggers wrote:
> On Mon, Mar 04, 2024 at 08:10:33PM +0100, Andrey Albershteyn wrote:
> > +#ifdef CONFIG_FS_VERITY
> > +struct iomap_fsverity_bio {
> > +	struct work_struct	work;
> > +	struct bio		bio;
> > +};
> 
> Maybe leave a comment above that mentions that bio must be the last field.
> 
> > @@ -471,6 +529,7 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
> >   * iomap_readahead - Attempt to read pages from a file.
> >   * @rac: Describes the pages to be read.
> >   * @ops: The operations vector for the filesystem.
> > + * @wq: Workqueue for post-I/O processing (only need for fsverity)
> 
> This should not be here.
> 
> > +#define IOMAP_POOL_SIZE		(4 * (PAGE_SIZE / SECTOR_SIZE))
> > +
> >  static int __init iomap_init(void)
> >  {
> > -	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
> > -			   offsetof(struct iomap_ioend, io_inline_bio),
> > -			   BIOSET_NEED_BVECS);
> > +	int error;
> > +
> > +	error = bioset_init(&iomap_ioend_bioset, IOMAP_POOL_SIZE,
> > +			    offsetof(struct iomap_ioend, io_inline_bio),
> > +			    BIOSET_NEED_BVECS);
> > +#ifdef CONFIG_FS_VERITY
> > +	if (error)
> > +		return error;
> > +
> > +	error = bioset_init(&iomap_fsverity_bioset, IOMAP_POOL_SIZE,
> > +			    offsetof(struct iomap_fsverity_bio, bio),
> > +			    BIOSET_NEED_BVECS);
> > +	if (error)
> > +		bioset_exit(&iomap_ioend_bioset);
> > +#endif
> > +	return error;
> >  }
> >  fs_initcall(iomap_init);
> 
> This makes all kernels with CONFIG_FS_VERITY enabled start preallocating memory
> for these bios, regardless of whether they end up being used or not.  When
> PAGE_SIZE==4096 it comes out to about 134 KiB of memory total (32 bios at just
> over 4 KiB per bio, most of which is used for the BIO_MAX_VECS bvecs), and it
> scales up with PAGE_SIZE such that with PAGE_SIZE==65536 it's about 2144 KiB.

Honestly: I don't think we care about this.

Indeed, if a system is configured with iomap and does not use XFS,
GFS2 or zonefs, it's not going to be using the iomap_ioend_bioset at
all, either. So by you definition that's just wasted memory, too, on
systems that don't use any of these three filesystems. But we
aren't going to make that one conditional, because the complexity
and overhead of checks that never trigger after the first IO doesn't
actually provide any return for the cost of ongoing maintenance.

Similarly, once XFS has fsverity enabled, it's going to get used all
over the place in the container and VM world. So we are *always*
going to want this bioset to be initialised on these production
systems, so it falls into the same category as the
iomap_ioend_bioset. That is, if you don't want that overhead, turn
the functionality off via CONFIG file options.

> How about allocating the pool when it's known it's actually going to be used,
> similar to what fs/crypto/ does for fscrypt_bounce_page_pool?  For example,
> there could be a flag in struct fsverity_operations that says whether filesystem
> wants the iomap fsverity bioset, and when fs/verity/ sets up the fsverity_info
> for any file for the first time since boot, it could call into fs/iomap/ to
> initialize the iomap fsverity bioset if needed.
> 
> BTW, errors from builtin initcalls such as iomap_init() get ignored.  So the
> error handling logic above does not really work as may have been intended.

That's not an iomap problem - lots of fs_initcall() functions return
errors because they failed things like memory allocation. If this is
actually problem, then fix the core init infrastructure to handle
errors properly, eh?

-Dave.
Darrick J. Wong March 7, 2024, 11:45 p.m. UTC | #5
On Fri, Mar 08, 2024 at 10:38:06AM +1100, Dave Chinner wrote:
> On Mon, Mar 04, 2024 at 03:39:27PM -0800, Eric Biggers wrote:
> > On Mon, Mar 04, 2024 at 08:10:33PM +0100, Andrey Albershteyn wrote:
> > > +#ifdef CONFIG_FS_VERITY
> > > +struct iomap_fsverity_bio {
> > > +	struct work_struct	work;
> > > +	struct bio		bio;
> > > +};
> > 
> > Maybe leave a comment above that mentions that bio must be the last field.
> > 
> > > @@ -471,6 +529,7 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
> > >   * iomap_readahead - Attempt to read pages from a file.
> > >   * @rac: Describes the pages to be read.
> > >   * @ops: The operations vector for the filesystem.
> > > + * @wq: Workqueue for post-I/O processing (only need for fsverity)
> > 
> > This should not be here.
> > 
> > > +#define IOMAP_POOL_SIZE		(4 * (PAGE_SIZE / SECTOR_SIZE))
> > > +
> > >  static int __init iomap_init(void)
> > >  {
> > > -	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
> > > -			   offsetof(struct iomap_ioend, io_inline_bio),
> > > -			   BIOSET_NEED_BVECS);
> > > +	int error;
> > > +
> > > +	error = bioset_init(&iomap_ioend_bioset, IOMAP_POOL_SIZE,
> > > +			    offsetof(struct iomap_ioend, io_inline_bio),
> > > +			    BIOSET_NEED_BVECS);
> > > +#ifdef CONFIG_FS_VERITY
> > > +	if (error)
> > > +		return error;
> > > +
> > > +	error = bioset_init(&iomap_fsverity_bioset, IOMAP_POOL_SIZE,
> > > +			    offsetof(struct iomap_fsverity_bio, bio),
> > > +			    BIOSET_NEED_BVECS);
> > > +	if (error)
> > > +		bioset_exit(&iomap_ioend_bioset);
> > > +#endif
> > > +	return error;
> > >  }
> > >  fs_initcall(iomap_init);
> > 
> > This makes all kernels with CONFIG_FS_VERITY enabled start preallocating memory
> > for these bios, regardless of whether they end up being used or not.  When
> > PAGE_SIZE==4096 it comes out to about 134 KiB of memory total (32 bios at just
> > over 4 KiB per bio, most of which is used for the BIO_MAX_VECS bvecs), and it
> > scales up with PAGE_SIZE such that with PAGE_SIZE==65536 it's about 2144 KiB.
> 
> Honestly: I don't think we care about this.
> 
> Indeed, if a system is configured with iomap and does not use XFS,
> GFS2 or zonefs, it's not going to be using the iomap_ioend_bioset at
> all, either. So by you definition that's just wasted memory, too, on
> systems that don't use any of these three filesystems. But we
> aren't going to make that one conditional, because the complexity
> and overhead of checks that never trigger after the first IO doesn't
> actually provide any return for the cost of ongoing maintenance.

I've occasionally wondered if I shouldn't try harder to make iomap a
module so that nobody pays the cost of having it until they load an fs
driver that uses it.

> Similarly, once XFS has fsverity enabled, it's going to get used all
> over the place in the container and VM world. So we are *always*
> going to want this bioset to be initialised on these production
> systems, so it falls into the same category as the
> iomap_ioend_bioset. That is, if you don't want that overhead, turn
> the functionality off via CONFIG file options.

<nod> If someone really cares I guess we could employ that weird
cmpxchg trick that the dio workqueue setup function uses.  But... let's
let someone complain first, eh? ;)

> > How about allocating the pool when it's known it's actually going to be used,
> > similar to what fs/crypto/ does for fscrypt_bounce_page_pool?  For example,
> > there could be a flag in struct fsverity_operations that says whether filesystem
> > wants the iomap fsverity bioset, and when fs/verity/ sets up the fsverity_info
> > for any file for the first time since boot, it could call into fs/iomap/ to
> > initialize the iomap fsverity bioset if needed.
> > 
> > BTW, errors from builtin initcalls such as iomap_init() get ignored.  So the
> > error handling logic above does not really work as may have been intended.
> 
> That's not an iomap problem - lots of fs_initcall() functions return
> errors because they failed things like memory allocation. If this is
> actually problem, then fix the core init infrastructure to handle
> errors properly, eh?

Soooo ... the kernel crashes if iomap cannot allocate its bioset?

I guess that's handling it, FSVO handle. :P

(Is that why I can't boot with mem=60M anymore?)

--D

> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
>
Eric Biggers March 7, 2024, 11:59 p.m. UTC | #6
On Fri, Mar 08, 2024 at 10:38:06AM +1100, Dave Chinner wrote:
> > This makes all kernels with CONFIG_FS_VERITY enabled start preallocating memory
> > for these bios, regardless of whether they end up being used or not.  When
> > PAGE_SIZE==4096 it comes out to about 134 KiB of memory total (32 bios at just
> > over 4 KiB per bio, most of which is used for the BIO_MAX_VECS bvecs), and it
> > scales up with PAGE_SIZE such that with PAGE_SIZE==65536 it's about 2144 KiB.
> 
> Honestly: I don't think we care about this.
> 
> Indeed, if a system is configured with iomap and does not use XFS,
> GFS2 or zonefs, it's not going to be using the iomap_ioend_bioset at
> all, either. So by you definition that's just wasted memory, too, on
> systems that don't use any of these three filesystems. But we
> aren't going to make that one conditional, because the complexity
> and overhead of checks that never trigger after the first IO doesn't
> actually provide any return for the cost of ongoing maintenance.
> 
> Similarly, once XFS has fsverity enabled, it's going to get used all
> over the place in the container and VM world. So we are *always*
> going to want this bioset to be initialised on these production
> systems, so it falls into the same category as the
> iomap_ioend_bioset. That is, if you don't want that overhead, turn
> the functionality off via CONFIG file options.

"We're already wasting memory, therefore it's fine to waste more" isn't a great
argument.

iomap_ioend_bioset is indeed also problematic, though it's also a bit different
in that it's needed for basic filesystem functionality, not an optional feature.
Yes, ext4 and f2fs don't actually use iomap for buffered reads, but IIUC at
least there's a plan to do that.  Meanwhile there's no plan for fsverity to be
used on every system that may be using a filesystem that supports it.

We should take care not to over-estimate how many users use optional features.
Linux distros turn on most kconfig options just in case, but often the common
case is that the option is on but the feature is not used.

> > How about allocating the pool when it's known it's actually going to be used,
> > similar to what fs/crypto/ does for fscrypt_bounce_page_pool?  For example,
> > there could be a flag in struct fsverity_operations that says whether filesystem
> > wants the iomap fsverity bioset, and when fs/verity/ sets up the fsverity_info
> > for any file for the first time since boot, it could call into fs/iomap/ to
> > initialize the iomap fsverity bioset if needed.
> > 
> > BTW, errors from builtin initcalls such as iomap_init() get ignored.  So the
> > error handling logic above does not really work as may have been intended.
> 
> That's not an iomap problem - lots of fs_initcall() functions return
> errors because they failed things like memory allocation. If this is
> actually problem, then fix the core init infrastructure to handle
> errors properly, eh?

What does "properly" mean?  Even if init/main.c did something with the returned
error code, individual subsystems still need to know what behavior they want and
act accordingly.  Do they want to panic the kernel, or do they want to fall back
gracefully with degraded functionality.  If subsystems go with the panic (like
fsverity_init() does these days), then there is no need for doing any cleanup on
errors like this patchset does.

- Eric
Dave Chinner March 8, 2024, 12:47 a.m. UTC | #7
On Thu, Mar 07, 2024 at 03:45:22PM -0800, Darrick J. Wong wrote:
> On Fri, Mar 08, 2024 at 10:38:06AM +1100, Dave Chinner wrote:
> > On Mon, Mar 04, 2024 at 03:39:27PM -0800, Eric Biggers wrote:
> > > On Mon, Mar 04, 2024 at 08:10:33PM +0100, Andrey Albershteyn wrote:
> > > > +#ifdef CONFIG_FS_VERITY
> > > > +struct iomap_fsverity_bio {
> > > > +	struct work_struct	work;
> > > > +	struct bio		bio;
> > > > +};
> > > 
> > > Maybe leave a comment above that mentions that bio must be the last field.
> > > 
> > > > @@ -471,6 +529,7 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
> > > >   * iomap_readahead - Attempt to read pages from a file.
> > > >   * @rac: Describes the pages to be read.
> > > >   * @ops: The operations vector for the filesystem.
> > > > + * @wq: Workqueue for post-I/O processing (only need for fsverity)
> > > 
> > > This should not be here.
> > > 
> > > > +#define IOMAP_POOL_SIZE		(4 * (PAGE_SIZE / SECTOR_SIZE))
> > > > +
> > > >  static int __init iomap_init(void)
> > > >  {
> > > > -	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
> > > > -			   offsetof(struct iomap_ioend, io_inline_bio),
> > > > -			   BIOSET_NEED_BVECS);
> > > > +	int error;
> > > > +
> > > > +	error = bioset_init(&iomap_ioend_bioset, IOMAP_POOL_SIZE,
> > > > +			    offsetof(struct iomap_ioend, io_inline_bio),
> > > > +			    BIOSET_NEED_BVECS);
> > > > +#ifdef CONFIG_FS_VERITY
> > > > +	if (error)
> > > > +		return error;
> > > > +
> > > > +	error = bioset_init(&iomap_fsverity_bioset, IOMAP_POOL_SIZE,
> > > > +			    offsetof(struct iomap_fsverity_bio, bio),
> > > > +			    BIOSET_NEED_BVECS);
> > > > +	if (error)
> > > > +		bioset_exit(&iomap_ioend_bioset);
> > > > +#endif
> > > > +	return error;
> > > >  }
> > > >  fs_initcall(iomap_init);
> > > 
> > > This makes all kernels with CONFIG_FS_VERITY enabled start preallocating memory
> > > for these bios, regardless of whether they end up being used or not.  When
> > > PAGE_SIZE==4096 it comes out to about 134 KiB of memory total (32 bios at just
> > > over 4 KiB per bio, most of which is used for the BIO_MAX_VECS bvecs), and it
> > > scales up with PAGE_SIZE such that with PAGE_SIZE==65536 it's about 2144 KiB.
> > 
> > Honestly: I don't think we care about this.
> > 
> > Indeed, if a system is configured with iomap and does not use XFS,
> > GFS2 or zonefs, it's not going to be using the iomap_ioend_bioset at
> > all, either. So by you definition that's just wasted memory, too, on
> > systems that don't use any of these three filesystems. But we
> > aren't going to make that one conditional, because the complexity
> > and overhead of checks that never trigger after the first IO doesn't
> > actually provide any return for the cost of ongoing maintenance.
> 
> I've occasionally wondered if I shouldn't try harder to make iomap a
> module so that nobody pays the cost of having it until they load an fs
> driver that uses it.

Not sure it is worth that effort. e.g. the block device code is
already using iomap (when bufferheads are turned off) so the future
reality is going to be that iomap will always be required when
CONFIG_BLOCK=y.

> > > How about allocating the pool when it's known it's actually going to be used,
> > > similar to what fs/crypto/ does for fscrypt_bounce_page_pool?  For example,
> > > there could be a flag in struct fsverity_operations that says whether filesystem
> > > wants the iomap fsverity bioset, and when fs/verity/ sets up the fsverity_info
> > > for any file for the first time since boot, it could call into fs/iomap/ to
> > > initialize the iomap fsverity bioset if needed.
> > > 
> > > BTW, errors from builtin initcalls such as iomap_init() get ignored.  So the
> > > error handling logic above does not really work as may have been intended.
> > 
> > That's not an iomap problem - lots of fs_initcall() functions return
> > errors because they failed things like memory allocation. If this is
> > actually problem, then fix the core init infrastructure to handle
> > errors properly, eh?
> 
> Soooo ... the kernel crashes if iomap cannot allocate its bioset?
>
> I guess that's handling it, FSVO handle. :P

Yup, the kernel will crash if any fs_initcall() fails to allocate
the memory it needs and that subsystem is then used. So the
workaround for that is that a bunch of these initcalls simply use
panic() calls for error handling. :(

> (Is that why I can't boot with mem=60M anymore?)

Something else is probably panic()ing on ENOMEM... :/

Cheers,

Dave.
Dave Chinner March 8, 2024, 1:20 a.m. UTC | #8
On Thu, Mar 07, 2024 at 03:59:07PM -0800, Eric Biggers wrote:
> On Fri, Mar 08, 2024 at 10:38:06AM +1100, Dave Chinner wrote:
> > > This makes all kernels with CONFIG_FS_VERITY enabled start preallocating memory
> > > for these bios, regardless of whether they end up being used or not.  When
> > > PAGE_SIZE==4096 it comes out to about 134 KiB of memory total (32 bios at just
> > > over 4 KiB per bio, most of which is used for the BIO_MAX_VECS bvecs), and it
> > > scales up with PAGE_SIZE such that with PAGE_SIZE==65536 it's about 2144 KiB.
> > 
> > Honestly: I don't think we care about this.
> > 
> > Indeed, if a system is configured with iomap and does not use XFS,
> > GFS2 or zonefs, it's not going to be using the iomap_ioend_bioset at
> > all, either. So by you definition that's just wasted memory, too, on
> > systems that don't use any of these three filesystems. But we
> > aren't going to make that one conditional, because the complexity
> > and overhead of checks that never trigger after the first IO doesn't
> > actually provide any return for the cost of ongoing maintenance.
> > 
> > Similarly, once XFS has fsverity enabled, it's going to get used all
> > over the place in the container and VM world. So we are *always*
> > going to want this bioset to be initialised on these production
> > systems, so it falls into the same category as the
> > iomap_ioend_bioset. That is, if you don't want that overhead, turn
> > the functionality off via CONFIG file options.
> 
> "We're already wasting memory, therefore it's fine to waste more" isn't a great
> argument.

Adding complexity just because -you- think the memory is wasted
isn't any better argument. I don't think the memory is wasted, and I
expect that fsverity will end up in -very- wide use across
enterprise production systems as container/vm image build
infrastructure moves towards composefs-like algorithms that have a
hard dependency on fsverity functionality being present in the host
filesytsems.

> iomap_ioend_bioset is indeed also problematic, though it's also a bit different
> in that it's needed for basic filesystem functionality, not an optional feature.
> Yes, ext4 and f2fs don't actually use iomap for buffered reads, but IIUC at
> least there's a plan to do that.  Meanwhile there's no plan for fsverity to be
> used on every system that may be using a filesystem that supports it.

That's not the case I see coming for XFS - by the time we get this
merged and out of experimental support, the distro and application
support will already be there for endemic use of fsverity on XFS.
That's all being prototyped on ext4+fsverity right now, and you
probably don't see any of that happening.

> We should take care not to over-estimate how many users use optional features.
> Linux distros turn on most kconfig options just in case, but often the common
> case is that the option is on but the feature is not used.

I don't think that fsverity is going to be an optional feature in
future distros - we're already building core infrastructure based on
the data integrity guarantees that fsverity provides us with. IOWs,
I think fsverity support will soon become required core OS
functionality by more than one distro, and so we just don't care
about this overhead because the extra read IO bioset will always be
necessary....

> > > How about allocating the pool when it's known it's actually going to be used,
> > > similar to what fs/crypto/ does for fscrypt_bounce_page_pool?  For example,
> > > there could be a flag in struct fsverity_operations that says whether filesystem
> > > wants the iomap fsverity bioset, and when fs/verity/ sets up the fsverity_info
> > > for any file for the first time since boot, it could call into fs/iomap/ to
> > > initialize the iomap fsverity bioset if needed.
> > > 
> > > BTW, errors from builtin initcalls such as iomap_init() get ignored.  So the
> > > error handling logic above does not really work as may have been intended.
> > 
> > That's not an iomap problem - lots of fs_initcall() functions return
> > errors because they failed things like memory allocation. If this is
> > actually problem, then fix the core init infrastructure to handle
> > errors properly, eh?
> 
> What does "properly" mean?

Having documented requirements and behaviour and then enforcing
them. There is no documentation for initcalls - they return an int,
so by convention it is expected that errors should be returned to
the caller.

There's *nothing* that says initcalls should panic() instead of
returning errors if they have a fatal error. There's nothing that
says "errors are ignored" - having them be declared as void would be
a good hint that errors can't be returned or handled.

Expecting people to cargo-cult other implementations and magically
get it right is almost guaranteed to ensure that nobody actually
gets it right the first time...

> Even if init/main.c did something with the returned
> error code, individual subsystems still need to know what behavior they want and
> act accordingly.

"return an error only on fatal initialisation failure" and then the
core code can decide if the initcall level is high enough to warrant
panic()ing the machine.

> Do they want to panic the kernel, or do they want to fall back
> gracefully with degraded functionality.

If they can gracefully fall back to some other mechanism, then they
*haven't failed* and there's no need to return an error or panic.

> If subsystems go with the panic (like
> fsverity_init() does these days), then there is no need for doing any cleanup on
> errors like this patchset does.

s/panic/BUG()/ and read that back.

Because that's essentially what it is - error handling via BUG()
calls. We get told off for using BUG() instead of correct error
handling, yet here we are with code that does correct error handling
and we're being told to replace it with BUG() because the errors
aren't handled by the infrastructure calling our code. Gross, nasty
and really, really needs to be documented.

-Dave.
Eric Biggers March 8, 2024, 3:16 a.m. UTC | #9
On Fri, Mar 08, 2024 at 12:20:31PM +1100, Dave Chinner wrote:
> On Thu, Mar 07, 2024 at 03:59:07PM -0800, Eric Biggers wrote:
> > On Fri, Mar 08, 2024 at 10:38:06AM +1100, Dave Chinner wrote:
> > > > This makes all kernels with CONFIG_FS_VERITY enabled start preallocating memory
> > > > for these bios, regardless of whether they end up being used or not.  When
> > > > PAGE_SIZE==4096 it comes out to about 134 KiB of memory total (32 bios at just
> > > > over 4 KiB per bio, most of which is used for the BIO_MAX_VECS bvecs), and it
> > > > scales up with PAGE_SIZE such that with PAGE_SIZE==65536 it's about 2144 KiB.
> > > 
> > > Honestly: I don't think we care about this.
> > > 
> > > Indeed, if a system is configured with iomap and does not use XFS,
> > > GFS2 or zonefs, it's not going to be using the iomap_ioend_bioset at
> > > all, either. So by you definition that's just wasted memory, too, on
> > > systems that don't use any of these three filesystems. But we
> > > aren't going to make that one conditional, because the complexity
> > > and overhead of checks that never trigger after the first IO doesn't
> > > actually provide any return for the cost of ongoing maintenance.
> > > 
> > > Similarly, once XFS has fsverity enabled, it's going to get used all
> > > over the place in the container and VM world. So we are *always*
> > > going to want this bioset to be initialised on these production
> > > systems, so it falls into the same category as the
> > > iomap_ioend_bioset. That is, if you don't want that overhead, turn
> > > the functionality off via CONFIG file options.
> > 
> > "We're already wasting memory, therefore it's fine to waste more" isn't a great
> > argument.
> 
> Adding complexity just because -you- think the memory is wasted
> isn't any better argument. I don't think the memory is wasted, and I
> expect that fsverity will end up in -very- wide use across
> enterprise production systems as container/vm image build
> infrastructure moves towards composefs-like algorithms that have a
> hard dependency on fsverity functionality being present in the host
> filesytsems.
> 
> > iomap_ioend_bioset is indeed also problematic, though it's also a bit different
> > in that it's needed for basic filesystem functionality, not an optional feature.
> > Yes, ext4 and f2fs don't actually use iomap for buffered reads, but IIUC at
> > least there's a plan to do that.  Meanwhile there's no plan for fsverity to be
> > used on every system that may be using a filesystem that supports it.
> 
> That's not the case I see coming for XFS - by the time we get this
> merged and out of experimental support, the distro and application
> support will already be there for endemic use of fsverity on XFS.
> That's all being prototyped on ext4+fsverity right now, and you
> probably don't see any of that happening.
> 
> > We should take care not to over-estimate how many users use optional features.
> > Linux distros turn on most kconfig options just in case, but often the common
> > case is that the option is on but the feature is not used.
> 
> I don't think that fsverity is going to be an optional feature in
> future distros - we're already building core infrastructure based on
> the data integrity guarantees that fsverity provides us with. IOWs,
> I think fsverity support will soon become required core OS
> functionality by more than one distro, and so we just don't care
> about this overhead because the extra read IO bioset will always be
> necessary....

That's great that you're finding fsverity to be useful!

At the same time, please do keep in mind that there's a huge variety of Linux
systems besides the "enterprise" systems using XFS that you may have in mind.

The memory allocated for iomap_fsverity_bioset, as proposed, is indeed wasted on
any system that isn't using both XFS *and* fsverity (as long as
CONFIG_FS_VERITY=y, and either CONFIG_EXT4_FS, CONFIG_F2FS_FS, or
CONFIG_BTRFS_FS is enabled too).  The amount is also quadrupled on systems that
use a 16K page size, which will become increasingly common in the future.

It may be that part of your intention for pushing back against this optimization
is to encourage filesystems to convert their buffered reads to iomap.  I'm not
sure it will actually have that effect.  First, it's not clear that it will be
technically feasible for the filesystems that support compression, btrfs and
f2fs, to convert their buffered reads to iomap.  Second, this sort of thing
reinforces an impression that iomap is targeted at enterprise systems using XFS,
and that changes that would be helpful on other types of systems are rejected.

- Eric
Darrick J. Wong March 8, 2024, 3:22 a.m. UTC | #10
On Fri, Mar 08, 2024 at 12:20:31PM +1100, Dave Chinner wrote:
> On Thu, Mar 07, 2024 at 03:59:07PM -0800, Eric Biggers wrote:
> > On Fri, Mar 08, 2024 at 10:38:06AM +1100, Dave Chinner wrote:
> > > > This makes all kernels with CONFIG_FS_VERITY enabled start preallocating memory
> > > > for these bios, regardless of whether they end up being used or not.  When
> > > > PAGE_SIZE==4096 it comes out to about 134 KiB of memory total (32 bios at just
> > > > over 4 KiB per bio, most of which is used for the BIO_MAX_VECS bvecs), and it
> > > > scales up with PAGE_SIZE such that with PAGE_SIZE==65536 it's about 2144 KiB.
> > > 
> > > Honestly: I don't think we care about this.
> > > 
> > > Indeed, if a system is configured with iomap and does not use XFS,
> > > GFS2 or zonefs, it's not going to be using the iomap_ioend_bioset at
> > > all, either. So by you definition that's just wasted memory, too, on
> > > systems that don't use any of these three filesystems. But we
> > > aren't going to make that one conditional, because the complexity
> > > and overhead of checks that never trigger after the first IO doesn't
> > > actually provide any return for the cost of ongoing maintenance.
> > > 
> > > Similarly, once XFS has fsverity enabled, it's going to get used all
> > > over the place in the container and VM world. So we are *always*
> > > going to want this bioset to be initialised on these production
> > > systems, so it falls into the same category as the
> > > iomap_ioend_bioset. That is, if you don't want that overhead, turn
> > > the functionality off via CONFIG file options.
> > 
> > "We're already wasting memory, therefore it's fine to waste more" isn't a great
> > argument.
> 
> Adding complexity just because -you- think the memory is wasted
> isn't any better argument. I don't think the memory is wasted, and I
> expect that fsverity will end up in -very- wide use across
> enterprise production systems as container/vm image build
> infrastructure moves towards composefs-like algorithms that have a
> hard dependency on fsverity functionality being present in the host
> filesytsems.

FWIW I would like to evaluate verity for certain things, such as bitrot
detection on backup disks and the like.  My employers are probably much
more interested in the same sealed rootfs blahdyblah that everyone else
has spilt much ink over. :)

> > iomap_ioend_bioset is indeed also problematic, though it's also a bit different
> > in that it's needed for basic filesystem functionality, not an optional feature.
> > Yes, ext4 and f2fs don't actually use iomap for buffered reads, but IIUC at
> > least there's a plan to do that.  Meanwhile there's no plan for fsverity to be
> > used on every system that may be using a filesystem that supports it.
> 
> That's not the case I see coming for XFS - by the time we get this
> merged and out of experimental support, the distro and application
> support will already be there for endemic use of fsverity on XFS.
> That's all being prototyped on ext4+fsverity right now, and you
> probably don't see any of that happening.
> 
> > We should take care not to over-estimate how many users use optional features.
> > Linux distros turn on most kconfig options just in case, but often the common
> > case is that the option is on but the feature is not used.
> 
> I don't think that fsverity is going to be an optional feature in
> future distros - we're already building core infrastructure based on
> the data integrity guarantees that fsverity provides us with. IOWs,
> I think fsverity support will soon become required core OS
> functionality by more than one distro, and so we just don't careg
> about this overhead because the extra read IO bioset will always be
> necessary....

Same here.

> > > > How about allocating the pool when it's known it's actually going to be used,
> > > > similar to what fs/crypto/ does for fscrypt_bounce_page_pool?  For example,
> > > > there could be a flag in struct fsverity_operations that says whether filesystem
> > > > wants the iomap fsverity bioset, and when fs/verity/ sets up the fsverity_info
> > > > for any file for the first time since boot, it could call into fs/iomap/ to
> > > > initialize the iomap fsverity bioset if needed.
> > > > 
> > > > BTW, errors from builtin initcalls such as iomap_init() get ignored.  So the
> > > > error handling logic above does not really work as may have been intended.
> > > 
> > > That's not an iomap problem - lots of fs_initcall() functions return
> > > errors because they failed things like memory allocation. If this is
> > > actually problem, then fix the core init infrastructure to handle
> > > errors properly, eh?
> > 
> > What does "properly" mean?
> 
> Having documented requirements and behaviour and then enforcing
> them. There is no documentation for initcalls - they return an int,
> so by convention it is expected that errors should be returned to
> the caller.

Agree.  I hated that weird thing that sync_filesystem did where it
mostly ignored the return values.

> There's *nothing* that says initcalls should panic() instead of
> returning errors if they have a fatal error. There's nothing that
> says "errors are ignored" - having them be declared as void would be
> a good hint that errors can't be returned or handled.
> 
> Expecting people to cargo-cult other implementations and magically
> get it right is almost guaranteed to ensure that nobody actually
> gets it right the first time...

Hrmm.  Should we have a iomap_init_succeeded() function that init_xfs_fs
and the like can call to find out if the initcall failed and refuse to
load?

Or just make it a module and then the insmod(iomap) can fail.

> > Even if init/main.c did something with the returned
> > error code, individual subsystems still need to know what behavior they want and
> > act accordingly.
> 
> "return an error only on fatal initialisation failure" and then the
> core code can decide if the initcall level is high enough to warrant
> panic()ing the machine.
> 
> > Do they want to panic the kernel, or do they want to fall back
> > gracefully with degraded functionality.
> 
> If they can gracefully fall back to some other mechanism, then they
> *haven't failed* and there's no need to return an error or panic.

I'm not sure if insmod(xfs) bailing out is all that graceful, but I
suppose it's less rude than a kernel panic :P

> > If subsystems go with the panic (like
> > fsverity_init() does these days), then there is no need for doing any cleanup on
> > errors like this patchset does.
> 
> s/panic/BUG()/ and read that back.
> 
> Because that's essentially what it is - error handling via BUG()
> calls. We get told off for using BUG() instead of correct error
> handling, yet here we are with code that does correct error handling
> and we're being told to replace it with BUG() because the errors
> aren't handled by the infrastructure calling our code. Gross, nasty
> and really, really needs to be documented.

Log an error and invoke the autorepair to see if you can really let the
smoke out? ;)

--D

> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
>
Darrick J. Wong March 8, 2024, 3:57 a.m. UTC | #11
On Thu, Mar 07, 2024 at 07:16:29PM -0800, Eric Biggers wrote:
> On Fri, Mar 08, 2024 at 12:20:31PM +1100, Dave Chinner wrote:
> > On Thu, Mar 07, 2024 at 03:59:07PM -0800, Eric Biggers wrote:
> > > On Fri, Mar 08, 2024 at 10:38:06AM +1100, Dave Chinner wrote:
> > > > > This makes all kernels with CONFIG_FS_VERITY enabled start preallocating memory
> > > > > for these bios, regardless of whether they end up being used or not.  When
> > > > > PAGE_SIZE==4096 it comes out to about 134 KiB of memory total (32 bios at just
> > > > > over 4 KiB per bio, most of which is used for the BIO_MAX_VECS bvecs), and it
> > > > > scales up with PAGE_SIZE such that with PAGE_SIZE==65536 it's about 2144 KiB.
> > > > 
> > > > Honestly: I don't think we care about this.
> > > > 
> > > > Indeed, if a system is configured with iomap and does not use XFS,
> > > > GFS2 or zonefs, it's not going to be using the iomap_ioend_bioset at
> > > > all, either. So by you definition that's just wasted memory, too, on
> > > > systems that don't use any of these three filesystems. But we
> > > > aren't going to make that one conditional, because the complexity
> > > > and overhead of checks that never trigger after the first IO doesn't
> > > > actually provide any return for the cost of ongoing maintenance.
> > > > 
> > > > Similarly, once XFS has fsverity enabled, it's going to get used all
> > > > over the place in the container and VM world. So we are *always*
> > > > going to want this bioset to be initialised on these production
> > > > systems, so it falls into the same category as the
> > > > iomap_ioend_bioset. That is, if you don't want that overhead, turn
> > > > the functionality off via CONFIG file options.
> > > 
> > > "We're already wasting memory, therefore it's fine to waste more" isn't a great
> > > argument.
> > 
> > Adding complexity just because -you- think the memory is wasted
> > isn't any better argument. I don't think the memory is wasted, and I
> > expect that fsverity will end up in -very- wide use across
> > enterprise production systems as container/vm image build
> > infrastructure moves towards composefs-like algorithms that have a
> > hard dependency on fsverity functionality being present in the host
> > filesytsems.
> > 
> > > iomap_ioend_bioset is indeed also problematic, though it's also a bit different
> > > in that it's needed for basic filesystem functionality, not an optional feature.
> > > Yes, ext4 and f2fs don't actually use iomap for buffered reads, but IIUC at
> > > least there's a plan to do that.  Meanwhile there's no plan for fsverity to be
> > > used on every system that may be using a filesystem that supports it.
> > 
> > That's not the case I see coming for XFS - by the time we get this
> > merged and out of experimental support, the distro and application
> > support will already be there for endemic use of fsverity on XFS.
> > That's all being prototyped on ext4+fsverity right now, and you
> > probably don't see any of that happening.
> > 
> > > We should take care not to over-estimate how many users use optional features.
> > > Linux distros turn on most kconfig options just in case, but often the common
> > > case is that the option is on but the feature is not used.
> > 
> > I don't think that fsverity is going to be an optional feature in
> > future distros - we're already building core infrastructure based on
> > the data integrity guarantees that fsverity provides us with. IOWs,
> > I think fsverity support will soon become required core OS
> > functionality by more than one distro, and so we just don't care
> > about this overhead because the extra read IO bioset will always be
> > necessary....
> 
> That's great that you're finding fsverity to be useful!
> 
> At the same time, please do keep in mind that there's a huge variety of Linux
> systems besides the "enterprise" systems using XFS that you may have in mind.
> 
> The memory allocated for iomap_fsverity_bioset, as proposed, is indeed wasted on
> any system that isn't using both XFS *and* fsverity (as long as
> CONFIG_FS_VERITY=y, and either CONFIG_EXT4_FS, CONFIG_F2FS_FS, or
> CONFIG_BTRFS_FS is enabled too).  The amount is also quadrupled on systems that
> use a 16K page size, which will become increasingly common in the future.
> 
> It may be that part of your intention for pushing back against this optimization
> is to encourage filesystems to convert their buffered reads to iomap.  I'm not
> sure it will actually have that effect.  First, it's not clear that it will be
> technically feasible for the filesystems that support compression, btrfs and
> f2fs, to convert their buffered reads to iomap.

I do hope more of that happens some day, even if xfs is too obsolete to
gain those features itself.

>                                                  Second, this sort of thing
> reinforces an impression that iomap is targeted at enterprise systems using XFS,
> and that changes that would be helpful on other types of systems are rejected.

I wouldn't be opposed to iomap_prepare_{filemap,verity}() functions that
client modules could call from their init methods to ensure that
static(ish) resources like the biosets have been activated.  You'd still
waste space in the bss section, but that would be a bit more svelte.

(Says me, who <cough> might some day end up with 64k page kernels
again.)

--D

> - Eric
>
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 093c4515b22a..e27a8af4b351 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -6,6 +6,7 @@ 
 #include <linux/module.h>
 #include <linux/compiler.h>
 #include <linux/fs.h>
+#include <linux/fsverity.h>
 #include <linux/iomap.h>
 #include <linux/pagemap.h>
 #include <linux/uio.h>
@@ -330,6 +331,56 @@  static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter,
 		pos >= i_size_read(iter->inode);
 }
 
+#ifdef CONFIG_FS_VERITY
+struct iomap_fsverity_bio {
+	struct work_struct	work;
+	struct bio		bio;
+};
+static struct bio_set iomap_fsverity_bioset;
+
+static void
+iomap_read_fsverify_end_io_work(struct work_struct *work)
+{
+	struct iomap_fsverity_bio *fbio =
+		container_of(work, struct iomap_fsverity_bio, work);
+
+	fsverity_verify_bio(&fbio->bio);
+	iomap_read_end_io(&fbio->bio);
+}
+
+static void
+iomap_read_fsverity_end_io(struct bio *bio)
+{
+	struct iomap_fsverity_bio *fbio =
+		container_of(bio, struct iomap_fsverity_bio, bio);
+
+	INIT_WORK(&fbio->work, iomap_read_fsverify_end_io_work);
+	queue_work(bio->bi_private, &fbio->work);
+}
+#endif /* CONFIG_FS_VERITY */
+
+static struct bio *iomap_read_bio_alloc(struct inode *inode,
+		struct block_device *bdev, int nr_vecs, gfp_t gfp)
+{
+	struct bio *bio;
+
+#ifdef CONFIG_FS_VERITY
+	if (fsverity_active(inode)) {
+		bio = bio_alloc_bioset(bdev, nr_vecs, REQ_OP_READ, gfp,
+					&iomap_fsverity_bioset);
+		if (bio) {
+			bio->bi_private = inode->i_sb->s_read_done_wq;
+			bio->bi_end_io = iomap_read_fsverity_end_io;
+		}
+		return bio;
+	}
+#endif
+	bio = bio_alloc(bdev, nr_vecs, REQ_OP_READ, gfp);
+	if (bio)
+		bio->bi_end_io = iomap_read_end_io;
+	return bio;
+}
+
 static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
 		struct iomap_readpage_ctx *ctx, loff_t offset)
 {
@@ -353,6 +404,12 @@  static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
 
 	if (iomap_block_needs_zeroing(iter, pos)) {
 		folio_zero_range(folio, poff, plen);
+		if (fsverity_active(iter->inode) &&
+		    !fsverity_verify_blocks(folio, plen, poff)) {
+			folio_set_error(folio);
+			goto done;
+		}
+
 		iomap_set_range_uptodate(folio, poff, plen);
 		goto done;
 	}
@@ -370,28 +427,29 @@  static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
 	    !bio_add_folio(ctx->bio, folio, plen, poff)) {
 		gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL);
 		gfp_t orig_gfp = gfp;
-		unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
 
 		if (ctx->bio)
 			submit_bio(ctx->bio);
 
 		if (ctx->rac) /* same as readahead_gfp_mask */
 			gfp |= __GFP_NORETRY | __GFP_NOWARN;
-		ctx->bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs),
-				     REQ_OP_READ, gfp);
+
+		ctx->bio = iomap_read_bio_alloc(iter->inode, iomap->bdev,
+				bio_max_segs(DIV_ROUND_UP(length, PAGE_SIZE)),
+				gfp);
+
 		/*
 		 * If the bio_alloc fails, try it again for a single page to
 		 * avoid having to deal with partial page reads.  This emulates
 		 * what do_mpage_read_folio does.
 		 */
 		if (!ctx->bio) {
-			ctx->bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ,
-					     orig_gfp);
+			ctx->bio = iomap_read_bio_alloc(iter->inode,
+					iomap->bdev, 1, orig_gfp);
 		}
 		if (ctx->rac)
 			ctx->bio->bi_opf |= REQ_RAHEAD;
 		ctx->bio->bi_iter.bi_sector = sector;
-		ctx->bio->bi_end_io = iomap_read_end_io;
 		bio_add_folio_nofail(ctx->bio, folio, plen, poff);
 	}
 
@@ -471,6 +529,7 @@  static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
  * iomap_readahead - Attempt to read pages from a file.
  * @rac: Describes the pages to be read.
  * @ops: The operations vector for the filesystem.
+ * @wq: Workqueue for post-I/O processing (only need for fsverity)
  *
  * This function is for filesystems to call to implement their readahead
  * address_space operation.
@@ -1996,10 +2055,25 @@  iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
 }
 EXPORT_SYMBOL_GPL(iomap_writepages);
 
+#define IOMAP_POOL_SIZE		(4 * (PAGE_SIZE / SECTOR_SIZE))
+
 static int __init iomap_init(void)
 {
-	return bioset_init(&iomap_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
-			   offsetof(struct iomap_ioend, io_inline_bio),
-			   BIOSET_NEED_BVECS);
+	int error;
+
+	error = bioset_init(&iomap_ioend_bioset, IOMAP_POOL_SIZE,
+			    offsetof(struct iomap_ioend, io_inline_bio),
+			    BIOSET_NEED_BVECS);
+#ifdef CONFIG_FS_VERITY
+	if (error)
+		return error;
+
+	error = bioset_init(&iomap_fsverity_bioset, IOMAP_POOL_SIZE,
+			    offsetof(struct iomap_fsverity_bio, bio),
+			    BIOSET_NEED_BVECS);
+	if (error)
+		bioset_exit(&iomap_ioend_bioset);
+#endif
+	return error;
 }
 fs_initcall(iomap_init);