diff mbox series

[10/13] fsverity: pass the zero-hash value to the implementation

Message ID 171175868031.1987804.13138670908694064691.stgit@frogsfrogsfrogs (mailing list archive)
State New, archived
Headers show
Series [01/13] fs: add FS_XFLAG_VERITY for verity files | expand

Commit Message

Darrick J. Wong March 30, 2024, 12:35 a.m. UTC
From: Darrick J. Wong <djwong@kernel.org>

Compute the hash of a data block full of zeros, and then supply this to
the merkle tree read and write methods.  A subsequent xfs patch will use
this to reduce the size of the merkle tree when dealing with sparse gold
master disk images and the like.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/verity/enable.c           |    2 ++
 fs/verity/fsverity_private.h |    2 ++
 fs/verity/open.c             |    7 +++++++
 fs/verity/verify.c           |    2 ++
 include/linux/fsverity.h     |    8 ++++++++
 5 files changed, 21 insertions(+)

Comments

Eric Biggers April 5, 2024, 2:57 a.m. UTC | #1
On Fri, Mar 29, 2024 at 05:35:17PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> Compute the hash of a data block full of zeros, and then supply this to
> the merkle tree read and write methods.  A subsequent xfs patch will use

This should say "hash of a block", not "hash of a data block".  What you
actually care about is the hash of a Merkle tree block, not the hash of a data
block.  Yet, there is no difference in how the hashes are calculated for the two
types of blocks, so we should simply write "hash of a block".

> diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
> index de8798f141d4a..195a92f203bba 100644
> --- a/fs/verity/fsverity_private.h
> +++ b/fs/verity/fsverity_private.h
> @@ -47,6 +47,8 @@ struct merkle_tree_params {
>  	u64 tree_size;			/* Merkle tree size in bytes */
>  	unsigned long tree_pages;	/* Merkle tree size in pages */
>  
> +	u8 zero_digest[FS_VERITY_MAX_DIGEST_SIZE]; /* hash of zeroed data block */

Similarly, "block" instead of "data block".

> diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
> index 5dacd30d65353..761a0b76eefec 100644
> --- a/include/linux/fsverity.h
> +++ b/include/linux/fsverity.h
> @@ -66,6 +66,8 @@ struct fsverity_blockbuf {
>   *		if the page at @block->offset isn't already cached.
>   *		Implementations may ignore this argument; it's only a
>   *		performance optimization.
> + * @zero_digest: the hash for a data block of zeroes

Likewise.

>  /**
> @@ -81,12 +85,16 @@ struct fsverity_readmerkle {
>   * @level: level of the block; level 0 are the leaves
>   * @num_levels: number of levels in the tree total
>   * @log_blocksize: log2 of the size of the block
> + * @zero_digest: the hash for a data block of zeroes
> + * @digest_size: size of zero_digest

Likewise.

- Eric
Darrick J. Wong April 24, 2024, 7:02 p.m. UTC | #2
On Thu, Apr 04, 2024 at 10:57:50PM -0400, Eric Biggers wrote:
> On Fri, Mar 29, 2024 at 05:35:17PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Compute the hash of a data block full of zeros, and then supply this to
> > the merkle tree read and write methods.  A subsequent xfs patch will use
> 
> This should say "hash of a block", not "hash of a data block".  What you
> actually care about is the hash of a Merkle tree block, not the hash of a data
> block.  Yet, there is no difference in how the hashes are calculated for the two
> types of blocks, so we should simply write "hash of a block".

I think I could go further with the precision of the description --

"Compute the hash of one filesystem block's worth of zeroes.  Any merkle
tree block containing only this hash can be elided at write time, and
its contents synthesized at read time."

I don't think this is going to happen very often above the leaf levels
of the merkle tree, but as written there's nothing to prevent the
elision of internal nodes.  Also note that the elision can happen for
internal nodes even when merkle tree blocksize != i_blocksize.

> > diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
> > index de8798f141d4a..195a92f203bba 100644
> > --- a/fs/verity/fsverity_private.h
> > +++ b/fs/verity/fsverity_private.h
> > @@ -47,6 +47,8 @@ struct merkle_tree_params {
> >  	u64 tree_size;			/* Merkle tree size in bytes */
> >  	unsigned long tree_pages;	/* Merkle tree size in pages */
> >  
> > +	u8 zero_digest[FS_VERITY_MAX_DIGEST_SIZE]; /* hash of zeroed data block */
> 
> Similarly, "block" instead of "data block".

How about "the hash of an i_blocksize-sized buffer of zeroes" for all
three?

--D

> > diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
> > index 5dacd30d65353..761a0b76eefec 100644
> > --- a/include/linux/fsverity.h
> > +++ b/include/linux/fsverity.h
> > @@ -66,6 +66,8 @@ struct fsverity_blockbuf {
> >   *		if the page at @block->offset isn't already cached.
> >   *		Implementations may ignore this argument; it's only a
> >   *		performance optimization.
> > + * @zero_digest: the hash for a data block of zeroes
> 
> Likewise.
> 
> >  /**
> > @@ -81,12 +85,16 @@ struct fsverity_readmerkle {
> >   * @level: level of the block; level 0 are the leaves
> >   * @num_levels: number of levels in the tree total
> >   * @log_blocksize: log2 of the size of the block
> > + * @zero_digest: the hash for a data block of zeroes
> > + * @digest_size: size of zero_digest
> 
> Likewise.
> 
> - Eric
>
Eric Biggers April 24, 2024, 7:19 p.m. UTC | #3
On Wed, Apr 24, 2024 at 12:02:46PM -0700, Darrick J. Wong wrote:
> On Thu, Apr 04, 2024 at 10:57:50PM -0400, Eric Biggers wrote:
> > On Fri, Mar 29, 2024 at 05:35:17PM -0700, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > > 
> > > Compute the hash of a data block full of zeros, and then supply this to
> > > the merkle tree read and write methods.  A subsequent xfs patch will use
> > 
> > This should say "hash of a block", not "hash of a data block".  What you
> > actually care about is the hash of a Merkle tree block, not the hash of a data
> > block.  Yet, there is no difference in how the hashes are calculated for the two
> > types of blocks, so we should simply write "hash of a block".
> 
> I think I could go further with the precision of the description --
> 
> "Compute the hash of one filesystem block's worth of zeroes.  Any merkle
> tree block containing only this hash can be elided at write time, and
> its contents synthesized at read time."
> 
> I don't think this is going to happen very often above the leaf levels
> of the merkle tree, but as written there's nothing to prevent the
> elision of internal nodes.  Also note that the elision can happen for
> internal nodes even when merkle tree blocksize != i_blocksize.
> 
> > > diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
> > > index de8798f141d4a..195a92f203bba 100644
> > > --- a/fs/verity/fsverity_private.h
> > > +++ b/fs/verity/fsverity_private.h
> > > @@ -47,6 +47,8 @@ struct merkle_tree_params {
> > >  	u64 tree_size;			/* Merkle tree size in bytes */
> > >  	unsigned long tree_pages;	/* Merkle tree size in pages */
> > >  
> > > +	u8 zero_digest[FS_VERITY_MAX_DIGEST_SIZE]; /* hash of zeroed data block */
> > 
> > Similarly, "block" instead of "data block".
> 
> How about "the hash of an i_blocksize-sized buffer of zeroes" for all
> three?

It's the Merkle tree block size, not the filesystem block size.  Or did you
actually intend for this to use the filesystem block size?

In struct merkle_tree_params, the "block size" is always the Merkle tree block
size, so the type of block size seems clear in that context.  My complaint was
just that it used the term "data block" to mean a block that is not necessarily
a file contents block (which is what "data block" means elsewhere).

- Eric
Darrick J. Wong April 24, 2024, 8:23 p.m. UTC | #4
On Wed, Apr 24, 2024 at 07:19:50PM +0000, Eric Biggers wrote:
> On Wed, Apr 24, 2024 at 12:02:46PM -0700, Darrick J. Wong wrote:
> > On Thu, Apr 04, 2024 at 10:57:50PM -0400, Eric Biggers wrote:
> > > On Fri, Mar 29, 2024 at 05:35:17PM -0700, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <djwong@kernel.org>
> > > > 
> > > > Compute the hash of a data block full of zeros, and then supply this to
> > > > the merkle tree read and write methods.  A subsequent xfs patch will use
> > > 
> > > This should say "hash of a block", not "hash of a data block".  What you
> > > actually care about is the hash of a Merkle tree block, not the hash of a data
> > > block.  Yet, there is no difference in how the hashes are calculated for the two
> > > types of blocks, so we should simply write "hash of a block".
> > 
> > I think I could go further with the precision of the description --
> > 
> > "Compute the hash of one filesystem block's worth of zeroes.  Any merkle
> > tree block containing only this hash can be elided at write time, and
> > its contents synthesized at read time."
> > 
> > I don't think this is going to happen very often above the leaf levels
> > of the merkle tree, but as written there's nothing to prevent the
> > elision of internal nodes.  Also note that the elision can happen for
> > internal nodes even when merkle tree blocksize != i_blocksize.
> > 
> > > > diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
> > > > index de8798f141d4a..195a92f203bba 100644
> > > > --- a/fs/verity/fsverity_private.h
> > > > +++ b/fs/verity/fsverity_private.h
> > > > @@ -47,6 +47,8 @@ struct merkle_tree_params {
> > > >  	u64 tree_size;			/* Merkle tree size in bytes */
> > > >  	unsigned long tree_pages;	/* Merkle tree size in pages */
> > > >  
> > > > +	u8 zero_digest[FS_VERITY_MAX_DIGEST_SIZE]; /* hash of zeroed data block */
> > > 
> > > Similarly, "block" instead of "data block".
> > 
> > How about "the hash of an i_blocksize-sized buffer of zeroes" for all
> > three?
> 
> It's the Merkle tree block size, not the filesystem block size.  Or did you
> actually intend for this to use the filesystem block size?

I actually did intend for this to be the fs block size, not the merkle
tree block size.  It's the bottom level that I care about shrinking.
Let's say that data[0-B] are the data blocks:

root
 +-internal0
 |   +-leaf0
 |   |   +-data0
 |   |   +-data1
 |   |   `-data2
 |   `-leaf1
 |       +-data3
 |       +-data4
 |       `-data5
 `-internal1
     +-leaf2
     |   +-data6
     |   +-data7
     |   `-data8
     `-leaf3
         +-data9
         +-dataA
         `-dataB

(thanks to https://arthursonzogni.com/Diagon/#Tree )

If data[3-5] are completely zeroes (unwritten blocks, sparse holes,
etc.) then I want to skip writing leaf1 of the merkle tree to disk.

If it happens that the hashes of leaf[0-1] match hash(data3) then it's
frosting on top (as it were) that we can also skip internal0.  However,
the merkle tree has a high fanout factor (4096/32==128 in the common
case), so I care /much/ less about eliding those levels.

> In struct merkle_tree_params, the "block size" is always the Merkle tree block
> size, so the type of block size seems clear in that context.  My complaint was
> just that it used the term "data block" to mean a block that is not necessarily
> a file contents block (which is what "data block" means elsewhere).

Hm.  Given the confusion, would it help if I said that zero_digest
should only be used to elide leaf nodes of the merkle tree that hash the
contents of file content blocks?  Or is "the hash of an
i_blocksize-sized buffer of zeroes" sufficient?

What do you think of the commit message saying:

"Compute the hash of one filesystem block's worth of zeroes.  Any merkle
tree leaf block containing only this hash can be elided at write time,
and its contents synthesized at read time.

"Let's pretend that there's a file containing six data blocks and whose
merkle tree looks roughly like this:

root
 +--leaf0
 |   +--data0
 |   +--data1
 |   `--data2
 `--leaf1
     +--data3
     +--data4
     `--data5

"If data[0-2] are sparse holes, then leaf0 will contain a repeating
sequence of @zero_digest.  Therefore, leaf0 need not be written to disk
because its contents can be synthesized."

--D

> 
> - Eric
>
Eric Biggers April 24, 2024, 8:59 p.m. UTC | #5
On Wed, Apr 24, 2024 at 01:23:48PM -0700, Darrick J. Wong wrote:
> > > How about "the hash of an i_blocksize-sized buffer of zeroes" for all
> > > three?
> > 
> > It's the Merkle tree block size, not the filesystem block size.  Or did you
> > actually intend for this to use the filesystem block size?
> 
> I actually did intend for this to be the fs block size, not the merkle
> tree block size.  It's the bottom level that I care about shrinking.
> Let's say that data[0-B] are the data blocks:
> 
> root
>  +-internal0
>  |   +-leaf0
>  |   |   +-data0
>  |   |   +-data1
>  |   |   `-data2
>  |   `-leaf1
>  |       +-data3
>  |       +-data4
>  |       `-data5
>  `-internal1
>      +-leaf2
>      |   +-data6
>      |   +-data7
>      |   `-data8
>      `-leaf3
>          +-data9
>          +-dataA
>          `-dataB
> 
> (thanks to https://arthursonzogni.com/Diagon/#Tree )
> 
> If data[3-5] are completely zeroes (unwritten blocks, sparse holes,
> etc.) then I want to skip writing leaf1 of the merkle tree to disk.
> 
> If it happens that the hashes of leaf[0-1] match hash(data3) then it's
> frosting on top (as it were) that we can also skip internal0.  However,
> the merkle tree has a high fanout factor (4096/32==128 in the common
> case), so I care /much/ less about eliding those levels.
> 
> > In struct merkle_tree_params, the "block size" is always the Merkle tree block
> > size, so the type of block size seems clear in that context.  My complaint was
> > just that it used the term "data block" to mean a block that is not necessarily
> > a file contents block (which is what "data block" means elsewhere).
> 
> Hm.  Given the confusion, would it help if I said that zero_digest
> should only be used to elide leaf nodes of the merkle tree that hash the
> contents of file content blocks?  Or is "the hash of an
> i_blocksize-sized buffer of zeroes" sufficient?
> 
> What do you think of the commit message saying:
> 
> "Compute the hash of one filesystem block's worth of zeroes.  Any merkle
> tree leaf block containing only this hash can be elided at write time,
> and its contents synthesized at read time.
> 
> "Let's pretend that there's a file containing six data blocks and whose
> merkle tree looks roughly like this:
> 
> root
>  +--leaf0
>  |   +--data0
>  |   +--data1
>  |   `--data2
>  `--leaf1
>      +--data3
>      +--data4
>      `--data5
> 
> "If data[0-2] are sparse holes, then leaf0 will contain a repeating
> sequence of @zero_digest.  Therefore, leaf0 need not be written to disk
> because its contents can be synthesized."

It sounds like you're assuming that the file data is always hashed in filesystem
block sized units.  That's not how it works.  The block size that's selected for
fsverity (which is a power of 2 between 1024 and min(fs_block_size, PAGE_SIZE),
inclusively) is used for both the data blocks and the Merkle tree blocks.

This is intentional, so that people can e.g. calculate the fsverity digest of a
file using a 4K block size, and deploy the file to both filesystems that use a
4K filesystem block size and filesystems that use a 16K filesystem block size,
and get the same fsverity file digest each time.

I've considered offering the ability to configure the data block size separately
from the Merkle tree block size, like what dm-verity does.  This hasn't seemed
useful, though.  And in any case, it should not be tied to the FS block size.

A better way to think about things might be that the Merkle tree actually
*includes* the data, as opposed to being separate from it.  In this respect,
it's natural that the Merkle tree parameters including block size, hash
algorithm, and salt apply both to blocks that contain file data and to blocks
that contain hashes.  In general, all the fsverity code and documentation
probably needs to be clearer about whether, when referring to the Merkle tree,
it means just the hash blocks, or if it means the conceptual full tree that
includes the file's data.

- Eric
Darrick J. Wong April 24, 2024, 9:43 p.m. UTC | #6
On Wed, Apr 24, 2024 at 08:59:41PM +0000, Eric Biggers wrote:
> On Wed, Apr 24, 2024 at 01:23:48PM -0700, Darrick J. Wong wrote:
> > > > How about "the hash of an i_blocksize-sized buffer of zeroes" for all
> > > > three?
> > > 
> > > It's the Merkle tree block size, not the filesystem block size.  Or did you
> > > actually intend for this to use the filesystem block size?
> > 
> > I actually did intend for this to be the fs block size, not the merkle
> > tree block size.  It's the bottom level that I care about shrinking.
> > Let's say that data[0-B] are the data blocks:
> > 
> > root
> >  +-internal0
> >  |   +-leaf0
> >  |   |   +-data0
> >  |   |   +-data1
> >  |   |   `-data2
> >  |   `-leaf1
> >  |       +-data3
> >  |       +-data4
> >  |       `-data5
> >  `-internal1
> >      +-leaf2
> >      |   +-data6
> >      |   +-data7
> >      |   `-data8
> >      `-leaf3
> >          +-data9
> >          +-dataA
> >          `-dataB
> > 
> > (thanks to https://arthursonzogni.com/Diagon/#Tree )
> > 
> > If data[3-5] are completely zeroes (unwritten blocks, sparse holes,
> > etc.) then I want to skip writing leaf1 of the merkle tree to disk.
> > 
> > If it happens that the hashes of leaf[0-1] match hash(data3) then it's
> > frosting on top (as it were) that we can also skip internal0.  However,
> > the merkle tree has a high fanout factor (4096/32==128 in the common
> > case), so I care /much/ less about eliding those levels.
> > 
> > > In struct merkle_tree_params, the "block size" is always the Merkle tree block
> > > size, so the type of block size seems clear in that context.  My complaint was
> > > just that it used the term "data block" to mean a block that is not necessarily
> > > a file contents block (which is what "data block" means elsewhere).
> > 
> > Hm.  Given the confusion, would it help if I said that zero_digest
> > should only be used to elide leaf nodes of the merkle tree that hash the
> > contents of file content blocks?  Or is "the hash of an
> > i_blocksize-sized buffer of zeroes" sufficient?
> > 
> > What do you think of the commit message saying:
> > 
> > "Compute the hash of one filesystem block's worth of zeroes.  Any merkle
> > tree leaf block containing only this hash can be elided at write time,
> > and its contents synthesized at read time.
> > 
> > "Let's pretend that there's a file containing six data blocks and whose
> > merkle tree looks roughly like this:
> > 
> > root
> >  +--leaf0
> >  |   +--data0
> >  |   +--data1
> >  |   `--data2
> >  `--leaf1
> >      +--data3
> >      +--data4
> >      `--data5
> > 
> > "If data[0-2] are sparse holes, then leaf0 will contain a repeating
> > sequence of @zero_digest.  Therefore, leaf0 need not be written to disk
> > because its contents can be synthesized."
> 
> It sounds like you're assuming that the file data is always hashed in filesystem
> block sized units.

Ohh!  Yes, I was making that assumption, and now I double-checked
enable.c and see this:

	/* Hash each data block, also hashing the tree blocks as they fill up */
	for (offset = 0; offset < data_size; offset += params->block_size) {
		ssize_t bytes_read;
		loff_t pos = offset;

		buffers[-1].filled = min_t(u64, params->block_size,
					   data_size - offset);
		bytes_read = __kernel_read(filp, buffers[-1].data,
					   buffers[-1].filled, &pos);

So yes, you're right, @zero_digest is a the hash of a *merkle tree
block-sized* buffer of zeroes.  And if ->write_merkle_tree_block sees
that the block is a repeating sequence of @zero_digest, it can skip
writing that block to disk, no matter where that block happens to be in
the tree.

> block sized units.  That's not how it works.  The block size that's selected for
> fsverity (which is a power of 2 between 1024 and min(fs_block_size, PAGE_SIZE),
> inclusively) is used for both the data blocks and the Merkle tree blocks.
> 
> This is intentional, so that people can e.g. calculate the fsverity digest of a
> file using a 4K block size, and deploy the file to both filesystems that use a
> 4K filesystem block size and filesystems that use a 16K filesystem block size,
> and get the same fsverity file digest each time.

Aha, yes, that makes more sense.  I had wondered if people actually
copied merkle tree data between filesystems.

> I've considered offering the ability to configure the data block size separately
> from the Merkle tree block size, like what dm-verity does.  This hasn't seemed
> useful, though.  And in any case, it should not be tied to the FS block size.
> 
> A better way to think about things might be that the Merkle tree actually
> *includes* the data, as opposed to being separate from it.  In this respect,
> it's natural that the Merkle tree parameters including block size, hash
> algorithm, and salt apply both to blocks that contain file data and to blocks
> that contain hashes.  In general, all the fsverity code and documentation
> probably needs to be clearer about whether, when referring to the Merkle tree,
> it means just the hash blocks, or if it means the conceptual full tree that
> includes the file's data.

Yes, that clears things right up.  Thank you for correcting me. :)

--D

> - Eric
>
diff mbox series

Patch

diff --git a/fs/verity/enable.c b/fs/verity/enable.c
index 233b20fb12ff5..8c6fe4b72b14e 100644
--- a/fs/verity/enable.c
+++ b/fs/verity/enable.c
@@ -52,6 +52,8 @@  static int write_merkle_tree_block(struct inode *inode, const u8 *buf,
 {
 	struct fsverity_writemerkle req = {
 		.inode = inode,
+		.zero_digest = params->zero_digest,
+		.digest_size = params->digest_size,
 	};
 	u64 pos = (u64)index << params->log_blocksize;
 	int err;
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index de8798f141d4a..195a92f203bba 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -47,6 +47,8 @@  struct merkle_tree_params {
 	u64 tree_size;			/* Merkle tree size in bytes */
 	unsigned long tree_pages;	/* Merkle tree size in pages */
 
+	u8 zero_digest[FS_VERITY_MAX_DIGEST_SIZE]; /* hash of zeroed data block */
+
 	/*
 	 * Starting block index for each tree level, ordered from leaf level (0)
 	 * to root level ('num_levels - 1')
diff --git a/fs/verity/open.c b/fs/verity/open.c
index 7a86407732c41..cdf694a261605 100644
--- a/fs/verity/open.c
+++ b/fs/verity/open.c
@@ -144,6 +144,13 @@  int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
 		goto out_err;
 	}
 
+	err = fsverity_hash_block(params, inode, page_address(ZERO_PAGE(0)),
+				   params->zero_digest);
+	if (err) {
+		fsverity_err(inode, "Error %d computing zero digest", err);
+		goto out_err;
+	}
+
 	params->tree_size = offset << log_blocksize;
 	params->tree_pages = PAGE_ALIGN(params->tree_size) >> PAGE_SHIFT;
 	return 0;
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
index c4ebf85ba2c79..99b1529bbb50b 100644
--- a/fs/verity/verify.c
+++ b/fs/verity/verify.c
@@ -432,6 +432,8 @@  int fsverity_read_merkle_tree_block(struct inode *inode,
 			.num_levels = params->num_levels,
 			.log_blocksize = params->log_blocksize,
 			.ra_bytes = ra_bytes,
+			.zero_digest = params->zero_digest,
+			.digest_size = params->digest_size,
 		};
 
 		block->verified = false;
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index 5dacd30d65353..761a0b76eefec 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -66,6 +66,8 @@  struct fsverity_blockbuf {
  *		if the page at @block->offset isn't already cached.
  *		Implementations may ignore this argument; it's only a
  *		performance optimization.
+ * @zero_digest: the hash for a data block of zeroes
+ * @digest_size: size of zero_digest
  */
 struct fsverity_readmerkle {
 	struct inode *inode;
@@ -73,6 +75,8 @@  struct fsverity_readmerkle {
 	int level;
 	int num_levels;
 	u8 log_blocksize;
+	const u8 *zero_digest;
+	unsigned int digest_size;
 };
 
 /**
@@ -81,12 +85,16 @@  struct fsverity_readmerkle {
  * @level: level of the block; level 0 are the leaves
  * @num_levels: number of levels in the tree total
  * @log_blocksize: log2 of the size of the block
+ * @zero_digest: the hash for a data block of zeroes
+ * @digest_size: size of zero_digest
  */
 struct fsverity_writemerkle {
 	struct inode *inode;
 	int level;
 	int num_levels;
 	u8 log_blocksize;
+	const u8 *zero_digest;
+	unsigned int digest_size;
 };
 
 /* Verity operations for filesystems */