@@ -154,4 +154,12 @@ static inline void fsverity_init_signature(void)
void __init fsverity_init_workqueue(void);
+/*
+ * Drop 'block' obtained with ->read_merkle_tree_block(). Calls out back to
+ * filesystem if ->drop_block() is set, otherwise, drop the reference in the
+ * block->context.
+ */
+void fsverity_drop_block(struct inode *inode,
+ struct fsverity_blockbuf *block);
+
#endif /* _FSVERITY_PRIVATE_H */
@@ -213,7 +213,13 @@ struct fsverity_info *fsverity_create_info(const struct inode *inode,
if (err)
goto fail;
- if (vi->tree_params.block_size != PAGE_SIZE) {
+ /*
+ * If fs passes Merkle tree blocks to fs-verity (e.g. XFS), then
+ * fs-verity should use hash_block_verified bitmap as there's no page
+ * to mark it with PG_checked.
+ */
+ if (vi->tree_params.block_size != PAGE_SIZE ||
+ inode->i_sb->s_vop->read_merkle_tree_block) {
/*
* When the Merkle tree block size and page size differ, we use
* a bitmap to keep track of which hash blocks have been
@@ -18,50 +18,68 @@ static int fsverity_read_merkle_tree(struct inode *inode,
{
const struct fsverity_operations *vops = inode->i_sb->s_vop;
u64 end_offset;
- unsigned int offs_in_page;
+ unsigned int offs_in_block;
pgoff_t index, last_index;
int retval = 0;
int err = 0;
+ const unsigned int block_size = vi->tree_params.block_size;
+ const u8 log_blocksize = vi->tree_params.log_blocksize;
end_offset = min(offset + length, vi->tree_params.tree_size);
if (offset >= end_offset)
return 0;
- offs_in_page = offset_in_page(offset);
- last_index = (end_offset - 1) >> PAGE_SHIFT;
+ offs_in_block = offset & (block_size - 1);
+ last_index = (end_offset - 1) >> log_blocksize;
/*
- * Iterate through each Merkle tree page in the requested range and copy
- * the requested portion to userspace. Note that the Merkle tree block
- * size isn't important here, as we are returning a byte stream; i.e.,
- * we can just work with pages even if the tree block size != PAGE_SIZE.
+ * Iterate through each Merkle tree block in the requested range and
+ * copy the requested portion to userspace. Note that we are returning
+ * a byte stream.
*/
- for (index = offset >> PAGE_SHIFT; index <= last_index; index++) {
+ for (index = offset >> log_blocksize; index <= last_index; index++) {
unsigned long num_ra_pages =
min_t(unsigned long, last_index - index + 1,
inode->i_sb->s_bdi->io_pages);
unsigned int bytes_to_copy = min_t(u64, end_offset - offset,
- PAGE_SIZE - offs_in_page);
- struct page *page;
- const void *virt;
+ block_size - offs_in_block);
+ struct fsverity_blockbuf block = {
+ .size = block_size,
+ };
- page = vops->read_merkle_tree_page(inode, index, num_ra_pages);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
+ if (!vops->read_merkle_tree_block) {
+ unsigned int blocks_per_page =
+ vi->tree_params.blocks_per_page;
+ unsigned long page_idx =
+ round_down(index, blocks_per_page);
+ struct page *page = vops->read_merkle_tree_page(inode,
+ page_idx, num_ra_pages);
+
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ } else {
+ block.kaddr = kmap_local_page(page) +
+ ((index - page_idx) << log_blocksize);
+ block.context = page;
+ }
+ } else {
+ err = vops->read_merkle_tree_block(inode,
+ index << log_blocksize,
+ &block, log_blocksize, num_ra_pages);
+ }
+
+ if (err) {
fsverity_err(inode,
- "Error %d reading Merkle tree page %lu",
- err, index);
+ "Error %d reading Merkle tree block %lu",
+ err, index << log_blocksize);
break;
}
- virt = kmap_local_page(page);
- if (copy_to_user(buf, virt + offs_in_page, bytes_to_copy)) {
- kunmap_local(virt);
- put_page(page);
+ if (copy_to_user(buf, block.kaddr + offs_in_block, bytes_to_copy)) {
+ fsverity_drop_block(inode, &block);
err = -EFAULT;
break;
}
- kunmap_local(virt);
- put_page(page);
+ fsverity_drop_block(inode, &block);
retval += bytes_to_copy;
buf += bytes_to_copy;
@@ -72,7 +90,7 @@ static int fsverity_read_merkle_tree(struct inode *inode,
break;
}
cond_resched();
- offs_in_page = 0;
+ offs_in_block = 0;
}
return retval ? retval : err;
}
@@ -13,14 +13,17 @@
static struct workqueue_struct *fsverity_read_workqueue;
/*
- * Returns true if the hash block with index @hblock_idx in the tree, located in
- * @hpage, has already been verified.
+ * Returns true if the hash block with index @hblock_idx in the tree has
+ * already been verified.
*/
-static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
+static bool is_hash_block_verified(struct inode *inode,
+ struct fsverity_blockbuf *block,
unsigned long hblock_idx)
{
unsigned int blocks_per_page;
unsigned int i;
+ struct fsverity_info *vi = inode->i_verity_info;
+ struct page *hpage = (struct page *)block->context;
/*
* When the Merkle tree block size and page size are the same, then the
@@ -34,6 +37,12 @@ static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
if (!vi->hash_block_verified)
return PageChecked(hpage);
+ /*
+ * Filesystems which use block based caching (e.g. XFS) always use
+ * bitmap.
+ */
+ if (inode->i_sb->s_vop->read_merkle_tree_block)
+ return test_bit(hblock_idx, vi->hash_block_verified);
/*
* When the Merkle tree block size and page size differ, we use a bitmap
* to indicate whether each hash block has been verified.
@@ -95,15 +104,15 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi,
const struct merkle_tree_params *params = &vi->tree_params;
const unsigned int hsize = params->digest_size;
int level;
+ int err = 0;
+ int num_ra_pages;
u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
const u8 *want_hash;
u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
/* The hash blocks that are traversed, indexed by level */
struct {
- /* Page containing the hash block */
- struct page *page;
- /* Mapped address of the hash block (will be within @page) */
- const void *addr;
+ /* Buffer containing the hash block */
+ struct fsverity_blockbuf block;
/* Index of the hash block in the tree overall */
unsigned long index;
/* Byte offset of the wanted hash relative to @addr */
@@ -144,10 +153,11 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi,
unsigned long next_hidx;
unsigned long hblock_idx;
pgoff_t hpage_idx;
+ u64 hblock_pos;
unsigned int hblock_offset_in_page;
unsigned int hoffset;
struct page *hpage;
- const void *haddr;
+ struct fsverity_blockbuf *block = &hblocks[level].block;
/*
* The index of the block in the current level; also the index
@@ -165,29 +175,49 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi,
hblock_offset_in_page =
(hblock_idx << params->log_blocksize) & ~PAGE_MASK;
+ /* Offset of the Merkle tree block into the tree */
+ hblock_pos = hblock_idx << params->log_blocksize;
+
/* Byte offset of the hash within the block */
hoffset = (hidx << params->log_digestsize) &
(params->block_size - 1);
- hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
- hpage_idx, level == 0 ? min(max_ra_pages,
- params->tree_pages - hpage_idx) : 0);
- if (IS_ERR(hpage)) {
+ num_ra_pages = level == 0 ?
+ min(max_ra_pages, params->tree_pages - hpage_idx) : 0;
+
+ if (inode->i_sb->s_vop->read_merkle_tree_block) {
+ err = inode->i_sb->s_vop->read_merkle_tree_block(
+ inode, hblock_pos, block, params->log_blocksize,
+ num_ra_pages);
+ } else {
+ unsigned int blocks_per_page =
+ vi->tree_params.blocks_per_page;
+ hblock_idx = round_down(hblock_idx, blocks_per_page);
+ hpage = inode->i_sb->s_vop->read_merkle_tree_page(
+ inode, hpage_idx, (num_ra_pages << PAGE_SHIFT));
+
+ if (IS_ERR(hpage)) {
+ err = PTR_ERR(hpage);
+ } else {
+ block->kaddr = kmap_local_page(hpage) +
+ hblock_offset_in_page;
+ block->context = hpage;
+ }
+ }
+
+ if (err) {
fsverity_err(inode,
- "Error %ld reading Merkle tree page %lu",
- PTR_ERR(hpage), hpage_idx);
+ "Error %d reading Merkle tree block %lu",
+ err, hblock_idx);
goto error;
}
- haddr = kmap_local_page(hpage) + hblock_offset_in_page;
- if (is_hash_block_verified(vi, hpage, hblock_idx)) {
- memcpy(_want_hash, haddr + hoffset, hsize);
+
+ if (is_hash_block_verified(inode, block, hblock_idx)) {
+ memcpy(_want_hash, block->kaddr + hoffset, hsize);
want_hash = _want_hash;
- kunmap_local(haddr);
- put_page(hpage);
+ fsverity_drop_block(inode, block);
goto descend;
}
- hblocks[level].page = hpage;
- hblocks[level].addr = haddr;
hblocks[level].index = hblock_idx;
hblocks[level].hoffset = hoffset;
hidx = next_hidx;
@@ -197,10 +227,11 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi,
descend:
/* Descend the tree verifying hash blocks. */
for (; level > 0; level--) {
- struct page *hpage = hblocks[level - 1].page;
- const void *haddr = hblocks[level - 1].addr;
+ struct fsverity_blockbuf *block = &hblocks[level - 1].block;
+ const void *haddr = block->kaddr;
unsigned long hblock_idx = hblocks[level - 1].index;
unsigned int hoffset = hblocks[level - 1].hoffset;
+ struct page *hpage = (struct page *)block->context;
if (fsverity_hash_block(params, inode, haddr, real_hash) != 0)
goto error;
@@ -217,8 +248,7 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi,
SetPageChecked(hpage);
memcpy(_want_hash, haddr + hoffset, hsize);
want_hash = _want_hash;
- kunmap_local(haddr);
- put_page(hpage);
+ fsverity_drop_block(inode, block);
}
/* Finally, verify the data block. */
@@ -235,10 +265,8 @@ verify_data_block(struct inode *inode, struct fsverity_info *vi,
params->hash_alg->name, hsize, want_hash,
params->hash_alg->name, hsize, real_hash);
error:
- for (; level > 0; level--) {
- kunmap_local(hblocks[level - 1].addr);
- put_page(hblocks[level - 1].page);
- }
+ for (; level > 0; level--)
+ fsverity_drop_block(inode, &hblocks[level - 1].block);
return false;
}
@@ -362,3 +390,42 @@ void __init fsverity_init_workqueue(void)
if (!fsverity_read_workqueue)
panic("failed to allocate fsverity_read_queue");
}
+
+/**
+ * fsverity_invalidate_block() - invalidate Merkle tree block
+ * @inode: inode to which this Merkle tree blocks belong
+ * @block: block to be invalidated
+ *
+ * This function invalidates/clears "verified" state of Merkle tree block
+ * in the fs-verity bitmap. The block needs to have ->offset set.
+ */
+void fsverity_invalidate_block(struct inode *inode,
+ struct fsverity_blockbuf *block)
+{
+ struct fsverity_info *vi = inode->i_verity_info;
+ const unsigned int log_blocksize = vi->tree_params.log_blocksize;
+
+ if (block->offset > vi->tree_params.tree_size) {
+ fsverity_err(inode,
+"Trying to invalidate beyond Merkle tree (tree %lld, offset %lld)",
+ vi->tree_params.tree_size, block->offset);
+ return;
+ }
+
+ clear_bit(block->offset >> log_blocksize, vi->hash_block_verified);
+}
+EXPORT_SYMBOL_GPL(fsverity_invalidate_block);
+
+void fsverity_drop_block(struct inode *inode,
+ struct fsverity_blockbuf *block)
+{
+ if (inode->i_sb->s_vop->drop_block)
+ inode->i_sb->s_vop->drop_block(block);
+ else {
+ struct page *page = (struct page *)block->context;
+
+ kunmap_local(block->kaddr);
+ put_page(page);
+ }
+ block->kaddr = NULL;
+}
@@ -26,6 +26,33 @@
/* Arbitrary limit to bound the kmalloc() size. Can be changed. */
#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384
+/**
+ * struct fsverity_blockbuf - Merkle Tree block buffer
+ * @kaddr: virtual address of the block's data
+ * @offset: block's offset into Merkle tree
+ * @size: the Merkle tree block size
+ * @context: filesystem private context
+ *
+ * Buffer containing single Merkle Tree block. These buffers are passed
+ * - to filesystem, when fs-verity is building merkel tree,
+ * - from filesystem, when fs-verity is reading merkle tree from a disk.
+ * Filesystems sets kaddr together with size to point to a memory which contains
+ * Merkle tree block. Same is done by fs-verity when Merkle tree is need to be
+ * written down to disk.
+ *
+ * While reading the tree, fs-verity calls ->read_merkle_tree_block followed by
+ * ->drop_block to let filesystem know that memory can be freed.
+ *
+ * The context is optional. This field can be used by filesystem to passthrough
+ * state from ->read_merkle_tree_block to ->drop_block.
+ */
+struct fsverity_blockbuf {
+ void *kaddr;
+ u64 offset;
+ unsigned int size;
+ void *context;
+};
+
/* Verity operations for filesystems */
struct fsverity_operations {
@@ -107,6 +134,32 @@ struct fsverity_operations {
pgoff_t index,
unsigned long num_ra_pages);
+ /**
+ * Read a Merkle tree block of the given inode.
+ * @inode: the inode
+ * @pos: byte offset of the block within the Merkle tree
+ * @block: block buffer for filesystem to point it to the block
+ * @log_blocksize: log2 of the size of the expected block
+ * @ra_bytes: The number of bytes that should be
+ * prefetched starting at @pos if the page at @pos
+ * isn't already cached. Implementations may ignore this
+ * argument; it's only a performance optimization.
+ *
+ * This can be called at any time on an open verity file. It may be
+ * called by multiple processes concurrently.
+ *
+ * In case that block was evicted from the memory filesystem has to use
+ * fsverity_invalidate_block() to let fsverity know that block's
+ * verification state is not valid anymore.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+ int (*read_merkle_tree_block)(struct inode *inode,
+ u64 pos,
+ struct fsverity_blockbuf *block,
+ unsigned int log_blocksize,
+ u64 ra_bytes);
+
/**
* Write a Merkle tree block to the given inode.
*
@@ -122,6 +175,16 @@ struct fsverity_operations {
*/
int (*write_merkle_tree_block)(struct inode *inode, const void *buf,
u64 pos, unsigned int size);
+
+ /**
+ * Release the reference to a Merkle tree block
+ *
+ * @block: the block to release
+ *
+ * This is called when fs-verity is done with a block obtained with
+ * ->read_merkle_tree_block().
+ */
+ void (*drop_block)(struct fsverity_blockbuf *block);
};
#ifdef CONFIG_FS_VERITY
@@ -175,6 +238,8 @@ int fsverity_ioctl_read_metadata(struct file *filp, const void __user *uarg);
bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset);
void fsverity_verify_bio(struct bio *bio);
void fsverity_enqueue_verify_work(struct work_struct *work);
+void fsverity_invalidate_block(struct inode *inode,
+ struct fsverity_blockbuf *block);
#else /* !CONFIG_FS_VERITY */