@@ -57,6 +57,7 @@ xfs-y += $(addprefix libxfs/, \
xfs_trans_resv.o \
xfs_trans_space.o \
xfs_types.o \
+ xfs_verity.o \
)
# xfs_rtbitmap is shared with libxfs
xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \
@@ -142,6 +143,7 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o
+xfs-$(CONFIG_FS_VERITY) += xfs_fsverity.o
# notify failure
ifeq ($(CONFIG_MEMORY_FAILURE),y)
@@ -123,6 +123,12 @@ struct xfs_perag {
/* Hook to feed rmapbt updates to an active online repair. */
struct xfs_hooks pag_rmap_update_hooks;
+
+# ifdef CONFIG_FS_VERITY
+ /* per-inode merkle tree caches */
+ spinlock_t pagi_merkle_lock;
+ struct rhashtable pagi_merkle_blobs;
+# endif /* CONFIG_FS_VERITY */
#endif /* __KERNEL__ */
};
@@ -135,6 +141,7 @@ struct xfs_perag {
#define XFS_AGSTATE_ALLOWS_INODES 3
#define XFS_AGSTATE_AGFL_NEEDS_RESET 4
#define XFS_AGSTATE_NOALLOC 5
+#define XFS_AGSTATE_MERKLE 6
#define __XFS_AG_OPSTATE(name, NAME) \
static inline bool xfs_perag_ ## name (struct xfs_perag *pag) \
@@ -148,6 +155,7 @@ __XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA)
__XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES)
__XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET)
__XFS_AG_OPSTATE(prohibits_alloc, NOALLOC)
+__XFS_AG_OPSTATE(caches_merkle, MERKLE)
void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart,
xfs_agnumber_t agend);
@@ -27,6 +27,7 @@
#include "xfs_attr_item.h"
#include "xfs_xattr.h"
#include "xfs_parent.h"
+#include "xfs_verity.h"
struct kmem_cache *xfs_attr_intent_cache;
@@ -1619,6 +1620,9 @@ xfs_attr_namecheck(
if (!xfs_attr_check_namespace(attr_flags))
return false;
+ if (attr_flags & XFS_ATTR_VERITY)
+ return xfs_verity_namecheck(attr_flags, name, length);
+
/*
* MAXNAMELEN includes the trailing null, but (name/length) leave it
* out, so use >= for the length check.
@@ -929,4 +929,18 @@ struct xfs_parent_rec {
__be32 p_gen;
} __packed;
+/*
+ * fs-verity attribute name format
+ *
+ * Merkle tree blocks are stored under extended attributes of the inode. The
+ * name of the attributes are byte positions into the merkle data.
+ */
+struct xfs_merkle_key {
+ __be64 mk_pos;
+};
+
+/* ondisk xattr name used for the fsverity descriptor */
+#define XFS_VERITY_DESCRIPTOR_NAME "vdesc"
+#define XFS_VERITY_DESCRIPTOR_NAME_LEN (sizeof(XFS_VERITY_DESCRIPTOR_NAME) - 1)
+
#endif /* __XFS_DA_FORMAT_H__ */
@@ -209,6 +209,9 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT,
16299260424LL);
+ /* fs-verity xattrs */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_merkle_key, 8);
+ XFS_CHECK_VALUE(sizeof(XFS_VERITY_DESCRIPTOR_NAME), 6);
}
#endif /* __XFS_ONDISK_H */
new file mode 100644
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Red Hat, Inc.
+ */
+#include "xfs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_log_format.h"
+#include "xfs_attr.h"
+#include "xfs_verity.h"
+
+/* Set a merkle tree pos in preparation for setting merkle tree attrs. */
+void
+xfs_merkle_key_to_disk(
+ struct xfs_merkle_key *key,
+ uint64_t pos)
+{
+ key->mk_pos = cpu_to_be64(pos);
+}
+
+/* Retrieve the merkle tree pos from the attr data. */
+uint64_t
+xfs_merkle_key_from_disk(
+ const void *attr_name,
+ int namelen)
+{
+ const struct xfs_merkle_key *key = attr_name;
+
+ ASSERT(namelen == sizeof(struct xfs_merkle_key));
+
+ return be64_to_cpu(key->mk_pos);
+}
+
+/* Return true if verity attr name is valid. */
+bool
+xfs_verity_namecheck(
+ unsigned int attr_flags,
+ const void *name,
+ int namelen)
+{
+ if (!(attr_flags & XFS_ATTR_VERITY))
+ return false;
+
+ /*
+ * Merkle tree pages are stored under u64 indexes; verity descriptor
+ * blocks are held in a named attribute.
+ */
+ if (namelen != sizeof(struct xfs_merkle_key) &&
+ namelen != XFS_VERITY_DESCRIPTOR_NAME_LEN)
+ return false;
+
+ return true;
+}
new file mode 100644
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Red Hat, Inc.
+ */
+#ifndef __XFS_VERITY_H__
+#define __XFS_VERITY_H__
+
+void xfs_merkle_key_to_disk(struct xfs_merkle_key *key, uint64_t pos);
+uint64_t xfs_merkle_key_from_disk(const void *attr_name, int namelen);
+bool xfs_verity_namecheck(unsigned int attr_flags, const void *name,
+ int namelen);
+
+#endif /* __XFS_VERITY_H__ */
@@ -25,6 +25,7 @@
#include "xfs_rtalloc.h"
#include "xfs_rtrmap_btree.h"
#include "xfs_rtrefcount_btree.h"
+#include "xfs_fsverity.h"
/*
* Write new AG headers to disk. Non-transactional, but need to be
@@ -155,6 +156,11 @@ xfs_growfs_data_private(
error = xfs_initialize_perag(mp, nagcount, nb, &nagimax);
if (error)
return error;
+ error = xfs_fsverity_growfs(mp, oagcount, nagcount);
+ if (error) {
+ xfs_free_unused_perag_range(mp, oagcount, nagcount);
+ return error;
+ }
} else if (nagcount < oagcount) {
/* TODO: shrinking the entire AGs hasn't yet completed */
return -EINVAL;
new file mode 100644
@@ -0,0 +1,758 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Red Hat, Inc.
+ */
+#include "xfs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_log_format.h"
+#include "xfs_attr.h"
+#include "xfs_verity.h"
+#include "xfs_bmap_util.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_trace.h"
+#include "xfs_quota.h"
+#include "xfs_ag.h"
+#include "xfs_fsverity.h"
+#include <linux/fsverity.h>
+
+/*
+ * Merkle Tree Block Cache
+ * =======================
+ *
+ * fsverity requires that the filesystem implement caching of ondisk merkle
+ * tree blocks. XFS stores merkle tree blocks in the extended attribute data,
+ * which makes it important to keep copies in memory for as long as possible.
+ * This is performed by allocating the data blob structure defined below,
+ * passing the data portion of the blob to xfs_attr_get, and later caching the
+ * data blob via a per-ag hashtable.
+ *
+ * The cache structure indexes merkle tree blocks by the pos given to us by
+ * fsverity, which drastically reduces lookups. First, it eliminating the need
+ * to walk the xattr structure to find the remote block containing the merkle
+ * tree block. Second, access to each block in the xattr structure requires a
+ * lookup in the incore extent btree.
+ */
+struct xfs_merkle_blob {
+ struct rhash_head rhash;
+ struct rcu_head rcu;
+
+ struct xfs_merkle_bkey key;
+
+ /* refcount of this item; the cache holds its own ref */
+ refcount_t refcount;
+
+ unsigned long flags;
+
+ /* Pointer to the merkle tree block, which is power-of-2 sized */
+ void *data;
+};
+
+#define XFS_MERKLE_BLOB_VERIFIED_BIT (0) /* fsverity validated this */
+
+static const struct rhashtable_params xfs_fsverity_merkle_hash_params = {
+ .key_len = sizeof(struct xfs_merkle_bkey),
+ .key_offset = offsetof(struct xfs_merkle_blob, key),
+ .head_offset = offsetof(struct xfs_merkle_blob, rhash),
+ .automatic_shrinking = true,
+};
+
+/*
+ * Allocate a merkle tree blob object to prepare for reading a merkle tree
+ * object from disk.
+ */
+static inline struct xfs_merkle_blob *
+xfs_merkle_blob_alloc(
+ struct xfs_inode *ip,
+ u64 pos,
+ unsigned int blocksize)
+{
+ struct xfs_merkle_blob *mk;
+
+ mk = kmalloc(sizeof(struct xfs_merkle_blob), GFP_KERNEL);
+ if (!mk)
+ return NULL;
+
+ mk->data = kvzalloc(blocksize, GFP_KERNEL);
+ if (!mk->data) {
+ kfree(mk);
+ return NULL;
+ }
+
+ /* Caller owns this refcount. */
+ refcount_set(&mk->refcount, 1);
+ mk->flags = 0;
+ mk->key.ino = ip->i_ino;
+ mk->key.pos = pos;
+ return mk;
+}
+
+/* Actually free this blob. */
+static void
+xfs_merkle_blob_free(
+ struct callback_head *cb)
+{
+ struct xfs_merkle_blob *mk =
+ container_of(cb, struct xfs_merkle_blob, rcu);
+
+ kvfree(mk->data);
+ kfree(mk);
+}
+
+/* Free a merkle tree blob. */
+static inline void
+xfs_merkle_blob_rele(
+ struct xfs_merkle_blob *mk)
+{
+ if (refcount_dec_and_test(&mk->refcount))
+ call_rcu(&mk->rcu, xfs_merkle_blob_free);
+}
+
+/*
+ * Drop this merkle tree blob from the cache. Caller must have a reference to
+ * the blob, which will be dropped at the end.
+ */
+static inline void
+xfs_merkle_blob_drop(
+ struct xfs_perag *pag,
+ struct xfs_merkle_blob *mk)
+{
+ /*
+ * Remove the blob from the hash table and drop the cache's
+ * ref to the blob handle.
+ */
+ spin_lock(&pag->pagi_merkle_lock);
+ rhashtable_remove_fast(&pag->pagi_merkle_blobs, &mk->rhash,
+ xfs_fsverity_merkle_hash_params);
+ xfs_merkle_blob_rele(mk);
+ spin_unlock(&pag->pagi_merkle_lock);
+
+ /* Drop the reference we obtained above. */
+ xfs_merkle_blob_rele(mk);
+}
+
+/* Drop all the merkle tree blocks from this part of the cache. */
+STATIC void
+xfs_fsverity_drop_cache(
+ struct xfs_inode *ip,
+ u64 tree_size,
+ unsigned int block_size)
+{
+ struct xfs_merkle_bkey key = {
+ .ino = ip->i_ino,
+ .pos = 0,
+ };
+ struct xfs_perag *pag;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_merkle_blob *mk;
+ s64 freed = 0;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ if (!pag)
+ return;
+
+ for (key.pos = 0; key.pos < tree_size; key.pos += block_size) {
+ /*
+ * Try to grab the blob from the hash table and get our own
+ * reference to the object. If there's a blob handle but it
+ * has zero refcount then we're racing with reclaim and can
+ * move on.
+ */
+ rcu_read_lock();
+ mk = rhashtable_lookup(&pag->pagi_merkle_blobs, &key,
+ xfs_fsverity_merkle_hash_params);
+ if (mk && !refcount_inc_not_zero(&mk->refcount))
+ mk = NULL;
+ rcu_read_unlock();
+
+ if (!mk)
+ continue;
+
+ trace_xfs_fsverity_cache_drop(mp, &mk->key, _RET_IP_);
+
+ xfs_merkle_blob_drop(pag, mk);
+ freed++;
+ }
+
+ xfs_perag_put(pag);
+}
+
+/*
+ * Drop all the merkle tree blocks out of the cache. Caller must ensure that
+ * there are no active references to cache items.
+ */
+void
+xfs_fsverity_destroy_inode(
+ struct xfs_inode *ip)
+{
+ u64 tree_size;
+ unsigned int block_size;
+ int error;
+
+ error = fsverity_merkle_tree_geometry(VFS_I(ip), &block_size,
+ &tree_size);
+ if (error)
+ return;
+
+ xfs_fsverity_drop_cache(ip, tree_size, block_size);
+}
+
+/* Return a cached merkle tree block, or NULL. */
+static struct xfs_merkle_blob *
+xfs_fsverity_cache_load(
+ struct xfs_inode *ip,
+ u64 pos)
+{
+ struct xfs_merkle_bkey key = {
+ .ino = ip->i_ino,
+ .pos = pos,
+ };
+ struct xfs_perag *pag;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_merkle_blob *mk;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ if (!pag)
+ return NULL;
+
+ rcu_read_lock();
+ mk = rhashtable_lookup(&pag->pagi_merkle_blobs, &key,
+ xfs_fsverity_merkle_hash_params);
+ if (mk && !refcount_inc_not_zero(&mk->refcount))
+ mk = NULL;
+ rcu_read_unlock();
+ xfs_perag_put(pag);
+
+ if (!mk) {
+ trace_xfs_fsverity_cache_miss(mp, &key, _RET_IP_);
+ return NULL;
+ }
+
+ trace_xfs_fsverity_cache_hit(mp, &mk->key, _RET_IP_);
+ return mk;
+}
+
+/*
+ * Try to store a merkle tree block in the cache with the given key.
+ *
+ * If the merkle tree block is not already in the cache, the given block @mk
+ * will be added to the cache and returned. The caller retains its active
+ * reference to @mk.
+ *
+ * If there was already a merkle block in the cache, it will be returned to
+ * the caller with an active reference. @mk will be untouched.
+ */
+static struct xfs_merkle_blob *
+xfs_fsverity_cache_store(
+ struct xfs_inode *ip,
+ struct xfs_merkle_blob *mk)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_merkle_blob *old;
+ struct xfs_perag *pag;
+
+ ASSERT(ip->i_ino == mk->key.ino);
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ if (!pag) {
+ ASSERT(pag);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+
+ spin_lock(&pag->pagi_merkle_lock);
+ old = rhashtable_lookup_get_insert_fast(&pag->pagi_merkle_blobs,
+ &mk->rhash, xfs_fsverity_merkle_hash_params);
+ if (IS_ERR(old)) {
+ spin_unlock(&pag->pagi_merkle_lock);
+ xfs_perag_put(pag);
+ return old;
+ }
+ if (!old) {
+ /*
+ * There was no previous value. @mk is now live in the cache.
+ * Bump the active refcount to transfer ownership to the cache
+ * and return @mk to the caller.
+ */
+ refcount_inc(&mk->refcount);
+ spin_unlock(&pag->pagi_merkle_lock);
+ xfs_perag_put(pag);
+
+ trace_xfs_fsverity_cache_store(mp, &mk->key, _RET_IP_);
+ return mk;
+ }
+
+ /*
+ * We obtained an active reference to a previous value in the cache.
+ * Return it to the caller.
+ */
+ refcount_inc(&old->refcount);
+ spin_unlock(&pag->pagi_merkle_lock);
+ xfs_perag_put(pag);
+
+ trace_xfs_fsverity_cache_reuse(mp, &old->key, _RET_IP_);
+ return old;
+}
+
+/* Set up fsverity for this mount. */
+int
+xfs_fsverity_mount(
+ struct xfs_mount *mp)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+ int error;
+
+ if (!xfs_has_verity(mp))
+ return 0;
+
+ for_each_perag(mp, agno, pag) {
+ spin_lock_init(&pag->pagi_merkle_lock);
+ error = rhashtable_init(&pag->pagi_merkle_blobs,
+ &xfs_fsverity_merkle_hash_params);
+ if (error) {
+ xfs_perag_put(pag);
+ goto out_perag;
+ }
+ set_bit(XFS_AGSTATE_MERKLE, &pag->pag_opstate);
+ }
+
+ return 0;
+out_perag:
+ for_each_perag(mp, agno, pag) {
+ if (test_and_clear_bit(XFS_AGSTATE_MERKLE, &pag->pag_opstate))
+ rhashtable_destroy(&pag->pagi_merkle_blobs);
+ }
+
+ return error;
+}
+
+/* Set up new merkle tree caches for new AGs. */
+int
+xfs_fsverity_growfs(
+ struct xfs_mount *mp,
+ xfs_agnumber_t old_agcount,
+ xfs_agnumber_t new_agcount)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+ int error;
+
+ if (!xfs_has_verity(mp))
+ return 0;
+
+ agno = old_agcount;
+ for_each_perag_range(mp, agno, new_agcount - 1, pag) {
+ spin_lock_init(&pag->pagi_merkle_lock);
+ error = rhashtable_init(&pag->pagi_merkle_blobs,
+ &xfs_fsverity_merkle_hash_params);
+ if (error) {
+ xfs_perag_put(pag);
+ goto out_perag;
+ }
+ set_bit(XFS_AGSTATE_MERKLE, &pag->pag_opstate);
+ }
+
+ return 0;
+out_perag:
+ agno = old_agcount;
+ for_each_perag_range(mp, agno, new_agcount - 1, pag) {
+ if (test_and_clear_bit(XFS_AGSTATE_MERKLE, &pag->pag_opstate))
+ rhashtable_destroy(&pag->pagi_merkle_blobs);
+ }
+
+ return error;
+}
+
+struct xfs_fsverity_umount {
+ struct xfs_mount *mp;
+ s64 freed;
+};
+
+/* Destroy this blob that's still left over in the cache. */
+static void
+xfs_merkle_blob_destroy(
+ void *ptr,
+ void *arg)
+{
+ struct xfs_fsverity_umount *fu = arg;
+ struct xfs_merkle_blob *mk = ptr;
+
+ trace_xfs_fsverity_cache_unmount(fu->mp, &mk->key, _RET_IP_);
+
+ xfs_merkle_blob_rele(ptr);
+ fu->freed++;
+}
+
+/* Tear down fsverity from this mount. */
+void
+xfs_fsverity_unmount(
+ struct xfs_mount *mp)
+{
+ struct xfs_fsverity_umount fu = {
+ .mp = mp,
+ .freed = 0,
+ };
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+
+ if (!xfs_has_verity(mp))
+ return;
+
+ for_each_perag(mp, agno, pag) {
+ if (test_and_clear_bit(XFS_AGSTATE_MERKLE, &pag->pag_opstate))
+ rhashtable_free_and_destroy(&pag->pagi_merkle_blobs,
+ xfs_merkle_blob_destroy, &fu);
+ }
+}
+
+/*
+ * Initialize an args structure to load or store the fsverity descriptor.
+ * Caller must ensure @args is zeroed except for value and valuelen.
+ */
+static inline void
+xfs_fsverity_init_vdesc_args(
+ struct xfs_inode *ip,
+ struct xfs_da_args *args)
+{
+ args->geo = ip->i_mount->m_attr_geo;
+ args->whichfork = XFS_ATTR_FORK,
+ args->attr_filter = XFS_ATTR_VERITY;
+ args->op_flags = XFS_DA_OP_OKNOENT;
+ args->dp = ip;
+ args->owner = ip->i_ino;
+ args->name = XFS_VERITY_DESCRIPTOR_NAME;
+ args->namelen = XFS_VERITY_DESCRIPTOR_NAME_LEN;
+ xfs_attr_sethash(args);
+}
+
+/*
+ * Initialize an args structure to load or store a merkle tree block.
+ * Caller must ensure @args is zeroed except for value and valuelen.
+ */
+static inline void
+xfs_fsverity_init_merkle_args(
+ struct xfs_inode *ip,
+ struct xfs_merkle_key *key,
+ uint64_t merkleoff,
+ struct xfs_da_args *args)
+{
+ xfs_merkle_key_to_disk(key, merkleoff);
+ args->geo = ip->i_mount->m_attr_geo;
+ args->whichfork = XFS_ATTR_FORK,
+ args->attr_filter = XFS_ATTR_VERITY;
+ args->op_flags = XFS_DA_OP_OKNOENT;
+ args->dp = ip;
+ args->owner = ip->i_ino;
+ args->name = (const uint8_t *)key;
+ args->namelen = sizeof(struct xfs_merkle_key);
+ xfs_attr_sethash(args);
+}
+
+/* Delete the verity descriptor. */
+static int
+xfs_fsverity_delete_descriptor(
+ struct xfs_inode *ip)
+{
+ struct xfs_da_args args = { };
+
+ xfs_fsverity_init_vdesc_args(ip, &args);
+ return xfs_attr_set(&args, XFS_ATTRUPDATE_REMOVE, false);
+}
+
+/* Delete a merkle tree block. */
+static int
+xfs_fsverity_delete_merkle_block(
+ struct xfs_inode *ip,
+ u64 pos)
+{
+ struct xfs_merkle_key name;
+ struct xfs_da_args args = { };
+
+ xfs_fsverity_init_merkle_args(ip, &name, pos, &args);
+ return xfs_attr_set(&args, XFS_ATTRUPDATE_REMOVE, false);
+}
+
+/* Retrieve the verity descriptor. */
+static int
+xfs_fsverity_get_descriptor(
+ struct inode *inode,
+ void *buf,
+ size_t buf_size)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_da_args args = {
+ .value = buf,
+ .valuelen = buf_size,
+ };
+ int error = 0;
+
+ /*
+ * The fact that (returned attribute size) == (provided buf_size) is
+ * checked by xfs_attr_copy_value() (returns -ERANGE). No descriptor
+ * is treated as a short read so that common fsverity code will
+ * complain.
+ */
+ xfs_fsverity_init_vdesc_args(ip, &args);
+ error = xfs_attr_get(&args);
+ if (error == -ENOATTR)
+ return 0;
+ if (error)
+ return error;
+
+ return args.valuelen;
+}
+
+/*
+ * Clear out old fsverity metadata before we start building a new one. This
+ * could happen if, say, we crashed while building fsverity data.
+ */
+static int
+xfs_fsverity_delete_stale_metadata(
+ struct xfs_inode *ip,
+ u64 new_tree_size,
+ unsigned int tree_blocksize)
+{
+ u64 pos;
+ int error = 0;
+
+ /*
+ * Delete as many merkle tree blocks in increasing blkno order until we
+ * don't find any more. That ought to be good enough for avoiding
+ * dead bloat without excessive runtime.
+ */
+ for (pos = new_tree_size; !error; pos += tree_blocksize) {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ error = xfs_fsverity_delete_merkle_block(ip, pos);
+ if (error)
+ break;
+ }
+
+ return error != -ENOATTR ? error : 0;
+}
+
+/* Prepare to enable fsverity by clearing old metadata. */
+static int
+xfs_fsverity_begin_enable(
+ struct file *filp,
+ u64 merkle_tree_size,
+ unsigned int tree_blocksize)
+{
+ struct inode *inode = file_inode(filp);
+ struct xfs_inode *ip = XFS_I(inode);
+ int error;
+
+ xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
+
+ if (IS_DAX(inode))
+ return -EINVAL;
+
+ if (xfs_iflags_test_and_set(ip, XFS_VERITY_CONSTRUCTION))
+ return -EBUSY;
+
+ error = xfs_qm_dqattach(ip);
+ if (error)
+ return error;
+
+ return xfs_fsverity_delete_stale_metadata(ip, merkle_tree_size,
+ tree_blocksize);
+}
+
+/* Try to remove all the fsverity metadata after a failed enablement. */
+static int
+xfs_fsverity_delete_metadata(
+ struct xfs_inode *ip,
+ u64 merkle_tree_size,
+ unsigned int tree_blocksize)
+{
+ u64 pos;
+ int error;
+
+ if (!merkle_tree_size)
+ return 0;
+
+ for (pos = 0; pos < merkle_tree_size; pos += tree_blocksize) {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ error = xfs_fsverity_delete_merkle_block(ip, pos);
+ if (error == -ENOATTR)
+ error = 0;
+ if (error)
+ return error;
+ }
+
+ error = xfs_fsverity_delete_descriptor(ip);
+ return error != -ENOATTR ? error : 0;
+}
+
+/* Complete (or fail) the process of enabling fsverity. */
+static int
+xfs_fsverity_end_enable(
+ struct file *filp,
+ const void *desc,
+ size_t desc_size,
+ u64 merkle_tree_size,
+ unsigned int tree_blocksize)
+{
+ struct xfs_da_args args = {
+ .value = (void *)desc,
+ .valuelen = desc_size,
+ };
+ struct inode *inode = file_inode(filp);
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error = 0;
+
+ xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
+
+ /* fs-verity failed, just cleanup */
+ if (desc == NULL)
+ goto out;
+
+ xfs_fsverity_init_vdesc_args(ip, &args);
+ error = xfs_attr_set(&args, XFS_ATTRUPDATE_UPSERT, false);
+ if (error)
+ goto out;
+
+ /* Set fsverity inode flag */
+ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange,
+ 0, 0, false, &tp);
+ if (error)
+ goto out;
+
+ /*
+ * Ensure that we've persisted the verity information before we enable
+ * it on the inode and tell the caller we have sealed the inode.
+ */
+ ip->i_diflags2 |= XFS_DIFLAG2_VERITY;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ xfs_trans_set_sync(tp);
+
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ if (!error)
+ inode->i_flags |= S_VERITY;
+
+out:
+ if (error) {
+ int error2;
+
+ error2 = xfs_fsverity_delete_metadata(ip,
+ merkle_tree_size, tree_blocksize);
+ if (error2)
+ xfs_alert(ip->i_mount,
+ "ino 0x%llx failed to clean up new fsverity metadata, err %d",
+ ip->i_ino, error2);
+ }
+
+ xfs_iflags_clear(ip, XFS_VERITY_CONSTRUCTION);
+ return error;
+}
+
+/* Retrieve a merkle tree block. */
+static int
+xfs_fsverity_read_merkle(
+ const struct fsverity_readmerkle *req,
+ struct fsverity_blockbuf *block)
+{
+ struct xfs_inode *ip = XFS_I(req->inode);
+ struct xfs_merkle_key name;
+ struct xfs_da_args args = {
+ .valuelen = block->size,
+ };
+ struct xfs_merkle_blob *mk, *new_mk;
+ int error;
+
+ /* Is the block already cached? */
+ mk = xfs_fsverity_cache_load(ip, block->pos);
+ if (mk)
+ goto out_hit;
+
+ new_mk = xfs_merkle_blob_alloc(ip, block->pos, block->size);
+ if (!new_mk)
+ return -ENOMEM;
+ args.value = new_mk->data;
+
+ /* Read the block in from disk and try to store it in the cache. */
+ xfs_fsverity_init_merkle_args(ip, &name, block->pos, &args);
+ error = xfs_attr_get(&args);
+ if (error)
+ goto out_new_mk;
+
+ mk = xfs_fsverity_cache_store(ip, new_mk);
+ if (IS_ERR(mk)) {
+ xfs_merkle_blob_rele(new_mk);
+ return PTR_ERR(mk);
+ }
+ if (mk != new_mk) {
+ /*
+ * We raced with another thread to populate the cache and lost.
+ * Free the new cache blob and continue with the existing one.
+ */
+ xfs_merkle_blob_rele(new_mk);
+ }
+
+out_hit:
+ block->kaddr = (void *)mk->data;
+ block->context = mk;
+ block->verified = test_bit(XFS_MERKLE_BLOB_VERIFIED_BIT, &mk->flags);
+
+ return 0;
+
+out_new_mk:
+ xfs_merkle_blob_rele(new_mk);
+ return error;
+}
+
+/* Write a merkle tree block. */
+static int
+xfs_fsverity_write_merkle(
+ const struct fsverity_writemerkle *req,
+ const void *buf,
+ u64 pos,
+ unsigned int size)
+{
+ struct inode *inode = req->inode;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_merkle_key name;
+ struct xfs_da_args args = {
+ .value = (void *)buf,
+ .valuelen = size,
+ };
+
+ xfs_fsverity_init_merkle_args(ip, &name, pos, &args);
+ return xfs_attr_set(&args, XFS_ATTRUPDATE_UPSERT, false);
+}
+
+/* Drop a cached merkle tree block.. */
+static void
+xfs_fsverity_drop_merkle(
+ struct fsverity_blockbuf *block)
+{
+ struct xfs_merkle_blob *mk = block->context;
+
+ if (block->verified)
+ set_bit(XFS_MERKLE_BLOB_VERIFIED_BIT, &mk->flags);
+ xfs_merkle_blob_rele(mk);
+ block->kaddr = NULL;
+ block->context = NULL;
+}
+
+const struct fsverity_operations xfs_fsverity_ops = {
+ .begin_enable_verity = xfs_fsverity_begin_enable,
+ .end_enable_verity = xfs_fsverity_end_enable,
+ .get_verity_descriptor = xfs_fsverity_get_descriptor,
+ .read_merkle_tree_block = xfs_fsverity_read_merkle,
+ .write_merkle_tree_block = xfs_fsverity_write_merkle,
+ .drop_merkle_tree_block = xfs_fsverity_drop_merkle,
+};
new file mode 100644
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Red Hat, Inc.
+ */
+#ifndef __XFS_FSVERITY_H__
+#define __XFS_FSVERITY_H__
+
+#ifdef CONFIG_FS_VERITY
+struct xfs_merkle_bkey {
+ /* inumber of the file */
+ xfs_ino_t ino;
+
+ /* the position of the block in the Merkle tree (in bytes) */
+ u64 pos;
+};
+
+void xfs_fsverity_destroy_inode(struct xfs_inode *ip);
+
+int xfs_fsverity_mount(struct xfs_mount *mp);
+void xfs_fsverity_unmount(struct xfs_mount *mp);
+int xfs_fsverity_growfs(struct xfs_mount *mp, xfs_agnumber_t old_agcount,
+ xfs_agnumber_t new_agcount);
+
+extern const struct fsverity_operations xfs_fsverity_ops;
+#else
+# define xfs_fsverity_destroy_inode(ip) ((void)0)
+# define xfs_fsverity_mount(mp) (0)
+# define xfs_fsverity_unmount(mp) ((void)0)
+# define xfs_fsverity_growfs(mp, o, n) (0)
+#endif /* CONFIG_FS_VERITY */
+
+#endif /* __XFS_FSVERITY_H__ */
@@ -391,6 +391,8 @@ static inline bool xfs_inode_needs_cow_around(struct xfs_inode *ip)
*/
#define XFS_IREMAPPING (1U << 15)
+#define XFS_VERITY_CONSTRUCTION (1U << 16) /* merkle tree construction */
+
/* All inode state flags related to inode reclaim. */
#define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \
XFS_IRECLAIM | \
@@ -38,6 +38,7 @@
#include "xfs_rtgroup.h"
#include "xfs_rtrmap_btree.h"
#include "xfs_rtrefcount_btree.h"
+#include "xfs_fsverity.h"
#include "scrub/stats.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -881,6 +882,10 @@ xfs_mountfs(
if (error)
goto out_fail_wait;
+ error = xfs_fsverity_mount(mp);
+ if (error)
+ goto out_inodegc_shrinker;
+
/*
* Log's mount-time initialization. The first part of recovery can place
* some items on the AIL, to be handled when recovery is finished or
@@ -891,7 +896,7 @@ xfs_mountfs(
XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
if (error) {
xfs_warn(mp, "log mount failed");
- goto out_inodegc_shrinker;
+ goto out_fsverity;
}
/*
@@ -1103,6 +1108,8 @@ xfs_mountfs(
*/
xfs_unmount_flush_inodes(mp);
xfs_log_mount_cancel(mp);
+ out_fsverity:
+ xfs_fsverity_unmount(mp);
out_inodegc_shrinker:
shrinker_free(mp->m_inodegc_shrinker);
out_fail_wait:
@@ -1194,6 +1201,7 @@ xfs_unmountfs(
#if defined(DEBUG)
xfs_errortag_clearall(mp);
#endif
+ xfs_fsverity_unmount(mp);
shrinker_free(mp->m_inodegc_shrinker);
xfs_free_rtgroups(mp);
xfs_free_perag(mp);
@@ -30,6 +30,7 @@
#include "xfs_filestream.h"
#include "xfs_quota.h"
#include "xfs_sysfs.h"
+#include "xfs_fsverity.h"
#include "xfs_ondisk.h"
#include "xfs_rmap_item.h"
#include "xfs_refcount_item.h"
@@ -53,6 +54,7 @@
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/fsverity.h>
+#include <linux/iomap.h>
static const struct super_operations xfs_super_operations;
@@ -668,6 +670,8 @@ xfs_fs_destroy_inode(
ASSERT(!rwsem_is_locked(&inode->i_rwsem));
XFS_STATS_INC(ip->i_mount, vn_rele);
XFS_STATS_INC(ip->i_mount, vn_remove);
+ if (fsverity_active(inode))
+ xfs_fsverity_destroy_inode(ip);
fsverity_cleanup_inode(inode);
xfs_inode_mark_reclaimable(ip);
}
@@ -1524,6 +1528,9 @@ xfs_fs_fill_super(
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
sb->s_op = &xfs_super_operations;
+#ifdef CONFIG_FS_VERITY
+ sb->s_vop = &xfs_fsverity_ops;
+#endif
/*
* Delay mount work if the debug hook is set. This is debug
@@ -1769,10 +1776,25 @@ xfs_fs_fill_super(
xfs_warn(mp,
"EXPERIMENTAL parent pointer feature enabled. Use at your own risk!");
+ if (xfs_has_verity(mp))
+ xfs_warn(mp,
+ "EXPERIMENTAL fsverity feature in use. Use at your own risk!");
+
error = xfs_mountfs(mp);
if (error)
goto out_filestream_unmount;
+#ifdef CONFIG_FS_VERITY
+ /*
+ * Don't use a high priority workqueue like the other fsverity
+ * implementations because that will lead to conflicts with the xfs log
+ * workqueue.
+ */
+ error = iomap_init_fsverity(mp->m_super, 0, 0);
+ if (error)
+ goto out_unmount;
+#endif
+
root = igrab(VFS_I(mp->m_rootip));
if (!root) {
error = -ENOENT;
@@ -47,6 +47,7 @@
#include "xfs_rmap.h"
#include "xfs_refcount.h"
#include "xfs_fsrefs.h"
+#include "xfs_fsverity.h"
static inline void
xfs_rmapbt_crack_agno_opdev(
@@ -102,6 +102,7 @@ struct xfs_extent_free_item;
struct xfs_rmap_intent;
struct xfs_refcount_intent;
struct xfs_fsrefs;
+struct xfs_merkle_bkey;
#define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \
@@ -5922,6 +5923,44 @@ TRACE_EVENT(xfs_growfs_check_rtgeom,
);
#endif /* CONFIG_XFS_RT */
+#ifdef CONFIG_FS_VERITY
+DECLARE_EVENT_CLASS(xfs_fsverity_cache_class,
+ TP_PROTO(struct xfs_mount *mp, const struct xfs_merkle_bkey *key,
+ unsigned long caller_ip),
+ TP_ARGS(mp, key, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(u64, pos)
+ __field(void *, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->ino = key->ino;
+ __entry->pos = key->pos;
+ __entry->caller_ip = (void *)caller_ip;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx pos 0x%llx caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->pos,
+ __entry->caller_ip)
+)
+
+#define DEFINE_XFS_FSVERITY_CACHE_EVENT(name) \
+DEFINE_EVENT(xfs_fsverity_cache_class, name, \
+ TP_PROTO(struct xfs_mount *mp, const struct xfs_merkle_bkey *key, \
+ unsigned long caller_ip), \
+ TP_ARGS(mp, key, caller_ip))
+DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_miss);
+DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_hit);
+DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_reuse);
+DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_store);
+DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_drop);
+DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_unmount);
+DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_reclaim);
+#endif /* CONFIG_XFS_VERITY */
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH