@@ -1273,6 +1273,9 @@ struct btrfs_block_group_cache {
/* For delayed block group creation */
struct list_head new_bg_list;
+
+ /* For locking reference modifications */
+ struct extent_io_tree ref_lock;
};
/* delayed seq elem */
@@ -3319,6 +3322,14 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int __get_raid_index(u64 flags);
+int lock_ref(struct btrfs_fs_info *fs_info, u64 root_objectid, u64 bytenr,
+ u64 num_bytes, int for_cow,
+ struct btrfs_block_group_cache **block_group,
+ struct extent_state **cached_state);
+int unlock_ref(struct btrfs_fs_info *fs_info, u64 root_objectid, u64 bytenr,
+ u64 num_bytes, int for_cow,
+ struct btrfs_block_group_cache *block_group,
+ struct extent_state **cached_state);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -680,6 +680,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
+ ref->for_cow = for_cow;
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
@@ -739,6 +740,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
+ ref->for_cow = for_cow;
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
@@ -52,6 +52,7 @@ struct btrfs_delayed_ref_node {
unsigned int action:8;
unsigned int type:8;
+ unsigned int for_cow:1;
/* is this node still in the rbtree? */
unsigned int is_head:1;
unsigned int in_tree:1;
@@ -672,6 +672,79 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
return cache;
}
+
+/* This is used to lock the modification to an extent ref. This only does
+ * something if the reference is a fs tree.
+ *
+ * @fs_info: the fs_info for this filesystem.
+ * @root_objectid: the root objectid that we are modifying for this extent.
+ * @bytenr: the byte we are modifying the reference for
+ * @num_bytes: the number of bytes we are locking.
+ * @for_cow: if this operation is for cow then we don't need to lock
+ * @block_group: we will store the block group we looked up so that the unlock
+ * doesn't have to do another search.
+ * @cached_state: this is for caching our location so when we unlock we don't
+ * have to do a tree search.
+ *
+ * This can return -ENOMEM if we cannot allocate our extent state.
+ */
+int lock_ref(struct btrfs_fs_info *fs_info, u64 root_objectid, u64 bytenr,
+ u64 num_bytes, int for_cow,
+ struct btrfs_block_group_cache **block_group,
+ struct extent_state **cached_state)
+{
+ struct btrfs_block_group_cache *cache;
+ int ret;
+
+ if (!fs_info->quota_enabled || !need_ref_seq(for_cow, root_objectid))
+ return 0;
+
+ cache = btrfs_lookup_block_group(fs_info, bytenr);
+ ASSERT(cache);
+ ASSERT(cache->key.objectid <= bytenr &&
+ (cache->key.objectid + cache->key.offset >=
+ bytenr + num_bytes));
+ ret = lock_extent_bits(&cache->ref_lock, bytenr,
+ bytenr + num_bytes - 1, 0, cached_state);
+ if (!ret)
+ *block_group = cache;
+ else
+ btrfs_put_block_group(cache);
+ return ret;
+}
+
+/*
+ * Unlock the extent ref, this only does something if the reference is for an fs
+ * tree.
+ *
+ * @fs_info: the fs_info for this filesystem.
+ * @root_objectid: the root objectid that we are modifying for this extent.
+ * @bytenr: the byte we are modifying the reference for
+ * @num_bytes: the number of bytes we are locking.
+ * @for_cow: if this ref update is for cow we didn't take the lock.
+ * @block_group: the block_group we got from lock_ref.
+ * @cached_state: this is for caching our location so when we unlock we don't
+ * have to do a tree search.
+ *
+ * This can return -ENOMEM if we fail to allocate an extent state.
+ */
+int unlock_ref(struct btrfs_fs_info *fs_info, u64 root_objectid, u64 bytenr,
+ u64 num_bytes, int for_cow,
+ struct btrfs_block_group_cache *block_group,
+ struct extent_state **cached_state)
+{
+ int ret;
+
+ if (!fs_info->quota_enabled || !need_ref_seq(for_cow, root_objectid))
+ return 0;
+
+ ret = unlock_extent_cached(&block_group->ref_lock, bytenr,
+ bytenr + num_bytes - 1, cached_state,
+ GFP_NOFS);
+ btrfs_put_block_group(block_group);
+ return ret;
+}
+
static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
u64 flags)
{
@@ -2024,10 +2097,13 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
{
int ret = 0;
struct btrfs_delayed_data_ref *ref;
+ struct btrfs_block_group_cache *block_group;
+ struct extent_state *cached_state = NULL;
struct btrfs_key ins;
u64 parent = 0;
u64 ref_root = 0;
u64 flags = 0;
+ int err;
ins.objectid = node->bytenr;
ins.offset = node->num_bytes;
@@ -2041,6 +2117,10 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
else
ref_root = ref->root;
+ ret = lock_ref(root->fs_info, ref->root, node->bytenr, node->num_bytes,
+ node->for_cow, &block_group, &cached_state);
+ if (ret)
+ return ret;
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
if (extent_op)
flags |= extent_op->flags_to_set;
@@ -2063,7 +2143,10 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
} else {
BUG();
}
- return ret;
+ err = unlock_ref(root->fs_info, ref->root, node->bytenr,
+ node->num_bytes, node->for_cow, block_group,
+ &cached_state);
+ return ret ? ret : err;
}
static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
@@ -2185,9 +2268,12 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
{
int ret = 0;
struct btrfs_delayed_tree_ref *ref;
+ struct btrfs_block_group_cache *block_group;
+ struct extent_state *cached_state = NULL;
struct btrfs_key ins;
u64 parent = 0;
u64 ref_root = 0;
+ int err;
bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
SKINNY_METADATA);
@@ -2208,6 +2294,10 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
ins.type = BTRFS_EXTENT_ITEM_KEY;
}
+ ret = lock_ref(root->fs_info, ref->root, node->bytenr, node->num_bytes,
+ node->for_cow, &block_group, &cached_state);
+ if (ret)
+ return ret;
BUG_ON(node->ref_mod != 1);
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
BUG_ON(!extent_op || !extent_op->update_flags);
@@ -2227,7 +2317,10 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
} else {
BUG();
}
- return ret;
+ err = unlock_ref(root->fs_info, ref->root, node->bytenr,
+ node->num_bytes, node->for_cow, block_group,
+ &cached_state);
+ return ret ? ret : err;
}
/* helper function to actually process a single delayed ref entry */
@@ -8490,7 +8583,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
cache->fs_info = info;
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
-
+ extent_io_tree_init(&cache->ref_lock,
+ info->btree_inode->i_mapping);
if (need_clear) {
/*
* When we mount with old space cache, we need to
@@ -8689,6 +8783,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
INIT_LIST_HEAD(&cache->new_bg_list);
+ extent_io_tree_init(&cache->ref_lock,
+ root->fs_info->btree_inode->i_mapping);
btrfs_init_free_space_ctl(cache);
qgroups need to have a consistent view of the references for a particular extent record. Currently they do this through sequence numbers on delayed refs, but this is no longer acceptable. So instead introduce lock_ref/unlock_ref. This will provide the qgroup code with a consistent view of the reference while it does its accounting calculations without interfering with the delayed ref code. Thanks, Signed-off-by: Josef Bacik <jbacik@fb.com> --- fs/btrfs/ctree.h | 11 ++++++ fs/btrfs/delayed-ref.c | 2 + fs/btrfs/delayed-ref.h | 1 + fs/btrfs/extent-tree.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 113 insertions(+), 3 deletions(-)