@@ -281,10 +281,10 @@ u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *target = NULL;
/*
- * If we are the delayed_rsv then push to the global rsv, otherwise dump
- * into the delayed rsv if it is not full.
+ * If we are a delayed block reserve then push to the global rsv,
+ * otherwise dump into the global delayed reserve if it is not full.
*/
- if (block_rsv == delayed_rsv)
+ if (block_rsv->type == BTRFS_BLOCK_RSV_DELOPS)
target = global_rsv;
else if (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv))
target = delayed_rsv;
@@ -89,7 +89,9 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
+ struct btrfs_block_rsv *local_rsv = &trans->delayed_rsv;
u64 num_bytes;
+ u64 reserved_bytes;
num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, trans->delayed_ref_updates);
num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info,
@@ -98,9 +100,26 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
if (num_bytes == 0)
return;
+ /*
+ * Try to take num_bytes from the transaction's local delayed reserve.
+ * If not possible, try to take as much as it's available. If the local
+ * reserve doesn't have enough reserved space, the delayed refs reserve
+ * will be refilled next time btrfs_delayed_refs_rsv_refill() is called
+ * by someone or if a transaction commit is triggered before that, the
+ * global block reserve will be used. We want to minimize using the
+ * global block reserve for cases we can account for in advance, to
+ * avoid exhausting it and reach -ENOSPC during a transaction commit.
+ */
+ spin_lock(&local_rsv->lock);
+ reserved_bytes = min(num_bytes, local_rsv->reserved);
+ local_rsv->reserved -= reserved_bytes;
+ local_rsv->full = (local_rsv->reserved >= local_rsv->size);
+ spin_unlock(&local_rsv->lock);
+
spin_lock(&delayed_rsv->lock);
delayed_rsv->size += num_bytes;
- delayed_rsv->full = false;
+ delayed_rsv->reserved += reserved_bytes;
+ delayed_rsv->full = (delayed_rsv->reserved >= delayed_rsv->size);
spin_unlock(&delayed_rsv->lock);
trans->delayed_ref_updates = 0;
trans->delayed_ref_csum_deletions = 0;
@@ -555,6 +555,69 @@ static inline bool need_reserve_reloc_root(struct btrfs_root *root)
return true;
}
+static int btrfs_reserve_trans_metadata(struct btrfs_fs_info *fs_info,
+ enum btrfs_reserve_flush_enum flush,
+ u64 num_bytes,
+ u64 *delayed_refs_bytes)
+{
+ struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+ struct btrfs_space_info *si = fs_info->trans_block_rsv.space_info;
+ u64 extra_delayed_refs_bytes = 0;
+ u64 bytes;
+ int ret;
+
+ /*
+ * If there's a gap between the size of the delayed refs reserve and
+ * its reserved space, than some tasks have added delayed refs or bumped
+ * its size otherwise (due to block group creation or removal, or block
+ * group item update). Also try to allocate that gap in order to prevent
+ * using (and possibly abusing) the global reserve when committing the
+ * transaction.
+ */
+ if (flush == BTRFS_RESERVE_FLUSH_ALL &&
+ !btrfs_block_rsv_full(delayed_refs_rsv)) {
+ spin_lock(&delayed_refs_rsv->lock);
+ if (delayed_refs_rsv->size > delayed_refs_rsv->reserved)
+ extra_delayed_refs_bytes = delayed_refs_rsv->size -
+ delayed_refs_rsv->reserved;
+ spin_unlock(&delayed_refs_rsv->lock);
+ }
+
+ bytes = num_bytes + *delayed_refs_bytes + extra_delayed_refs_bytes;
+
+ /*
+ * We want to reserve all the bytes we may need all at once, so we only
+ * do 1 enospc flushing cycle per transaction start.
+ */
+ ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
+ if (ret == 0) {
+ if (extra_delayed_refs_bytes > 0)
+ btrfs_migrate_to_delayed_refs_rsv(fs_info,
+ extra_delayed_refs_bytes);
+ return 0;
+ }
+
+ if (extra_delayed_refs_bytes > 0) {
+ bytes -= extra_delayed_refs_bytes;
+ ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
+ if (ret == 0)
+ return 0;
+ }
+
+ /*
+ * If we are an emergency flush, which can steal from the global block
+ * reserve, then attempt to not reserve space for the delayed refs, as
+ * we will consume space for them from the global block reserve.
+ */
+ if (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
+ bytes -= *delayed_refs_bytes;
+ *delayed_refs_bytes = 0;
+ ret = btrfs_reserve_metadata_bytes(fs_info, si, bytes, flush);
+ }
+
+ return ret;
+}
+
static struct btrfs_trans_handle *
start_transaction(struct btrfs_root *root, unsigned int num_items,
unsigned int type, enum btrfs_reserve_flush_enum flush,
@@ -562,10 +625,12 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
+ struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv;
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
u64 num_bytes = 0;
u64 qgroup_reserved = 0;
+ u64 delayed_refs_bytes = 0;
bool reloc_reserved = false;
bool do_chunk_alloc = false;
int ret;
@@ -588,9 +653,6 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
* the appropriate flushing if need be.
*/
if (num_items && root != fs_info->chunk_root) {
- struct btrfs_block_rsv *rsv = &fs_info->trans_block_rsv;
- u64 delayed_refs_bytes = 0;
-
qgroup_reserved = num_items * fs_info->nodesize;
/*
* Use prealloc for now, as there might be a currently running
@@ -602,20 +664,16 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
if (ret)
return ERR_PTR(ret);
+ num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
/*
- * We want to reserve all the bytes we may need all at once, so
- * we only do 1 enospc flushing cycle per transaction start. We
- * accomplish this by simply assuming we'll do num_items worth
- * of delayed refs updates in this trans handle, and refill that
- * amount for whatever is missing in the reserve.
+ * If we plan to insert/update/delete "num_items" from a btree,
+ * we will also generate delayed refs for extent buffers in the
+ * respective btree paths, so reserve space for the delayed refs
+ * that will be generated by the caller as it modifies btrees.
+ * Try to reserve them to avoid excessive use of the global
+ * block reserve.
*/
- num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
- if (flush == BTRFS_RESERVE_FLUSH_ALL &&
- !btrfs_block_rsv_full(delayed_refs_rsv)) {
- delayed_refs_bytes = btrfs_calc_delayed_ref_bytes(fs_info,
- num_items);
- num_bytes += delayed_refs_bytes;
- }
+ delayed_refs_bytes = btrfs_calc_delayed_ref_bytes(fs_info, num_items);
/*
* Do the reservation for the relocation root creation
@@ -625,18 +683,14 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
reloc_reserved = true;
}
- ret = btrfs_reserve_metadata_bytes(fs_info, rsv->space_info,
- num_bytes, flush);
+ ret = btrfs_reserve_trans_metadata(fs_info, flush, num_bytes,
+ &delayed_refs_bytes);
if (ret)
goto reserve_fail;
- if (delayed_refs_bytes) {
- btrfs_migrate_to_delayed_refs_rsv(fs_info,
- delayed_refs_bytes);
- num_bytes -= delayed_refs_bytes;
- }
- btrfs_block_rsv_add_bytes(rsv, num_bytes, true);
- if (rsv->space_info->force_alloc)
+ btrfs_block_rsv_add_bytes(trans_rsv, num_bytes, true);
+
+ if (trans_rsv->space_info->force_alloc)
do_chunk_alloc = true;
} else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL &&
!btrfs_block_rsv_full(delayed_refs_rsv)) {
@@ -696,6 +750,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
h->type = type;
INIT_LIST_HEAD(&h->new_bgs);
+ btrfs_init_metadata_block_rsv(fs_info, &h->delayed_rsv, BTRFS_BLOCK_RSV_DELOPS);
smp_mb();
if (cur_trans->state >= TRANS_STATE_COMMIT_START &&
@@ -708,8 +763,17 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
if (num_bytes) {
trace_btrfs_space_reservation(fs_info, "transaction",
h->transid, num_bytes, 1);
- h->block_rsv = &fs_info->trans_block_rsv;
+ h->block_rsv = trans_rsv;
h->bytes_reserved = num_bytes;
+ if (delayed_refs_bytes > 0) {
+ trace_btrfs_space_reservation(fs_info,
+ "local_delayed_refs_rsv",
+ h->transid,
+ delayed_refs_bytes, 1);
+ h->delayed_refs_bytes_reserved = delayed_refs_bytes;
+ btrfs_block_rsv_add_bytes(&h->delayed_rsv, delayed_refs_bytes, true);
+ delayed_refs_bytes = 0;
+ }
h->reloc_reserved = reloc_reserved;
}
@@ -765,8 +829,10 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
kmem_cache_free(btrfs_trans_handle_cachep, h);
alloc_fail:
if (num_bytes)
- btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
- num_bytes, NULL);
+ btrfs_block_rsv_release(fs_info, trans_rsv, num_bytes, NULL);
+ if (delayed_refs_bytes)
+ btrfs_space_info_free_bytes_may_use(fs_info, trans_rsv->space_info,
+ delayed_refs_bytes);
reserve_fail:
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
return ERR_PTR(ret);
@@ -987,11 +1053,14 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
if (!trans->block_rsv) {
ASSERT(!trans->bytes_reserved);
+ ASSERT(!trans->delayed_refs_bytes_reserved);
return;
}
- if (!trans->bytes_reserved)
+ if (!trans->bytes_reserved) {
+ ASSERT(!trans->delayed_refs_bytes_reserved);
return;
+ }
ASSERT(trans->block_rsv == &fs_info->trans_block_rsv);
trace_btrfs_space_reservation(fs_info, "transaction",
@@ -999,6 +1068,16 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
btrfs_block_rsv_release(fs_info, trans->block_rsv,
trans->bytes_reserved, NULL);
trans->bytes_reserved = 0;
+
+ if (!trans->delayed_refs_bytes_reserved)
+ return;
+
+ trace_btrfs_space_reservation(fs_info, "local_delayed_refs_rsv",
+ trans->transid,
+ trans->delayed_refs_bytes_reserved, 0);
+ btrfs_block_rsv_release(fs_info, &trans->delayed_rsv,
+ trans->delayed_refs_bytes_reserved, NULL);
+ trans->delayed_refs_bytes_reserved = 0;
}
static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
@@ -117,6 +117,7 @@ enum {
struct btrfs_trans_handle {
u64 transid;
u64 bytes_reserved;
+ u64 delayed_refs_bytes_reserved;
u64 chunk_bytes_reserved;
unsigned long delayed_ref_updates;
unsigned long delayed_ref_csum_deletions;
@@ -139,6 +140,7 @@ struct btrfs_trans_handle {
bool in_fsync;
struct btrfs_fs_info *fs_info;
struct list_head new_bgs;
+ struct btrfs_block_rsv delayed_rsv;
};
/*