@@ -137,8 +137,8 @@ struct btrfs_inode {
* of extent items we've reserved metadata for.
*/
spinlock_t accounting_lock;
+ atomic_t outstanding_extents;
int reserved_extents;
- int outstanding_extents;
/*
* ordered_data_close is set by truncate when a file that used
@@ -2073,19 +2073,8 @@ int btrfs_remove_block_group(struct btrf
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
-
-int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
- struct inode *inode, int num_items);
-int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
- struct inode *inode, int num_items);
-int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
- u64 bytes);
-void btrfs_free_reserved_data_space(struct btrfs_root *root,
- struct inode *inode, u64 bytes);
-void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
- u64 bytes);
-void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
- u64 bytes);
+int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
+void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items, int *retries);
@@ -2093,6 +2082,10 @@ void btrfs_trans_release_metadata(struct
struct btrfs_root *root);
int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
+int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
+int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes);
+void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
void btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
@@ -336,21 +336,18 @@ static int merge_state(struct extent_io_
}
static int set_state_cb(struct extent_io_tree *tree,
- struct extent_state *state,
- unsigned long bits)
+ struct extent_state *state, int *bits)
{
if (tree->ops && tree->ops->set_bit_hook) {
return tree->ops->set_bit_hook(tree->mapping->host,
- state->start, state->end,
- state->state, bits);
+ state, bits);
}
return 0;
}
static void clear_state_cb(struct extent_io_tree *tree,
- struct extent_state *state,
- unsigned long bits)
+ struct extent_state *state, int *bits)
{
if (tree->ops && tree->ops->clear_bit_hook)
tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
@@ -368,9 +365,10 @@ static void clear_state_cb(struct extent
*/
static int insert_state(struct extent_io_tree *tree,
struct extent_state *state, u64 start, u64 end,
- int bits)
+ int *bits)
{
struct rb_node *node;
+ int bits_to_set = *bits & ~EXTENT_CTLBITS;
int ret;
if (end < start) {
@@ -385,9 +383,9 @@ static int insert_state(struct extent_io
if (ret)
return ret;
- if (bits & EXTENT_DIRTY)
+ if (bits_to_set & EXTENT_DIRTY)
tree->dirty_bytes += end - start + 1;
- state->state |= bits;
+ state->state |= bits_to_set;
node = tree_insert(&tree->state, end, &state->rb_node);
if (node) {
struct extent_state *found;
@@ -457,13 +455,13 @@ static int split_state(struct extent_io_
* struct is freed and removed from the tree
*/
static int clear_state_bit(struct extent_io_tree *tree,
- struct extent_state *state, int bits, int wake,
- int delete)
+ struct extent_state *state,
+ int *bits, int wake)
{
- int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING;
+ int bits_to_clear = *bits & ~EXTENT_CTLBITS;
int ret = state->state & bits_to_clear;
- if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
+ if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
WARN_ON(range > tree->dirty_bytes);
tree->dirty_bytes -= range;
@@ -472,9 +470,8 @@ static int clear_state_bit(struct extent
state->state &= ~bits_to_clear;
if (wake)
wake_up(&state->wq);
- if (delete || state->state == 0) {
+ if (state->state == 0) {
if (state->tree) {
- clear_state_cb(tree, state, state->state);
rb_erase(&state->rb_node, &tree->state);
state->tree = NULL;
free_extent_state(state);
@@ -515,6 +512,10 @@ int clear_extent_bit(struct extent_io_tr
int set = 0;
int clear = 0;
+ if (delete)
+ bits |= ~EXTENT_CTLBITS;
+ bits |= EXTENT_FIRST_DELALLOC;
+
if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
clear = 1;
again:
@@ -581,8 +582,7 @@ hit_next:
if (err)
goto out;
if (state->end <= end) {
- set |= clear_state_bit(tree, state, bits, wake,
- delete);
+ set |= clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
@@ -603,7 +603,7 @@ hit_next:
if (wake)
wake_up(&state->wq);
- set |= clear_state_bit(tree, prealloc, bits, wake, delete);
+ set |= clear_state_bit(tree, prealloc, &bits, wake);
prealloc = NULL;
goto out;
@@ -614,7 +614,7 @@ hit_next:
else
next_node = NULL;
- set |= clear_state_bit(tree, state, bits, wake, delete);
+ set |= clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
@@ -707,19 +707,19 @@ out:
static int set_state_bits(struct extent_io_tree *tree,
struct extent_state *state,
- int bits)
+ int *bits)
{
int ret;
+ int bits_to_set = *bits & ~EXTENT_CTLBITS;
ret = set_state_cb(tree, state, bits);
if (ret)
return ret;
-
- if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
+ if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
tree->dirty_bytes += range;
}
- state->state |= bits;
+ state->state |= bits_to_set;
return 0;
}
@@ -758,6 +758,7 @@ static int set_extent_bit(struct extent_
u64 last_start;
u64 last_end;
+ bits |= EXTENT_FIRST_DELALLOC;
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
@@ -779,7 +780,7 @@ again:
*/
node = tree_search(tree, start);
if (!node) {
- err = insert_state(tree, prealloc, start, end, bits);
+ err = insert_state(tree, prealloc, start, end, &bits);
prealloc = NULL;
BUG_ON(err == -EEXIST);
goto out;
@@ -803,7 +804,7 @@ hit_next:
goto out;
}
- err = set_state_bits(tree, state, bits);
+ err = set_state_bits(tree, state, &bits);
if (err)
goto out;
@@ -853,7 +854,7 @@ hit_next:
if (err)
goto out;
if (state->end <= end) {
- err = set_state_bits(tree, state, bits);
+ err = set_state_bits(tree, state, &bits);
if (err)
goto out;
cache_state(state, cached_state);
@@ -878,7 +879,7 @@ hit_next:
else
this_end = last_start - 1;
err = insert_state(tree, prealloc, start, this_end,
- bits);
+ &bits);
BUG_ON(err == -EEXIST);
if (err) {
prealloc = NULL;
@@ -904,7 +905,7 @@ hit_next:
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
- err = set_state_bits(tree, prealloc, bits);
+ err = set_state_bits(tree, prealloc, &bits);
if (err) {
prealloc = NULL;
goto out;
@@ -967,8 +968,7 @@ int clear_extent_dirty(struct extent_io_
{
return clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING, 0, 0,
- NULL, mask);
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
}
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
@@ -1436,9 +1436,6 @@ int extent_clear_unlock_delalloc(struct
if (op & EXTENT_CLEAR_DELALLOC)
clear_bits |= EXTENT_DELALLOC;
- if (op & EXTENT_CLEAR_ACCOUNTING)
- clear_bits |= EXTENT_DO_ACCOUNTING;
-
clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
@@ -16,7 +16,9 @@
#define EXTENT_BOUNDARY (1 << 9)
#define EXTENT_NODATASUM (1 << 10)
#define EXTENT_DO_ACCOUNTING (1 << 11)
+#define EXTENT_FIRST_DELALLOC (1 << 12)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
+#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
/* flags for bio submission */
#define EXTENT_BIO_COMPRESSED 1
@@ -69,10 +71,10 @@ struct extent_io_ops {
struct extent_state *state);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
- int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
- unsigned long old, unsigned long bits);
+ int (*set_bit_hook)(struct inode *inode, struct extent_state *state,
+ int *bits);
int (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
- unsigned long bits);
+ int *bits);
int (*merge_extent_hook)(struct inode *inode,
struct extent_state *new,
struct extent_state *other);
@@ -63,12 +63,6 @@ static int find_next_key(struct btrfs_pa
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
-static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_space_info *sinfo, u64 num_bytes);
-static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_space_info *sinfo, u64 to_reclaim);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -2772,189 +2766,14 @@ void btrfs_set_inode_space_info(struct b
BTRFS_BLOCK_GROUP_DATA);
}
-static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
-{
- u64 num_bytes;
- int level;
-
- level = BTRFS_MAX_LEVEL - 2;
- /*
- * NOTE: these calculations are absolutely the worst possible case.
- * This assumes that _every_ item we insert will require a new leaf, and
- * that the tree has grown to its maximum level size.
- */
-
- /*
- * for every item we insert we could insert both an extent item and a
- * extent ref item. Then for ever item we insert, we will need to cow
- * both the original leaf, plus the leaf to the left and right of it.
- *
- * Unless we are talking about the extent root, then we just want the
- * number of items * 2, since we just need the extent item plus its ref.
- */
- if (root == root->fs_info->extent_root)
- num_bytes = num_items * 2;
- else
- num_bytes = (num_items + (2 * num_items)) * 3;
-
- /*
- * num_bytes is total number of leaves we could need times the leaf
- * size, and then for every leaf we could end up cow'ing 2 nodes per
- * level, down to the leaf level.
- */
- num_bytes = (num_bytes * root->leafsize) +
- (num_bytes * (level * 2)) * root->nodesize;
-
- return num_bytes;
-}
-
-/*
- * Unreserve metadata space for delalloc. If we have less reserved credits than
- * we have extents, this function does nothing.
- */
-int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
- struct inode *inode, int num_items)
-{
- struct btrfs_fs_info *info = root->fs_info;
- struct btrfs_space_info *meta_sinfo;
- u64 num_bytes;
- u64 alloc_target;
- bool bug = false;
-
- /* get the space info for where the metadata will live */
- alloc_target = btrfs_get_alloc_profile(root, 0);
- meta_sinfo = __find_space_info(info, alloc_target);
-
- num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
- num_items);
-
- spin_lock(&meta_sinfo->lock);
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- if (BTRFS_I(inode)->reserved_extents <=
- BTRFS_I(inode)->outstanding_extents) {
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
- spin_unlock(&meta_sinfo->lock);
- return 0;
- }
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
- BTRFS_I(inode)->reserved_extents -= num_items;
- BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
-
- if (meta_sinfo->bytes_delalloc < num_bytes) {
- bug = true;
- meta_sinfo->bytes_delalloc = 0;
- } else {
- meta_sinfo->bytes_delalloc -= num_bytes;
- }
- spin_unlock(&meta_sinfo->lock);
-
- BUG_ON(bug);
-
- return 0;
-}
-
-static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
-{
- u64 thresh;
-
- thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
- meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
- meta_sinfo->bytes_super + meta_sinfo->bytes_root +
- meta_sinfo->bytes_may_use;
-
- thresh = meta_sinfo->total_bytes - thresh;
- thresh *= 80;
- do_div(thresh, 100);
- if (thresh <= meta_sinfo->bytes_delalloc)
- meta_sinfo->force_delalloc = 1;
- else
- meta_sinfo->force_delalloc = 0;
-}
-
-/*
- * Reserve metadata space for delalloc.
- */
-int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
- struct inode *inode, int num_items)
-{
- struct btrfs_fs_info *info = root->fs_info;
- struct btrfs_space_info *meta_sinfo;
- u64 num_bytes;
- u64 used;
- u64 alloc_target;
- int flushed = 0;
- int force_delalloc;
-
- /* get the space info for where the metadata will live */
- alloc_target = btrfs_get_alloc_profile(root, 0);
- meta_sinfo = __find_space_info(info, alloc_target);
-
- num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
- num_items);
-again:
- spin_lock(&meta_sinfo->lock);
-
- force_delalloc = meta_sinfo->force_delalloc;
-
- if (unlikely(!meta_sinfo->bytes_root))
- meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
-
- if (!flushed)
- meta_sinfo->bytes_delalloc += num_bytes;
-
- used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
- meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
- meta_sinfo->bytes_super + meta_sinfo->bytes_root +
- meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
-
- if (used > meta_sinfo->total_bytes) {
- flushed++;
-
- if (flushed == 1) {
- if (maybe_allocate_chunk(NULL, root, meta_sinfo,
- num_bytes))
- goto again;
- flushed++;
- } else {
- spin_unlock(&meta_sinfo->lock);
- }
-
- if (flushed == 2) {
- filemap_flush(inode->i_mapping);
- goto again;
- } else if (flushed == 3) {
- shrink_delalloc(NULL, root, meta_sinfo, num_bytes);
- goto again;
- }
- spin_lock(&meta_sinfo->lock);
- meta_sinfo->bytes_delalloc -= num_bytes;
- spin_unlock(&meta_sinfo->lock);
- printk(KERN_ERR "enospc, has %d, reserved %d\n",
- BTRFS_I(inode)->outstanding_extents,
- BTRFS_I(inode)->reserved_extents);
- dump_space_info(meta_sinfo, 0, 0);
- return -ENOSPC;
- }
-
- BTRFS_I(inode)->reserved_extents += num_items;
- check_force_delalloc(meta_sinfo);
- spin_unlock(&meta_sinfo->lock);
-
- if (!flushed && force_delalloc)
- filemap_flush(inode->i_mapping);
-
- return 0;
-}
-
/*
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
*/
-int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
- u64 bytes)
+int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
u64 used;
int ret = 0, committed = 0;
@@ -3039,12 +2858,13 @@ alloc:
}
/*
- * if there was an error for whatever reason after calling
- * btrfs_check_data_free_space, call this so we can cleanup the counters.
+ * called when we are clearing an delalloc extent from the
+ * inode's io_tree or there was an error for whatever reason
+ * after calling btrfs_check_data_free_space
*/
-void btrfs_free_reserved_data_space(struct btrfs_root *root,
- struct inode *inode, u64 bytes)
+void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_space_info *data_sinfo;
/* make sure bytes are sectorsize aligned */
@@ -3057,48 +2877,6 @@ void btrfs_free_reserved_data_space(stru
spin_unlock(&data_sinfo->lock);
}
-/* called when we are adding a delalloc extent to the inode's io_tree */
-void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
- u64 bytes)
-{
- struct btrfs_space_info *data_sinfo;
-
- /* get the space info for where this inode will be storing its data */
- data_sinfo = BTRFS_I(inode)->space_info;
-
- /* make sure we have enough space to handle the data first */
- spin_lock(&data_sinfo->lock);
- data_sinfo->bytes_delalloc += bytes;
-
- /*
- * we are adding a delalloc extent without calling
- * btrfs_check_data_free_space first. This happens on a weird
- * writepage condition, but shouldn't hurt our accounting
- */
- if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
- data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
- BTRFS_I(inode)->reserved_bytes = 0;
- } else {
- data_sinfo->bytes_may_use -= bytes;
- BTRFS_I(inode)->reserved_bytes -= bytes;
- }
-
- spin_unlock(&data_sinfo->lock);
-}
-
-/* called when we are clearing an delalloc extent from the inode's io_tree */
-void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
- u64 bytes)
-{
- struct btrfs_space_info *info;
-
- info = BTRFS_I(inode)->space_info;
-
- spin_lock(&info->lock);
- info->bytes_delalloc -= bytes;
- spin_unlock(&info->lock);
-}
-
static void force_metadata_allocation(struct btrfs_fs_info *info)
{
struct list_head *head = &info->space_info;
@@ -3223,18 +3001,19 @@ static int maybe_allocate_chunk(struct b
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_space_info *sinfo, u64 to_reclaim)
+ struct btrfs_root *root, u64 to_reclaim)
{
+ struct btrfs_block_rsv *block_rsv;
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
int pause = 1;
int ret;
- spin_lock(&sinfo->lock);
- reserved = sinfo->bytes_delalloc;
- spin_unlock(&sinfo->lock);
+ block_rsv = &root->fs_info->delalloc_block_rsv;
+ spin_lock(&block_rsv->lock);
+ reserved = block_rsv->reserved;
+ spin_unlock(&block_rsv->lock);
if (reserved == 0)
return 0;
@@ -3253,11 +3032,11 @@ static int shrink_delalloc(struct btrfs_
pause = 1;
}
- spin_lock(&sinfo->lock);
- if (reserved > sinfo->bytes_delalloc)
- reclaimed = reserved - sinfo->bytes_delalloc;
- reserved = sinfo->bytes_delalloc;
- spin_unlock(&sinfo->lock);
+ spin_lock(&block_rsv->lock);
+ if (reserved > block_rsv->reserved)
+ reclaimed = reserved - block_rsv->reserved;
+ reserved = block_rsv->reserved;
+ spin_unlock(&block_rsv->lock);
if (reserved == 0 || reclaimed >= max_reclaim)
break;
@@ -3286,7 +3065,7 @@ static int should_retry_reserve(struct b
if (trans && trans->transaction->in_commit)
return -ENOSPC;
- ret = shrink_delalloc(trans, root, space_info, num_bytes);
+ ret = shrink_delalloc(trans, root, num_bytes);
if (ret)
return ret;
@@ -3625,6 +3404,105 @@ int btrfs_snap_reserve_metadata(struct b
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
+static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
+{
+ return num_bytes >>= 3;
+}
+
+int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
+ u64 to_reserve;
+ int nr_extents;
+ int retries = 0;
+ int ret;
+
+ if (btrfs_transaction_in_commit(root->fs_info))
+ schedule_timeout(1);
+
+ num_bytes = ALIGN(num_bytes, root->sectorsize);
+again:
+ spin_lock(&BTRFS_I(inode)->accounting_lock);
+ nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
+ if (nr_extents > BTRFS_I(inode)->reserved_extents) {
+ nr_extents -= BTRFS_I(inode)->reserved_extents;
+ to_reserve = calc_trans_metadata_size(root, nr_extents);
+ } else {
+ nr_extents = 0;
+ to_reserve = 0;
+ }
+
+ to_reserve += calc_csum_metadata_size(inode, num_bytes);
+ ret = reserve_metadata_bytes(block_rsv, to_reserve);
+ if (ret) {
+ spin_unlock(&BTRFS_I(inode)->accounting_lock);
+ ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
+ &retries);
+ if (ret > 0)
+ goto again;
+ return ret;
+ }
+
+ BTRFS_I(inode)->reserved_extents += nr_extents;
+ atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+ spin_unlock(&BTRFS_I(inode)->accounting_lock);
+
+ if (block_rsv_add_bytes(block_rsv, to_reserve, 1) > 512 * 1024 * 1024)
+ shrink_delalloc(NULL, root, to_reserve);
+
+ return 0;
+}
+
+void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 to_free;
+ int nr_extents;
+
+ num_bytes = ALIGN(num_bytes, root->sectorsize);
+ atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+
+ spin_lock(&BTRFS_I(inode)->accounting_lock);
+ nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
+ if (nr_extents < BTRFS_I(inode)->reserved_extents) {
+ nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents;
+ BTRFS_I(inode)->reserved_extents -= nr_extents;
+ } else {
+ nr_extents = 0;
+ }
+ spin_unlock(&BTRFS_I(inode)->accounting_lock);
+
+ to_free = calc_csum_metadata_size(inode, num_bytes);
+ if (nr_extents > 0)
+ to_free += calc_trans_metadata_size(root, nr_extents);
+
+ block_rsv_release_bytes(&root->fs_info->delalloc_block_rsv, to_free);
+}
+
+int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
+{
+ int ret;
+
+ ret = btrfs_check_data_free_space(inode, num_bytes);
+ if (ret)
+ return ret;
+
+ ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
+ if (ret) {
+ btrfs_free_reserved_data_space(inode, num_bytes);
+ return ret;
+ }
+
+ return 0;
+}
+
+void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
+{
+ btrfs_delalloc_release_metadata(inode, num_bytes);
+ btrfs_free_reserved_data_space(inode, num_bytes);
+}
+
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc)
@@ -852,13 +852,6 @@ static ssize_t btrfs_file_write(struct f
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
- /* do the reserve before the mutex lock in case we have to do some
- * flushing. We wouldn't deadlock, but this is more polite.
- */
- err = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
- if (err)
- goto out_nolock;
-
mutex_lock(&inode->i_mutex);
current->backing_dev_info = inode->i_mapping->backing_dev_info;
@@ -921,7 +914,7 @@ static ssize_t btrfs_file_write(struct f
WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs);
- ret = btrfs_check_data_free_space(root, inode, write_bytes);
+ ret = btrfs_delalloc_reserve_space(inode, write_bytes);
if (ret)
goto out;
@@ -929,26 +922,20 @@ static ssize_t btrfs_file_write(struct f
pos, first_index, last_index,
write_bytes);
if (ret) {
- btrfs_free_reserved_data_space(root, inode,
- write_bytes);
+ btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}
ret = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, buf);
- if (ret) {
- btrfs_free_reserved_data_space(root, inode,
- write_bytes);
- btrfs_drop_pages(pages, num_pages);
- goto out;
+ if (ret == 0) {
+ dirty_and_release_pages(NULL, root, file, pages,
+ num_pages, pos, write_bytes);
}
- ret = dirty_and_release_pages(NULL, root, file, pages,
- num_pages, pos, write_bytes);
btrfs_drop_pages(pages, num_pages);
if (ret) {
- btrfs_free_reserved_data_space(root, inode,
- write_bytes);
+ btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}
@@ -975,9 +962,7 @@ out:
mutex_unlock(&inode->i_mutex);
if (ret)
err = ret;
- btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
-out_nolock:
kfree(pages);
if (pinned[0])
page_cache_release(pinned[0]);
@@ -251,6 +251,7 @@ static noinline int cow_file_range_inlin
inline_len, compressed_size,
compressed_pages);
BUG_ON(ret);
+ btrfs_delalloc_release_metadata(inode, end + 1 - start);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
return 0;
}
@@ -413,6 +414,7 @@ again:
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
/* lets try to make an inline extent */
if (ret || total_in < (actual_end - start)) {
@@ -438,7 +440,6 @@ again:
start, end, NULL,
EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_ACCOUNTING |
EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
btrfs_end_transaction(trans, root);
@@ -733,6 +734,7 @@ static noinline int cow_file_range(struc
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
actual_end = min_t(u64, isize, end + 1);
@@ -752,7 +754,6 @@ static noinline int cow_file_range(struc
EXTENT_CLEAR_UNLOCK_PAGE |
EXTENT_CLEAR_UNLOCK |
EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_ACCOUNTING |
EXTENT_CLEAR_DIRTY |
EXTENT_SET_WRITEBACK |
EXTENT_END_WRITEBACK);
@@ -1225,15 +1226,13 @@ static int run_delalloc_range(struct ino
}
static int btrfs_split_extent_hook(struct inode *inode,
- struct extent_state *orig, u64 split)
+ struct extent_state *orig, u64 split)
{
+ /* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC))
return 0;
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
+ atomic_inc(&BTRFS_I(inode)->outstanding_extents);
return 0;
}
@@ -1251,10 +1250,7 @@ static int btrfs_merge_extent_hook(struc
if (!(other->state & EXTENT_DELALLOC))
return 0;
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- BTRFS_I(inode)->outstanding_extents--;
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
+ atomic_dec(&BTRFS_I(inode)->outstanding_extents);
return 0;
}
@@ -1263,8 +1259,8 @@ static int btrfs_merge_extent_hook(struc
* bytes in this file, and to maintain the list of inodes that
* have pending delalloc work to be done.
*/
-static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
- unsigned long old, unsigned long bits)
+static int btrfs_set_bit_hook(struct inode *inode,
+ struct extent_state *state, int *bits)
{
/*
@@ -1272,17 +1268,18 @@ static int btrfs_set_bit_hook(struct ino
* but in this case, we are only testeing for the DELALLOC
* bit, which is only set or cleared with irqs on
*/
- if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
+ if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 len = state->end + 1 - state->start;
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
- btrfs_delalloc_reserve_space(root, inode, end - start + 1);
+ if (*bits & EXTENT_FIRST_DELALLOC)
+ *bits &= ~EXTENT_FIRST_DELALLOC;
+ else
+ atomic_inc(&BTRFS_I(inode)->outstanding_extents);
spin_lock(&root->fs_info->delalloc_lock);
- BTRFS_I(inode)->delalloc_bytes += end - start + 1;
- root->fs_info->delalloc_bytes += end - start + 1;
+ BTRFS_I(inode)->delalloc_bytes += len;
+ root->fs_info->delalloc_bytes += len;
if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
&root->fs_info->delalloc_inodes);
@@ -1296,45 +1293,32 @@ static int btrfs_set_bit_hook(struct ino
* extent_io.c clear_bit_hook, see set_bit_hook for why
*/
static int btrfs_clear_bit_hook(struct inode *inode,
- struct extent_state *state, unsigned long bits)
+ struct extent_state *state, int *bits)
{
/*
* set_bit and clear bit hooks normally require _irqsave/restore
* but in this case, we are only testeing for the DELALLOC
* bit, which is only set or cleared with irqs on
*/
- if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
+ if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 len = state->end + 1 - state->start;
- if (bits & EXTENT_DO_ACCOUNTING) {
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- WARN_ON(!BTRFS_I(inode)->outstanding_extents);
- BTRFS_I(inode)->outstanding_extents--;
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
- btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
- }
+ if (*bits & EXTENT_FIRST_DELALLOC)
+ *bits &= ~EXTENT_FIRST_DELALLOC;
+ else if (!(*bits & EXTENT_DO_ACCOUNTING))
+ atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+
+ if (*bits & EXTENT_DO_ACCOUNTING)
+ btrfs_delalloc_release_metadata(inode, len);
+
+ if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+ btrfs_free_reserved_data_space(inode, len);
spin_lock(&root->fs_info->delalloc_lock);
- if (state->end - state->start + 1 >
- root->fs_info->delalloc_bytes) {
- printk(KERN_INFO "btrfs warning: delalloc account "
- "%llu %llu\n",
- (unsigned long long)
- state->end - state->start + 1,
- (unsigned long long)
- root->fs_info->delalloc_bytes);
- btrfs_delalloc_free_space(root, inode, (u64)-1);
- root->fs_info->delalloc_bytes = 0;
- BTRFS_I(inode)->delalloc_bytes = 0;
- } else {
- btrfs_delalloc_free_space(root, inode,
- state->end -
- state->start + 1);
- root->fs_info->delalloc_bytes -= state->end -
- state->start + 1;
- BTRFS_I(inode)->delalloc_bytes -= state->end -
- state->start + 1;
- }
+ root->fs_info->delalloc_bytes -= len;
+ BTRFS_I(inode)->delalloc_bytes -= len;
+
if (BTRFS_I(inode)->delalloc_bytes == 0 &&
!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_del_init(&BTRFS_I(inode)->delalloc_inodes);
@@ -1519,6 +1503,7 @@ again:
goto again;
}
+ BUG();
btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
ClearPageChecked(page);
out:
@@ -1649,7 +1634,7 @@ static int insert_reserved_file_extent(s
static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
+ struct btrfs_trans_handle *trans = NULL;
struct btrfs_ordered_extent *ordered_extent = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
@@ -1667,9 +1652,10 @@ static int btrfs_finish_ordered_io(struc
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret) {
trans = btrfs_join_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
- btrfs_end_transaction(trans, root);
}
goto out;
}
@@ -1679,6 +1665,8 @@ static int btrfs_finish_ordered_io(struc
0, &cached_state, GFP_NOFS);
trans = btrfs_join_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
compressed = 1;
@@ -1710,12 +1698,13 @@ static int btrfs_finish_ordered_io(struc
add_pending_csums(trans, inode, ordered_extent->file_offset,
&ordered_extent->list);
- /* this also removes the ordered extent from the tree */
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
- btrfs_end_transaction(trans, root);
out:
+ btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+ if (trans)
+ btrfs_end_transaction(trans, root);
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
@@ -3044,11 +3033,7 @@ static int btrfs_truncate_page(struct ad
if ((offset & (blocksize - 1)) == 0)
goto out;
- ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
- if (ret)
- goto out;
-
- ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
+ ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret)
goto out;
@@ -3056,8 +3041,7 @@ static int btrfs_truncate_page(struct ad
again:
page = grab_cache_page(mapping, index);
if (!page) {
- btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
- btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
+ btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto out;
}
@@ -3120,8 +3104,7 @@ again:
out_unlock:
if (ret)
- btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
- btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
+ btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
unlock_page(page);
page_cache_release(page);
out:
@@ -4703,6 +4686,7 @@ again:
}
flush_dcache_page(page);
} else if (create && PageUptodate(page)) {
+ WARN_ON(1);
if (!trans) {
kunmap(page);
free_extent_map(em);
@@ -4967,7 +4951,7 @@ int btrfs_page_mkwrite(struct vm_area_st
u64 page_start;
u64 page_end;
- ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
+ ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret) {
if (ret == -ENOMEM)
ret = VM_FAULT_OOM;
@@ -4976,13 +4960,6 @@ int btrfs_page_mkwrite(struct vm_area_st
goto out;
}
- ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
- if (ret) {
- btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
- ret = VM_FAULT_SIGBUS;
- goto out;
- }
-
ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
again:
lock_page(page);
@@ -4992,7 +4969,6 @@ again:
if ((page->mapping != inode->i_mapping) ||
(page_start >= size)) {
- btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
/* page got truncated out from underneath us */
goto out_unlock;
}
@@ -5033,7 +5009,6 @@ again:
unlock_extent_cached(io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
ret = VM_FAULT_SIGBUS;
- btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
goto out_unlock;
}
ret = 0;
@@ -5060,10 +5035,10 @@ again:
unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
out_unlock:
- btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
if (!ret)
return VM_FAULT_LOCKED;
unlock_page(page);
+ btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
out:
return ret;
}
@@ -5208,7 +5183,7 @@ struct inode *btrfs_alloc_inode(struct s
ei->last_unlink_trans = 0;
spin_lock_init(&ei->accounting_lock);
- ei->outstanding_extents = 0;
+ atomic_set(&ei->outstanding_extents, 0);
ei->reserved_extents = 0;
ei->ordered_data_close = 0;
@@ -5236,6 +5211,8 @@ void btrfs_destroy_inode(struct inode *i
WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);
+ WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
+ WARN_ON(BTRFS_I(inode)->reserved_extents);
/*
* This can happen where we create an inode, but somebody else also
@@ -5795,8 +5772,7 @@ static long btrfs_fallocate(struct inode
goto out;
}
- ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
- alloc_end - alloc_start);
+ ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
if (ret)
goto out;
@@ -5862,8 +5838,7 @@ static long btrfs_fallocate(struct inode
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
&cached_state, GFP_NOFS);
- btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
- alloc_end - alloc_start);
+ btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
out:
mutex_unlock(&inode->i_mutex);
return ret;
@@ -586,19 +586,9 @@ static int btrfs_defrag_file(struct file
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
BTRFS_I(inode)->force_compress = 1;
- ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
- if (ret) {
- ret = -ENOSPC;
- break;
- }
-
- ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
- if (ret) {
- btrfs_free_reserved_data_space(root, inode,
- PAGE_CACHE_SIZE);
- ret = -ENOSPC;
- break;
- }
+ ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
+ if (ret)
+ goto err_unlock;
again:
if (inode->i_size == 0 ||
i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
@@ -607,8 +597,10 @@ again:
}
page = grab_cache_page(inode->i_mapping, i);
- if (!page)
+ if (!page) {
+ ret = -ENOMEM;
goto err_reservations;
+ }
if (!PageUptodate(page)) {
btrfs_readpage(NULL, page);
@@ -616,6 +608,7 @@ again:
if (!PageUptodate(page)) {
unlock_page(page);
page_cache_release(page);
+ ret = -EIO;
goto err_reservations;
}
}
@@ -629,8 +622,7 @@ again:
wait_on_page_writeback(page);
if (PageDirty(page)) {
- btrfs_free_reserved_data_space(root, inode,
- PAGE_CACHE_SIZE);
+ btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto loop_unlock;
}
@@ -668,7 +660,6 @@ loop_unlock:
page_cache_release(page);
mutex_unlock(&inode->i_mutex);
- btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
i++;
}
@@ -698,9 +689,9 @@ loop_unlock:
return 0;
err_reservations:
+ btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
+err_unlock:
mutex_unlock(&inode->i_mutex);
- btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
- btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
return ret;
}
@@ -312,13 +312,6 @@ static int __btrfs_remove_ordered_extent
tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- WARN_ON(!BTRFS_I(inode)->outstanding_extents);
- BTRFS_I(inode)->outstanding_extents--;
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
- btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
- inode, 1);
-
spin_lock(&root->fs_info->ordered_extent_lock);
list_del_init(&entry->root_extent_list);