@@ -238,7 +238,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
else
btrfs_node_key(buf, &disk_key, 0);
- cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
+ cow = btrfs_alloc_free_block(trans, root, buf->len, NULL, 0,
new_root_objectid, &disk_key, level,
buf->start, 0);
if (IS_ERR(cow))
@@ -444,9 +444,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
} else
parent_start = 0;
- cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
- root->root_key.objectid, &disk_key,
- level, search_start, empty_size);
+ cow = btrfs_alloc_free_block(trans, root, buf->len, parent,
+ parent_start, root->root_key.objectid,
+ &disk_key, level, search_start,
+ empty_size);
if (IS_ERR(cow))
return PTR_ERR(cow);
@@ -467,6 +468,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
+ WARN_ON(cow->cluster);
+ cow->cluster = buf->cluster;
+ buf->cluster = NULL;
+
update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (root->ref_cows)
@@ -2070,7 +2075,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
else
btrfs_node_key(lower, &lower_key, 0);
- c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
+ c = btrfs_alloc_free_block(trans, root, root->nodesize, NULL, 0,
root->root_key.objectid, &lower_key,
level, root->node->start, 0);
if (IS_ERR(c))
@@ -2170,6 +2175,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
{
struct extent_buffer *c;
struct extent_buffer *split;
+ struct extent_buffer *parent = NULL;
struct btrfs_disk_key disk_key;
int mid;
int ret;
@@ -2197,9 +2203,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
mid = (c_nritems + 1) / 2;
btrfs_node_key(c, &disk_key, mid);
- split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
- root->root_key.objectid,
- &disk_key, level, c->start, 0);
+ if (level + 1 < BTRFS_MAX_LEVEL)
+ parent = path->nodes[level + 1];
+
+ split = btrfs_alloc_free_block(trans, root, root->nodesize, parent, 0,
+ root->root_key.objectid,
+ &disk_key, level, c->start, 0);
if (IS_ERR(split))
return PTR_ERR(split);
@@ -2951,7 +2960,8 @@ again:
else
btrfs_item_key(l, &disk_key, mid);
- right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
+ right = btrfs_alloc_free_block(trans, root, root->leafsize,
+ path->nodes[1], 0,
root->root_key.objectid,
&disk_key, 0, l->start, 0);
if (IS_ERR(right))
@@ -789,14 +789,9 @@ struct btrfs_block_rsv {
*/
struct btrfs_free_cluster {
spinlock_t lock;
- spinlock_t refill_lock;
- struct rb_root root;
+ struct mutex refill_lock;
- /* largest extent in this cluster */
- u64 max_size;
-
- /* first extent starting offset */
- u64 window_start;
+ struct btrfs_free_space *info;
struct btrfs_block_group_cache *block_group;
/*
@@ -2156,7 +2151,9 @@ u64 btrfs_find_block_group(struct btrfs_root *root,
u64 search_start, u64 search_hint, int owner);
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
- u64 parent, u64 root_objectid,
+ struct extent_buffer *parent,
+ u64 parent_start,
+ u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
@@ -2175,12 +2172,37 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 owner, u64 offset,
struct btrfs_key *ins);
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 empty_size, u64 hint_byte,
- u64 search_end, struct btrfs_key *ins,
- u64 data);
+int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins,
+ u64 data, struct btrfs_free_cluster *cluster);
+static inline int btrfs_reserve_extent_data(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end,
+ struct btrfs_key *ins)
+{
+ return __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
+ empty_size, hint_byte, search_end, ins,
+ 1, NULL);
+}
+
+static inline int btrfs_reserve_extent_mdata(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end,
+ struct btrfs_key *ins,
+ struct btrfs_free_cluster *cluster)
+{
+ return __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
+ empty_size, hint_byte, search_end, ins,
+ 0, cluster);
+}
+
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -1171,7 +1171,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
*/
root->ref_cows = 0;
- leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
+ leaf = btrfs_alloc_free_block(trans, root, root->leafsize, NULL, 0,
BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
kfree(root);
@@ -4672,6 +4672,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *cache = NULL;
int ret;
+ btrfs_free_extent_cluster(buf);
+
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
parent, root->root_key.objectid,
@@ -4853,8 +4855,10 @@ enum btrfs_loop_type {
LOOP_FIND_IDEAL = 0,
LOOP_CACHING_NOWAIT = 1,
LOOP_CACHING_WAIT = 2,
- LOOP_ALLOC_CHUNK = 3,
- LOOP_NO_EMPTY_SIZE = 4,
+ LOOP_RECLAIM_CLUSTERS = 3,
+ LOOP_RECLAIM_ALL_CLUSTERS = 4,
+ LOOP_ALLOC_CHUNK = 5,
+ LOOP_NO_EMPTY_SIZE = 6,
};
/*
@@ -4870,7 +4874,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
u64 num_bytes, u64 empty_size,
u64 search_start, u64 search_end,
u64 hint_byte, struct btrfs_key *ins,
- u64 data)
+ u64 data,
+ struct btrfs_free_cluster *cluster)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4912,9 +4917,12 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
allowed_chunk_alloc = 1;
if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
- last_ptr = &root->fs_info->meta_alloc_cluster;
+ if (cluster)
+ last_ptr = cluster;
+ else
+ last_ptr = &root->fs_info->meta_alloc_cluster;
if (!btrfs_test_opt(root, SSD))
- empty_cluster = 64 * 1024;
+ empty_cluster = 256 * 1024;
}
if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
@@ -4925,7 +4933,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
if (last_ptr) {
spin_lock(&last_ptr->lock);
if (last_ptr->block_group)
- hint_byte = last_ptr->window_start;
+ hint_byte = last_ptr->info->offset;
spin_unlock(&last_ptr->lock);
}
@@ -5043,6 +5051,13 @@ have_block_group:
if (unlikely(block_group->ro))
goto loop;
+
+ if (loop == LOOP_RECLAIM_CLUSTERS) {
+ btrfs_reclaim_extent_clusters(block_group,
+ empty_cluster * 2);
+ } else if (loop == LOOP_RECLAIM_ALL_CLUSTERS)
+ btrfs_reclaim_extent_clusters(block_group, 0);
+
spin_lock(&block_group->free_space_ctl->tree_lock);
if (cached &&
block_group->free_space_ctl->free_space <
@@ -5066,7 +5081,7 @@ have_block_group:
* the refill lock keeps out other
* people trying to start a new cluster
*/
- spin_lock(&last_ptr->refill_lock);
+ mutex_lock(&last_ptr->refill_lock);
if (last_ptr->block_group &&
(last_ptr->block_group->ro ||
!block_group_bits(last_ptr->block_group, data))) {
@@ -5078,7 +5093,7 @@ have_block_group:
num_bytes, search_start);
if (offset) {
/* we have a block, we're done */
- spin_unlock(&last_ptr->refill_lock);
+ mutex_unlock(&last_ptr->refill_lock);
goto checks;
}
@@ -5097,7 +5112,7 @@ have_block_group:
block_group = last_ptr->block_group;
btrfs_get_block_group(block_group);
spin_unlock(&last_ptr->lock);
- spin_unlock(&last_ptr->refill_lock);
+ mutex_unlock(&last_ptr->refill_lock);
last_ptr_loop = 1;
search_start = block_group->key.objectid;
@@ -5123,7 +5138,7 @@ refill_cluster:
/* allocate a cluster in this block group */
ret = btrfs_find_space_cluster(trans, root,
block_group, last_ptr,
- offset, num_bytes,
+ search_start, num_bytes,
empty_cluster + empty_size);
if (ret == 0) {
/*
@@ -5135,12 +5150,12 @@ refill_cluster:
search_start);
if (offset) {
/* we found one, proceed */
- spin_unlock(&last_ptr->refill_lock);
+ mutex_unlock(&last_ptr->refill_lock);
goto checks;
}
} else if (!cached && loop > LOOP_CACHING_NOWAIT
&& !failed_cluster_refill) {
- spin_unlock(&last_ptr->refill_lock);
+ mutex_unlock(&last_ptr->refill_lock);
failed_cluster_refill = true;
wait_block_group_cache_progress(block_group,
@@ -5155,7 +5170,7 @@ refill_cluster:
* to use, and go to the next block group
*/
btrfs_return_cluster_to_free_space(NULL, last_ptr);
- spin_unlock(&last_ptr->refill_lock);
+ mutex_unlock(&last_ptr->refill_lock);
goto loop;
}
@@ -5197,9 +5212,11 @@ checks:
ins->objectid = search_start;
ins->offset = num_bytes;
- if (offset < search_start)
+ if (offset < search_start) {
btrfs_add_free_space(block_group, offset,
search_start - offset);
+ offset = search_start;
+ }
BUG_ON(offset > search_start);
ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1,
@@ -5210,13 +5227,6 @@ checks:
}
/* we are all good, lets return */
- ins->objectid = search_start;
- ins->offset = num_bytes;
-
- if (offset < search_start)
- btrfs_add_free_space(block_group, offset,
- search_start - offset);
- BUG_ON(offset > search_start);
btrfs_put_block_group(block_group);
break;
loop:
@@ -5236,6 +5246,12 @@ loop:
* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
* caching kthreads as we move along
* LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
+ * LOOP_RECLAIM_CLUSTERS, reclaim free space from some clusters, and
+ * by this way, we may find enough continuous
+ * space to fill the cluster, and then search
+ * the free space again.
+ * LOOP_RECLAIM_ALL_CLUSTERS, reclaim free space from all the clusters,
+ * and then search again.
* LOOP_ALLOC_CHUNK, force a chunk allocation and try again
* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
* again
@@ -5362,12 +5378,12 @@ again:
up_read(&info->groups_sem);
}
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 empty_size, u64 hint_byte,
- u64 search_end, struct btrfs_key *ins,
- u64 data)
+int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins,
+ u64 data, struct btrfs_free_cluster *cluster)
{
int ret;
u64 search_start = 0;
@@ -5386,7 +5402,7 @@ again:
WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(trans, root, num_bytes, empty_size,
search_start, search_end, hint_byte,
- ins, data);
+ ins, data, cluster);
if (ret == -ENOSPC && num_bytes > min_alloc_size) {
num_bytes = num_bytes >> 1;
@@ -5741,13 +5757,15 @@ static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
*/
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
- u64 parent, u64 root_objectid,
+ struct extent_buffer *parent,
+ u64 parent_start, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size)
{
struct btrfs_key ins;
struct btrfs_block_rsv *block_rsv;
struct extent_buffer *buf;
+ struct btrfs_free_cluster *cluster = NULL;
u64 flags = 0;
int ret;
@@ -5756,8 +5774,19 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
if (IS_ERR(block_rsv))
return ERR_CAST(block_rsv);
- ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
- empty_size, hint, (u64)-1, &ins, 0);
+ /*
+ * We needn't worry about allocation failure, because if failed,
+ * we will use the default metadata cluster in fs_info
+ */
+ if (parent && !parent->cluster)
+ parent->cluster = btrfs_alloc_free_cluster();
+
+ if (parent)
+ cluster = parent->cluster;
+
+ ret = btrfs_reserve_extent_mdata(trans, root, blocksize, blocksize,
+ empty_size, hint, (u64)-1, &ins,
+ cluster);
if (ret) {
unuse_block_rsv(block_rsv, blocksize);
return ERR_PTR(ret);
@@ -5768,11 +5797,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
BUG_ON(IS_ERR(buf));
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
- if (parent == 0)
- parent = ins.objectid;
+ if (parent_start == 0)
+ parent_start = ins.objectid;
flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
} else
- BUG_ON(parent > 0);
+ BUG_ON(parent_start > 0);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_delayed_extent_op *extent_op;
@@ -5788,7 +5817,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
extent_op->is_data = 0;
ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
- ins.offset, parent, root_objectid,
+ ins.offset, parent_start, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT,
extent_op);
BUG_ON(ret);
@@ -7231,18 +7260,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
/* make sure this block group isn't part of an allocation cluster */
cluster = &root->fs_info->data_alloc_cluster;
- spin_lock(&cluster->refill_lock);
+ mutex_lock(&cluster->refill_lock);
btrfs_return_cluster_to_free_space(block_group, cluster);
- spin_unlock(&cluster->refill_lock);
+ mutex_unlock(&cluster->refill_lock);
/*
* make sure this block group isn't part of a metadata
* allocation cluster
*/
cluster = &root->fs_info->meta_alloc_cluster;
- spin_lock(&cluster->refill_lock);
+ mutex_lock(&cluster->refill_lock);
btrfs_return_cluster_to_free_space(block_group, cluster);
- spin_unlock(&cluster->refill_lock);
+ mutex_unlock(&cluster->refill_lock);
path = btrfs_alloc_path();
if (!path) {
@@ -17,6 +17,7 @@
#include "compat.h"
#include "ctree.h"
#include "btrfs_inode.h"
+#include "free-space-cache.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -2988,6 +2989,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
spin_unlock_irqrestore(&leak_lock, flags);
#endif
atomic_set(&eb->refs, 1);
+ eb->cluster = NULL;
return eb;
}
@@ -3827,7 +3829,10 @@ out:
spin_unlock(&tree->buffer_lock);
/* at this point we can safely release the extent buffer */
- if (atomic_read(&eb->refs) == 0)
+ if (atomic_read(&eb->refs) == 0) {
+ btrfs_free_extent_cluster(eb);
call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
+ }
return ret;
}
+
@@ -146,6 +146,9 @@ struct extent_buffer {
* to unlock
*/
wait_queue_head_t read_lock_wq;
+
+ /* Used for the node to allocate space for its children */
+ struct btrfs_free_cluster *cluster;
};
static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -31,9 +31,15 @@
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
static struct kmem_cache *btrfs_free_space_cachep;
+static struct kmem_cache *btrfs_free_cluster_cachep;
static int link_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
+static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *info,
+ bool update_stat);
+static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *info, bool update_stat);
int __init free_space_cache_init(void)
{
@@ -43,6 +49,14 @@ int __init free_space_cache_init(void)
if (!btrfs_free_space_cachep)
return -ENOMEM;
+ btrfs_free_cluster_cachep = kmem_cache_create("extent_clusters",
+ sizeof(struct btrfs_free_cluster), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+ if (!btrfs_free_cluster_cachep) {
+ kmem_cache_destroy(btrfs_free_space_cachep);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -50,6 +64,8 @@ void free_space_cache_exit(void)
{
if (btrfs_free_space_cachep)
kmem_cache_destroy(btrfs_free_space_cachep);
+ if (btrfs_free_cluster_cachep)
+ kmem_cache_destroy(btrfs_free_cluster_cachep);
}
static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
@@ -397,11 +413,32 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
spin_lock(&ctl->tree_lock);
+ if (try_merge_free_space(ctl, e, true))
+ goto link;
+
+ ret = insert_into_bitmap(ctl, e, true);
+ if (ret < 0) {
+ spin_unlock(&ctl->tree_lock);
+ printk(KERN_ERR "Inserting into bitmap "
+ "failed, dumping\n");
+ kmem_cache_free(btrfs_free_space_cachep,
+ e);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ goto free_cache;
+ } else if (ret) {
+ ret = 0;
+ goto next_entry;
+ }
+link:
ret = link_free_space(ctl, e);
spin_unlock(&ctl->tree_lock);
if (ret) {
printk(KERN_ERR "Duplicate entries in "
"free space cache, dumping\n");
+ kmem_cache_free(btrfs_free_space_cachep,
+ e);
kunmap(page);
unlock_page(page);
page_cache_release(page);
@@ -425,6 +462,9 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
if (ret) {
printk(KERN_ERR "Duplicate entries in "
"free space cache, dumping\n");
+ kfree(e->bitmap);
+ kmem_cache_free(
+ btrfs_free_space_cachep, e);
kunmap(page);
unlock_page(page);
page_cache_release(page);
@@ -432,7 +472,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
list_add_tail(&e->list, &bitmaps);
}
-
+next_entry:
num_entries--;
offset += sizeof(struct btrfs_free_space_entry);
if (offset + sizeof(struct btrfs_free_space_entry) >=
@@ -676,7 +716,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
while (node && !next_page) {
struct btrfs_free_space *e;
- e = rb_entry(node, struct btrfs_free_space, offset_index);
+ e = rb_entry(node, struct btrfs_free_space,
+ offset_index);
entries++;
entry->offset = cpu_to_le64(e->offset);
@@ -689,10 +730,39 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
entry->type = BTRFS_FREE_SPACE_EXTENT;
}
node = rb_next(node);
- if (!node && cluster) {
- node = rb_first(&cluster->root);
+ offset += sizeof(struct btrfs_free_space_entry);
+ if (offset + sizeof(struct btrfs_free_space_entry) >=
+ PAGE_CACHE_SIZE)
+ next_page = true;
+ entry++;
+ }
+
+ /*
+ * We want to write the extent in the cluster to our free space
+ * cache.
+ */
+ while (cluster && !next_page) {
+ struct btrfs_free_space *e;
+
+ e = cluster->info;
+ if (!e || !e->bytes)
+ break;
+
+ entries++;
+
+ entry->offset = cpu_to_le64(e->offset);
+ entry->bytes = cpu_to_le64(e->bytes);
+ entry->type = BTRFS_FREE_SPACE_EXTENT;
+
+ if (list_is_last(&cluster->block_group_list,
+ &block_group->cluster_list))
cluster = NULL;
- }
+ else
+ cluster = list_entry(
+ cluster->block_group_list.next,
+ struct btrfs_free_cluster,
+ block_group_list);
+
offset += sizeof(struct btrfs_free_space_entry);
if (offset + sizeof(struct btrfs_free_space_entry) >=
PAGE_CACHE_SIZE)
@@ -1125,19 +1195,30 @@ static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
ctl->free_space -= info->bytes;
}
+static int __link_free_space(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *info)
+{
+ int ret;
+
+ ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
+ &info->offset_index, (info->bitmap != NULL));
+ if (ret)
+ return ret;
+ ctl->free_extents++;
+ return 0;
+}
+
static int link_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
- int ret = 0;
+ int ret;
BUG_ON(!info->bitmap && !info->bytes);
- ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
- &info->offset_index, (info->bitmap != NULL));
+ ret = __link_free_space(ctl, info);
if (ret)
return ret;
ctl->free_space += info->bytes;
- ctl->free_extents++;
return ret;
}
@@ -1212,7 +1293,7 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, u64 offset,
- u64 bytes)
+ u64 bytes, bool update_stat)
{
unsigned long start, count;
@@ -1223,7 +1304,8 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
bitmap_set(info->bitmap, start, count);
info->bytes += bytes;
- ctl->free_space += bytes;
+ if (update_stat)
+ ctl->free_space += bytes;
}
static int search_bitmap(struct btrfs_free_space_ctl *ctl,
@@ -1394,7 +1476,7 @@ again:
static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, u64 offset,
- u64 bytes)
+ u64 bytes, bool update_stat)
{
u64 bytes_to_set = 0;
u64 end;
@@ -1403,7 +1485,7 @@ static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
bytes_to_set = min(end - offset, bytes);
- bitmap_set_bits(ctl, info, offset, bytes_to_set);
+ bitmap_set_bits(ctl, info, offset, bytes_to_set, update_stat);
return bytes_to_set;
@@ -1451,10 +1533,9 @@ static struct btrfs_free_space_op free_space_op = {
};
static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *info)
+ struct btrfs_free_space *info, bool update_stat)
{
struct btrfs_free_space *bitmap_info;
- struct btrfs_block_group_cache *block_group = NULL;
int added = 0;
u64 bytes, offset, bytes_added;
int ret;
@@ -1465,49 +1546,7 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
if (!ctl->op->use_bitmap(ctl, info))
return 0;
- if (ctl->op == &free_space_op)
- block_group = ctl->private;
again:
- /*
- * Since we link bitmaps right into the cluster we need to see if we
- * have a cluster here, and if so and it has our bitmap we need to add
- * the free space to that bitmap.
- */
- if (block_group && !list_empty(&block_group->cluster_list)) {
- struct btrfs_free_cluster *cluster;
- struct rb_node *node;
- struct btrfs_free_space *entry;
-
- cluster = list_entry(block_group->cluster_list.next,
- struct btrfs_free_cluster,
- block_group_list);
- spin_lock(&cluster->lock);
- node = rb_first(&cluster->root);
- if (!node) {
- spin_unlock(&cluster->lock);
- goto no_cluster_bitmap;
- }
-
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- if (!entry->bitmap) {
- spin_unlock(&cluster->lock);
- goto no_cluster_bitmap;
- }
-
- if (entry->offset == offset_to_bitmap(ctl, offset)) {
- bytes_added = add_bytes_to_bitmap(ctl, entry,
- offset, bytes);
- bytes -= bytes_added;
- offset += bytes_added;
- }
- spin_unlock(&cluster->lock);
- if (!bytes) {
- ret = 1;
- goto out;
- }
- }
-
-no_cluster_bitmap:
bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1, 0);
if (!bitmap_info) {
@@ -1515,7 +1554,8 @@ no_cluster_bitmap:
goto new_bitmap;
}
- bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
+ bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes,
+ update_stat);
bytes -= bytes_added;
offset += bytes_added;
added = 0;
@@ -1533,6 +1573,12 @@ new_bitmap:
info = NULL;
goto again;
} else {
+ if (current->flags & PF_MEMALLOC) {
+ printk(KERN_INFO "Alloc memory when reclaim "
+ "the pages\n");
+ return 0;
+ }
+
spin_unlock(&ctl->tree_lock);
/* no pre-allocated info, allocate a new one */
@@ -1635,7 +1681,7 @@ int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
* extent then we know we're going to have to allocate a new extent, so
* before we do that see if we need to drop this into a bitmap
*/
- ret = insert_into_bitmap(ctl, info);
+ ret = insert_into_bitmap(ctl, info, true);
if (ret < 0) {
goto out;
} else if (ret) {
@@ -1829,36 +1875,50 @@ __btrfs_return_cluster_to_free_space(
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry;
- struct rb_node *node;
+ int ret = 0;
spin_lock(&cluster->lock);
- if (cluster->block_group != block_group)
+ if (cluster->block_group != block_group) {
+ spin_unlock(&cluster->lock);
goto out;
+ }
cluster->block_group = NULL;
- cluster->window_start = 0;
+
+ entry = cluster->info;
+ cluster->info = NULL;
+
list_del_init(&cluster->block_group_list);
+ spin_unlock(&cluster->lock);
- node = rb_first(&cluster->root);
- while (node) {
- bool bitmap;
+ if (!entry)
+ goto out;
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- node = rb_next(&entry->offset_index);
- rb_erase(&entry->offset_index, &cluster->root);
-
- bitmap = (entry->bitmap != NULL);
- if (!bitmap)
- try_merge_free_space(ctl, entry, false);
- tree_insert_offset(&ctl->free_space_offset,
- entry->offset, &entry->offset_index, bitmap);
+ if (!entry->bytes) {
+ kmem_cache_free(btrfs_free_space_cachep, entry);
+ goto out;
}
- cluster->root = RB_ROOT;
+ if (try_merge_free_space(ctl, entry, false))
+ goto link;
+
+ ret = insert_into_bitmap(ctl, entry, false);
+ if (ret < 0)
+ goto out;
+ else if (ret) {
+ ret = 0;
+ goto out;
+ }
+link:
+ ret = __link_free_space(ctl, entry);
+ if (ret) {
+ kmem_cache_free(btrfs_free_space_cachep, entry);
+ printk(KERN_ERR "Duplicate entries in free space cache, "
+ "dumping\n");
+ }
out:
- spin_unlock(&cluster->lock);
btrfs_put_block_group(block_group);
- return 0;
+ return ret;
}
void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
@@ -1889,29 +1949,58 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
spin_unlock(&ctl->tree_lock);
}
-void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
+static void __btrfs_reclaim_extent_clusters(
+ struct btrfs_block_group_cache *block_group,
+ u64 to_reclaim)
{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_cluster *cluster;
struct list_head *head;
+ u64 bytes;
+ bool reclaim_all = (to_reclaim == 0);
- spin_lock(&ctl->tree_lock);
while ((head = block_group->cluster_list.next) !=
&block_group->cluster_list) {
cluster = list_entry(head, struct btrfs_free_cluster,
block_group_list);
WARN_ON(cluster->block_group != block_group);
+
+ bytes = cluster->info->bytes;
__btrfs_return_cluster_to_free_space(block_group, cluster);
+
+ if (!reclaim_all) {
+ if (to_reclaim > bytes)
+ to_reclaim -= bytes;
+ else {
+ to_reclaim = 0;
+ break;
+ }
+ }
+
if (need_resched()) {
- spin_unlock(&ctl->tree_lock);
+ spin_unlock(&block_group->free_space_ctl->tree_lock);
cond_resched();
- spin_lock(&ctl->tree_lock);
+ spin_lock(&block_group->free_space_ctl->tree_lock);
}
}
+}
+
+void btrfs_reclaim_extent_clusters(struct btrfs_block_group_cache *block_group,
+ u64 to_reclaim)
+{
+ spin_lock(&block_group->free_space_ctl->tree_lock);
+ __btrfs_reclaim_extent_clusters(block_group, to_reclaim);
+ spin_unlock(&block_group->free_space_ctl->tree_lock);
+}
+
+void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+
+ spin_lock(&ctl->tree_lock);
+ __btrfs_reclaim_extent_clusters(block_group, 0);
__btrfs_remove_free_space_cache_locked(ctl);
spin_unlock(&ctl->tree_lock);
-
}
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
@@ -1991,30 +2080,6 @@ int btrfs_return_cluster_to_free_space(
return ret;
}
-static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- struct btrfs_free_space *entry,
- u64 bytes, u64 min_start)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- int err;
- u64 search_start = cluster->window_start;
- u64 search_bytes = bytes;
- u64 ret = 0;
-
- search_start = min_start;
- search_bytes = bytes;
-
- err = search_bitmap(ctl, entry, &search_start, &search_bytes);
- if (err)
- return 0;
-
- ret = search_start;
- __bitmap_clear_bits(ctl, entry, ret, bytes);
-
- return ret;
-}
-
/*
* given a cluster, try to allocate 'bytes' from it, returns 0
* if it couldn't find anything suitably large, or a logical disk offset
@@ -2025,56 +2090,26 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
u64 min_start)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry = NULL;
- struct rb_node *node;
u64 ret = 0;
spin_lock(&cluster->lock);
- if (bytes > cluster->max_size)
- goto out;
-
if (cluster->block_group != block_group)
goto out;
- node = rb_first(&cluster->root);
- if (!node)
+ /*
+ * If we set ->block_group, it means we have filled this cluster,
+ * and ->info also has been set. So we needn't check ->info is
+ * NULL or not now.
+ */
+ if (bytes > cluster->info->bytes)
goto out;
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- while(1) {
- if (entry->bytes < bytes ||
- (!entry->bitmap && entry->offset < min_start)) {
- node = rb_next(&entry->offset_index);
- if (!node)
- break;
- entry = rb_entry(node, struct btrfs_free_space,
- offset_index);
- continue;
- }
-
- if (entry->bitmap) {
- ret = btrfs_alloc_from_bitmap(block_group,
- cluster, entry, bytes,
- min_start);
- if (ret == 0) {
- node = rb_next(&entry->offset_index);
- if (!node)
- break;
- entry = rb_entry(node, struct btrfs_free_space,
- offset_index);
- continue;
- }
- } else {
- ret = entry->offset;
-
- entry->offset += bytes;
- entry->bytes -= bytes;
- }
+ if (min_start >= cluster->info->offset + cluster->info->bytes)
+ goto out;
- if (entry->bytes == 0)
- rb_erase(&entry->offset_index, &cluster->root);
- break;
- }
+ ret = cluster->info->offset;
+ cluster->info->offset += bytes;
+ cluster->info->bytes -= bytes;
out:
spin_unlock(&cluster->lock);
@@ -2082,260 +2117,12 @@ out:
return 0;
spin_lock(&ctl->tree_lock);
-
ctl->free_space -= bytes;
- if (entry->bytes == 0) {
- ctl->free_extents--;
- if (entry->bitmap) {
- kfree(entry->bitmap);
- ctl->total_bitmaps--;
- ctl->op->recalc_thresholds(ctl);
- }
- kmem_cache_free(btrfs_free_space_cachep, entry);
- }
-
spin_unlock(&ctl->tree_lock);
return ret;
}
-static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_space *entry,
- struct btrfs_free_cluster *cluster,
- u64 offset, u64 bytes, u64 min_bytes)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- unsigned long next_zero;
- unsigned long i;
- unsigned long search_bits;
- unsigned long total_bits;
- unsigned long found_bits;
- unsigned long start = 0;
- unsigned long total_found = 0;
- int ret;
- bool found = false;
-
- i = offset_to_bit(entry->offset, block_group->sectorsize,
- max_t(u64, offset, entry->offset));
- search_bits = bytes_to_bits(bytes, block_group->sectorsize);
- total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
-
-again:
- found_bits = 0;
- for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
- i < BITS_PER_BITMAP;
- i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
- next_zero = find_next_zero_bit(entry->bitmap,
- BITS_PER_BITMAP, i);
- if (next_zero - i >= search_bits) {
- found_bits = next_zero - i;
- break;
- }
- i = next_zero;
- }
-
- if (!found_bits)
- return -ENOSPC;
-
- if (!found) {
- start = i;
- found = true;
- }
-
- total_found += found_bits;
-
- if (cluster->max_size < found_bits * block_group->sectorsize)
- cluster->max_size = found_bits * block_group->sectorsize;
-
- if (total_found < total_bits) {
- i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
- if (i - start > total_bits * 2) {
- total_found = 0;
- cluster->max_size = 0;
- found = false;
- }
- goto again;
- }
-
- cluster->window_start = start * block_group->sectorsize +
- entry->offset;
- rb_erase(&entry->offset_index, &ctl->free_space_offset);
- ret = tree_insert_offset(&cluster->root, entry->offset,
- &entry->offset_index, 1);
- BUG_ON(ret);
-
- return 0;
-}
-
-/*
- * This searches the block group for just extents to fill the cluster with.
- */
-static noinline int
-setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- struct list_head *bitmaps, u64 offset, u64 bytes,
- u64 min_bytes)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *first = NULL;
- struct btrfs_free_space *entry = NULL;
- struct btrfs_free_space *prev = NULL;
- struct btrfs_free_space *last;
- struct rb_node *node;
- u64 window_start;
- u64 window_free;
- u64 max_extent;
- u64 max_gap = 128 * 1024;
-
- entry = tree_search_offset(ctl, offset, 0, 1);
- if (!entry)
- return -ENOSPC;
-
- /*
- * We don't want bitmaps, so just move along until we find a normal
- * extent entry.
- */
- while (entry->bitmap) {
- if (list_empty(&entry->list))
- list_add_tail(&entry->list, bitmaps);
- node = rb_next(&entry->offset_index);
- if (!node)
- return -ENOSPC;
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- }
-
- window_start = entry->offset;
- window_free = entry->bytes;
- max_extent = entry->bytes;
- first = entry;
- last = entry;
- prev = entry;
-
- while (window_free <= min_bytes) {
- node = rb_next(&entry->offset_index);
- if (!node)
- return -ENOSPC;
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
-
- if (entry->bitmap) {
- if (list_empty(&entry->list))
- list_add_tail(&entry->list, bitmaps);
- continue;
- }
-
- /*
- * we haven't filled the empty size and the window is
- * very large. reset and try again
- */
- if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
- entry->offset - window_start > (min_bytes * 2)) {
- first = entry;
- window_start = entry->offset;
- window_free = entry->bytes;
- last = entry;
- max_extent = entry->bytes;
- } else {
- last = entry;
- window_free += entry->bytes;
- if (entry->bytes > max_extent)
- max_extent = entry->bytes;
- }
- prev = entry;
- }
-
- cluster->window_start = first->offset;
-
- node = &first->offset_index;
-
- /*
- * now we've found our entries, pull them out of the free space
- * cache and put them into the cluster rbtree
- */
- do {
- int ret;
-
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- node = rb_next(&entry->offset_index);
- if (entry->bitmap)
- continue;
-
- rb_erase(&entry->offset_index, &ctl->free_space_offset);
- ret = tree_insert_offset(&cluster->root, entry->offset,
- &entry->offset_index, 0);
- BUG_ON(ret);
- } while (node && entry != last);
-
- cluster->max_size = max_extent;
-
- return 0;
-}
-
-/*
- * This specifically looks for bitmaps that may work in the cluster, we assume
- * that we have already failed to find extents that will work.
- */
-static noinline int
-setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
- struct btrfs_free_cluster *cluster,
- struct list_head *bitmaps, u64 offset, u64 bytes,
- u64 min_bytes)
-{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct btrfs_free_space *entry;
- struct rb_node *node;
- int ret = -ENOSPC;
-
- if (ctl->total_bitmaps == 0)
- return -ENOSPC;
-
- /*
- * First check our cached list of bitmaps and see if there is an entry
- * here that will work.
- */
- list_for_each_entry(entry, bitmaps, list) {
- if (entry->bytes < min_bytes)
- continue;
- ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
- bytes, min_bytes);
- if (!ret)
- return 0;
- }
-
- /*
- * If we do have entries on our list and we are here then we didn't find
- * anything, so go ahead and get the next entry after the last entry in
- * this list and start the search from there.
- */
- if (!list_empty(bitmaps)) {
- entry = list_entry(bitmaps->prev, struct btrfs_free_space,
- list);
- node = rb_next(&entry->offset_index);
- if (!node)
- return -ENOSPC;
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- goto search;
- }
-
- entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
- if (!entry)
- return -ENOSPC;
-
-search:
- node = &entry->offset_index;
- do {
- entry = rb_entry(node, struct btrfs_free_space, offset_index);
- node = rb_next(&entry->offset_index);
- if (!entry->bitmap)
- continue;
- if (entry->bytes < min_bytes)
- continue;
- ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
- bytes, min_bytes);
- } while (ret && node);
-
- return ret;
-}
-
/*
* here we try to find a cluster of blocks in a block group. The goal
* is to find at least bytes free and up to empty_size + bytes free.
@@ -2351,11 +2138,12 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
u64 offset, u64 bytes, u64 empty_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
- struct list_head bitmaps;
- struct btrfs_free_space *entry, *tmp;
+ struct btrfs_free_space *entry, *info;
u64 min_bytes;
int ret;
+ BUG_ON(cluster->info);
+
/* for metadata, allow allocates with more holes */
if (btrfs_test_opt(root, SSD_SPREAD)) {
min_bytes = bytes + empty_size;
@@ -2369,9 +2157,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
min_bytes = max(bytes, (bytes + empty_size) >> 1);
else
min_bytes = max(bytes, (bytes + empty_size) >> 4);
+ min_bytes = max(min_bytes, (u64)256 * 1024);
} else
min_bytes = max(bytes, (bytes + empty_size) >> 2);
+ min_bytes = round_up(min_bytes, root->sectorsize);
+
+ info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
+ if (!info)
+ return -ENOMEM;
+
spin_lock(&ctl->tree_lock);
/*
@@ -2380,6 +2175,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
*/
if (ctl->free_space < min_bytes) {
spin_unlock(&ctl->tree_lock);
+ kmem_cache_free(btrfs_free_space_cachep, info);
return -ENOSPC;
}
@@ -2388,26 +2184,44 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
/* someone already found a cluster, hooray */
if (cluster->block_group) {
ret = 0;
+ kmem_cache_free(btrfs_free_space_cachep, info);
goto out;
}
- INIT_LIST_HEAD(&bitmaps);
- ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
- bytes, min_bytes);
- if (ret)
- ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
- offset, bytes, min_bytes);
+ bytes = min_bytes;
+ entry = find_free_space(ctl, &offset, &min_bytes);
+ if (!entry) {
+ ret = -ENOSPC;
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ goto out;
+ }
- /* Clear our temporary list */
- list_for_each_entry_safe(entry, tmp, &bitmaps, list)
- list_del_init(&entry->list);
+ BUG_ON(min_bytes < bytes);
- if (!ret) {
- atomic_inc(&block_group->count);
- list_add_tail(&cluster->block_group_list,
- &block_group->cluster_list);
- cluster->block_group = block_group;
+ ret = 0;
+ if (entry->bitmap) {
+ __bitmap_clear_bits(ctl, entry, offset, bytes);
+ if (!entry->bytes)
+ free_bitmap(ctl, entry);
+ } else {
+ __unlink_free_space(ctl, entry);
+ entry->offset += bytes;
+ entry->bytes -= bytes;
+ if (!entry->bytes)
+ kmem_cache_free(btrfs_free_space_cachep, entry);
+ else
+ __link_free_space(ctl, entry);
}
+
+ info->offset = offset;
+ info->bytes = bytes;
+
+ cluster->info = info;
+
+ atomic_inc(&block_group->count);
+ list_add_tail(&cluster->block_group_list,
+ &block_group->cluster_list);
+ cluster->block_group = block_group;
out:
spin_unlock(&cluster->lock);
spin_unlock(&ctl->tree_lock);
@@ -2421,11 +2235,10 @@ out:
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
{
spin_lock_init(&cluster->lock);
- spin_lock_init(&cluster->refill_lock);
- cluster->root = RB_ROOT;
- cluster->max_size = 0;
+ mutex_init(&cluster->refill_lock);
INIT_LIST_HEAD(&cluster->block_group_list);
cluster->block_group = NULL;
+ cluster->info = NULL;
}
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
@@ -2665,3 +2478,27 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
iput(inode);
return ret;
}
+
+struct btrfs_free_cluster *btrfs_alloc_free_cluster(void)
+{
+ struct btrfs_free_cluster *cluster;
+
+ cluster = kmem_cache_zalloc(btrfs_free_cluster_cachep, GFP_NOFS);
+ if (!cluster)
+ return NULL;
+
+ btrfs_init_free_cluster(cluster);
+ return cluster;
+}
+
+void btrfs_release_free_cluster(struct btrfs_free_cluster *cluster)
+{
+ /* return the free space in the cluster to the space cache. */
+ mutex_lock(&cluster->refill_lock);
+ btrfs_return_cluster_to_free_space(NULL, cluster);
+ mutex_unlock(&cluster->refill_lock);
+
+ /* free the cluster. */
+ kmem_cache_free(btrfs_free_cluster_cachep, cluster);
+}
+
@@ -34,6 +34,7 @@ struct btrfs_free_space_ctl {
int extents_thresh;
int free_extents;
int total_bitmaps;
+ int total_clusters;
int unit;
u64 start;
struct btrfs_free_space_op *op;
@@ -113,4 +114,16 @@ int btrfs_return_cluster_to_free_space(
struct btrfs_free_cluster *cluster);
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen);
+void btrfs_reclaim_extent_clusters(struct btrfs_block_group_cache *block_group,
+ u64 to_reclaim);
+struct btrfs_free_cluster *btrfs_alloc_free_cluster(void);
+void btrfs_release_free_cluster(struct btrfs_free_cluster *cluster);
+static inline void btrfs_free_extent_cluster(struct extent_buffer *eb)
+{
+ if (!eb->cluster)
+ return;
+
+ btrfs_release_free_cluster(eb->cluster);
+ eb->cluster = NULL;
+}
#endif
@@ -457,6 +457,7 @@ again:
spin_unlock(&root->cache_lock);
spin_lock(&ctl->tree_lock);
+ WARN_ON(ctl->total_clusters);
prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
@@ -51,7 +51,6 @@
#include "tree-log.h"
#include "compression.h"
#include "locking.h"
-#include "free-space-cache.h"
#include "inode-map.h"
struct btrfs_iget_args {
@@ -623,11 +622,10 @@ retry:
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- ret = btrfs_reserve_extent(trans, root,
- async_extent->compressed_size,
- async_extent->compressed_size,
- 0, alloc_hint,
- (u64)-1, &ins, 1);
+ ret = btrfs_reserve_extent_data(trans, root,
+ async_extent->compressed_size,
+ async_extent->compressed_size,
+ 0, alloc_hint, (u64)-1, &ins);
btrfs_end_transaction(trans, root);
if (ret) {
@@ -828,9 +826,9 @@ static noinline int cow_file_range(struct inode *inode,
unsigned long op;
cur_alloc_size = disk_num_bytes;
- ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
- root->sectorsize, 0, alloc_hint,
- (u64)-1, &ins, 1);
+ ret = btrfs_reserve_extent_data(trans, root, cur_alloc_size,
+ root->sectorsize, 0, alloc_hint,
+ (u64)-1, &ins);
BUG_ON(ret);
em = alloc_extent_map();
@@ -5409,8 +5407,8 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
alloc_hint = get_extent_allocation_hint(inode, start, len);
- ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
- alloc_hint, (u64)-1, &ins, 1);
+ ret = btrfs_reserve_extent_data(trans, root, len, root->sectorsize, 0,
+ alloc_hint, (u64)-1, &ins);
if (ret) {
em = ERR_PTR(ret);
goto out;
@@ -7276,8 +7274,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
}
}
- ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
- 0, *alloc_hint, (u64)-1, &ins, 1);
+ ret = btrfs_reserve_extent_data(trans, root, num_bytes,
+ min_size, 0, *alloc_hint,
+ (u64)-1, &ins);
if (ret) {
if (own_trans)
btrfs_end_transaction(trans, root);
@@ -347,7 +347,7 @@ static noinline int create_subvol(struct btrfs_root *root,
if (IS_ERR(trans))
return PTR_ERR(trans);
- leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
+ leaf = btrfs_alloc_free_block(trans, root, root->leafsize, NULL,
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
@@ -25,6 +25,7 @@
#include "print-tree.h"
#include "compat.h"
#include "tree-log.h"
+#include "free-space-cache.h"
/* magic values for the inode_only field in btrfs_log_inode:
*
@@ -1763,6 +1764,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
ret = btrfs_free_reserved_extent(root,
bytenr, blocksize);
BUG_ON(ret);
+
+ btrfs_free_extent_cluster(next);
}
free_extent_buffer(next);
continue;
@@ -1832,6 +1835,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
path->nodes[*level]->start,
path->nodes[*level]->len);
BUG_ON(ret);
+
+ btrfs_free_extent_cluster(next);
}
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
@@ -1900,6 +1905,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
ret = btrfs_free_reserved_extent(log, next->start,
next->len);
BUG_ON(ret);
+
+ btrfs_free_extent_cluster(next);
}
}
This patch introduce free space cluster for each node in the b+tree. And we also simplify the fill method and the space allocation of the cluster: - Allocate free space cluster for each node - Allocate the free space extent (>=256KB, split a large extent or get the contiguous blocks from the bitmaps) from the block group cache and cache it in the cluster, instead of moving the free space entry from the block group cache to the cluster directly. By this way, we just manage a extent in the cluster. - When doing the tree block allocation, we can get the space just by split the extent in the parent's cluster. By this way, we can allocate the tree block concurrently and also can store the child nodes/leaves into the contiguous blocks. Beside that, we modify the source of the space cache to make it fit the above change. When we write out the information of the free space, we also write out the extent information in the clusters. And when we load the free space information, we will try to merge the free space fragment at first, because the extent in the clusters is small, and may merge them to be a large one. (Before applying this patch, we build the free space tree by the cached information directly. I think we needn't worry about the compatibility. At the worst, we may create lots of small extents which may be merge into a large one, but it will not break the use of Btrfs.? We did some performance test for this patch, we find it works well, and make the small file performance grow up. The sequential write performance of the small file: N_files N_threads File Size Before After 10240 8 2KB(Inline) 2.2055MB/s 4.1779MB/s 10240 8 4KB 1.001MB/s 1.1893MB/s Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> --- fs/btrfs/ctree.c | 28 ++- fs/btrfs/ctree.h | 50 +++- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 107 +++++--- fs/btrfs/extent_io.c | 7 +- fs/btrfs/extent_io.h | 3 + fs/btrfs/free-space-cache.c | 667 ++++++++++++++++--------------------------- fs/btrfs/free-space-cache.h | 13 + fs/btrfs/inode-map.c | 1 + fs/btrfs/inode.c | 25 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/tree-log.c | 7 + 12 files changed, 419 insertions(+), 493 deletions(-)