@@ -68,8 +68,8 @@ struct btrfs_inode {
/* node for the red-black tree that links inodes in subvolume root */
struct rb_node rb_node;
- /* the space_info for where this inode's data allocations are done */
- struct btrfs_space_info *space_info;
+ /* the profile for where this inode's data allocations are done */
+ struct btrfs_profile *profile;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
@@ -99,10 +99,19 @@ struct btrfs_inode {
*/
u64 delalloc_bytes;
+ /* used to protect reserved_total and reserved_from
+ */
+ spinlock_t reserved_lock;
+
/* total number of bytes that may be used for this inode for
* delalloc
*/
- u64 reserved_bytes;
+ u64 reserved_total;
+
+ /* where did we reserve the bytes from? indices correspond to the
+ * profile
+ */
+ u64 reserved_from[MAX_PROFILE_ENTRIES];
/*
* the size of the file stored in the metadata on disk. data=ordered
@@ -728,7 +728,8 @@ struct btrfs_space_info {
u64 disk_used; /* total bytes used on disk */
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
-
+ int speed; /* device's seek_speed, used to classify devices
+ for profiles */
int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
@@ -743,18 +744,39 @@ struct btrfs_space_info {
atomic_t caching_threads;
};
+#define MAX_PROFILE_ENTRIES 16
+#define MAX_PROFILE_NAME 64
+
+struct btrfs_profile {
+ u8 speed[MAX_PROFILE_ENTRIES];
+ int nentries;
+ struct list_head profile_list;
+ char name[MAX_PROFILE_NAME];
+ struct btrfs_space_info *data_sinfo[MAX_PROFILE_ENTRIES];
+ struct btrfs_space_info *meta_sinfo[MAX_PROFILE_ENTRIES];
+};
+
struct btrfs_block_rsv {
- u64 size;
- u64 reserved;
- u64 freed[2];
- struct btrfs_space_info *space_info;
- struct list_head list;
+ u64 size; /* target size of the reserve */
+ u64 reserved_total; /* # of bytes reserved in the space_info, i.e
+ number of bytes to expend */
+ u64 freed_total[2]; /* only for durable block_rsv, freed bytes for
+ [transaction & 1] */
+ struct list_head list; /* element of fs_info.durable_block_rsv_list */
spinlock_t lock;
- atomic_t usage;
- unsigned int priority:8;
- unsigned int durable:1;
- unsigned int refill_used:1;
- unsigned int full:1;
+ atomic_t usage; /* refcount */
+ unsigned int priority:8;/* unused for now */
+ unsigned int durable:1; /* spans transactions */
+ unsigned int refill_used:1; /* refill reserve from space_info if
+ getting empty */
+
+ unsigned int full:1; /* set when reserved >= size. Full means we
+ have a full reserve to expend from */
+ /* track from which speeds we allocated space. the indices into the
+ arrays correspond to the index into the profile */
+ u64 reserved_from[MAX_PROFILE_ENTRIES];
+ u64 freed_from[2][MAX_PROFILE_ENTRIES];
+ struct btrfs_profile *profile;
};
/*
@@ -820,6 +842,7 @@ struct btrfs_block_group_cache {
u64 bytes_super;
u64 flags;
u64 sectorsize;
+ int speed;
int extents_thresh;
int free_extents;
int total_bitmaps;
@@ -895,6 +918,12 @@ struct btrfs_fs_info {
struct btrfs_block_rsv chunk_block_rsv;
struct btrfs_block_rsv empty_block_rsv;
+ struct btrfs_block_rsv log_block_rsv;
+
+ struct btrfs_profile default_data_profile;
+ struct btrfs_profile default_meta_profile;
+ struct btrfs_profile default_system_profile;
+ struct btrfs_profile default_log_profile;
/* list of block reservations that cross multiple transactions */
struct list_head durable_block_rsv_list;
@@ -1136,6 +1165,12 @@ struct btrfs_root {
char *name;
int in_sysfs;
+ /* profiles to use for allocations for this tree */
+ struct btrfs_profile *data_profile;
+ struct btrfs_profile *meta_profile;
+ struct btrfs_profile *system_profile;
+ struct btrfs_profile *log_profile;
+
/* the dirty list is only used by non-reference counted roots */
struct list_head dirty_list;
@@ -2085,6 +2120,8 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
}
/* extent-tree.c */
+int btrfs_init_profile(struct btrfs_fs_info *fs_info,
+ struct btrfs_profile *profile, int is_system);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
@@ -2132,7 +2169,15 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
u64 search_end, struct btrfs_key *ins,
- u64 data);
+ u64 data, struct btrfs_profile *profile,
+ int pix);
+int btrfs_reserve_data_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins,
+ u64 data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2170,7 +2215,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
+void btrfs_set_inode_profile(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
@@ -2189,7 +2234,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes);
void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
+ struct btrfs_profile *profile);
void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
@@ -945,7 +945,11 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
u32 stripesize, struct btrfs_root *root,
struct btrfs_fs_info *fs_info,
- u64 objectid)
+ u64 objectid,
+ struct btrfs_profile *data_profile,
+ struct btrfs_profile *meta_profile,
+ struct btrfs_profile *system_profile,
+ struct btrfs_profile *log_profile)
{
root->node = NULL;
root->commit_root = NULL;
@@ -968,6 +972,10 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->inode_tree = RB_ROOT;
root->block_rsv = NULL;
root->orphan_block_rsv = NULL;
+ root->data_profile = data_profile;
+ root->system_profile = system_profile;
+ root->meta_profile = meta_profile;
+ root->log_profile = log_profile;
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->orphan_list);
@@ -1018,7 +1026,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
__setup_root(tree_root->nodesize, tree_root->leafsize,
tree_root->sectorsize, tree_root->stripesize,
- root, fs_info, objectid);
+ root, fs_info, objectid, tree_root->data_profile,
+ tree_root->meta_profile, tree_root->system_profile,
+ tree_root->log_profile);
+
ret = btrfs_find_last_root(tree_root, objectid,
&root->root_item, &root->root_key);
if (ret > 0)
@@ -1050,7 +1061,9 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
__setup_root(tree_root->nodesize, tree_root->leafsize,
tree_root->sectorsize, tree_root->stripesize,
- root, fs_info, BTRFS_TREE_LOG_OBJECTID);
+ root, fs_info, BTRFS_TREE_LOG_OBJECTID,
+ tree_root->log_profile, tree_root->log_profile,
+ tree_root->system_profile, tree_root->log_profile);
root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
@@ -1153,7 +1166,9 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
__setup_root(tree_root->nodesize, tree_root->leafsize,
tree_root->sectorsize, tree_root->stripesize,
- root, fs_info, location->objectid);
+ root, fs_info, location->objectid,
+ tree_root->data_profile, tree_root->meta_profile,
+ tree_root->system_profile, tree_root->log_profile);
path = btrfs_alloc_path();
BUG_ON(!path);
@@ -1656,6 +1671,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_init_block_rsv(&fs_info->trans_block_rsv);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
btrfs_init_block_rsv(&fs_info->empty_block_rsv);
+ btrfs_init_block_rsv(&fs_info->log_block_rsv);
INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
mutex_init(&fs_info->durable_block_rsv_mutex);
atomic_set(&fs_info->nr_async_submits, 0);
@@ -1732,8 +1748,34 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
+ fs_info->default_data_profile.nentries = 2;
+ fs_info->default_data_profile.speed[0] = 35;
+ fs_info->default_data_profile.speed[1] = 30;
+ ret = btrfs_init_profile(fs_info, &fs_info->default_data_profile, 0);
+ BUG_ON(ret);
+ fs_info->default_meta_profile.nentries = 2;
+ fs_info->default_meta_profile.speed[0] = 45;
+ fs_info->default_meta_profile.speed[1] = 30;
+ ret = btrfs_init_profile(fs_info, &fs_info->default_meta_profile, 0);
+ BUG_ON(ret);
+ fs_info->default_system_profile.nentries = 2;
+ fs_info->default_system_profile.speed[0] = 45;
+ fs_info->default_system_profile.speed[1] = 30;
+ ret = btrfs_init_profile(fs_info, &fs_info->default_system_profile, 1);
+ BUG_ON(ret);
+ fs_info->default_log_profile.nentries = 3;
+ fs_info->default_log_profile.speed[0] = 75;
+ fs_info->default_log_profile.speed[1] = 45;
+ fs_info->default_log_profile.speed[2] = 30;
+ ret = btrfs_init_profile(fs_info, &fs_info->default_log_profile, 0);
+ BUG_ON(ret);
+
__setup_root(4096, 4096, 4096, 4096, tree_root,
- fs_info, BTRFS_ROOT_TREE_OBJECTID);
+ fs_info, BTRFS_ROOT_TREE_OBJECTID,
+ &fs_info->default_data_profile,
+ &fs_info->default_meta_profile,
+ &fs_info->default_system_profile,
+ &fs_info->default_log_profile);
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
if (!bh) {
@@ -1891,7 +1933,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
generation = btrfs_super_chunk_root_generation(disk_super);
__setup_root(nodesize, leafsize, sectorsize, stripesize,
- chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
+ chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID,
+ tree_root->data_profile, tree_root->meta_profile,
+ tree_root->system_profile, tree_root->log_profile);
chunk_root->node = read_tree_block(chunk_root,
btrfs_super_chunk_root(disk_super),
@@ -1968,6 +2012,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
goto fail_block_groups;
}
+ /* FIXME read profiles from disk */
+
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread))
@@ -2009,7 +2055,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
}
__setup_root(nodesize, leafsize, sectorsize, stripesize,
- log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
+ log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID,
+ tree_root->data_profile, tree_root->meta_profile,
+ tree_root->system_profile, tree_root->log_profile);
log_tree_root->node = read_tree_block(tree_root, bytenr,
blocksize,
@@ -2285,7 +2333,63 @@ static int write_dev_supers(struct btrfs_device *device,
return errors < i ? 0 : -1;
}
-int write_all_supers(struct btrfs_root *root, int max_mirrors)
+static void btrfs_end_empty_barrier(struct bio *bio, int err)
+{
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ }
+ if (bio->bi_private)
+ complete(bio->bi_private);
+ bio_put(bio);
+}
+
+static int write_dev_flush(struct btrfs_device *device, int wait)
+{
+ struct bio *bio;
+ int ret = 0;
+
+ if (!device->barriers)
+ return 0;
+
+ if (wait) {
+ bio = device->flush_bio;
+ wait_for_completion(&device->flush_wait);
+
+ if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
+ printk("btrfs: disabling barriers on dev %s\n",
+ device->name);
+ device->barriers = 0;
+ }
+ if (!bio_flagged(bio, BIO_UPTODATE)) {
+ ret = -EIO;
+ }
+
+ /* drop the reference from the wait == 0 run */
+ bio_put(bio);
+
+ return ret;
+ }
+
+ /*
+ * one reference for us, and we leave it for the
+ * caller
+ */
+ bio = bio_alloc(GFP_NOFS, 0);
+ bio->bi_end_io = btrfs_end_empty_barrier;
+ bio->bi_bdev = device->bdev;
+ init_completion(&device->flush_wait);
+ bio->bi_private = &device->flush_wait;
+ device->flush_bio = bio;
+
+ bio_get(bio);
+ submit_bio(WRITE_BARRIER, bio);
+
+ return 0;
+}
+
+int write_all_supers(struct btrfs_root *root, int max_mirrors, int all_devices)
{
struct list_head *head;
struct btrfs_device *dev;
@@ -2296,6 +2400,34 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
int max_errors;
int total_errors = 0;
u64 flags;
+ int log_pix = MAX_PROFILE_ENTRIES;
+ int pix;
+ struct btrfs_profile *log_profile = root->log_profile;
+
+ mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+
+ /* determine the speed of the fastest log devices present */
+ if (!all_devices && log_profile) {
+ /* FIXME cache this somewhere */
+ log_pix = log_profile->nentries;
+ head = &root->fs_info->fs_devices->devices;
+ list_for_each_entry(dev, head, dev_list) {
+ if (!dev->bdev)
+ continue;
+ if (!dev->in_fs_metadata || !dev->writeable)
+ continue;
+
+ for (pix = 0; pix < log_pix; ++pix) {
+ int speed = log_profile->speed[pix];
+ if (speed == dev->seek_speed) {
+ log_pix = pix;
+ break;
+ }
+ }
+ if (log_pix == 0)
+ break;
+ }
+ }
max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
do_barriers = !btrfs_test_opt(root, NOBARRIER);
@@ -2303,7 +2435,6 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
sb = &root->fs_info->super_for_commit;
dev_item = &sb->dev_item;
- mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
head = &root->fs_info->fs_devices->devices;
list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev) {
@@ -2313,6 +2444,23 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
if (!dev->in_fs_metadata || !dev->writeable)
continue;
+ if (!all_devices && root->log_profile) {
+ /*
+ * only write the super to the fastest log devices,
+ * all other devices only get flushed
+ * FIXME: this is only a temporary solution. The correct
+ * solution would be to track which devices received
+ * log blocks and which devices received sync extents.
+ * write supers to the former, flush the latter
+ */
+ if (log_profile->speed[log_pix] != dev->seek_speed) {
+ /* device not in profile, only sync */
+ ret = write_dev_flush(dev, 0);
+ if (ret)
+ total_errors++;
+ continue;
+ }
+ }
btrfs_set_stack_device_generation(dev_item, 0);
btrfs_set_stack_device_type(dev_item, dev->type);
btrfs_set_stack_device_id(dev_item, dev->devid);
@@ -2344,6 +2492,15 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
if (!dev->in_fs_metadata || !dev->writeable)
continue;
+ if (!all_devices && log_profile) {
+ if (log_profile->speed[log_pix] != dev->seek_speed) {
+ /* device not in profile, only sync */
+ ret = write_dev_flush(dev, 1);
+ if (ret)
+ total_errors++;
+ continue;
+ }
+ }
ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
if (ret)
total_errors++;
@@ -2358,11 +2515,11 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
}
int write_ctree_super(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int max_mirrors)
+ struct btrfs_root *root, int max_mirrors, int all_devices)
{
int ret;
- ret = write_all_supers(root, max_mirrors);
+ ret = write_all_supers(root, max_mirrors, all_devices);
return ret;
}
@@ -2472,7 +2629,7 @@ int btrfs_commit_super(struct btrfs_root *root)
ret = btrfs_write_and_wait_transaction(NULL, root);
BUG_ON(ret);
- ret = write_ctree_super(NULL, root, 0);
+ ret = write_ctree_super(NULL, root, 0, 1);
return ret;
}
@@ -2707,7 +2864,7 @@ int btrfs_error_commit_super(struct btrfs_root *root)
/* cleanup FS via transaction */
btrfs_cleanup_transaction(root);
- ret = write_ctree_super(NULL, root, 0);
+ ret = write_ctree_super(NULL, root, 0, 1);
return ret;
}
@@ -49,7 +49,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
char *options);
int close_ctree(struct btrfs_root *root);
int write_ctree_super(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int max_mirrors);
+ struct btrfs_root *root, int max_mirrors,
+ int all_devices);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_commit_super(struct btrfs_root *root);
int btrfs_error_commit_super(struct btrfs_root *root);
@@ -59,7 +59,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
int level, struct btrfs_key *ins);
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
- u64 flags, int force);
+ u64 flags, int force, struct btrfs_profile *profile,
+ int pix, int in_logtree);
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
@@ -541,7 +542,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
}
static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
- u64 flags)
+ u64 flags, int speed)
{
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
@@ -551,7 +552,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
- if (found->flags & flags) {
+ if (found->flags & flags && found->speed == speed) {
rcu_read_unlock();
return found;
}
@@ -2975,7 +2976,7 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
static int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
- struct btrfs_space_info **space_info)
+ int speed, struct btrfs_space_info **space_info)
{
struct btrfs_space_info *found;
int i;
@@ -2987,7 +2988,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
else
factor = 1;
- found = __find_space_info(info, flags);
+ found = __find_space_info(info, flags, speed);
if (found) {
spin_lock(&found->lock);
found->total_bytes += total_bytes;
@@ -3020,12 +3021,53 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_may_use = 0;
found->full = 0;
found->force_alloc = 0;
+ found->speed = speed;
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
atomic_set(&found->caching_threads, 0);
return 0;
}
+int btrfs_init_profile(struct btrfs_fs_info *fs_info,
+ struct btrfs_profile *profile, int is_system)
+{
+ int pix;
+ int ret;
+ u64 flags = BTRFS_BLOCK_GROUP_METADATA;
+
+ if (is_system)
+ flags = BTRFS_BLOCK_GROUP_SYSTEM;
+
+ for (pix = 0; pix < profile->nentries; ++pix) {
+ struct btrfs_space_info *sinfo;
+ sinfo = __find_space_info(fs_info, flags, profile->speed[pix]);
+ if (!sinfo) {
+ ret = update_space_info(fs_info, flags, 0, 0,
+ profile->speed[pix], &sinfo);
+ if (ret)
+ return ret;
+ }
+ BUG_ON(!sinfo);
+ profile->meta_sinfo[pix] = sinfo;
+
+ if (is_system)
+ continue;
+
+ sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA,
+ profile->speed[pix]);
+ if (!sinfo) {
+ ret = update_space_info(fs_info,
+ BTRFS_BLOCK_GROUP_DATA, 0,
+ 0, profile->speed[pix], &sinfo);
+ if (ret)
+ return ret;
+ }
+ BUG_ON(!sinfo);
+ profile->data_sinfo[pix] = sinfo;
+ }
+ return 0;
+}
+
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
@@ -3104,10 +3146,9 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
return get_alloc_profile(root, flags);
}
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
+void btrfs_set_inode_profile(struct btrfs_root *root, struct inode *inode)
{
- BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
- BTRFS_BLOCK_GROUP_DATA);
+ BTRFS_I(inode)->profile = root->data_profile;
}
/*
@@ -3119,7 +3160,11 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 used;
+ u64 to_reserve;
int ret = 0, committed = 0, alloc_chunk = 1;
+ int pix = 0;
+ u64 from[MAX_PROFILE_ENTRIES] = {0};
+ struct btrfs_trans_handle *trans;
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
@@ -3129,20 +3174,18 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
committed = 1;
}
- data_sinfo = BTRFS_I(inode)->space_info;
- if (!data_sinfo)
- goto alloc;
-
again:
+ data_sinfo = BTRFS_I(inode)->profile->data_sinfo[pix];
+ BUG_ON(!data_sinfo);
+
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
data_sinfo->bytes_may_use;
+ to_reserve = bytes;
if (used + bytes > data_sinfo->total_bytes) {
- struct btrfs_trans_handle *trans;
-
/*
* if we don't have enough free bytes in this space then we need
* to alloc a new chunk.
@@ -3152,42 +3195,37 @@ again:
data_sinfo->force_alloc = 1;
spin_unlock(&data_sinfo->lock);
-alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
trans = btrfs_join_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
-
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
bytes + 2 * 1024 * 1024,
- alloc_target, 0);
+ alloc_target, 0,
+ BTRFS_I(inode)->profile, pix, 0);
btrfs_end_transaction(trans, root);
- if (ret < 0) {
- if (ret != -ENOSPC)
- return ret;
- else
- goto commit_trans;
- }
- if (!data_sinfo) {
- btrfs_set_inode_space_info(root, inode);
- data_sinfo = BTRFS_I(inode)->space_info;
+ if (ret < 0 && ret != -ENOSPC)
+ return ret;
+
+ if (!ret)
+ goto again;
+
+ if (pix + 1 < BTRFS_I(inode)->profile->nentries) {
+ ++pix;
+ goto again;
}
- goto again;
+ spin_lock(&data_sinfo->lock);
}
- spin_unlock(&data_sinfo->lock);
- /* commit the current transaction and try again */
-commit_trans:
- if (!committed && !root->fs_info->open_ioctl_trans) {
- committed = 1;
- trans = btrfs_join_transaction(root, 1);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- ret = btrfs_commit_transaction(trans, root);
- if (ret)
- return ret;
- goto again;
+ /* reserve what we can get, taking the rest from the other
+ * space_infos if possible
+ */
+ if (used < data_sinfo->total_bytes) {
+ to_reserve = data_sinfo->total_bytes - used;
+ from[pix] = to_reserve;
+ } else {
+ to_reserve = 0;
}
#if 0 /* I hope we never need this code again, just in case */
@@ -3202,12 +3240,60 @@ commit_trans:
(unsigned long long)data_sinfo->bytes_may_use,
(unsigned long long)data_sinfo->total_bytes);
#endif
- return -ENOSPC;
}
- data_sinfo->bytes_may_use += bytes;
- BTRFS_I(inode)->reserved_bytes += bytes;
+
+ data_sinfo->bytes_may_use += to_reserve;
+
spin_unlock(&data_sinfo->lock);
+ if (to_reserve) {
+ spin_lock(&BTRFS_I(inode)->reserved_lock);
+ BTRFS_I(inode)->reserved_total += to_reserve;
+ BTRFS_I(inode)->reserved_from[pix] += to_reserve;
+ spin_unlock(&BTRFS_I(inode)->reserved_lock);
+
+ bytes -= to_reserve;
+ }
+
+ if (bytes && pix + 1 < BTRFS_I(inode)->profile->nentries) {
+ ++pix;
+ goto again;
+ }
+
+ /* commit the current transaction and try again */
+ if (bytes && !committed && !root->fs_info->open_ioctl_trans) {
+ committed = 1;
+ trans = btrfs_join_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ return ret;
+ pix = 0;
+ goto again;
+ }
+
+ if (bytes) {
+ /* we didn't succeed in reserving all requested space, so free
+ * what we already reserved
+ */
+ for (pix = 0; pix < BTRFS_I(inode)->profile->nentries; ++pix) {
+ data_sinfo = __find_space_info(root->fs_info,
+ BTRFS_BLOCK_GROUP_DATA,
+ BTRFS_I(inode)->profile->speed[pix]);
+
+ spin_lock(&BTRFS_I(inode)->reserved_lock);
+ BTRFS_I(inode)->reserved_total -= from[pix];
+ BTRFS_I(inode)->reserved_from[pix] -= from[pix];
+ spin_unlock(&BTRFS_I(inode)->reserved_lock);
+
+ spin_lock(&data_sinfo->lock);
+ data_sinfo->bytes_may_use -= from[pix];
+ spin_unlock(&data_sinfo->lock);
+ }
+ return -ENOSPC;
+ }
+
return 0;
}
@@ -3219,16 +3305,51 @@ commit_trans:
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_profile *profile = BTRFS_I(inode)->profile;
+ int pix;
struct btrfs_space_info *data_sinfo;
+ u64 to_free;
+ u64 sum = 0;
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
- data_sinfo = BTRFS_I(inode)->space_info;
- spin_lock(&data_sinfo->lock);
- data_sinfo->bytes_may_use -= bytes;
- BTRFS_I(inode)->reserved_bytes -= bytes;
- spin_unlock(&data_sinfo->lock);
+ spin_lock(&BTRFS_I(inode)->reserved_lock);
+
+ BTRFS_I(inode)->reserved_total -= bytes;
+
+ /*
+ * Freeing reservations takes place in two steps.
+ *
+ * reserved_from[] is decremented when the space actually gets
+ * allocated. reserved_total is decremented only here. If the sum of
+ * all reserved_from is bigger than reserved_total, some space has
+ * been freed (unreserved) without actually being allocated. In this
+ * case we return enough allocation with the lowest priority to its
+ * space_info.
+ */
+
+ for (pix = 0; pix < profile->nentries; ++pix) {
+ sum += BTRFS_I(inode)->reserved_from[pix];
+ }
+ for (pix = profile->nentries - 1;
+ sum > BTRFS_I(inode)->reserved_total; --pix) {
+ BUG_ON(pix < 0);
+ if (BTRFS_I(inode)->reserved_from[pix] == 0)
+ continue;
+
+ data_sinfo = __find_space_info(root->fs_info,
+ BTRFS_BLOCK_GROUP_DATA,
+ profile->speed[pix]);
+ to_free = min(BTRFS_I(inode)->reserved_from[pix],
+ sum - BTRFS_I(inode)->reserved_total);
+ spin_lock(&data_sinfo->lock);
+ data_sinfo->bytes_may_use -= to_free;
+ BTRFS_I(inode)->reserved_from[pix] -= to_free;
+ sum -= to_free;
+ spin_unlock(&data_sinfo->lock);
+ }
+ spin_unlock(&BTRFS_I(inode)->reserved_lock);
}
static void force_metadata_allocation(struct btrfs_fs_info *info)
@@ -3238,29 +3359,40 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
- if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
+ if (found->flags & BTRFS_BLOCK_GROUP_METADATA) {
found->force_alloc = 1;
+ break;
+ }
}
rcu_read_unlock();
}
static int should_alloc_chunk(struct btrfs_root *root,
- struct btrfs_space_info *sinfo, u64 alloc_bytes)
+ struct btrfs_space_info *sinfo, u64 alloc_bytes,
+ int in_logtree)
{
u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
u64 thresh;
+ u64 used;
+
+ used = sinfo->bytes_used + sinfo->bytes_reserved;
+ if (in_logtree)
+ used += sinfo->bytes_pinned;
- if (sinfo->bytes_used + sinfo->bytes_reserved +
- alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+ /* if at least 256 MB are free after this alloc, we have enough */
+ if (used + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
return 0;
- if (sinfo->bytes_used + sinfo->bytes_reserved +
- alloc_bytes < div_factor(num_bytes, 8))
+ /* if after this alloc we still use <80%, we have enough */
+ if (used + alloc_bytes < div_factor(num_bytes, 8))
return 0;
thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
+ /* if this space occupies more than %5 of the total space and has
+ * less than 30% in use, we have enough
+ */
if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
return 0;
@@ -3269,22 +3401,29 @@ static int should_alloc_chunk(struct btrfs_root *root,
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
- u64 flags, int force)
+ u64 flags, int force, struct btrfs_profile *profile,
+ int pix, int in_logtree)
{
struct btrfs_space_info *space_info;
struct btrfs_fs_info *fs_info = extent_root->fs_info;
int ret = 0;
+ int ix = pix;
+
+ if (pix == -1)
+ ix = 0; /* loop through all speeds */
+
+ if (profile->nentries == 0) {
+ WARN_ON(1);
+ return ret;
+ }
mutex_lock(&fs_info->chunk_mutex);
flags = btrfs_reduce_alloc_profile(extent_root, flags);
- space_info = __find_space_info(extent_root->fs_info, flags);
- if (!space_info) {
- ret = update_space_info(extent_root->fs_info, flags,
- 0, 0, &space_info);
- BUG_ON(ret);
- }
+again:
+ space_info = __find_space_info(extent_root->fs_info, flags,
+ profile->speed[ix]);
BUG_ON(!space_info);
spin_lock(&space_info->lock);
@@ -3292,11 +3431,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
force = 1;
if (space_info->full) {
spin_unlock(&space_info->lock);
- goto out;
+ goto loop;
}
if (!force && !should_alloc_chunk(extent_root, space_info,
- alloc_bytes)) {
+ alloc_bytes, in_logtree)) {
spin_unlock(&space_info->lock);
goto out;
}
@@ -3321,7 +3460,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
force_metadata_allocation(fs_info);
}
- ret = btrfs_alloc_chunk(trans, extent_root, flags);
+ ret = btrfs_alloc_chunk(trans, extent_root, flags, profile->speed[ix]);
spin_lock(&space_info->lock);
if (ret)
space_info->full = 1;
@@ -3329,6 +3468,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
ret = 1;
space_info->force_alloc = 0;
spin_unlock(&space_info->lock);
+loop:
+ if (ret <= 0 && pix == -1 && ix < profile->nentries - 1) {
+ ++ix;
+ ret = 0;
+ goto again;
+ }
+
out:
mutex_unlock(&extent_root->fs_info->chunk_mutex);
return ret;
@@ -3341,18 +3487,24 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 to_reclaim, int sync)
{
struct btrfs_block_rsv *block_rsv;
- struct btrfs_space_info *space_info;
+ struct btrfs_profile *profile;
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
int pause = 1;
int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
+ u64 sum;
+ int pix;
block_rsv = &root->fs_info->delalloc_block_rsv;
- space_info = block_rsv->space_info;
+ profile = block_rsv->profile;
smp_mb();
- reserved = space_info->bytes_reserved;
+ sum = 0;
+ for (pix = 0; pix < profile->nentries; ++pix)
+ sum += profile->meta_sinfo[pix]->bytes_reserved;
+
+ reserved = sum;
if (reserved == 0)
return 0;
@@ -3364,13 +3516,19 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
smp_mb();
nr_pages = min_t(unsigned long, nr_pages,
root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
+ /*
+ * FIXME limit it to inodes that share at least one space_info
+ */
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
- spin_lock(&space_info->lock);
- if (reserved > space_info->bytes_reserved)
- reclaimed += reserved - space_info->bytes_reserved;
- reserved = space_info->bytes_reserved;
- spin_unlock(&space_info->lock);
+ sum = 0;
+ for (pix = 0; pix < profile->nentries; ++pix)
+ sum += profile->meta_sinfo[pix]->bytes_reserved;
+
+ if (reserved > sum)
+ reclaimed += reserved - sum;
+
+ reserved = sum;
if (reserved == 0 || reclaimed >= max_reclaim)
break;
@@ -3402,71 +3560,74 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
- u64 orig_bytes, int flush)
+ u64 orig_bytes, int flush, int *ppix)
{
- struct btrfs_space_info *space_info = block_rsv->space_info;
+ struct btrfs_space_info *space_info;
+ u64 used;
u64 unused;
u64 num_bytes = orig_bytes;
int retries = 0;
int ret = 0;
- bool reserved = false;
bool committed = false;
+ int pix;
+ u64 max_pinned;
again:
ret = -ENOSPC;
- if (reserved)
- num_bytes = 0;
- spin_lock(&space_info->lock);
- unused = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- space_info->bytes_may_use;
+ for (pix = 0; pix < block_rsv->profile->nentries; ++pix) {
+ space_info = block_rsv->profile->meta_sinfo[pix];
- /*
- * The idea here is that we've not already over-reserved the block group
- * then we can go ahead and save our reservation first and then start
- * flushing if we need to. Otherwise if we've already overcommitted
- * lets start flushing stuff first and then come back and try to make
- * our reservation.
- */
- if (unused <= space_info->total_bytes) {
- unused = space_info->total_bytes - unused;
- if (unused >= num_bytes) {
- if (!reserved)
- space_info->bytes_reserved += orig_bytes;
- ret = 0;
- } else {
+ if (space_info->full)
+ continue;
+
+ spin_lock(&space_info->lock);
+
+ if (space_info->total_bytes == 0) {
/*
- * Ok set num_bytes to orig_bytes since we aren't
- * overocmmitted, this way we only try and reclaim what
- * we need.
+ * bootstrap: this space info does not have an initial
+ * chunk. try to allocate it here.
+ * FIXME: check, under which conditions we are allowed
+ * to allocate a chunk. are we allowed to join a trans-
+ * action?
*/
- num_bytes = orig_bytes;
+ int in_logtree = root->root_key.objectid ==
+ BTRFS_TREE_LOG_OBJECTID &&
+ !root->fs_info->log_root_recovering;
+ if (trans && (root->ref_cows || in_logtree)) {
+ spin_unlock(&space_info->lock);
+ ret = do_chunk_alloc(trans, root, num_bytes,
+ BTRFS_BLOCK_GROUP_METADATA,
+ 0, block_rsv->profile, -1,
+ in_logtree);
+ if (ret < 0)
+ return ret;
+ spin_lock(&space_info->lock);
+ }
}
- } else {
- /*
- * Ok we're over committed, set num_bytes to the overcommitted
- * amount plus the amount of bytes that we need for this
- * reservation.
- */
- num_bytes = unused - space_info->total_bytes +
- (orig_bytes * (retries + 1));
+ used = space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ space_info->bytes_may_use;
+
+ if (used <= space_info->total_bytes) {
+ unused = space_info->total_bytes - used;
+ if (unused >= orig_bytes) {
+ space_info->bytes_reserved += orig_bytes;
+ spin_unlock(&space_info->lock);
+ *ppix = pix;
+ return 0;
+ }
+ }
+ spin_unlock(&space_info->lock);
}
/*
- * Couldn't make our reservation, save our place so while we're trying
- * to reclaim space we can actually use it instead of somebody else
- * stealing it from us.
+ * There is a risk someone else is claiming the space we are freeing
+ * below. To mitigate this risk, we try to reclaim more than we actually
+ * need.
+ * FIXME try to reserve the space upfront, but in which space info?
*/
- if (ret && !reserved) {
- space_info->bytes_reserved += orig_bytes;
- reserved = true;
- }
-
- spin_unlock(&space_info->lock);
-
- if (!ret)
- return 0;
+ num_bytes = orig_bytes * (retries + 1);
if (!flush)
goto out;
@@ -3476,9 +3637,7 @@ again:
* metadata until after the IO is completed.
*/
ret = shrink_delalloc(trans, root, num_bytes, 1);
- if (ret > 0)
- return 0;
- else if (ret < 0)
+ if (ret < 0)
goto out;
/*
@@ -3486,21 +3645,27 @@ again:
* out enough space and we simply didn't have enough space to reclaim,
* so go back around and try again.
*/
- if (retries < 2) {
+ if (retries < 2 || ret > 0) {
retries++;
goto again;
}
- spin_lock(&space_info->lock);
+ max_pinned = 0;
+ for (pix = 0; pix < block_rsv->profile->nentries; ++pix) {
+ space_info = block_rsv->profile->meta_sinfo[pix];
+ spin_lock(&space_info->lock);
+ if (space_info->bytes_pinned > max_pinned)
+ max_pinned = space_info->bytes_pinned;
+ spin_unlock(&space_info->lock);
+ }
/*
* Not enough space to be reclaimed, don't bother committing the
* transaction.
*/
- if (space_info->bytes_pinned < orig_bytes)
+ if (max_pinned < orig_bytes) {
ret = -ENOSPC;
- spin_unlock(&space_info->lock);
- if (ret)
goto out;
+ }
ret = -EAGAIN;
if (trans || committed)
@@ -3518,17 +3683,11 @@ again:
}
out:
- if (reserved) {
- spin_lock(&space_info->lock);
- space_info->bytes_reserved -= orig_bytes;
- spin_unlock(&space_info->lock);
- }
-
return ret;
}
static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+ struct btrfs_root *root)
{
struct btrfs_block_rsv *block_rsv;
if (root->ref_cows)
@@ -3536,35 +3695,47 @@ static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
else
block_rsv = root->block_rsv;
- if (!block_rsv)
- block_rsv = &root->fs_info->empty_block_rsv;
+ if (!block_rsv) {
+ if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
+ block_rsv = &root->fs_info->log_block_rsv;
+ else
+ block_rsv = &root->fs_info->empty_block_rsv;
+ }
return block_rsv;
}
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes)
+ u64 num_bytes, int *ppix)
{
int ret = -ENOSPC;
+ int pix;
+ struct btrfs_profile *profile = block_rsv->profile;
spin_lock(&block_rsv->lock);
- if (block_rsv->reserved >= num_bytes) {
- block_rsv->reserved -= num_bytes;
- if (block_rsv->reserved < block_rsv->size)
- block_rsv->full = 0;
- ret = 0;
+ for (pix=0; pix < profile->nentries; ++pix) {
+ if (block_rsv->reserved_from[pix] >= num_bytes) {
+ block_rsv->reserved_from[pix] -= num_bytes;
+ block_rsv->reserved_total -= num_bytes;
+ if (block_rsv->reserved_total < block_rsv->size)
+ block_rsv->full = 0;
+ ret = 0;
+ *ppix = pix;
+ break;
+ }
}
spin_unlock(&block_rsv->lock);
return ret;
}
static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
- u64 num_bytes, int update_size)
+ u64 num_bytes, int update_size, int pix)
{
spin_lock(&block_rsv->lock);
- block_rsv->reserved += num_bytes;
+ block_rsv->reserved_total += num_bytes;
+ block_rsv->reserved_from[pix] += num_bytes;
if (update_size)
block_rsv->size += num_bytes;
- else if (block_rsv->reserved >= block_rsv->size)
+ else if (block_rsv->reserved_total >= block_rsv->size)
block_rsv->full = 1;
spin_unlock(&block_rsv->lock);
}
@@ -3572,42 +3743,90 @@ static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes)
{
- struct btrfs_space_info *space_info = block_rsv->space_info;
+ struct btrfs_space_info *space_info;
+ int pix;
+
+ if (dest) {
+ BUG_ON(block_rsv->profile != dest->profile);
+ }
spin_lock(&block_rsv->lock);
if (num_bytes == (u64)-1)
num_bytes = block_rsv->size;
block_rsv->size -= num_bytes;
- if (block_rsv->reserved >= block_rsv->size) {
- num_bytes = block_rsv->reserved - block_rsv->size;
- block_rsv->reserved = block_rsv->size;
+ if (block_rsv->reserved_total >= block_rsv->size) {
+ num_bytes = block_rsv->reserved_total - block_rsv->size;
+ block_rsv->reserved_total = block_rsv->size;
block_rsv->full = 1;
} else {
num_bytes = 0;
}
spin_unlock(&block_rsv->lock);
- if (num_bytes > 0) {
+ pix = block_rsv->profile->nentries - 1;
+ BUG_ON(pix < 0);
+ while (num_bytes > 0 && pix >= 0) {
+ u64 n;
+
+ spin_lock(&block_rsv->lock);
+ n = min(num_bytes, block_rsv->reserved_from[pix]);
+ block_rsv->reserved_from[pix] -= n;
+ spin_unlock(&block_rsv->lock);
+
+ space_info = block_rsv->profile->meta_sinfo[pix];
if (dest) {
- block_rsv_add_bytes(dest, num_bytes, 0);
+ block_rsv_add_bytes(dest, n, 0, pix);
} else {
spin_lock(&space_info->lock);
- space_info->bytes_reserved -= num_bytes;
+ space_info->bytes_reserved -= n;
+ WARN_ON((s64)space_info->bytes_reserved < 0);
spin_unlock(&space_info->lock);
}
+ num_bytes -= n;
+ --pix;
}
+ BUG_ON(num_bytes);
}
static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
- struct btrfs_block_rsv *dst, u64 num_bytes)
+ struct btrfs_block_rsv *dst,
+ u64 num_bytes)
{
- int ret;
+ int pix;
+ int n;
+ struct btrfs_profile *profile;
- ret = block_rsv_use_bytes(src, num_bytes);
- if (ret)
- return ret;
+ BUG_ON(src == dst);
+
+ spin_lock(&src->lock);
+
+ profile = src->profile;
+ BUG_ON(profile != dst->profile);
+
+ if (num_bytes > src->reserved_total) {
+ spin_unlock(&src->lock);
+ return -ENOSPC;
+ }
+
+ for (pix = 0; pix < profile->nentries && num_bytes; ++pix) {
+ n = min(num_bytes, src->reserved_from[pix]);
+ if (n == 0) {
+ continue;
+ }
+ src->reserved_from[pix] -= n;
+ src->reserved_total -= n;
+ spin_unlock(&src->lock);
+
+ block_rsv_add_bytes(dst, n, 1, pix);
+
+ num_bytes -= n;
+
+ spin_lock(&src->lock);
+ }
+ if (src->reserved_total < src->size)
+ src->full = 0;
+ spin_unlock(&src->lock);
- block_rsv_add_bytes(dst, num_bytes, 1);
return 0;
}
@@ -3620,18 +3839,18 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
INIT_LIST_HEAD(&rsv->list);
}
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
+ struct btrfs_profile *profile)
{
struct btrfs_block_rsv *block_rsv;
- struct btrfs_fs_info *fs_info = root->fs_info;
block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
if (!block_rsv)
return NULL;
btrfs_init_block_rsv(block_rsv);
- block_rsv->space_info = __find_space_info(fs_info,
- BTRFS_BLOCK_GROUP_METADATA);
+ block_rsv->profile = profile;
+
return block_rsv;
}
@@ -3665,13 +3884,15 @@ int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
u64 num_bytes)
{
int ret;
+ int pix;
if (num_bytes == 0)
return 0;
- ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
+ ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1,
+ &pix);
if (!ret) {
- block_rsv_add_bytes(block_rsv, num_bytes, 1);
+ block_rsv_add_bytes(block_rsv, num_bytes, 1, pix);
return 0;
}
@@ -3686,6 +3907,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
u64 num_bytes = 0;
int commit_trans = 0;
int ret = -ENOSPC;
+ int pix;
if (!block_rsv)
return 0;
@@ -3696,12 +3918,13 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
if (min_reserved > num_bytes)
num_bytes = min_reserved;
- if (block_rsv->reserved >= num_bytes) {
+ if (block_rsv->reserved_total >= num_bytes) {
ret = 0;
} else {
- num_bytes -= block_rsv->reserved;
+ num_bytes -= block_rsv->reserved_total;
if (block_rsv->durable &&
- block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
+ block_rsv->freed_total[0] + block_rsv->freed_total[1]
+ >= num_bytes)
commit_trans = 1;
}
spin_unlock(&block_rsv->lock);
@@ -3709,10 +3932,13 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
return 0;
if (block_rsv->refill_used) {
+ /* FIXME should we loop here? or be content with a partial
+ * re-fill? currently we do all-or-nothing here
+ */
ret = reserve_metadata_bytes(trans, root, block_rsv,
- num_bytes, 0);
+ num_bytes, 0, &pix);
if (!ret) {
- block_rsv_add_bytes(block_rsv, num_bytes, 0);
+ block_rsv_add_bytes(block_rsv, num_bytes, 0, pix);
return 0;
}
}
@@ -3743,7 +3969,7 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
{
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
if (global_rsv->full || global_rsv == block_rsv ||
- block_rsv->space_info != global_rsv->space_info)
+ block_rsv->profile != global_rsv->profile)
global_rsv = NULL;
block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
}
@@ -3756,9 +3982,10 @@ void btrfs_block_rsv_release(struct btrfs_root *root,
static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *sinfo;
+ struct list_head *head;
u64 num_bytes;
- u64 meta_used;
- u64 data_used;
+ u64 meta_used = 0;
+ u64 data_used = 0;
int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
#if 0
/*
@@ -3777,17 +4004,18 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
spin_unlock(&fs_info->tree_root->accounting_lock);
#endif
- sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
- spin_lock(&sinfo->lock);
- data_used = sinfo->bytes_used;
- spin_unlock(&sinfo->lock);
-
- sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
- spin_lock(&sinfo->lock);
- if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
- data_used = 0;
- meta_used = sinfo->bytes_used;
- spin_unlock(&sinfo->lock);
+ head = &fs_info->space_info;
+ rcu_read_lock();
+ list_for_each_entry_rcu(sinfo, head, list) {
+ spin_lock(&sinfo->lock);
+ if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA) {
+ meta_used += sinfo->bytes_used;
+ } else if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) {
+ data_used += sinfo->bytes_used;
+ }
+ spin_unlock(&sinfo->lock);
+ }
+ rcu_read_unlock();
num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
csum_size * 2;
@@ -3802,56 +4030,76 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
{
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
- struct btrfs_space_info *sinfo = block_rsv->space_info;
+ struct btrfs_space_info *sinfo;
+ struct btrfs_profile *profile;
u64 num_bytes;
+ int pix;
num_bytes = calc_global_metadata_size(fs_info);
spin_lock(&block_rsv->lock);
- spin_lock(&sinfo->lock);
+
+ profile = block_rsv->profile;
block_rsv->size = num_bytes;
- num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
- sinfo->bytes_reserved + sinfo->bytes_readonly +
- sinfo->bytes_may_use;
+ for (pix = 0; pix < profile->nentries; ++pix) {
+ sinfo = profile->meta_sinfo[pix];
+ BUG_ON(!sinfo);
+ spin_lock(&sinfo->lock);
+ num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
+ sinfo->bytes_reserved + sinfo->bytes_readonly +
+ sinfo->bytes_may_use;
- if (sinfo->total_bytes > num_bytes) {
- num_bytes = sinfo->total_bytes - num_bytes;
- block_rsv->reserved += num_bytes;
- sinfo->bytes_reserved += num_bytes;
+ if (sinfo->total_bytes > num_bytes) {
+ num_bytes = sinfo->total_bytes - num_bytes;
+ block_rsv->reserved_total += num_bytes;
+ block_rsv->reserved_from[pix] += num_bytes;
+ sinfo->bytes_reserved += num_bytes;
+ }
+ spin_unlock(&sinfo->lock);
}
+ for (pix = profile->nentries - 1; pix >= 0; --pix) {
+ sinfo = profile->meta_sinfo[pix];
- if (block_rsv->reserved >= block_rsv->size) {
- num_bytes = block_rsv->reserved - block_rsv->size;
+ if (block_rsv->reserved_total <= block_rsv->size)
+ break;
+
+ spin_lock(&sinfo->lock);
+ num_bytes = block_rsv->reserved_total - block_rsv->size;
+ num_bytes = min(num_bytes,
+ block_rsv->reserved_from[pix]);
sinfo->bytes_reserved -= num_bytes;
- block_rsv->reserved = block_rsv->size;
- block_rsv->full = 1;
+ block_rsv->reserved_total -= num_bytes;
+ block_rsv->reserved_from[pix] -= num_bytes;
+ spin_unlock(&sinfo->lock);
}
+ if (block_rsv->size == block_rsv->reserved_total)
+ block_rsv->full = 1;
+
#if 0
printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
- block_rsv->size, block_rsv->reserved);
+ block_rsv->size, block_rsv->reserved_total);
#endif
- spin_unlock(&sinfo->lock);
spin_unlock(&block_rsv->lock);
}
-static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
+static int init_global_block_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_profile *log_profile,
+ struct btrfs_profile *meta_profile,
+ struct btrfs_profile *system_profile)
{
- struct btrfs_space_info *space_info;
-
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
- fs_info->chunk_block_rsv.space_info = space_info;
+ fs_info->chunk_block_rsv.profile = system_profile;
fs_info->chunk_block_rsv.priority = 10;
-
- space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
- fs_info->global_block_rsv.space_info = space_info;
+ fs_info->global_block_rsv.profile = meta_profile;
fs_info->global_block_rsv.priority = 10;
fs_info->global_block_rsv.refill_used = 1;
- fs_info->delalloc_block_rsv.space_info = space_info;
- fs_info->trans_block_rsv.space_info = space_info;
- fs_info->empty_block_rsv.space_info = space_info;
+ fs_info->delalloc_block_rsv.profile = meta_profile;
+ fs_info->trans_block_rsv.profile = meta_profile;
+ fs_info->empty_block_rsv.profile = meta_profile;
fs_info->empty_block_rsv.priority = 10;
+ fs_info->log_block_rsv.profile = log_profile;
+ fs_info->log_block_rsv.priority = 10;
fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
@@ -3864,17 +4112,19 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv);
update_global_block_rsv(fs_info);
+
+ return 0;
}
static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
{
block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1);
WARN_ON(fs_info->delalloc_block_rsv.size > 0);
- WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
+ WARN_ON(fs_info->delalloc_block_rsv.reserved_total > 0);
WARN_ON(fs_info->trans_block_rsv.size > 0);
- WARN_ON(fs_info->trans_block_rsv.reserved > 0);
+ WARN_ON(fs_info->trans_block_rsv.reserved_total > 0);
WARN_ON(fs_info->chunk_block_rsv.size > 0);
- WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
+ WARN_ON(fs_info->chunk_block_rsv.reserved_total > 0);
}
static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
@@ -3954,7 +4204,6 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
* and one for root of the snapshot.
*/
u64 num_bytes = calc_trans_metadata_size(root, 5);
- dst_rsv->space_info = src_rsv->space_info;
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
@@ -3970,6 +4219,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
u64 to_reserve;
int nr_extents;
int ret;
+ int pix;
if (btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);
@@ -3988,7 +4238,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
spin_unlock(&BTRFS_I(inode)->accounting_lock);
to_reserve += calc_csum_metadata_size(inode, num_bytes);
- ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
+ ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1,
+ &pix);
if (ret)
return ret;
@@ -3997,7 +4248,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
spin_unlock(&BTRFS_I(inode)->accounting_lock);
- block_rsv_add_bytes(block_rsv, to_reserve, 1);
+ block_rsv_add_bytes(block_rsv, to_reserve, 1, pix);
if (block_rsv->size > 512 * 1024 * 1024)
shrink_delalloc(NULL, root, to_reserve, 0);
@@ -4320,6 +4571,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
u64 start;
u64 end;
int idx;
+ int pix;
int ret;
if (fs_info->pinned_extents == &fs_info->freed_extents[0])
@@ -4345,16 +4597,20 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
&fs_info->durable_block_rsv_list, list) {
idx = trans->transid & 0x1;
- if (block_rsv->freed[idx] > 0) {
- block_rsv_add_bytes(block_rsv,
- block_rsv->freed[idx], 0);
- block_rsv->freed[idx] = 0;
+ if (block_rsv->freed_total[idx] > 0) {
+ for (pix=0; pix < block_rsv->profile->nentries; ++pix) {
+ block_rsv_add_bytes(block_rsv,
+ block_rsv->freed_from[idx][pix], 0,
+ pix);
+ block_rsv->freed_from[idx][pix] = 0;
+ }
+ block_rsv->freed_total[idx] = 0;
}
if (atomic_read(&block_rsv->usage) == 0) {
btrfs_block_rsv_release(root, block_rsv, (u64)-1);
- if (block_rsv->freed[0] == 0 &&
- block_rsv->freed[1] == 0) {
+ if (block_rsv->freed_total[0] == 0 &&
+ block_rsv->freed_total[1] == 0) {
list_del_init(&block_rsv->list);
kfree(block_rsv);
}
@@ -4642,6 +4898,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_group_cache *cache = NULL;
int ret;
+ int pix;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
@@ -4656,7 +4913,15 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
- if (block_rsv->space_info != cache->space_info)
+
+ ret = -1;
+ for (pix = 0; pix < block_rsv->profile->nentries; ++pix) {
+ if (block_rsv->profile->meta_sinfo[pix] == cache->space_info) {
+ ret = 0;
+ break;
+ }
+ }
+ if (ret)
goto out;
if (btrfs_header_generation(buf) == trans->transid) {
@@ -4683,8 +4948,9 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
ret = 1;
spin_lock(&block_rsv->lock);
- if (block_rsv->reserved < block_rsv->size) {
- block_rsv->reserved += buf->len;
+ if (block_rsv->reserved_total < block_rsv->size) {
+ block_rsv->reserved_total += buf->len;
+ block_rsv->reserved_from[pix] += buf->len;
ret = 0;
}
spin_unlock(&block_rsv->lock);
@@ -4707,8 +4973,10 @@ pin:
spin_unlock(&cache->lock);
if (ret) {
+ int index = trans->transid & 0x1;
spin_lock(&block_rsv->lock);
- block_rsv->freed[trans->transid & 0x1] += buf->len;
+ block_rsv->freed_total[index] += buf->len;
+ block_rsv->freed_from[index][pix] += buf->len;
spin_unlock(&block_rsv->lock);
}
}
@@ -4835,7 +5103,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
u64 num_bytes, u64 empty_size,
u64 search_start, u64 search_end,
u64 hint_byte, struct btrfs_key *ins,
- int data)
+ int data,
+ struct btrfs_space_info *space_info)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4844,7 +5113,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
int empty_cluster = 2 * 1024 * 1024;
int allowed_chunk_alloc = 0;
int done_chunk_alloc = 0;
- struct btrfs_space_info *space_info;
int last_ptr_loop = 0;
int loop = 0;
int index = 0;
@@ -4860,12 +5128,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
ins->objectid = 0;
ins->offset = 0;
- space_info = __find_space_info(root->fs_info, data);
- if (!space_info) {
- printk(KERN_ERR "No space info for %d\n", data);
- return -ENOSPC;
- }
-
/*
* If the space info is for both data and metadata it means we have a
* small filesystem and we can't use the clustering stuff.
@@ -4884,11 +5146,23 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
btrfs_test_opt(root, SSD)) {
+ /* FIXME do we need last_ptr per speed? */
last_ptr = &root->fs_info->data_alloc_cluster;
}
if (last_ptr) {
spin_lock(&last_ptr->lock);
+ if (last_ptr->block_group &&
+ last_ptr->block_group->speed != space_info->speed) {
+ spin_unlock(&last_ptr->lock);
+ last_ptr = NULL;
+ } else {
+ spin_unlock(&last_ptr->lock);
+ }
+ }
+
+ if (last_ptr) {
+ spin_lock(&last_ptr->lock);
if (last_ptr->block_group)
hint_byte = last_ptr->window_start;
spin_unlock(&last_ptr->lock);
@@ -4912,6 +5186,7 @@ ideal_cache:
* picked out then we don't care that the block group is cached.
*/
if (block_group && block_group_bits(block_group, data) &&
+ block_group->speed == space_info->speed &&
(block_group->cached != BTRFS_CACHE_NO ||
search_start == ideal_cache_offset)) {
down_read(&space_info->groups_sem);
@@ -4963,6 +5238,7 @@ search:
}
have_block_group:
+ BUG_ON(block_group->speed != space_info->speed);
if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
u64 free_percent;
@@ -5250,8 +5526,13 @@ loop:
}
if (allowed_chunk_alloc) {
+ struct btrfs_profile profile;
+ memset(&profile, 0, sizeof(profile));
+ profile.nentries = 1;
+ profile.speed[0] = space_info->speed;
ret = do_chunk_alloc(trans, root, num_bytes +
- 2 * 1024 * 1024, data, 1);
+ 2 * 1024 * 1024, data, 1,
+ &profile, 0, 0);
allowed_chunk_alloc = 0;
done_chunk_alloc = 1;
} else if (!done_chunk_alloc) {
@@ -5286,7 +5567,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int index = 0;
spin_lock(&info->lock);
- printk(KERN_INFO "space_info has %llu free, is %sfull\n",
+ printk(KERN_INFO "space_info 0x%llx has %llu free, is %sfull\n",
+ info->flags,
(unsigned long long)(info->total_bytes - info->bytes_used -
info->bytes_pinned - info->bytes_reserved -
info->bytes_readonly),
@@ -5323,15 +5605,90 @@ again:
up_read(&info->groups_sem);
}
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+int btrfs_reserve_data_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
+ struct inode *inode,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
u64 search_end, struct btrfs_key *ins,
u64 data)
{
+ u64 max_size = 0;
+ int max_pix = 0;
+ int pix;
int ret;
+ struct btrfs_profile *profile = BTRFS_I(inode)->profile;
+ struct btrfs_inode *bino = BTRFS_I(inode);
+
+ spin_lock(&BTRFS_I(inode)->reserved_lock);
+
+ BUG_ON(BTRFS_I(inode)->reserved_total < min_alloc_size);
+
+ for (pix = 0; pix < profile->nentries; ++pix) {
+ if (bino->reserved_from[pix] >= num_bytes)
+ break;
+ if (bino->reserved_from[pix] > max_size) {
+ max_size = bino->reserved_from[pix];
+ max_pix = pix;
+ }
+ }
+ if (pix == profile->nentries) {
+ if (max_size >= min_alloc_size) {
+ pix = max_pix;
+ num_bytes = max_size;
+ }
+ }
+ if (pix == profile->nentries) {
+ spin_unlock(&BTRFS_I(inode)->reserved_lock);
+ return -ENOSPC;
+ }
+ bino->reserved_from[pix] -= num_bytes;
+ spin_unlock(&BTRFS_I(inode)->reserved_lock);
+
+ ret = btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
+ empty_size, hint_byte, search_end, ins,
+ data, profile, pix);
+ if (ret == 0) {
+ struct btrfs_space_info *sinfo;
+
+ spin_lock(&BTRFS_I(inode)->reserved_lock);
+ bino->reserved_from[pix] += num_bytes;
+ bino->reserved_from[pix] -= ins->offset;
+ spin_unlock(&BTRFS_I(inode)->reserved_lock);
+
+ sinfo = __find_space_info(root->fs_info,
+ BTRFS_BLOCK_GROUP_DATA,
+ BTRFS_I(inode)->profile->speed[pix]);
+ BUG_ON(!sinfo);
+ spin_lock(&sinfo->lock);
+ sinfo->bytes_may_use -= ins->offset;
+ spin_unlock(&sinfo->lock);
+ } else {
+ spin_lock(&BTRFS_I(inode)->reserved_lock);
+ bino->reserved_from[pix] += num_bytes;
+ spin_unlock(&BTRFS_I(inode)->reserved_lock);
+ }
+ return ret;
+}
+
+/*
+ * pix is the index into the profile to indicate from which speed the extent
+ * should get allocated. pix==-1 means any speed from the profile is ok
+ */
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins,
+ u64 data, struct btrfs_profile *profile, int pix)
+{
+ int ret = -ENOSPC;
u64 search_start = 0;
+ struct btrfs_space_info *sinfo;
+ int ix;
+ int p_start, p_end;
+ int nospc;
+ int in_logtree = root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID;
data = btrfs_get_alloc_profile(root, data);
again:
@@ -5339,31 +5696,54 @@ again:
* the only place that sets empty_size is btrfs_realloc_node, which
* is not called recursively on allocations
*/
- if (empty_size || root->ref_cows)
+ if (empty_size || root->ref_cows ||
+ (in_logtree && !root->fs_info->log_root_recovering)) {
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- num_bytes + 2 * 1024 * 1024, data, 0);
+ num_bytes + 2 * 1024 * 1024, data, 0,
+ profile, pix, in_logtree);
+ }
WARN_ON(num_bytes < root->sectorsize);
- ret = find_free_extent(trans, root, num_bytes, empty_size,
- search_start, search_end, hint_byte,
- ins, data);
- if (ret == -ENOSPC && num_bytes > min_alloc_size) {
+ if (pix == -1) {
+ p_start = 0;
+ p_end = profile->nentries - 1;
+ } else {
+ p_start = pix;
+ p_end = pix;
+ }
+ nospc = 0;
+ for (ix = p_start; ix <= p_end; ++ix) {
+
+ sinfo = __find_space_info(root->fs_info, data,
+ profile->speed[ix]);
+ ret = find_free_extent(trans, root, num_bytes, empty_size,
+ search_start, search_end, hint_byte,
+ ins, data, sinfo);
+ if (ret == 0) {
+ return 0;
+ }
+ if (ret == -ENOSPC)
+ ++nospc;
+ }
+
+ if (nospc && num_bytes > min_alloc_size) {
num_bytes = num_bytes >> 1;
num_bytes = num_bytes & ~(root->sectorsize - 1);
num_bytes = max(num_bytes, min_alloc_size);
do_chunk_alloc(trans, root->fs_info->extent_root,
- num_bytes, data, 1);
+ num_bytes, data, 1, profile, pix, 0);
goto again;
}
- if (ret == -ENOSPC) {
- struct btrfs_space_info *sinfo;
-
- sinfo = __find_space_info(root->fs_info, data);
- printk(KERN_ERR "btrfs allocation failed flags %llu, "
- "wanted %llu\n", (unsigned long long)data,
- (unsigned long long)num_bytes);
- dump_space_info(sinfo, num_bytes, 1);
+ if (nospc) {
+ for (ix = p_start; ix <= p_end; ++ix) {
+ sinfo = __find_space_info(root->fs_info, data,
+ profile->speed[ix]);
+ printk(KERN_ERR "btrfs allocation failed flags %llu, "
+ "wanted %llu\n", (unsigned long long)data,
+ (unsigned long long)num_bytes);
+ dump_space_info(sinfo, num_bytes, 1);
+ }
}
return ret;
@@ -5631,31 +6011,34 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
static struct btrfs_block_rsv *
use_block_rsv(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u32 blocksize)
+ struct btrfs_root *root, u32 blocksize, int *ppix)
{
struct btrfs_block_rsv *block_rsv;
int ret;
+ BUG_ON(!ppix);
+
block_rsv = get_block_rsv(trans, root);
if (block_rsv->size == 0) {
ret = reserve_metadata_bytes(trans, root, block_rsv,
- blocksize, 0);
+ blocksize, 0, ppix);
if (ret)
return ERR_PTR(ret);
return block_rsv;
}
- ret = block_rsv_use_bytes(block_rsv, blocksize);
+ ret = block_rsv_use_bytes(block_rsv, blocksize, ppix);
if (!ret)
return block_rsv;
return ERR_PTR(-ENOSPC);
}
-static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
+static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize,
+ int pix)
{
- block_rsv_add_bytes(block_rsv, blocksize, 0);
+ block_rsv_add_bytes(block_rsv, blocksize, 0, pix);
block_rsv_release_bytes(block_rsv, NULL, 0);
}
@@ -5677,16 +6060,18 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct extent_buffer *buf;
u64 flags = 0;
int ret;
+ int pix;
-
- block_rsv = use_block_rsv(trans, root, blocksize);
- if (IS_ERR(block_rsv))
+ block_rsv = use_block_rsv(trans, root, blocksize, &pix);
+ if (IS_ERR(block_rsv)) {
return ERR_CAST(block_rsv);
+ }
ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
- empty_size, hint, (u64)-1, &ins, 0);
+ empty_size, hint, (u64)-1, &ins, 0,
+ block_rsv->profile, pix);
if (ret) {
- unuse_block_rsv(block_rsv, blocksize);
+ unuse_block_rsv(block_rsv, blocksize, pix);
return ERR_PTR(ret);
}
@@ -7991,6 +8376,13 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
struct btrfs_trans_handle *trans;
u64 alloc_flags;
int ret;
+ struct btrfs_profile profile;
+
+ memset(&profile, 0, sizeof(profile));
+ profile.nentries = 1;
+ profile.speed[0] = cache->speed;
+ btrfs_init_profile(root->fs_info, &profile,
+ !!(cache->flags & BTRFS_BLOCK_GROUP_SYSTEM));
BUG_ON(cache->ro);
@@ -7999,13 +8391,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
alloc_flags = update_block_group_flags(root, cache->flags);
if (alloc_flags != cache->flags)
- do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+ do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1,
+ &profile, 0, 0);
ret = set_block_group_ro(cache);
if (!ret)
goto out;
alloc_flags = get_alloc_profile(root, cache->space_info->flags);
- ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+ ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1,
+ &profile, 0, 0);
if (ret < 0)
goto out;
ret = set_block_group_ro(cache);
@@ -8384,6 +8778,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
btrfs_release_path(root, path);
cache->flags = btrfs_block_group_flags(&cache->item);
cache->sectorsize = root->sectorsize;
+ cache->speed = btrfs_chunk_seek_speed(root, found_key.objectid);
/*
* check for two cases, either we are full, and therefore
@@ -8410,7 +8805,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
ret = update_space_info(info, cache->flags, found_key.offset,
btrfs_block_group_used(&cache->item),
- &space_info);
+ cache->speed, &space_info);
BUG_ON(ret);
cache->space_info = space_info;
spin_lock(&cache->space_info->lock);
@@ -8443,8 +8838,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
set_block_group_ro(cache);
}
- init_global_block_rsv(info);
- ret = 0;
+ ret = init_global_block_rsv(info, root->log_profile, root->meta_profile,
+ root->system_profile);
error:
btrfs_free_path(path);
return ret;
@@ -8500,8 +8895,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
free_excluded_extents(root, cache);
+ cache->speed = btrfs_chunk_seek_speed(root, chunk_offset);
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
- &cache->space_info);
+ cache->speed, &cache->space_info);
BUG_ON(ret);
spin_lock(&cache->space_info->lock);
@@ -2535,7 +2535,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
struct writeback_control *wbc)
{
int ret;
- struct address_space *mapping = page->mapping;
struct extent_page_data epd = {
.bio = NULL,
.tree = tree,
@@ -2543,6 +2542,8 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
+#if 0
+ struct address_space *mapping = page->mapping;
struct writeback_control wbc_writepages = {
.sync_mode = wbc->sync_mode,
.older_than_this = NULL,
@@ -2550,11 +2551,16 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.range_start = page_offset(page) + PAGE_CACHE_SIZE,
.range_end = (loff_t)-1,
};
+#endif
ret = __extent_writepage(page, wbc, &epd);
+#if 0 /* FIXME this code is disable for the moment as it might triggers
+ * writes from different space_infos. This hurts log tree writes
+ * badly */
extent_write_cache_pages(tree, mapping, &wbc_writepages,
__extent_writepage, &epd, flush_write_bio);
+#endif
flush_epd_write_bio(&epd);
return ret;
}
@@ -612,11 +612,11 @@ retry:
GFP_NOFS);
trans = btrfs_join_transaction(root, 1);
- ret = btrfs_reserve_extent(trans, root,
- async_extent->compressed_size,
- async_extent->compressed_size,
- 0, alloc_hint,
- (u64)-1, &ins, 1);
+ ret = btrfs_reserve_data_extent(trans, root, inode,
+ async_extent->compressed_size,
+ async_extent->compressed_size,
+ 0, alloc_hint,
+ (u64)-1, &ins, 1);
btrfs_end_transaction(trans, root);
if (ret) {
@@ -813,9 +813,10 @@ static noinline int cow_file_range(struct inode *inode,
unsigned long op;
cur_alloc_size = disk_num_bytes;
- ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
- root->sectorsize, 0, alloc_hint,
- (u64)-1, &ins, 1);
+ ret = btrfs_reserve_data_extent(trans, root, inode,
+ cur_alloc_size,
+ root->sectorsize, 0, alloc_hint,
+ (u64)-1, &ins, 1);
BUG_ON(ret);
em = alloc_extent_map(GFP_NOFS);
@@ -2072,9 +2073,11 @@ void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
* reserved space.
*/
index = trans->transid & 0x1;
- if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
+ if (block_rsv->reserved_total + block_rsv->freed_total[index]
+ < block_rsv->size) {
num_bytes += block_rsv->size -
- (block_rsv->reserved + block_rsv->freed[index]);
+ (block_rsv->reserved_total +
+ block_rsv->freed_total[index]);
}
*bytes_to_reserve += num_bytes;
@@ -2096,9 +2099,11 @@ void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
/* refill source subvolume's orphan block reservation */
block_rsv = root->orphan_block_rsv;
index = trans->transid & 0x1;
- if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
+ if (block_rsv->reserved_total + block_rsv->freed_total[index]
+ < block_rsv->size) {
num_bytes = block_rsv->size -
- (block_rsv->reserved + block_rsv->freed[index]);
+ (block_rsv->reserved_total +
+ block_rsv->freed_total[index]);
ret = btrfs_block_rsv_migrate(&pending->block_rsv,
root->orphan_block_rsv,
num_bytes);
@@ -2106,7 +2111,7 @@ void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
}
/* setup orphan block reservation for the snapshot */
- block_rsv = btrfs_alloc_block_rsv(snap);
+ block_rsv = btrfs_alloc_block_rsv(snap, root->meta_profile);
BUG_ON(!block_rsv);
btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
@@ -2177,7 +2182,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
int ret;
if (!root->orphan_block_rsv) {
- block_rsv = btrfs_alloc_block_rsv(root);
+ block_rsv = btrfs_alloc_block_rsv(root, root->meta_profile);
BUG_ON(!block_rsv);
}
@@ -4020,7 +4025,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
struct btrfs_iget_args *args = p;
inode->i_ino = args->ino;
BTRFS_I(inode)->root = args->root;
- btrfs_set_inode_space_info(args->root, inode);
+ btrfs_set_inode_profile(args->root, inode);
return 0;
}
@@ -4521,7 +4526,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->root = root;
BTRFS_I(inode)->generation = trans->transid;
inode->i_generation = BTRFS_I(inode)->generation;
- btrfs_set_inode_space_info(root, inode);
+ btrfs_set_inode_profile(root, inode);
if (mode & S_IFDIR)
owner = 0;
@@ -5288,8 +5293,9 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
alloc_hint = get_extent_allocation_hint(inode, start, len);
- ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
- alloc_hint, (u64)-1, &ins, 1);
+ ret = btrfs_reserve_data_extent(trans, root, inode,
+ len, root->sectorsize, 0,
+ alloc_hint, (u64)-1, &ins, 1);
if (ret) {
em = ERR_PTR(ret);
goto out;
@@ -6483,19 +6489,21 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
return NULL;
ei->root = NULL;
- ei->space_info = NULL;
+ ei->profile = NULL;
ei->generation = 0;
ei->sequence = 0;
ei->last_trans = 0;
ei->last_sub_trans = 0;
ei->logged_trans = 0;
ei->delalloc_bytes = 0;
- ei->reserved_bytes = 0;
+ ei->reserved_total = 0;
+ memset(&ei->reserved_from, 0, sizeof(ei->reserved_from));
ei->disk_i_size = 0;
ei->flags = 0;
ei->index_cnt = (u64)-1;
ei->last_unlink_trans = 0;
+ spin_lock_init(&ei->reserved_lock);
spin_lock_init(&ei->accounting_lock);
atomic_set(&ei->outstanding_extents, 0);
ei->reserved_extents = 0;
@@ -7056,8 +7064,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
}
}
- ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
- 0, *alloc_hint, (u64)-1, &ins, 1);
+ ret = btrfs_reserve_data_extent(trans, root, inode,
+ num_bytes, min_size, 0,
+ *alloc_hint, (u64)-1, &ins, 1);
if (ret) {
if (own_trans)
btrfs_end_transaction(trans, root);
@@ -1683,7 +1683,26 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
return PTR_ERR(vol_args);
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- ret = btrfs_init_new_device(root, vol_args->name);
+ ret = btrfs_init_new_device(root, vol_args->name, 30);
+
+ kfree(vol_args);
+ return ret;
+}
+
+static long btrfs_ioctl_add_dev_v2(struct btrfs_root *root, void __user *arg)
+{
+ struct btrfs_ioctl_vol_args_v2 *vol_args;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ vol_args = memdup_user(arg, sizeof(*vol_args));
+ if (IS_ERR(vol_args))
+ return PTR_ERR(vol_args);
+
+ vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+ ret = btrfs_init_new_device(root, vol_args->name, vol_args->seek_speed);
kfree(vol_args);
return ret;
@@ -2392,6 +2411,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_resize(root, argp);
case BTRFS_IOC_ADD_DEV:
return btrfs_ioctl_add_dev(root, argp);
+ case BTRFS_IOC_ADD_DEV_V2:
+ return btrfs_ioctl_add_dev_v2(root, argp);
case BTRFS_IOC_RM_DEV:
return btrfs_ioctl_rm_dev(root, argp);
case BTRFS_IOC_BALANCE:
@@ -38,8 +38,10 @@ struct btrfs_ioctl_vol_args_v2 {
__s64 fd;
__u64 transid;
__u64 flags;
- __u64 unused[4];
- char name[BTRFS_SUBVOL_NAME_MAX + 1];
+ __u8 seek_speed;
+ __u8 unused_u8[3];
+ __u64 unused_u64[3];
+ char name[BTRFS_PATH_NAME_MAX + 1];
};
#define BTRFS_INO_LOOKUP_PATH_MAX 4080
@@ -203,4 +205,6 @@ struct btrfs_ioctl_space_args {
struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
+#define BTRFS_IOC_ADD_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 27, \
+ struct btrfs_ioctl_vol_args_v2)
#endif
@@ -141,7 +141,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
u64 file_offset)
{
struct rb_root *root = &tree->tree;
- struct rb_node *prev;
+ struct rb_node *prev = NULL;
struct rb_node *ret;
struct btrfs_ordered_extent *entry;
@@ -3601,7 +3601,8 @@ int prepare_to_relocate(struct reloc_control *rc)
struct btrfs_trans_handle *trans;
int ret;
- rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root);
+ rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root,
+ rc->extent_root->meta_profile);
if (!rc->block_rsv)
return -ENOMEM;
@@ -510,11 +510,13 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
u64 end;
unsigned long index;
+ start = 0;
while (1) {
ret = find_first_extent_bit(dirty_pages, start, &start, &end,
mark);
if (ret)
break;
+
while (start <= end) {
cond_resched();
@@ -530,7 +532,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
page_cache_release(page);
continue;
}
-
if (PageWriteback(page)) {
if (PageDirty(page))
wait_on_page_writeback(page);
@@ -1363,7 +1364,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_unlock(&root->fs_info->trans_mutex);
ret = btrfs_write_and_wait_transaction(trans, root);
BUG_ON(ret);
- write_ctree_super(trans, root, 0);
+ write_ctree_super(trans, root, 0, 1);
/*
* the super is written, we can safely allow the tree-loggers
@@ -1960,7 +1960,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
while (1) {
unsigned long batch = root->log_batch;
- if (root->log_multiple_pids) {
+ if (0 && root->log_multiple_pids) {
mutex_unlock(&root->log_mutex);
schedule_timeout_uninterruptible(1);
mutex_lock(&root->log_mutex);
@@ -2078,7 +2078,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* the running transaction open, so a full commit can't hop
* in and cause problems either.
*/
- write_ctree_super(trans, root->fs_info->tree_root, 1);
+ write_ctree_super(trans, log, 1, 0);
ret = 0;
mutex_lock(&root->log_mutex);
@@ -1181,7 +1181,7 @@ int btrfs_add_device(struct btrfs_trans_handle *trans,
btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
btrfs_set_device_group(leaf, dev_item, 0);
- btrfs_set_device_seek_speed(leaf, dev_item, 0);
+ btrfs_set_device_seek_speed(leaf, dev_item, device->seek_speed);
btrfs_set_device_bandwidth(leaf, dev_item, 0);
btrfs_set_device_start_offset(leaf, dev_item, 0);
@@ -1544,7 +1544,7 @@ error:
return ret;
}
-int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
+int btrfs_init_new_device(struct btrfs_root *root, char *device_path, int speed)
{
struct btrfs_trans_handle *trans;
struct btrfs_device *device;
@@ -1621,7 +1621,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->bdev = bdev;
device->in_fs_metadata = 1;
device->mode = 0;
+ device->seek_speed = speed;
set_blocksize(device->bdev, 4096);
+ device->flush_bio = NULL;
if (seeding_dev) {
sb->s_flags &= ~MS_RDONLY;
@@ -2280,15 +2282,33 @@ int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
}
static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type,
- int *num_stripes, int *min_stripes,
+ int speed, int *num_stripes, int *min_stripes,
int *sub_stripes)
{
+ struct btrfs_device *device = NULL;
+ int ndevs = 0;
+ struct list_head *cur;
+
*num_stripes = 1;
*min_stripes = 1;
*sub_stripes = 0;
+ /*
+ * count devides with this speed. FIXME: this number could be cached
+ */
+ cur = fs_devices->alloc_list.next;
+ while(1) {
+ device =list_entry(cur, struct btrfs_device, dev_alloc_list);
+ BUG_ON(!device->writeable);
+ if (device->in_fs_metadata && device->seek_speed == speed)
+ ++ndevs;
+ cur = cur->next;
+ if (cur == &fs_devices->alloc_list)
+ break;
+ }
+
if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
- *num_stripes = fs_devices->rw_devices;
+ *num_stripes = ndevs;
*min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_DUP)) {
@@ -2296,13 +2316,13 @@ static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type,
*min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
- if (fs_devices->rw_devices < 2)
+ if (ndevs < 2)
return -ENOSPC;
*num_stripes = 2;
*min_stripes = 2;
}
if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
- *num_stripes = fs_devices->rw_devices;
+ *num_stripes = ndevs;
if (*num_stripes < 4)
return -ENOSPC;
*num_stripes &= ~(u32)1;
@@ -2484,7 +2504,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root,
struct map_lookup **map_ret,
u64 *num_bytes, u64 *stripe_size,
- u64 start, u64 type)
+ u64 start, u64 type, int speed)
{
struct btrfs_fs_info *info = extent_root->fs_info;
struct btrfs_device *device = NULL;
@@ -2515,7 +2535,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (list_empty(&fs_devices->alloc_list))
return -ENOSPC;
- ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
+ ret = __btrfs_calc_nstripes(fs_devices, type, speed, &num_stripes,
&min_stripes, &sub_stripes);
if (ret)
return ret;
@@ -2557,6 +2577,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
avail = 0;
cur = cur->next;
+ if (device->seek_speed != speed)
+ goto next;
+
if (device->in_fs_metadata && avail >= min_free) {
ret = find_free_dev_extent(trans, device, min_free,
&devices_info[i].dev_offset,
@@ -2586,7 +2609,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
devices_info[i].max_avail = avail;
i++;
}
-
+next:
if (cur == &fs_devices->alloc_list)
break;
}
@@ -2745,7 +2768,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
* bootstrap process of adding storage to a seed btrfs.
*/
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root, u64 type)
+ struct btrfs_root *extent_root, u64 type, int speed)
{
u64 chunk_offset;
u64 chunk_size;
@@ -2760,7 +2783,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
return ret;
ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
- &stripe_size, chunk_offset, type);
+ &stripe_size, chunk_offset, type, speed);
if (ret)
return ret;
@@ -2797,7 +2820,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
- &stripe_size, chunk_offset, alloc_profile);
+ &stripe_size, chunk_offset, alloc_profile,
+ device->seek_speed);
BUG_ON(ret);
sys_chunk_offset = chunk_offset + chunk_size;
@@ -2809,7 +2833,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
&sys_chunk_size, &sys_stripe_size,
- sys_chunk_offset, alloc_profile);
+ sys_chunk_offset, alloc_profile,
+ device->seek_speed);
BUG_ON(ret);
ret = btrfs_add_device(trans, fs_info->chunk_root, device);
@@ -2862,6 +2887,33 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
return readonly;
}
+int btrfs_chunk_seek_speed(struct btrfs_root *root, u64 chunk_offset)
+{
+ struct extent_map *em;
+ struct map_lookup *map;
+ struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+ int seek_speed = 256;
+ int i;
+
+ read_lock(&map_tree->map_tree.lock);
+ em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
+ read_unlock(&map_tree->map_tree.lock);
+ if (!em)
+ return 0;
+
+ map = (struct map_lookup *)em->bdev;
+ for (i = 0; i < map->num_stripes; i++) {
+ if (map->stripes[i].dev->seek_speed < seek_speed) {
+ seek_speed = map->stripes[i].dev->seek_speed;
+ }
+ }
+ free_extent_map(em);
+
+ WARN_ON(seek_speed == 256);
+
+ return seek_speed;
+}
+
void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
{
extent_map_tree_init(&tree->map_tree, GFP_NOFS);
@@ -3494,6 +3546,16 @@ static int fill_device_from_item(struct extent_buffer *leaf,
device->io_align = btrfs_device_io_align(leaf, dev_item);
device->io_width = btrfs_device_io_width(leaf, dev_item);
device->sector_size = btrfs_device_sector_size(leaf, dev_item);
+ device->seek_speed = btrfs_device_seek_speed(leaf, dev_item);
+ if (device->seek_speed <= 1) {
+ /* this is necessary, because in older versions of mkfs.btrfs
+ * the seek_speed got initialized 1 for the first device and
+ * 0 for the following. 30 is the default for data + metadata
+ */
+ device->seek_speed = 30;
+ }
+ printk(KERN_DEBUG "btrfs: device %llu has speed %d\n", device->devid,
+ device->seek_speed);
ptr = (unsigned long)btrfs_device_uuid(dev_item);
read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
@@ -83,10 +83,17 @@ struct btrfs_device {
/* type and info about this device */
u64 type;
+ /* the speed is used to determine if the device should be a preferred
+ * log device */
+ u8 seek_speed;
+
/* physical drive uuid (or lvm uuid) */
u8 uuid[BTRFS_UUID_SIZE];
struct btrfs_work work;
+
+ struct bio *flush_bio;
+ struct completion flush_wait;
};
struct btrfs_fs_devices {
@@ -180,7 +187,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
int btrfs_read_sys_array(struct btrfs_root *root);
int btrfs_read_chunk_tree(struct btrfs_root *root);
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root, u64 type);
+ struct btrfs_root *extent_root, u64 type, int speed);
void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
@@ -205,7 +212,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
u8 *uuid, u8 *fsid);
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
-int btrfs_init_new_device(struct btrfs_root *root, char *path);
+int btrfs_init_new_device(struct btrfs_root *root, char *path, int speed);
int btrfs_balance(struct btrfs_root *dev_root);
void btrfs_unlock_volumes(void);
void btrfs_lock_volumes(void);
@@ -213,4 +220,6 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);
+int btrfs_chunk_seek_speed(struct btrfs_root *root, u64 chunk_offset);
+
#endif