@@ -8049,7 +8049,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
mutex_lock(&root->fs_info->chunk_mutex);
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
u64 min_free = btrfs_block_group_used(&block_group->item);
- u64 dev_offset, max_avail;
+ u64 dev_offset;
/*
* check to make sure we can actually find a chunk with enough
@@ -8057,7 +8057,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
*/
if (device->total_bytes > device->bytes_used + min_free) {
ret = find_free_dev_extent(NULL, device, min_free,
- &dev_offset, &max_avail);
+ &dev_offset, NULL);
if (!ret)
break;
ret = -1;
@@ -738,29 +738,33 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_dev_extent *dev_extent = NULL;
struct btrfs_path *path;
u64 hole_size = 0;
- u64 last_byte = 0;
- u64 search_start = 0;
+ u64 hole_start;
+ u64 hole_end;
+ u64 search_start;
u64 search_end = device->total_bytes;
int ret;
int slot = 0;
- int start_found;
struct extent_buffer *l;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->reada = 2;
- start_found = 0;
/* FIXME use last free of some kind */
/* we don't want to overwrite the superblock on the drive,
* so we make sure to start at an offset of at least 1MB
*/
- search_start = max((u64)1024 * 1024, search_start);
+ search_start = max((u64)1024 * 1024, root->fs_info->alloc_start);
- if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
- search_start = max(root->fs_info->alloc_start, search_start);
+ if (search_start >= search_end) {
+ ret = -ENOSPC;
+ goto error;
+ }
+
+ hole_start = search_start;
+ hole_end = search_end;
key.objectid = device->devid;
key.offset = search_start;
@@ -772,8 +776,6 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
ret = btrfs_previous_item(root, path, key.objectid, key.type);
if (ret < 0)
goto error;
- if (ret > 0)
- start_found = 1;
}
l = path->nodes[0];
btrfs_item_key_to_cpu(l, &key, path->slots[0]);
@@ -786,23 +788,8 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
continue;
if (ret < 0)
goto error;
-no_more_items:
- if (!start_found) {
- if (search_start >= search_end) {
- ret = -ENOSPC;
- goto error;
- }
- *start = search_start;
- start_found = 1;
- goto check_pending;
- }
- *start = last_byte > search_start ?
- last_byte : search_start;
- if (search_end <= *start) {
- ret = -ENOSPC;
- goto error;
- }
- goto check_pending;
+
+ break;
}
btrfs_item_key_to_cpu(l, &key, slot);
@@ -810,43 +797,48 @@ no_more_items:
goto next;
if (key.objectid > device->devid)
- goto no_more_items;
+ break;
- if (key.offset >= search_start && key.offset > last_byte &&
- start_found) {
- if (last_byte < search_start)
- last_byte = search_start;
- hole_size = key.offset - last_byte;
+ if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
+ goto next;
- if (hole_size > *max_avail)
- *max_avail = hole_size;
+ if (key.offset > hole_start) {
+ hole_size = key.offset - hole_start;
- if (key.offset > last_byte &&
- hole_size >= num_bytes) {
- *start = last_byte;
- goto check_pending;
+ if (hole_size >= num_bytes) {
+ hole_end = key.offset;
+ break;
}
+
+ if (max_avail && hole_size > *max_avail)
+ *max_avail = hole_size;
}
- if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY)
- goto next;
- start_found = 1;
dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
- last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
+ hole_start = key.offset + btrfs_dev_extent_length(l, dev_extent);
+ if (hole_start < search_start)
+ hole_start = search_start;
next:
path->slots[0]++;
cond_resched();
}
-check_pending:
/* we have to make sure we didn't find an extent that has already
* been allocated by the map tree or the original allocation
*/
- BUG_ON(*start < search_start);
+ BUG_ON(hole_start < search_start);
+
+ hole_size = hole_end - hole_start;
- if (*start + num_bytes > search_end) {
+ if (max_avail && hole_size > *max_avail)
+ *max_avail = hole_size;
+
+ if (hole_start + num_bytes > search_end) {
ret = -ENOSPC;
goto error;
}
+
+ *start = hole_start;
+
/* check for pending inserts here */
ret = 0;
@@ -2154,6 +2146,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *info = extent_root->fs_info;
struct btrfs_device *device = NULL;
+ struct btrfs_device *smallest_dev;
struct btrfs_fs_devices *fs_devices = info->fs_devices;
struct list_head *cur;
struct map_lookup *map = NULL;
@@ -2165,7 +2158,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
u64 max_chunk_size = calc_size;
u64 min_free;
u64 avail;
- u64 max_avail = 0;
+ u64 min_max_avail = 0;
u64 dev_offset;
int num_stripes = 1;
int min_stripes = 1;
@@ -2223,7 +2216,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
max_chunk_size);
again:
- max_avail = 0;
if (!map || map->num_stripes != num_stripes) {
kfree(map);
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -2260,15 +2252,9 @@ again:
else
min_free = calc_size;
- /*
- * we add 1MB because we never use the first 1MB of the device, unless
- * we've looped, then we are likely allocating the maximum amount of
- * space left already
- */
- if (!looped)
- min_free += 1024 * 1024;
-
INIT_LIST_HEAD(&private_devs);
+ min_max_avail = 0;
+ smallest_dev = NULL;
while (index < num_stripes) {
device = list_entry(cur, struct btrfs_device, dev_alloc_list);
BUG_ON(!device->writeable);
@@ -2278,10 +2264,20 @@ again:
avail = 0;
cur = cur->next;
- if (device->in_fs_metadata && avail >= min_free) {
- ret = find_free_dev_extent(trans, device,
- min_free, &dev_offset,
- &max_avail);
+ if (device->in_fs_metadata) {
+ u64 max_avail = 0;
+ ret = find_free_dev_extent(trans, device, min_free,
+ &dev_offset, &max_avail);
+ /*
+ * track the minimum stripe len that is available on all devices,
+ * but don't count holes that are too small
+ */
+ if (max_avail >= stripe_len * 4 &&
+ (max_avail < min_max_avail || min_max_avail == 0)) {
+ min_max_avail = max_avail;
+ smallest_dev = device;
+ }
+
if (ret == 0) {
list_move_tail(&device->dev_alloc_list,
&private_devs);
@@ -2295,12 +2291,16 @@ again:
index++;
}
}
- } else if (device->in_fs_metadata && avail > max_avail)
- max_avail = avail;
+ }
if (cur == &fs_devices->alloc_list)
break;
}
- list_splice(&private_devs, &fs_devices->alloc_list);
+ /*
+ * splice the list of used devices to the end. This achieves
+ * a basic round-robin scheme which help balancing the devices
+ * better.
+ */
+ list_splice_tail(&private_devs, &fs_devices->alloc_list);
if (index < num_stripes) {
if (index >= min_stripes) {
num_stripes = index;
@@ -2311,9 +2311,16 @@ again:
looped = 1;
goto again;
}
- if (!looped && max_avail > 0) {
+ if (!looped && min_max_avail > 0) {
looped = 1;
- calc_size = max_avail;
+ calc_size = min_max_avail;
+ /*
+ * move the smallest device to the head of the list to
+ * make sure it gets included in the chunk.
+ */
+ BUG_ON(!smallest_dev);
+ list_move(&smallest_dev->dev_alloc_list,
+ &fs_devices->alloc_list);
goto again;
}
kfree(map);