@@ -26,6 +26,8 @@
#include "common/messages.h"
#include "mkfs/common.h"
#include "common/hmzoned.h"
+#include "volumes.h"
+#include "disk-io.h"
#define BTRFS_REPORT_NR_ZONES 8192
@@ -272,3 +274,75 @@ int zero_zone_blocks(int fd, struct btrfs_zone_info *zinfo, off_t start,
return 0;
}
+
+static inline bool btrfs_dev_is_empty_zone(struct btrfs_device *device, u64 pos)
+{
+ struct btrfs_zone_info *zinfo = &device->zone_info;
+ unsigned int zno;
+
+ if (!zone_is_sequential(zinfo, pos))
+ return true;
+
+ zno = pos / zinfo->zone_size;
+ return zinfo->zones[zno].cond == BLK_ZONE_COND_EMPTY;
+}
+
+
+/*
+ * btrfs_check_allocatable_zones - check if spcecifeid region is
+ * suitable for allocation
+ * @device: the device to allocate a region
+ * @pos: the position of the region
+ * @num_bytes: the size of the region
+ *
+ * In non-ZONED device, anywhere is suitable for allocation. In ZONED
+ * device, check if
+ * 1) the region is not on non-empty zones,
+ * 2) all zones in the region have the same zone type,
+ * 3) it does not contain super block location, if the zones are
+ * sequential.
+ */
+bool btrfs_check_allocatable_zones(struct btrfs_device *device, u64 pos,
+ u64 num_bytes)
+{
+ struct btrfs_zone_info *zinfo = &device->zone_info;
+ u64 nzones, begin, end;
+ u64 sb_pos;
+ bool is_sequential;
+ int i;
+
+ if (zinfo->model == ZONED_NONE)
+ return true;
+
+ nzones = num_bytes / zinfo->zone_size;
+ begin = pos / zinfo->zone_size;
+ end = begin + nzones;
+
+ ASSERT(IS_ALIGNED(pos, zinfo->zone_size));
+ ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size));
+
+ if (end > zinfo->nr_zones)
+ return false;
+
+ is_sequential = btrfs_dev_is_sequential(device, pos);
+ if (is_sequential) {
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ sb_pos = btrfs_sb_offset(i);
+ if (!(sb_pos + BTRFS_SUPER_INFO_SIZE <= pos ||
+ pos + end <= sb_pos))
+ return false;
+ }
+ }
+
+ while (num_bytes) {
+ if (!btrfs_dev_is_empty_zone(device, pos))
+ return false;
+ if (is_sequential != btrfs_dev_is_sequential(device, pos))
+ return false;
+
+ pos += zinfo->zone_size;
+ num_bytes -= zinfo->zone_size;
+ }
+
+ return true;
+}
@@ -53,6 +53,8 @@ enum btrfs_zoned_model zoned_model(const char *file);
size_t zone_size(const char *file);
int btrfs_get_zone_info(int fd, const char *file, bool hmzoned,
struct btrfs_zone_info *zinfo);
+bool btrfs_check_allocatable_zones(struct btrfs_device *device, u64 pos,
+ u64 num_bytes);
#ifdef BTRFS_ZONED
bool zone_is_sequential(struct btrfs_zone_info *zinfo, u64 bytenr);
@@ -28,6 +28,7 @@
#include <assert.h>
#include <stddef.h>
#include <linux/types.h>
+#include <linux/kernel.h>
#include <stdint.h>
#include <features.h>
@@ -345,6 +346,7 @@ static inline void assert_trace(const char *assertion, const char *filename,
/* Alignment check */
#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
+#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
static inline int is_power_of_2(unsigned long n)
{
@@ -465,6 +465,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
int slot;
struct extent_buffer *l;
u64 min_search_start;
+ u64 zone_size;
/*
* We don't want to overwrite the superblock on the drive nor any area
@@ -473,6 +474,13 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
*/
min_search_start = max(root->fs_info->alloc_start, (u64)SZ_1M);
search_start = max(search_start, min_search_start);
+ /*
+ * For a zoned block device, skip the first zone of the device
+ * entirely.
+ */
+ zone_size = device->zone_info.zone_size;
+ search_start = max_t(u64, search_start, zone_size);
+ search_start = btrfs_zone_align(device, search_start);
path = btrfs_alloc_path();
if (!path)
@@ -481,6 +489,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
max_hole_start = search_start;
max_hole_size = 0;
+again:
if (search_start >= search_end) {
ret = -ENOSPC;
goto out;
@@ -525,6 +534,13 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
goto next;
if (key.offset > search_start) {
+ if (!btrfs_check_allocatable_zones(device, search_start,
+ num_bytes)) {
+ search_start += zone_size;
+ btrfs_release_path(path);
+ goto again;
+ }
+
hole_size = key.offset - search_start;
/*
@@ -567,6 +583,13 @@ next:
* search_end may be smaller than search_start.
*/
if (search_end > search_start) {
+ if (!btrfs_check_allocatable_zones(device, search_start,
+ num_bytes)) {
+ search_start += zone_size;
+ btrfs_release_path(path);
+ goto again;
+ }
+
hole_size = search_end - search_start;
if (hole_size > max_hole_size) {
@@ -610,6 +633,10 @@ int btrfs_insert_dev_extent(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_key key;
+ /* Check alignment to zone for a zoned block device */
+ ASSERT(device->zone_info.model != ZONED_HOST_MANAGED ||
+ IS_ALIGNED(start, device->zone_info.zone_size));
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -1012,17 +1039,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
int max_stripes = 0;
int min_stripes = 1;
int sub_stripes; /* sub_stripes info for map */
- int dev_stripes __attribute__((unused));
- /* stripes per dev */
+ int dev_stripes; /* stripes per dev */
int devs_max; /* max devs to use */
- int devs_min __attribute__((unused));
- /* min devs needed */
+ int devs_min; /* min devs needed */
int devs_increment __attribute__((unused));
/* ndevs has to be a multiple of this */
- int ncopies __attribute__((unused));
- /* how many copies to data has */
- int nparity __attribute__((unused));
- /* number of stripes worth of bytes to
+ int ncopies; /* how many copies to data has */
+ int nparity; /* number of stripes worth of bytes to
store parity information */
int looped = 0;
int ret;
@@ -1030,6 +1053,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
int stripe_len = BTRFS_STRIPE_LEN;
struct btrfs_key key;
u64 offset;
+ bool hmzoned = info->fs_devices->hmzoned;
+ u64 zone_size = info->fs_devices->zone_size;
if (list_empty(dev_list)) {
return -ENOSPC;
@@ -1116,13 +1141,39 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
btrfs_super_stripesize(info->super_copy));
}
+ if (hmzoned) {
+ calc_size = zone_size;
+ max_chunk_size = round_down(max_chunk_size, zone_size);
+ }
+
/* we don't want a chunk larger than 10% of the FS */
percent_max = div_factor(btrfs_super_total_bytes(info->super_copy), 1);
max_chunk_size = min(percent_max, max_chunk_size);
+ if (hmzoned) {
+ int min_num_stripes = devs_min * dev_stripes;
+ int min_data_stripes = (min_num_stripes - nparity) / ncopies;
+ u64 min_chunk_size = min_data_stripes * zone_size;
+
+ max_chunk_size = max(round_down(max_chunk_size,
+ zone_size),
+ min_chunk_size);
+ }
+
again:
if (chunk_bytes_by_type(type, calc_size, num_stripes, sub_stripes) >
max_chunk_size) {
+ if (hmzoned) {
+ /*
+ * calc_size is fixed in HMZONED. Reduce
+ * num_stripes instead.
+ */
+ num_stripes = max_chunk_size / calc_size;
+ if (num_stripes < min_stripes)
+ return -ENOSPC;
+ goto again;
+ }
+
calc_size = max_chunk_size;
calc_size /= num_stripes;
calc_size /= stripe_len;
@@ -1133,6 +1184,9 @@ again:
calc_size /= stripe_len;
calc_size *= stripe_len;
+
+ ASSERT(!hmzoned || calc_size == zone_size);
+
INIT_LIST_HEAD(&private_devs);
cur = dev_list->next;
index = 0;
@@ -1214,6 +1268,8 @@ again:
if (ret < 0)
goto out_chunk_map;
+ ASSERT(!zone_size || IS_ALIGNED(dev_offset, zone_size));
+
device->bytes_used += calc_size;
ret = btrfs_update_device(trans, device);
if (ret < 0)
@@ -323,4 +323,11 @@ static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
{
return zone_is_sequential(&device->zone_info, pos);
}
+static inline u64 btrfs_zone_align(struct btrfs_device *device, u64 pos)
+{
+ if (device->zone_info.model == ZONED_NONE)
+ return pos;
+
+ return ALIGN(pos, device->zone_info.zone_size);
+}
#endif
In HMZONED mode, align the device extents to zone boundaries so that a zone reset affects only the device extent and does not change the state of blocks in the neighbor device extents. Also, check that a region allocation is always over empty same-type zones and it is not over any locations of super block copies. Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> --- common/hmzoned.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++ common/hmzoned.h | 2 ++ kerncompat.h | 2 ++ volumes.c | 72 ++++++++++++++++++++++++++++++++++++++++------ volumes.h | 7 +++++ 5 files changed, 149 insertions(+), 8 deletions(-)