@@ -882,6 +882,8 @@ static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
static int check_async_write(struct btrfs_fs_info *fs_info,
struct btrfs_inode *bi)
{
+ if (btrfs_fs_incompat(fs_info, HMZONED))
+ return 0;
if (atomic_read(&bi->sync_writers))
return 0;
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
@@ -2166,7 +2166,8 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
blk_status_t ret = 0;
int skip_sum;
- int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
+ int async = !atomic_read(&BTRFS_I(inode)->sync_writers) &&
+ !btrfs_fs_incompat(fs_info, HMZONED);
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -8457,7 +8458,8 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
/* Check btrfs_submit_bio_hook() for rules about async submit. */
if (async_submit)
- async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
+ async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers) &&
+ !btrfs_fs_incompat(fs_info, HMZONED);
if (!write) {
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
@@ -8522,7 +8524,8 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
}
/* async crcs make it difficult to collect full stripe writes. */
- if (btrfs_data_alloc_profile(fs_info) & BTRFS_BLOCK_GROUP_RAID56_MASK)
+ if (btrfs_data_alloc_profile(fs_info) & BTRFS_BLOCK_GROUP_RAID56_MASK ||
+ btrfs_fs_incompat(fs_info, HMZONED))
async_submit = 0;
else
async_submit = 1;
In HMZONED, btrfs use per-Block Group zone_io_lock to serialize the data write IOs or use per-FS hmzoned_meta_io_lock to serialize the metadata write IOs. Even with these serialization, write bios sent from {btree,btrfs}_write_cache_pages can be reordered by async checksum workers as these workers are per CPU and not per zone. To preserve write BIO ordering, we can disable async checksum on HMZONED. This does not result in lower performance with HDDs as a single CPU core is fast enough to do checksum for a single zone write stream with the maximum possible bandwidth of the device. If multiple zones are being written simultaneously, HDD seek overhead lowers the achievable maximum bandwidth, resulting again in a per zone checksum serialization not affecting performance. Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> --- fs/btrfs/disk-io.c | 2 ++ fs/btrfs/inode.c | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-)