@@ -247,6 +247,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
unsigned short max_vecs, blk_opf_t opf)
{
bio->bi_next = NULL;
+ bio->atomic_write_unit = 0;
bio->bi_bdev = bdev;
bio->bi_opf = opf;
bio->bi_flags = 0;
@@ -815,6 +816,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_iter = bio_src->bi_iter;
+ bio->atomic_write_unit = bio_src->atomic_write_unit;
if (bio->bi_bdev) {
if (bio->bi_bdev == bio_src->bi_bdev &&
bio_flagged(bio_src, BIO_REMAPPED))
@@ -1273,7 +1275,10 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
nr_pages = DIV_ROUND_UP(offset + size, PAGE_SIZE);
- trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
+ if (bio->atomic_write_unit)
+ trim = size & (bio->atomic_write_unit - 1);
+ else
+ trim = size & (bdev_logical_block_size(bio->bi_bdev) - 1);
iov_iter_revert(iter, trim);
size -= trim;
@@ -171,7 +171,17 @@ static inline unsigned get_max_io_size(struct bio *bio,
{
unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
- unsigned max_sectors = lim->max_sectors, start, end;
+ unsigned max_sectors, start, end;
+
+ /*
+ * We ignore lim->max_sectors for atomic writes simply because
+ * it may less than bio->write_atomic_unit, which we cannot
+ * tolerate.
+ */
+ if (bio->bi_opf & REQ_ATOMIC)
+ max_sectors = lim->atomic_write_max_bytes >> SECTOR_SHIFT;
+ else
+ max_sectors = lim->max_sectors;
if (lim->chunk_sectors) {
max_sectors = min(max_sectors,
@@ -256,6 +266,22 @@ static bool bvec_split_segs(const struct queue_limits *lim,
return len > 0 || bv->bv_len > max_len;
}
+static bool bio_straddles_boundary(struct bio *bio, unsigned int bytes,
+ unsigned int boundary)
+{
+ loff_t start = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ loff_t end = start + bytes;
+ loff_t start_mod = start % boundary;
+ loff_t end_mod = end % boundary;
+
+ if (end - start > boundary)
+ return true;
+ if ((start_mod > end_mod) && (start_mod && end_mod))
+ return true;
+
+ return false;
+}
+
/**
* bio_split_rw - split a bio in two bios
* @bio: [in] bio to be split
@@ -276,10 +302,15 @@ static bool bvec_split_segs(const struct queue_limits *lim,
* responsible for ensuring that @bs is only destroyed after processing of the
* split bio has finished.
*/
+
+
struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
unsigned *segs, struct bio_set *bs, unsigned max_bytes)
{
+ unsigned int atomic_write_boundary = lim->atomic_write_boundary;
+ bool atomic_write = bio->bi_opf & REQ_ATOMIC;
struct bio_vec bv, bvprv, *bvprvp = NULL;
+ bool straddles_boundary = false;
struct bvec_iter iter;
unsigned nsegs = 0, bytes = 0;
@@ -291,14 +322,31 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
goto split;
+ if (atomic_write && atomic_write_boundary) {
+ straddles_boundary = bio_straddles_boundary(bio,
+ bytes + bv.bv_len, atomic_write_boundary);
+ }
if (nsegs < lim->max_segments &&
bytes + bv.bv_len <= max_bytes &&
- bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
+ bv.bv_offset + bv.bv_len <= PAGE_SIZE &&
+ !straddles_boundary) {
nsegs++;
bytes += bv.bv_len;
} else {
- if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
- lim->max_segments, max_bytes))
+ bool split_the_segs =
+ bvec_split_segs(lim, &bv, &nsegs, &bytes,
+ lim->max_segments, max_bytes);
+
+ /*
+ * We may not actually straddle the boundary as we may
+ * have added less bytes than anticipated
+ */
+ if (straddles_boundary) {
+ straddles_boundary = bio_straddles_boundary(bio,
+ bytes, atomic_write_boundary);
+ }
+
+ if (split_the_segs || straddles_boundary)
goto split;
}
@@ -321,12 +369,25 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
*segs = nsegs;
- /*
- * Individual bvecs might not be logical block aligned. Round down the
- * split size so that each bio is properly block size aligned, even if
- * we do not use the full hardware limits.
- */
- bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
+ if (straddles_boundary) {
+ loff_t new_end = (bio->bi_iter.bi_sector << SECTOR_SHIFT) + bytes;
+ unsigned int trim = new_end & (atomic_write_boundary - 1);
+ bytes -= trim;
+ new_end = (bio->bi_iter.bi_sector << SECTOR_SHIFT) + bytes;
+ BUG_ON(new_end % atomic_write_boundary);
+ } else if (bio->atomic_write_unit) {
+ unsigned int atomic_write_unit = bio->atomic_write_unit;
+ unsigned int trim = bytes % atomic_write_unit;
+
+ bytes -= trim;
+ } else {
+ /*
+ * Individual bvecs might not be logical block aligned. Round down the
+ * split size so that each bio is properly block size aligned, even if
+ * we do not use the full hardware limits.
+ */
+ bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
+ }
/*
* Bio splitting may cause subtle trouble such as hang when doing sync
@@ -355,7 +416,8 @@ struct bio *__bio_split_to_limits(struct bio *bio,
const struct queue_limits *lim,
unsigned int *nr_segs)
{
- struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
+ struct block_device *bi_bdev = bio->bi_bdev;
+ struct bio_set *bs = &bi_bdev->bd_disk->bio_split;
struct bio *split;
switch (bio_op(bio)) {
@@ -303,6 +303,8 @@ struct bio {
struct bio_set *bi_pool;
+ unsigned int atomic_write_unit;
+
/*
* We can inline a number of vecs at the end of the bio, to avoid
* double allocations for a small number of bio_vecs. This member
Add bio.atomic_write_unit, which is the min size which we can split a bio. Any bio needs to be split in a multiple of this size and also aligned to this size. In __bio_iov_iter_get_pages(), use atomic_write_unit to trim a bio to be a multiple of atomic_write_unit. In bio_split_rw(), we need to consider splitting as follows: - For a regular split which does not cross an atomic write boundary, same as in __bio_iov_iter_get_pages(), trim to be a multiple of atomic_write_unit - We also need to check for when a bio straddles an atomic write boundary. In this case, split to be start/end-aligned with the boundary. We need to ignore lim->max_sectors since to may be less than bio->write_atomic_unit, which we cannot tolerate. Signed-off-by: John Garry <john.g.garry@oracle.com> --- block/bio.c | 7 +++- block/blk-merge.c | 84 ++++++++++++++++++++++++++++++++++----- include/linux/blk_types.h | 2 + 3 files changed, 81 insertions(+), 12 deletions(-)