@@ -431,6 +431,7 @@ struct bio {
struct bio *bi_next; /* request queue link */
struct block_device *bi_bdev; /* target device */
unsigned long bi_flags; /* status, command, etc */
+ unsigned short bi_rw_hint; /* bio read/write hint */
unsigned long bi_opf; /* low bits: r/w, high: priority */
unsigned int bi_vcnt; /* how may bio_vec's */
@@ -465,6 +466,12 @@ With this multipage bio design:
(e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
[TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying
bi_offset an len fields]
+- bi_rw_hint is an in/out parameter. Fs can set bi_rw_hint in submit_bio() to
+ specify which mirror/copy to read from by force. Zero is a special value
+ means fs don't care about reading from which mirror/copy. Starting from 1
+ means to read from the 'bi_rw_hint-1' mirror mandatory.
+ bi_rw_hint was set to indicate which mirror this i/o was really
+ happened on completion.
(*) unrelated merges -- a request ends up containing two or more bios that
didn't originate from the same place.
@@ -605,7 +605,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
if (bio_flagged(bio_src, BIO_THROTTLED))
bio_set_flag(bio, BIO_THROTTLED);
bio->bi_opf = bio_src->bi_opf;
- bio->bi_write_hint = bio_src->bi_write_hint;
+ bio->bi_rw_hint = bio_src->bi_rw_hint;
bio->bi_iter = bio_src->bi_iter;
bio->bi_io_vec = bio_src->bi_io_vec;
@@ -1980,7 +1980,7 @@ void blk_init_request_from_bio(struct request *req, struct bio *bio)
req->ioprio = ioc->ioprio;
else
req->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
- req->write_hint = bio->bi_write_hint;
+ req->rw_hint = bio->bi_rw_hint;
blk_rq_bio_prep(req->q, req, bio);
}
EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
@@ -2314,6 +2314,14 @@ generic_make_request_checks(struct bio *bio)
if (!q->limits.max_write_zeroes_sectors)
goto not_supported;
break;
+ /*
+ * Zero is special value which means upper layer e.g fs don't care
+ * about reading from which mirror.
+ * Starting from 1 means reading from mirror 'bi_rw_hint-1' mandatory.
+ */
+ case REQ_OP_READ:
+ if (bio->bi_rw_hint < 0 || bio->bi_rw_hint > q->nr_mirrors)
+ goto not_supported;
default:
break;
}
@@ -766,10 +766,10 @@ static struct request *attempt_merge(struct request_queue *q,
return NULL;
/*
- * Don't allow merge of different write hints, or for a hint with
+ * Don't allow merge of different rw hints, or for a hint with
* non-hint IO.
*/
- if (req->write_hint != next->write_hint)
+ if (req->rw_hint != next->rw_hint)
return NULL;
/*
@@ -904,10 +904,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
return false;
/*
- * Don't allow merge of different write hints, or for a hint with
+ * Don't allow merge of different rw hints, or for a hint with
* non-hint IO.
*/
- if (rq->write_hint != bio->bi_write_hint)
+ if (rq->rw_hint != bio->bi_rw_hint)
return false;
return true;
@@ -248,7 +248,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
return NULL;
bio->bi_disk = bio_src->bi_disk;
bio->bi_opf = bio_src->bi_opf;
- bio->bi_write_hint = bio_src->bi_write_hint;
+ bio->bi_rw_hint = bio_src->bi_rw_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
@@ -1102,7 +1102,7 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio,
goto skip_copy;
}
- behind_bio->bi_write_hint = bio->bi_write_hint;
+ behind_bio->bi_rw_hint = bio->bi_rw_hint;
while (i < vcnt && size) {
struct page *page;
@@ -1137,9 +1137,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_iter.bi_size = STRIPE_SIZE;
- bi->bi_write_hint = sh->dev[i].write_hint;
+ bi->bi_rw_hint = sh->dev[i].rw_hint;
if (!rrdev)
- sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET;
+ sh->dev[i].rw_hint = RWF_WRITE_LIFE_NOT_SET;
/*
* If this is discard request, set bi_vcnt 0. We don't
* want to confuse SCSI because SCSI will replace payload
@@ -1191,8 +1191,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
rbi->bi_io_vec[0].bv_offset = 0;
rbi->bi_iter.bi_size = STRIPE_SIZE;
- rbi->bi_write_hint = sh->dev[i].write_hint;
- sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET;
+ rbi->bi_rw_hint = sh->dev[i].rw_hint;
+ sh->dev[i].rw_hint = RWF_WRITE_LIFE_NOT_SET;
/*
* If this is discard request, set bi_vcnt 0. We don't
* want to confuse SCSI because SCSI will replace payload
@@ -3219,7 +3219,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
(unsigned long long)sh->sector);
spin_lock_irq(&sh->stripe_lock);
- sh->dev[dd_idx].write_hint = bi->bi_write_hint;
+ sh->dev[dd_idx].rw_hint = bi->bi_rw_hint;
/* Don't allow new IO added to stripes in batch list */
if (sh->batch_head)
goto overlap;
@@ -257,7 +257,7 @@ struct stripe_head {
sector_t sector; /* sector of this page */
unsigned long flags;
u32 log_checksum;
- unsigned short write_hint;
+ unsigned short rw_hint;
} dev[1]; /* allocated with extra space depending of RAID geometry */
};
@@ -516,7 +516,7 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
struct request *req, u16 *control,
u32 *dsmgmt)
{
- enum rw_hint streamid = req->write_hint;
+ enum rw_hint streamid = req->rw_hint;
if (streamid == WRITE_LIFE_NOT_SET || streamid == WRITE_LIFE_NONE)
streamid = 0;
@@ -214,7 +214,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
bio_init(&bio, vecs, nr_pages);
bio_set_dev(&bio, bdev);
bio.bi_iter.bi_sector = pos >> 9;
- bio.bi_write_hint = iocb->ki_hint;
+ if (iov_iter_rw(iter) == WRITE)
+ bio.bi_rw_hint = iocb->ki_hint;
bio.bi_private = current;
bio.bi_end_io = blkdev_bio_end_io_simple;
bio.bi_ioprio = iocb->ki_ioprio;
@@ -355,7 +356,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
for (;;) {
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> 9;
- bio->bi_write_hint = iocb->ki_hint;
+ if (!is_read)
+ bio->bi_rw_hint = iocb->ki_hint;
bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io;
bio->bi_ioprio = iocb->ki_ioprio;
@@ -2806,7 +2806,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
bio_add_page(bio, page, page_size, pg_offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
- bio->bi_write_hint = page->mapping->host->i_write_hint;
+ if (opf & REQ_OP_WRITE)
+ bio->bi_rw_hint = page->mapping->host->i_write_hint;
bio->bi_opf = opf;
if (wbc) {
wbc_init_bio(wbc, bio);
@@ -3067,7 +3067,8 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
- bio->bi_write_hint = write_hint;
+ if (REQ_OP_WRITE & op)
+ bio->bi_rw_hint = write_hint;
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -445,7 +445,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
else
bio->bi_end_io = dio_bio_end_io;
- bio->bi_write_hint = dio->iocb->ki_hint;
+ if (dio->op == REQ_OP_WRITE)
+ bio->bi_rw_hint = dio->iocb->ki_hint;
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
@@ -351,7 +351,9 @@ void ext4_io_submit(struct ext4_io_submit *io)
if (bio) {
int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
REQ_SYNC : 0;
- io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
+ if (io->io_bio->bi_opf & REQ_OP_WRITE)
+ io->io_bio->bi_rw_hint =
+ io->io_end->inode->i_write_hint;
bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
submit_bio(io->io_bio);
}
@@ -399,7 +401,8 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
ret = io_submit_init_bio(io, bh);
if (ret)
return ret;
- io->io_bio->bi_write_hint = inode->i_write_hint;
+ if (io->io_bio->bi_opf & REQ_OP_WRITE)
+ io->io_bio->bi_rw_hint = inode->i_write_hint;
}
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
@@ -269,7 +269,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
} else {
bio->bi_end_io = f2fs_write_end_io;
bio->bi_private = sbi;
- bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
+ bio->bi_rw_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
}
if (wbc)
wbc_init_bio(wbc, bio);
@@ -1637,7 +1637,8 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
bio = bio_alloc(GFP_KERNEL, nr_pages);
bio_set_dev(bio, iomap->bdev);
bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
- bio->bi_write_hint = dio->iocb->ki_hint;
+ if (dio->flags & IOMAP_DIO_WRITE)
+ bio->bi_rw_hint = dio->iocb->ki_hint;
bio->bi_ioprio = dio->iocb->ki_ioprio;
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
@@ -639,7 +639,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
goto confused;
wbc_init_bio(wbc, bio);
- bio->bi_write_hint = inode->i_write_hint;
+ bio->bi_rw_hint = inode->i_write_hint;
}
/*
@@ -523,7 +523,7 @@ xfs_submit_ioend(
return status;
}
- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
+ ioend->io_bio->bi_rw_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
return 0;
}
@@ -577,7 +577,7 @@ xfs_chain_bio(
bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
+ ioend->io_bio->bi_rw_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
ioend->io_bio = new;
}
@@ -150,7 +150,7 @@ struct bio {
*/
unsigned short bi_flags; /* status, etc and bvec pool number */
unsigned short bi_ioprio;
- unsigned short bi_write_hint;
+ unsigned short bi_rw_hint;
blk_status_t bi_status;
u8 bi_partno;
@@ -234,7 +234,7 @@ struct request {
unsigned short nr_integrity_segments;
#endif
- unsigned short write_hint;
+ unsigned short rw_hint;
unsigned short ioprio;
void *special; /* opaque pointer available for LLD use */