@@ -295,14 +295,19 @@ static void blkdev_bio_end_io_async(struct bio *bio)
ret = blk_status_to_errno(bio->bi_status);
}
- iocb->ki_complete(iocb, ret);
-
if (dio->flags & DIO_SHOULD_DIRTY) {
bio_check_pages_dirty(bio);
} else {
bio_release_pages(bio, false);
- bio_put(bio);
+ if (iocb->ki_flags & IOCB_BIO_PASSBACK) {
+ iocb->ki_flags |= IOCB_PRIV_IS_BIO;
+ iocb->private = bio;
+ } else {
+ bio_put(bio);
+ }
}
+
+ iocb->ki_complete(iocb, ret);
}
static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
@@ -2770,6 +2770,9 @@ static void io_req_task_complete(struct io_kiocb *req, bool *locked)
unsigned int cflags = io_put_rw_kbuf(req);
int res = req->result;
+ if (req->rw.kiocb.ki_flags & IOCB_PRIV_IS_BIO)
+ bio_put(req->rw.kiocb.private);
+
if (*locked) {
io_req_complete_state(req, res, cflags);
io_req_add_compl_list(req);
@@ -2966,6 +2969,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
} else {
if (kiocb->ki_flags & IOCB_HIPRI)
return -EINVAL;
+ kiocb->ki_flags |= IOCB_ALLOC_CACHE | IOCB_BIO_PASSBACK;
kiocb->ki_complete = io_complete_rw;
}
@@ -322,6 +322,10 @@ enum rw_hint {
#define IOCB_NOIO (1 << 20)
/* can use bio alloc cache */
#define IOCB_ALLOC_CACHE (1 << 21)
+/* iocb supports bio passback */
+#define IOCB_BIO_PASSBACK (1 << 22)
+/* iocb->private holds bio to put */
+#define IOCB_PRIV_IS_BIO (1 << 23)
struct kiocb {
struct file *ki_filp;
We currently cannot use the bio recycling allocation cache for IRQ driven IO, as the cache isn't IRQ safe (by design). Add a way for the completion side to pass back a bio that needs freeing, so we can do it from the io_uring side. io_uring completions always run in task context. This is good for about a 13% improvement in IRQ driven IO, taking us from around 6.3M/core to 7.1M/core IOPS. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- Open to suggestions on how to potentially do this cleaner. The below obviously works, but ideally we'd want to run the whole end_io handler from this context rather than just the bio put. That would enable further optimizations in this area. But the wins are rather large as-is.