diff mbox series

[2/2] virtio-blk: support completion batching for the IRQ path

Message ID 20221206141125.93055-2-suwan.kim027@gmail.com (mailing list archive)
State New, archived
Headers show
Series [1/2] virtio-blk: set req->state to MQ_RQ_COMPLETE after polling I/O is finished | expand

Commit Message

Suwan Kim Dec. 6, 2022, 2:11 p.m. UTC
This patch adds completion batching to the IRQ path. It reuses batch
completion code of virtblk_poll(). It collects requests to io_comp_batch
and processes them all at once. It can boost up the performance by 2%.

To validate the performance improvement and stabilty, I did fio test with
4 vCPU VM and 12 vCPU VM respectively. Both VMs have 8GB ram and the same
number of HW queues as vCPU.
The fio cammad is as follows and I ran the fio 5 times and got IOPS average.
(io_uring, randread, direct=1, bs=512, iodepth=64 numjobs=2,4)

Test result shows about 2% improvement.

           4 vcpu VM       |   numjobs=2   |   numjobs=4
      -----------------------------------------------------------
        fio without patch  |  367.2K IOPS  |   397.6K IOPS
      -----------------------------------------------------------
        fio with patch     |  372.8K IOPS  |   407.7K IOPS

           12 vcpu VM      |   numjobs=2   |   numjobs=4
      -----------------------------------------------------------
        fio without patch  |  363.6K IOPS  |   374.8K IOPS
      -----------------------------------------------------------
        fio with patch     |  373.8K IOPS  |   385.3K IOPS

Signed-off-by: Suwan Kim <suwan.kim027@gmail.com>
---
 drivers/block/virtio_blk.c | 38 +++++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 15 deletions(-)

Comments

Stefan Hajnoczi Dec. 7, 2022, 10:05 p.m. UTC | #1
On Tue, Dec 06, 2022 at 11:11:25PM +0900, Suwan Kim wrote:
> This patch adds completion batching to the IRQ path. It reuses batch
> completion code of virtblk_poll(). It collects requests to io_comp_batch
> and processes them all at once. It can boost up the performance by 2%.
> 
> To validate the performance improvement and stabilty, I did fio test with
> 4 vCPU VM and 12 vCPU VM respectively. Both VMs have 8GB ram and the same
> number of HW queues as vCPU.
> The fio cammad is as follows and I ran the fio 5 times and got IOPS average.
> (io_uring, randread, direct=1, bs=512, iodepth=64 numjobs=2,4)
> 
> Test result shows about 2% improvement.
> 
>            4 vcpu VM       |   numjobs=2   |   numjobs=4
>       -----------------------------------------------------------
>         fio without patch  |  367.2K IOPS  |   397.6K IOPS
>       -----------------------------------------------------------
>         fio with patch     |  372.8K IOPS  |   407.7K IOPS
> 
>            12 vcpu VM      |   numjobs=2   |   numjobs=4
>       -----------------------------------------------------------
>         fio without patch  |  363.6K IOPS  |   374.8K IOPS
>       -----------------------------------------------------------
>         fio with patch     |  373.8K IOPS  |   385.3K IOPS
> 
> Signed-off-by: Suwan Kim <suwan.kim027@gmail.com>
> ---
>  drivers/block/virtio_blk.c | 38 +++++++++++++++++++++++---------------
>  1 file changed, 23 insertions(+), 15 deletions(-)

Cool, thanks for doing this!

> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index cf64d256787e..48fcf745f007 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -272,6 +272,18 @@ static inline void virtblk_request_done(struct request *req)
>  	blk_mq_end_request(req, virtblk_result(vbr));
>  }
>  
> +static void virtblk_complete_batch(struct io_comp_batch *iob)
> +{
> +	struct request *req;
> +
> +	rq_list_for_each(&iob->req_list, req) {
> +		virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
> +		virtblk_cleanup_cmd(req);
> +		blk_mq_set_request_complete(req);
> +	}
> +	blk_mq_end_request_batch(iob);
> +}
> +
>  static void virtblk_done(struct virtqueue *vq)
>  {
>  	struct virtio_blk *vblk = vq->vdev->priv;
> @@ -280,6 +292,7 @@ static void virtblk_done(struct virtqueue *vq)
>  	struct virtblk_req *vbr;
>  	unsigned long flags;
>  	unsigned int len;
> +	DEFINE_IO_COMP_BATCH(iob);
>  
>  	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
>  	do {
> @@ -287,7 +300,9 @@ static void virtblk_done(struct virtqueue *vq)
>  		while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
>  			struct request *req = blk_mq_rq_from_pdu(vbr);
>  
> -			if (likely(!blk_should_fake_timeout(req->q)))
> +			if (likely(!blk_should_fake_timeout(req->q)) &&
> +				!blk_mq_add_to_batch(req, &iob, vbr->status,
> +							virtblk_complete_batch))
>  				blk_mq_complete_request(req);
>  			req_done = true;
>  		}
> @@ -295,9 +310,14 @@ static void virtblk_done(struct virtqueue *vq)
>  			break;
>  	} while (!virtqueue_enable_cb(vq));
>  
> -	/* In case queue is stopped waiting for more buffers. */
> -	if (req_done)
> +	if (req_done) {
> +		if (!rq_list_empty(iob.req_list))
> +			virtblk_complete_batch(&iob);

A little optimization to avoid the indirect call: iob.complete(&iob) :).
Not sure if it's good style to do that but it works in this case because
we know it can only be virtblk_complete_batch().

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Suwan Kim Dec. 12, 2022, 2:43 p.m. UTC | #2
On Wed, Dec 07, 2022 at 05:05:06PM -0500, Stefan Hajnoczi wrote:
> On Tue, Dec 06, 2022 at 11:11:25PM +0900, Suwan Kim wrote:
> > This patch adds completion batching to the IRQ path. It reuses batch
> > completion code of virtblk_poll(). It collects requests to io_comp_batch
> > and processes them all at once. It can boost up the performance by 2%.
> > 
> > To validate the performance improvement and stabilty, I did fio test with
> > 4 vCPU VM and 12 vCPU VM respectively. Both VMs have 8GB ram and the same
> > number of HW queues as vCPU.
> > The fio cammad is as follows and I ran the fio 5 times and got IOPS average.
> > (io_uring, randread, direct=1, bs=512, iodepth=64 numjobs=2,4)
> > 
> > Test result shows about 2% improvement.
> > 
> >            4 vcpu VM       |   numjobs=2   |   numjobs=4
> >       -----------------------------------------------------------
> >         fio without patch  |  367.2K IOPS  |   397.6K IOPS
> >       -----------------------------------------------------------
> >         fio with patch     |  372.8K IOPS  |   407.7K IOPS
> > 
> >            12 vcpu VM      |   numjobs=2   |   numjobs=4
> >       -----------------------------------------------------------
> >         fio without patch  |  363.6K IOPS  |   374.8K IOPS
> >       -----------------------------------------------------------
> >         fio with patch     |  373.8K IOPS  |   385.3K IOPS
> > 
> > Signed-off-by: Suwan Kim <suwan.kim027@gmail.com>
> > ---
> >  drivers/block/virtio_blk.c | 38 +++++++++++++++++++++++---------------
> >  1 file changed, 23 insertions(+), 15 deletions(-)
> 
> Cool, thanks for doing this!
> 
> > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> > index cf64d256787e..48fcf745f007 100644
> > --- a/drivers/block/virtio_blk.c
> > +++ b/drivers/block/virtio_blk.c
> > @@ -272,6 +272,18 @@ static inline void virtblk_request_done(struct request *req)
> >  	blk_mq_end_request(req, virtblk_result(vbr));
> >  }
> >  
> > +static void virtblk_complete_batch(struct io_comp_batch *iob)
> > +{
> > +	struct request *req;
> > +
> > +	rq_list_for_each(&iob->req_list, req) {
> > +		virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
> > +		virtblk_cleanup_cmd(req);
> > +		blk_mq_set_request_complete(req);
> > +	}
> > +	blk_mq_end_request_batch(iob);
> > +}
> > +
> >  static void virtblk_done(struct virtqueue *vq)
> >  {
> >  	struct virtio_blk *vblk = vq->vdev->priv;
> > @@ -280,6 +292,7 @@ static void virtblk_done(struct virtqueue *vq)
> >  	struct virtblk_req *vbr;
> >  	unsigned long flags;
> >  	unsigned int len;
> > +	DEFINE_IO_COMP_BATCH(iob);
> >  
> >  	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
> >  	do {
> > @@ -287,7 +300,9 @@ static void virtblk_done(struct virtqueue *vq)
> >  		while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
> >  			struct request *req = blk_mq_rq_from_pdu(vbr);
> >  
> > -			if (likely(!blk_should_fake_timeout(req->q)))
> > +			if (likely(!blk_should_fake_timeout(req->q)) &&
> > +				!blk_mq_add_to_batch(req, &iob, vbr->status,
> > +							virtblk_complete_batch))
> >  				blk_mq_complete_request(req);
> >  			req_done = true;
> >  		}
> > @@ -295,9 +310,14 @@ static void virtblk_done(struct virtqueue *vq)
> >  			break;
> >  	} while (!virtqueue_enable_cb(vq));
> >  
> > -	/* In case queue is stopped waiting for more buffers. */
> > -	if (req_done)
> > +	if (req_done) {
> > +		if (!rq_list_empty(iob.req_list))
> > +			virtblk_complete_batch(&iob);
> 
> A little optimization to avoid the indirect call: iob.complete(&iob) :).
> Not sure if it's good style to do that but it works in this case because
> we know it can only be virtblk_complete_batch().
> 
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

Hi Stefan,

Thanks for the comment!
It also needs to use blk_mq_complete_request_remote() instead of
blk_mq_set_request_complete()
I will resend it with the modification of patch #1 and then please
review it again.

Regards,
Suwan Kim
diff mbox series

Patch

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index cf64d256787e..48fcf745f007 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -272,6 +272,18 @@  static inline void virtblk_request_done(struct request *req)
 	blk_mq_end_request(req, virtblk_result(vbr));
 }
 
+static void virtblk_complete_batch(struct io_comp_batch *iob)
+{
+	struct request *req;
+
+	rq_list_for_each(&iob->req_list, req) {
+		virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
+		virtblk_cleanup_cmd(req);
+		blk_mq_set_request_complete(req);
+	}
+	blk_mq_end_request_batch(iob);
+}
+
 static void virtblk_done(struct virtqueue *vq)
 {
 	struct virtio_blk *vblk = vq->vdev->priv;
@@ -280,6 +292,7 @@  static void virtblk_done(struct virtqueue *vq)
 	struct virtblk_req *vbr;
 	unsigned long flags;
 	unsigned int len;
+	DEFINE_IO_COMP_BATCH(iob);
 
 	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
 	do {
@@ -287,7 +300,9 @@  static void virtblk_done(struct virtqueue *vq)
 		while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
 			struct request *req = blk_mq_rq_from_pdu(vbr);
 
-			if (likely(!blk_should_fake_timeout(req->q)))
+			if (likely(!blk_should_fake_timeout(req->q)) &&
+				!blk_mq_add_to_batch(req, &iob, vbr->status,
+							virtblk_complete_batch))
 				blk_mq_complete_request(req);
 			req_done = true;
 		}
@@ -295,9 +310,14 @@  static void virtblk_done(struct virtqueue *vq)
 			break;
 	} while (!virtqueue_enable_cb(vq));
 
-	/* In case queue is stopped waiting for more buffers. */
-	if (req_done)
+	if (req_done) {
+		if (!rq_list_empty(iob.req_list))
+			virtblk_complete_batch(&iob);
+
+		/* In case queue is stopped waiting for more buffers. */
 		blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
+	}
+
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
@@ -832,18 +852,6 @@  static void virtblk_map_queues(struct blk_mq_tag_set *set)
 	}
 }
 
-static void virtblk_complete_batch(struct io_comp_batch *iob)
-{
-	struct request *req;
-
-	rq_list_for_each(&iob->req_list, req) {
-		virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
-		virtblk_cleanup_cmd(req);
-		blk_mq_set_request_complete(req);
-	}
-	blk_mq_end_request_batch(iob);
-}
-
 static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;