diff mbox series

[RFC,5/7] scsi: storvsc: Enable swiotlb throttling

Message ID 20240822183718.1234-6-mhklinux@outlook.com (mailing list archive)
State Not Applicable
Headers show
Series Introduce swiotlb throttling | expand

Commit Message

Michael Kelley Aug. 22, 2024, 6:37 p.m. UTC
From: Michael Kelley <mhklinux@outlook.com>

In a CoCo VM, all DMA-based I/O must use swiotlb bounce buffers
because DMA cannot be done to private (encrypted) portions of VM
memory. The bounce buffer memory is marked shared (decrypted) at
boot time, so I/O is done to/from the bounce buffer memory and then
copied by the CPU to/from the final target memory (i.e, "bounced").
Storage devices can be large consumers of bounce buffer memory because it
is possible to have large numbers of I/Os in flight across multiple
devices. Bounce buffer memory must be pre-allocated at boot time, and
it is difficult to know how much memory to allocate to handle peak
storage I/O loads. Consequently, bounce buffer memory is typically
over-provisioned, which wastes memory, and may still not avoid a peak
that exhausts bounce buffer memory and cause storage I/O errors.

To solve this problem for Coco VMs running on Hyper-V, update the
storvsc driver to permit bounce buffer throttling. First, use
scsi_dma_map_attrs() instead of scsi_dma_map(). Then gate the
throttling behavior on a DMA layer check indicating that throttling is
useful, so that no change occurs in a non-CoCo VM. If throttling is
useful, pass the DMA_ATTR_MAY_BLOCK attribute, and set the block queue
flag indicating that the I/O request submission path may sleep, which
could happen when throttling. With these options in place, DMA map
requests are pended when necessary to reduce the likelihood of usage
peaks caused by storvsc that could exhaust bounce buffer memory and
generate errors.

Signed-off-by: Michael Kelley <mhklinux@outlook.com>
---
 drivers/scsi/storvsc_drv.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

Comments

Petr Tesařík Aug. 23, 2024, 8:19 a.m. UTC | #1
On Thu, 22 Aug 2024 11:37:16 -0700
mhkelley58@gmail.com wrote:

> From: Michael Kelley <mhklinux@outlook.com>
> 
> In a CoCo VM, all DMA-based I/O must use swiotlb bounce buffers
> because DMA cannot be done to private (encrypted) portions of VM
> memory. The bounce buffer memory is marked shared (decrypted) at
> boot time, so I/O is done to/from the bounce buffer memory and then
> copied by the CPU to/from the final target memory (i.e, "bounced").
> Storage devices can be large consumers of bounce buffer memory because it
> is possible to have large numbers of I/Os in flight across multiple
> devices. Bounce buffer memory must be pre-allocated at boot time, and
> it is difficult to know how much memory to allocate to handle peak
> storage I/O loads. Consequently, bounce buffer memory is typically
> over-provisioned, which wastes memory, and may still not avoid a peak
> that exhausts bounce buffer memory and cause storage I/O errors.
> 
> To solve this problem for Coco VMs running on Hyper-V, update the
> storvsc driver to permit bounce buffer throttling. First, use
> scsi_dma_map_attrs() instead of scsi_dma_map(). Then gate the
> throttling behavior on a DMA layer check indicating that throttling is
> useful, so that no change occurs in a non-CoCo VM. If throttling is
> useful, pass the DMA_ATTR_MAY_BLOCK attribute, and set the block queue
> flag indicating that the I/O request submission path may sleep, which
> could happen when throttling. With these options in place, DMA map
> requests are pended when necessary to reduce the likelihood of usage
> peaks caused by storvsc that could exhaust bounce buffer memory and
> generate errors.
> 
> Signed-off-by: Michael Kelley <mhklinux@outlook.com>

LGTM, but I'm not familiar with this driver or the SCSI layer. In
particular, I don't know if it's OK to change the value of
host->queuecommand_may_block after scsi_host_alloc() initialized it
from a scsi host template, although it seems to be fine.

Petr T

> ---
>  drivers/scsi/storvsc_drv.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> index 7ceb982040a5..7bedd5502d07 100644
> --- a/drivers/scsi/storvsc_drv.c
> +++ b/drivers/scsi/storvsc_drv.c
> @@ -457,6 +457,7 @@ struct hv_host_device {
>  	struct workqueue_struct *handle_error_wq;
>  	struct work_struct host_scan_work;
>  	struct Scsi_Host *host;
> +	unsigned long dma_attrs;
>  };
>  
>  struct storvsc_scan_work {
> @@ -1810,7 +1811,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
>  		payload->range.len = length;
>  		payload->range.offset = offset_in_hvpg;
>  
> -		sg_count = scsi_dma_map(scmnd);
> +		sg_count = scsi_dma_map_attrs(scmnd, host_dev->dma_attrs);
>  		if (sg_count < 0) {
>  			ret = SCSI_MLQUEUE_DEVICE_BUSY;
>  			goto err_free_payload;
> @@ -2030,6 +2031,12 @@ static int storvsc_probe(struct hv_device *device,
>  	 *    have an offset that is a multiple of HV_HYP_PAGE_SIZE.
>  	 */
>  	host->sg_tablesize = (max_xfer_bytes >> HV_HYP_PAGE_SHIFT) + 1;
> +
> +	if (dma_recommend_may_block(&device->device)) {
> +		host->queuecommand_may_block = true;
> +		host_dev->dma_attrs = DMA_ATTR_MAY_BLOCK;
> +	}
> +
>  	/*
>  	 * For non-IDE disks, the host supports multiple channels.
>  	 * Set the number of HW queues we are supporting.
Michael Kelley Aug. 23, 2024, 8:42 p.m. UTC | #2
From: Petr Tesařík <petr@tesarici.cz> Sent: Friday, August 23, 2024 1:20 AM
> 
> On Thu, 22 Aug 2024 11:37:16 -0700
> mhkelley58@gmail.com wrote:
> 
> > From: Michael Kelley <mhklinux@outlook.com>
> >
> > In a CoCo VM, all DMA-based I/O must use swiotlb bounce buffers
> > because DMA cannot be done to private (encrypted) portions of VM
> > memory. The bounce buffer memory is marked shared (decrypted) at
> > boot time, so I/O is done to/from the bounce buffer memory and then
> > copied by the CPU to/from the final target memory (i.e, "bounced").
> > Storage devices can be large consumers of bounce buffer memory because it
> > is possible to have large numbers of I/Os in flight across multiple
> > devices. Bounce buffer memory must be pre-allocated at boot time, and
> > it is difficult to know how much memory to allocate to handle peak
> > storage I/O loads. Consequently, bounce buffer memory is typically
> > over-provisioned, which wastes memory, and may still not avoid a peak
> > that exhausts bounce buffer memory and cause storage I/O errors.
> >
> > To solve this problem for Coco VMs running on Hyper-V, update the
> > storvsc driver to permit bounce buffer throttling. First, use
> > scsi_dma_map_attrs() instead of scsi_dma_map(). Then gate the
> > throttling behavior on a DMA layer check indicating that throttling is
> > useful, so that no change occurs in a non-CoCo VM. If throttling is
> > useful, pass the DMA_ATTR_MAY_BLOCK attribute, and set the block queue
> > flag indicating that the I/O request submission path may sleep, which
> > could happen when throttling. With these options in place, DMA map
> > requests are pended when necessary to reduce the likelihood of usage
> > peaks caused by storvsc that could exhaust bounce buffer memory and
> > generate errors.
> >
> > Signed-off-by: Michael Kelley <mhklinux@outlook.com>
> 
> LGTM, but I'm not familiar with this driver or the SCSI layer. In
> particular, I don't know if it's OK to change the value of
> host->queuecommand_may_block after scsi_host_alloc() initialized it
> from a scsi host template, although it seems to be fine.
> 
> Petr T

Yes, it's OK to change the value after scsi_host_alloc().
The flag isn't consumed until scsi_add_host() is called
later in storvsc_probe().

Note this maps to BLK_MQ_F_BLOCKING, which you can see in
/sys/kernel/debug/block/<device>/hctx0/flags. Same for NVMe
devices with my Patches 6 and 7. When debugging, I've been
checking that /sys entry to make sure the behavior is as expected. :-)

Michael

> 
> > ---
> >  drivers/scsi/storvsc_drv.c | 9 ++++++++-
> >  1 file changed, 8 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> > index 7ceb982040a5..7bedd5502d07 100644
> > --- a/drivers/scsi/storvsc_drv.c
> > +++ b/drivers/scsi/storvsc_drv.c
> > @@ -457,6 +457,7 @@ struct hv_host_device {
> >  	struct workqueue_struct *handle_error_wq;
> >  	struct work_struct host_scan_work;
> >  	struct Scsi_Host *host;
> > +	unsigned long dma_attrs;
> >  };
> >
> >  struct storvsc_scan_work {
> > @@ -1810,7 +1811,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host,
> struct scsi_cmnd *scmnd)
> >  		payload->range.len = length;
> >  		payload->range.offset = offset_in_hvpg;
> >
> > -		sg_count = scsi_dma_map(scmnd);
> > +		sg_count = scsi_dma_map_attrs(scmnd, host_dev->dma_attrs);
> >  		if (sg_count < 0) {
> >  			ret = SCSI_MLQUEUE_DEVICE_BUSY;
> >  			goto err_free_payload;
> > @@ -2030,6 +2031,12 @@ static int storvsc_probe(struct hv_device *device,
> >  	 *    have an offset that is a multiple of HV_HYP_PAGE_SIZE.
> >  	 */
> >  	host->sg_tablesize = (max_xfer_bytes >> HV_HYP_PAGE_SHIFT) + 1;
> > +
> > +	if (dma_recommend_may_block(&device->device)) {
> > +		host->queuecommand_may_block = true;
> > +		host_dev->dma_attrs = DMA_ATTR_MAY_BLOCK;
> > +	}
> > +
> >  	/*
> >  	 * For non-IDE disks, the host supports multiple channels.
> >  	 * Set the number of HW queues we are supporting.
diff mbox series

Patch

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 7ceb982040a5..7bedd5502d07 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -457,6 +457,7 @@  struct hv_host_device {
 	struct workqueue_struct *handle_error_wq;
 	struct work_struct host_scan_work;
 	struct Scsi_Host *host;
+	unsigned long dma_attrs;
 };
 
 struct storvsc_scan_work {
@@ -1810,7 +1811,7 @@  static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
 		payload->range.len = length;
 		payload->range.offset = offset_in_hvpg;
 
-		sg_count = scsi_dma_map(scmnd);
+		sg_count = scsi_dma_map_attrs(scmnd, host_dev->dma_attrs);
 		if (sg_count < 0) {
 			ret = SCSI_MLQUEUE_DEVICE_BUSY;
 			goto err_free_payload;
@@ -2030,6 +2031,12 @@  static int storvsc_probe(struct hv_device *device,
 	 *    have an offset that is a multiple of HV_HYP_PAGE_SIZE.
 	 */
 	host->sg_tablesize = (max_xfer_bytes >> HV_HYP_PAGE_SHIFT) + 1;
+
+	if (dma_recommend_may_block(&device->device)) {
+		host->queuecommand_may_block = true;
+		host_dev->dma_attrs = DMA_ATTR_MAY_BLOCK;
+	}
+
 	/*
 	 * For non-IDE disks, the host supports multiple channels.
 	 * Set the number of HW queues we are supporting.