diff mbox series

[v3] hw/scsi/lsi53c895a: add timer to scripts processing

Message ID 20240304163727.2320861-1-svens@stackframe.org (mailing list archive)
State New, archived
Headers show
Series [v3] hw/scsi/lsi53c895a: add timer to scripts processing | expand

Commit Message

Sven Schnelle March 4, 2024, 4:37 p.m. UTC
HP-UX 10.20 seems to make the lsi53c895a spinning on a memory location
under certain circumstances. As the SCSI controller and CPU are not
running at the same time this loop will never finish. After some
time, the check loop interrupts with a unexpected device disconnect.
This works, but is slow because the kernel resets the scsi controller.
Instead of signaling UDC, start a timer and exit the loop. Until the
timer fires, the CPU can process instructions which might change the
memory location.

The limit of instructions is also reduced because scripts running on
the SCSI processor are usually very short. This keeps the time until
the loop is exited short.

Suggested-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Sven Schnelle <svens@stackframe.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/scsi/lsi53c895a.c | 43 +++++++++++++++++++++++++++++++++----------
 hw/scsi/trace-events |  2 ++
 2 files changed, 35 insertions(+), 10 deletions(-)

Comments

Philippe Mathieu-Daudé March 8, 2024, 8:14 a.m. UTC | #1
On 4/3/24 17:37, Sven Schnelle wrote:
> HP-UX 10.20 seems to make the lsi53c895a spinning on a memory location
> under certain circumstances. As the SCSI controller and CPU are not
> running at the same time this loop will never finish. After some
> time, the check loop interrupts with a unexpected device disconnect.
> This works, but is slow because the kernel resets the scsi controller.
> Instead of signaling UDC, start a timer and exit the loop. Until the
> timer fires, the CPU can process instructions which might change the
> memory location.
> 
> The limit of instructions is also reduced because scripts running on
> the SCSI processor are usually very short. This keeps the time until
> the loop is exited short.
> 
> Suggested-by: Peter Maydell <peter.maydell@linaro.org>
> Signed-off-by: Sven Schnelle <svens@stackframe.org>
> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
> ---
>   hw/scsi/lsi53c895a.c | 43 +++++++++++++++++++++++++++++++++----------
>   hw/scsi/trace-events |  2 ++
>   2 files changed, 35 insertions(+), 10 deletions(-)

I suppose Paolo is going to take this patch, otherwise I can
queue it via hw-misc tree.
Michael Tokarev March 10, 2024, 3:35 p.m. UTC | #2
04.03.2024 19:37, Sven Schnelle :
> HP-UX 10.20 seems to make the lsi53c895a spinning on a memory location
> under certain circumstances. As the SCSI controller and CPU are not
> running at the same time this loop will never finish. After some
> time, the check loop interrupts with a unexpected device disconnect.
> This works, but is slow because the kernel resets the scsi controller.
> Instead of signaling UDC, start a timer and exit the loop. Until the
> timer fires, the CPU can process instructions which might change the
> memory location.
> 
> The limit of instructions is also reduced because scripts running on
> the SCSI processor are usually very short. This keeps the time until
> the loop is exited short.

This one is a bit large(ish), yet, - is it okay for -stable?  From the
description it feels like it should be picked up, and it applies cleanly
to 7.2.x too (after picking up the forgotten 8b09b7fe47082c692
"hw/scsi/lsi53c895a: add missing decrement of reentrancy counter" there).

Thanks,

/mjt
Helge Deller March 10, 2024, 6:59 p.m. UTC | #3
On 3/10/24 16:35, Michael Tokarev wrote:
> 04.03.2024 19:37, Sven Schnelle :
>> HP-UX 10.20 seems to make the lsi53c895a spinning on a memory location
>> under certain circumstances. As the SCSI controller and CPU are not
>> running at the same time this loop will never finish. After some
>> time, the check loop interrupts with a unexpected device disconnect.
>> This works, but is slow because the kernel resets the scsi controller.
>> Instead of signaling UDC, start a timer and exit the loop. Until the
>> timer fires, the CPU can process instructions which might change the
>> memory location.
>>
>> The limit of instructions is also reduced because scripts running on
>> the SCSI processor are usually very short. This keeps the time until
>> the loop is exited short.
>
> This one is a bit large(ish), yet, - is it okay for -stable?  From the
> description it feels like it should be picked up, and it applies cleanly
> to 7.2.x too (after picking up the forgotten 8b09b7fe47082c692
> "hw/scsi/lsi53c895a: add missing decrement of reentrancy counter" there).

Please include it in stable.
HP-UX feels incredible sluggish and slow without it, and even with Linux
guests the disconnects can be seen sometimes.

Helge
diff mbox series

Patch

diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index 949b2bbd10..3b5e76bbd9 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -188,7 +188,7 @@  static const char *names[] = {
 #define LSI_TAG_VALID     (1 << 16)
 
 /* Maximum instructions to process. */
-#define LSI_MAX_INSN    10000
+#define LSI_MAX_INSN    100
 
 typedef struct lsi_request {
     SCSIRequest *req;
@@ -205,6 +205,7 @@  enum {
     LSI_WAIT_RESELECT, /* Wait Reselect instruction has been issued */
     LSI_DMA_SCRIPTS, /* processing DMA from lsi_execute_script */
     LSI_DMA_IN_PROGRESS, /* DMA operation is in progress */
+    LSI_WAIT_SCRIPTS, /* SCRIPTS stopped because of instruction count limit */
 };
 
 enum {
@@ -224,6 +225,7 @@  struct LSIState {
     MemoryRegion ram_io;
     MemoryRegion io_io;
     AddressSpace pci_io_as;
+    QEMUTimer *scripts_timer;
 
     int carry; /* ??? Should this be an a visible register somewhere?  */
     int status;
@@ -415,6 +417,7 @@  static void lsi_soft_reset(LSIState *s)
     s->sbr = 0;
     assert(QTAILQ_EMPTY(&s->queue));
     assert(!s->current);
+    timer_del(s->scripts_timer);
 }
 
 static int lsi_dma_40bit(LSIState *s)
@@ -1135,6 +1138,12 @@  static void lsi_wait_reselect(LSIState *s)
     }
 }
 
+static void lsi_scripts_timer_start(LSIState *s)
+{
+    trace_lsi_scripts_timer_start();
+    timer_mod(s->scripts_timer, qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + 500);
+}
+
 static void lsi_execute_script(LSIState *s)
 {
     PCIDevice *pci_dev = PCI_DEVICE(s);
@@ -1144,6 +1153,11 @@  static void lsi_execute_script(LSIState *s)
     int insn_processed = 0;
     static int reentrancy_level;
 
+    if (s->waiting == LSI_WAIT_SCRIPTS) {
+        timer_del(s->scripts_timer);
+        s->waiting = LSI_NOWAIT;
+    }
+
     reentrancy_level++;
 
     s->istat1 |= LSI_ISTAT1_SRUN;
@@ -1151,8 +1165,8 @@  again:
     /*
      * Some windows drivers make the device spin waiting for a memory location
      * to change. If we have executed more than LSI_MAX_INSN instructions then
-     * assume this is the case and force an unexpected device disconnect. This
-     * is apparently sufficient to beat the drivers into submission.
+     * assume this is the case and start a timer. Until the timer fires, the
+     * guest CPU has a chance to run and change the memory location.
      *
      * Another issue (CVE-2023-0330) can occur if the script is programmed to
      * trigger itself again and again. Avoid this problem by stopping after
@@ -1160,13 +1174,8 @@  again:
      * which should be enough for all valid use cases).
      */
     if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) {
-        if (!(s->sien0 & LSI_SIST0_UDC)) {
-            qemu_log_mask(LOG_GUEST_ERROR,
-                          "lsi_scsi: inf. loop with UDC masked");
-        }
-        lsi_script_scsi_interrupt(s, LSI_SIST0_UDC, 0);
-        lsi_disconnect(s);
-        trace_lsi_execute_script_stop();
+        s->waiting = LSI_WAIT_SCRIPTS;
+        lsi_scripts_timer_start(s);
         reentrancy_level--;
         return;
     }
@@ -2205,6 +2214,9 @@  static int lsi_post_load(void *opaque, int version_id)
         return -EINVAL;
     }
 
+    if (s->waiting == LSI_WAIT_SCRIPTS) {
+        lsi_scripts_timer_start(s);
+    }
     return 0;
 }
 
@@ -2302,6 +2314,15 @@  static const struct SCSIBusInfo lsi_scsi_info = {
     .cancel = lsi_request_cancelled
 };
 
+static void scripts_timer_cb(void *opaque)
+{
+    LSIState *s = opaque;
+
+    trace_lsi_scripts_timer_triggered();
+    s->waiting = LSI_NOWAIT;
+    lsi_execute_script(s);
+}
+
 static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
 {
     LSIState *s = LSI53C895A(dev);
@@ -2321,6 +2342,7 @@  static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
                           "lsi-ram", 0x2000);
     memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s,
                           "lsi-io", 256);
+    s->scripts_timer = timer_new_us(QEMU_CLOCK_VIRTUAL, scripts_timer_cb, s);
 
     /*
      * Since we use the address-space API to interact with ram_io, disable the
@@ -2345,6 +2367,7 @@  static void lsi_scsi_exit(PCIDevice *dev)
     LSIState *s = LSI53C895A(dev);
 
     address_space_destroy(&s->pci_io_as);
+    timer_del(s->scripts_timer);
 }
 
 static void lsi_class_init(ObjectClass *klass, void *data)
diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events
index d72f741ed8..f0f2a98c2e 100644
--- a/hw/scsi/trace-events
+++ b/hw/scsi/trace-events
@@ -302,6 +302,8 @@  lsi_execute_script_stop(void) "SCRIPTS execution stopped"
 lsi_awoken(void) "Woken by SIGP"
 lsi_reg_read(const char *name, int offset, uint8_t ret) "Read reg %s 0x%x = 0x%02x"
 lsi_reg_write(const char *name, int offset, uint8_t val) "Write reg %s 0x%x = 0x%02x"
+lsi_scripts_timer_triggered(void) "SCRIPTS timer triggered"
+lsi_scripts_timer_start(void) "SCRIPTS timer started"
 
 # virtio-scsi.c
 virtio_scsi_cmd_req(int lun, uint32_t tag, uint8_t cmd) "virtio_scsi_cmd_req lun=%u tag=0x%x cmd=0x%x"