diff mbox series

[v2,2/3] block-backend: fix edge case in bdrv_next() where BDS associated to BB changes

Message ID 20240321133323.831133-3-f.ebner@proxmox.com (mailing list archive)
State New, archived
Headers show
Series fix two edge cases related to stream block jobs | expand

Commit Message

Fiona Ebner March 21, 2024, 1:33 p.m. UTC
The old_bs variable in bdrv_next() is currently determined by looking
at the old block backend. However, if the block graph changes before
the next bdrv_next() call, it might be that the associated BDS is not
the same that was referenced previously. In that case, the wrong BDS
is unreferenced, leading to an assertion failure later:

> bdrv_unref: Assertion `bs->refcnt > 0' failed.

In particular, this can happen in the context of bdrv_flush_all(),
when polling for bdrv_co_flush() in the generated co-wrapper leads to
a graph change (for example with a stream block job [0]).

A racy reproducer:

> #!/bin/bash
> rm -f /tmp/backing.qcow2
> rm -f /tmp/top.qcow2
> ./qemu-img create /tmp/backing.qcow2 -f qcow2 64M
> ./qemu-io -c "write -P42 0x0 0x1" /tmp/backing.qcow2
> ./qemu-img create /tmp/top.qcow2 -f qcow2 64M -b /tmp/backing.qcow2 -F qcow2
> ./qemu-system-x86_64 --qmp stdio \
> --blockdev qcow2,node-name=node0,file.driver=file,file.filename=/tmp/top.qcow2 \
> <<EOF
> {"execute": "qmp_capabilities"}
> {"execute": "block-stream", "arguments": { "job-id": "stream0", "device": "node0" } }
> {"execute": "quit"}
> EOF

[0]:

> #0  bdrv_replace_child_tran (child=..., new_bs=..., tran=...)
> #1  bdrv_replace_node_noperm (from=..., to=..., auto_skip=..., tran=..., errp=...)
> #2  bdrv_replace_node_common (from=..., to=..., auto_skip=..., detach_subchain=..., errp=...)
> #3  bdrv_drop_filter (bs=..., errp=...)
> #4  bdrv_cor_filter_drop (cor_filter_bs=...)
> #5  stream_prepare (job=...)
> #6  job_prepare_locked (job=...)
> #7  job_txn_apply_locked (fn=..., job=...)
> #8  job_do_finalize_locked (job=...)
> #9  job_exit (opaque=...)
> #10 aio_bh_poll (ctx=...)
> #11 aio_poll (ctx=..., blocking=...)
> #12 bdrv_poll_co (s=...)
> #13 bdrv_flush (bs=...)
> #14 bdrv_flush_all ()
> #15 do_vm_stop (state=..., send_stop=...)
> #16 vm_shutdown ()

Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---

Not sure if this is the correct fix, or if the call site should rather
be adapted somehow?

New in v2.

 block/block-backend.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/block/block-backend.c b/block/block-backend.c
index 9c4de79e6b..28af1eb17a 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -599,14 +599,14 @@  BlockDriverState *bdrv_next(BdrvNextIterator *it)
     /* Must be called from the main loop */
     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
 
+    old_bs = it->bs;
+
     /* First, return all root nodes of BlockBackends. In order to avoid
      * returning a BDS twice when multiple BBs refer to it, we only return it
      * if the BB is the first one in the parent list of the BDS. */
     if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
         BlockBackend *old_blk = it->blk;
 
-        old_bs = old_blk ? blk_bs(old_blk) : NULL;
-
         do {
             it->blk = blk_all_next(it->blk);
             bs = it->blk ? blk_bs(it->blk) : NULL;
@@ -620,11 +620,10 @@  BlockDriverState *bdrv_next(BdrvNextIterator *it)
         if (bs) {
             bdrv_ref(bs);
             bdrv_unref(old_bs);
+            it->bs = bs;
             return bs;
         }
         it->phase = BDRV_NEXT_MONITOR_OWNED;
-    } else {
-        old_bs = it->bs;
     }
 
     /* Then return the monitor-owned BDSes without a BB attached. Ignore all