diff mbox

[v2,2/2] blockjob: Fix hang in block_job_finish_sync

Message ID 1453978880-14187-3-git-send-email-famz@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Fam Zheng Jan. 28, 2016, 11:01 a.m. UTC
With a mirror job running on a virtio-blk dataplane disk, sending "q" to
HMP will cause a dead loop in block_job_finish_sync.

This is because the aio_poll() only processes the AIO context of bs
which has no more work to do, while the main loop BH that is scheduled
for setting the job->completed flag is never processed.

Fix this by adding a "ctx" pointer in BlockJob structure, to track which
context to poll for the block job to make progress (NULL means
bdrv_get_aio_context(bs) is used).

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 blockjob.c               | 3 ++-
 include/block/blockjob.h | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/blockjob.c b/blockjob.c
index 4b16720..de11f9a 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -304,7 +304,7 @@  static int block_job_finish_sync(BlockJob *job,
         return -EBUSY;
     }
     while (!job->completed) {
-        aio_poll(bdrv_get_aio_context(bs), true);
+        aio_poll(job->ctx ? : bdrv_get_aio_context(job->bs), true);
     }
     ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
     block_job_unref(job);
@@ -497,6 +497,7 @@  void block_job_coroutine_complete(BlockJob *job,
     data->aio_context = bdrv_get_aio_context(job->bs);
     data->fn = fn;
     data->opaque = opaque;
+    job->ctx = qemu_get_aio_context();
 
     qemu_bh_schedule(data->bh);
 }
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index de59fc2..5c6a884 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -92,6 +92,8 @@  struct BlockJob {
      */
     char *id;
 
+    AioContext *ctx;
+
     /**
      * The coroutine that executes the job.  If not NULL, it is
      * reentered when busy is false and the job is cancelled.