@@ -1044,10 +1044,12 @@ static int find_image_format(BlockBackend *file, const char *filename,
* Set the current 'total_sectors' value
* Return 0 on success, -errno on error.
*/
-int bdrv_refresh_total_sectors(BlockDriverState *bs, int64_t hint)
+int coroutine_fn bdrv_co_refresh_total_sectors(BlockDriverState *bs,
+ int64_t hint)
{
BlockDriver *drv = bs->drv;
IO_CODE();
+ assert_bdrv_graph_readable();
if (!drv) {
return -ENOMEDIUM;
@@ -1611,6 +1613,11 @@ out:
g_free(gen_node_name);
}
+/*
+ * The caller must always hold @bs AioContext lock, because this function calls
+ * bdrv_refresh_total_sectors() which polls when called from non-coroutine
+ * context.
+ */
static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
const char *node_name, QDict *options,
int open_flags, Error **errp)
@@ -3750,6 +3757,10 @@ out:
* The reference parameter may be used to specify an existing block device which
* should be opened. If specified, neither options nor a filename may be given,
* nor can an existing BDS be reused (that is, *pbs has to be NULL).
+ *
+ * The caller must always hold @filename AioContext lock, because this
+ * function eventually calls bdrv_refresh_total_sectors() which polls
+ * when called from non-coroutine context.
*/
static BlockDriverState *bdrv_open_inherit(const char *filename,
const char *reference,
@@ -4038,6 +4049,11 @@ close_and_fail:
return NULL;
}
+/*
+ * The caller must always hold @filename AioContext lock, because this
+ * function eventually calls bdrv_refresh_total_sectors() which polls
+ * when called from non-coroutine context.
+ */
BlockDriverState *bdrv_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp)
{
@@ -5774,16 +5790,17 @@ BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
/**
* Return number of sectors on success, -errno on error.
*/
-int64_t bdrv_nb_sectors(BlockDriverState *bs)
+int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
IO_CODE();
+ assert_bdrv_graph_readable();
if (!drv)
return -ENOMEDIUM;
if (drv->has_variable_length) {
- int ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
+ int ret = bdrv_co_refresh_total_sectors(bs, bs->total_sectors);
if (ret < 0) {
return ret;
}
@@ -5795,11 +5812,13 @@ int64_t bdrv_nb_sectors(BlockDriverState *bs)
* Return length in bytes on success, -errno on error.
* The length is always a multiple of BDRV_SECTOR_SIZE.
*/
-int64_t bdrv_getlength(BlockDriverState *bs)
+int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs)
{
- int64_t ret = bdrv_nb_sectors(bs);
+ int64_t ret;
IO_CODE();
+ assert_bdrv_graph_readable();
+ ret = bdrv_co_nb_sectors(bs);
if (ret < 0) {
return ret;
}
@@ -1603,14 +1603,16 @@ BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
}
-int64_t blk_getlength(BlockBackend *blk)
+int64_t coroutine_fn blk_co_getlength(BlockBackend *blk)
{
IO_CODE();
+ GRAPH_RDLOCK_GUARD();
+
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
- return bdrv_getlength(blk_bs(blk));
+ return bdrv_co_getlength(blk_bs(blk));
}
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
@@ -1623,14 +1625,16 @@ void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
}
}
-int64_t blk_nb_sectors(BlockBackend *blk)
+int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk)
{
IO_CODE();
+ GRAPH_RDLOCK_GUARD();
+
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
- return bdrv_nb_sectors(blk_bs(blk));
+ return bdrv_co_nb_sectors(blk_bs(blk));
}
BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
@@ -123,13 +123,13 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
QEMU_AUTO_VFREE void *buf = NULL;
int64_t len, base_len;
- len = blk_getlength(s->top);
+ len = blk_co_getlength(s->top);
if (len < 0) {
return len;
}
job_progress_set_remaining(&s->common.job, len);
- base_len = blk_getlength(s->base);
+ base_len = blk_co_getlength(s->base);
if (base_len < 0) {
return base_len;
}
@@ -139,6 +139,7 @@ block_gen_c = custom_target('block-gen.c',
input: files(
'../include/block/block-io.h',
'../include/block/dirty-bitmap.h',
+ '../include/block/block_int-io.h',
'../include/block/block-global-state.h',
'../include/sysemu/block-backend-io.h',
'coroutines.h'
@@ -915,13 +915,16 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
goto immediate_exit;
}
- s->bdev_length = bdrv_getlength(bs);
+ bdrv_graph_co_rdlock();
+ s->bdev_length = bdrv_co_getlength(bs);
+ bdrv_graph_co_rdunlock();
+
if (s->bdev_length < 0) {
ret = s->bdev_length;
goto immediate_exit;
}
- target_length = blk_getlength(s->target);
+ target_length = blk_co_getlength(s->target);
if (target_length < 0) {
ret = target_length;
goto immediate_exit;
@@ -2332,10 +2332,15 @@ static void scsi_disk_reset(DeviceState *dev)
{
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev);
uint64_t nb_sectors;
+ AioContext *ctx;
scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
+ ctx = blk_get_aio_context(s->qdev.conf.blk);
+ aio_context_acquire(ctx);
blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
+ aio_context_release(ctx);
+
nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
if (nb_sectors) {
nb_sectors--;
@@ -68,8 +68,12 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags,
Error **errp);
-int64_t bdrv_nb_sectors(BlockDriverState *bs);
-int64_t bdrv_getlength(BlockDriverState *bs);
+int64_t coroutine_fn bdrv_co_nb_sectors(BlockDriverState *bs);
+int64_t generated_co_wrapper bdrv_nb_sectors(BlockDriverState *bs);
+
+int64_t coroutine_fn bdrv_co_getlength(BlockDriverState *bs);
+int64_t generated_co_wrapper bdrv_getlength(BlockDriverState *bs);
+
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
BlockDriverState *in_bs, Error **errp);
@@ -723,7 +723,8 @@ struct BlockDriver {
int coroutine_fn (*bdrv_co_truncate)(BlockDriverState *bs, int64_t offset,
bool exact, PreallocMode prealloc,
BdrvRequestFlags flags, Error **errp);
- int64_t (*bdrv_getlength)(BlockDriverState *bs);
+ /* Called with graph rdlock held. */
+ int64_t coroutine_fn (*bdrv_getlength)(BlockDriverState *bs);
int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
/* Does not need graph rdlock, since it does not traverse the graph */
@@ -122,7 +122,10 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
-int bdrv_refresh_total_sectors(BlockDriverState *bs, int64_t hint);
+int coroutine_fn bdrv_co_refresh_total_sectors(BlockDriverState *bs,
+ int64_t hint);
+int generated_co_wrapper bdrv_refresh_total_sectors(BlockDriverState *bs,
+ int64_t hint);
BdrvChild *bdrv_cow_child(BlockDriverState *bs);
BdrvChild *bdrv_filter_child(BlockDriverState *bs);
@@ -57,9 +57,15 @@ bool blk_is_inserted(BlockBackend *blk);
bool blk_is_available(BlockBackend *blk);
void blk_lock_medium(BlockBackend *blk, bool locked);
void blk_eject(BlockBackend *blk, bool eject_flag);
-int64_t blk_getlength(BlockBackend *blk);
+
+int64_t coroutine_fn blk_co_getlength(BlockBackend *blk);
+int64_t generated_co_wrapper_blk blk_getlength(BlockBackend *blk);
+
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
-int64_t blk_nb_sectors(BlockBackend *blk);
+
+int64_t coroutine_fn blk_co_nb_sectors(BlockBackend *blk);
+int64_t generated_co_wrapper_blk blk_nb_sectors(BlockBackend *blk);
+
void *blk_try_blockalign(BlockBackend *blk, size_t size);
void *blk_blockalign(BlockBackend *blk, size_t size);
bool blk_is_writable(BlockBackend *blk);
@@ -831,7 +831,14 @@ static void test_attach_second_node(void)
qdict_put_str(options, "driver", "raw");
qdict_put_str(options, "file", "base");
+ /*
+ * Acquire iothread AioContext, since bs is in ctx and bdrv_open will
+ * call bdrv_refresh_total_sectors(), a generated_co_wrapper function
+ * that therefore expects ctx lock to be held.
+ */
+ aio_context_acquire(ctx);
filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
+ aio_context_release(ctx);
g_assert(blk_get_aio_context(blk) == ctx);
g_assert(bdrv_get_aio_context(bs) == ctx);
g_assert(bdrv_get_aio_context(filter) == ctx);
BlockDriver->bdrv_getlength is categorized as IO callb ack, and it currently doesn't run in a coroutine. This makes very difficult to add the graph rdlock, sin ce the callback traverses the block nodes graph. Therefore use generated_co_wrapper to automatically create a wrapper for bdrv_refresh_total_sectors. Unfortunately we cannot use a generated_co_wrapper_sim ple, because the function is called by mixed functions that run both in coroutine and non-coroutine context (for example bd rv_open_driver()). Unfortunately this callback requires multiple bdrv_* a nd blk_* callbacks to be converted, because there are different mixed callers (coroutines and not) calling this callback fro m different function paths. Because now this function creates a new coroutine and polls, we need to take the AioContext lock where it is missing, for the only reason that internally g_c_w calls AIO_WAIT_WHILE and it expects to release the AioContext lock. This is especially messy when a g_c_w creates a coroutine and polls in bdrv_open_driver, because this function has so many callers in so many context that it can easily lead to deadlocks. Therefore the new rule for bdrv_open_driver is that the caller must always hold the AioContext lock of the given bs (except if it is a coroutine), because the function calls bdrv_refresh_total_sectors() which is a g_c_w. Once the rwlock is ultimated and placed in every place it needs to be, we will poll using AIO_WAIT_WHILE_UNLOCKED and remove the AioContext lock. Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> --- block.c | 29 ++++++++++++++++++++++++----- block/block-backend.c | 12 ++++++++---- block/commit.c | 4 ++-- block/meson.build | 1 + block/mirror.c | 7 +++++-- hw/scsi/scsi-disk.c | 5 +++++ include/block/block-io.h | 8 ++++++-- include/block/block_int-common.h | 3 ++- include/block/block_int-io.h | 5 ++++- include/sysemu/block-backend-io.h | 10 ++++++++-- tests/unit/test-block-iothread.c | 7 +++++++ 11 files changed, 72 insertions(+), 19 deletions(-)