Message ID | 20190218130702.32575-6-jean-philippe.brucker@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Disk fixes and AIO reset | expand |
On Mon, 18 Feb 2019 13:06:58 +0000 Jean-Philippe Brucker <jean-philippe.brucker@arm.com> wrote: > Add an 'async' attribute to disk_image_operations, that describes if they > can submit async I/O or not. disk_image->async is now set iff > CONFIG_HAS_AIO and the ops do use AIO. The difference between disk->async and disk_ops->async is somewhat easy to miss sometimes, but the patch seems to be correct to me. > This fixes qcow1, which used to set async = 1 even though the qcow > operations don't use AIO. The disk core would perform the read/write > operation without pushing the completion onto the virtio queue, and the > guest would be stuck waiting. Yes, I can confirm that this patch fixes QCOW, both 1 and 2, actually. > Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com> Reviewed-by: Andre Przywara <andre.przywara@arm.com> Cheers, Andre. > --- > disk/aio.c | 9 +++++++++ > disk/blk.c | 9 ++------- > disk/qcow.c | 2 -- > disk/raw.c | 15 +++------------ > include/kvm/disk-image.h | 1 + > 5 files changed, 15 insertions(+), 21 deletions(-) > > diff --git a/disk/aio.c b/disk/aio.c > index 6afcffe5a..007415c69 100644 > --- a/disk/aio.c > +++ b/disk/aio.c > @@ -90,6 +90,10 @@ int disk_aio_setup(struct disk_image *disk) > int r; > pthread_t thread; > > + /* No need to setup AIO if the disk ops won't make use of it */ > + if (!disk->ops->async) > + return 0; > + > disk->evt = eventfd(0, 0); > if (disk->evt < 0) > return -errno; > @@ -101,11 +105,16 @@ int disk_aio_setup(struct disk_image *disk) > close(disk->evt); > return r; > } > + > + disk->async = true; > return 0; > } > > void disk_aio_destroy(struct disk_image *disk) > { > + if (!disk->async) > + return; > + > close(disk->evt); > io_destroy(disk->ctx); > } > diff --git a/disk/blk.c b/disk/blk.c > index 37581d331..48922e028 100644 > --- a/disk/blk.c > +++ b/disk/blk.c > @@ -9,6 +9,7 @@ > static struct disk_image_operations blk_dev_ops = { > .read = raw_image__read, > .write = raw_image__write, > + .async = true, > }; > > static bool is_mounted(struct stat *st) > @@ -35,7 +36,6 @@ static bool is_mounted(struct stat *st) > > struct disk_image *blkdev__probe(const char *filename, int flags, struct stat *st) > { > - struct disk_image *disk; > int fd, r; > u64 size; > > @@ -67,10 +67,5 @@ struct disk_image *blkdev__probe(const char *filename, int flags, struct stat *s > * mmap large disk. There is not enough virtual address space > * in 32-bit host. However, this works on 64-bit host. > */ > - disk = disk_image__new(fd, size, &blk_dev_ops, DISK_IMAGE_REGULAR); > -#ifdef CONFIG_HAS_AIO > - if (!IS_ERR_OR_NULL(disk)) > - disk->async = 1; > -#endif > - return disk; > + return disk_image__new(fd, size, &blk_dev_ops, DISK_IMAGE_REGULAR); > } > diff --git a/disk/qcow.c b/disk/qcow.c > index bed70c65c..dd6be62ee 100644 > --- a/disk/qcow.c > +++ b/disk/qcow.c > @@ -1337,7 +1337,6 @@ static struct disk_image *qcow2_probe(int fd, bool readonly) > if (IS_ERR_OR_NULL(disk_image)) > goto free_refcount_table; > > - disk_image->async = 0; > disk_image->priv = q; > > return disk_image; > @@ -1474,7 +1473,6 @@ static struct disk_image *qcow1_probe(int fd, bool readonly) > if (!disk_image) > goto free_l1_table; > > - disk_image->async = 1; > disk_image->priv = q; > > return disk_image; > diff --git a/disk/raw.c b/disk/raw.c > index 09da7e081..e869d6cc2 100644 > --- a/disk/raw.c > +++ b/disk/raw.c > @@ -67,6 +67,7 @@ int raw_image__close(struct disk_image *disk) > static struct disk_image_operations raw_image_regular_ops = { > .read = raw_image__read, > .write = raw_image__write, > + .async = true, > }; > > struct disk_image_operations ro_ops = { > @@ -77,12 +78,11 @@ struct disk_image_operations ro_ops = { > > struct disk_image_operations ro_ops_nowrite = { > .read = raw_image__read, > + .async = true, > }; > > struct disk_image *raw_image__probe(int fd, struct stat *st, bool readonly) > { > - struct disk_image *disk; > - > if (readonly) { > /* > * Use mmap's MAP_PRIVATE to implement non-persistent write > @@ -93,10 +93,6 @@ struct disk_image *raw_image__probe(int fd, struct stat *st, bool readonly) > disk = disk_image__new(fd, st->st_size, &ro_ops, DISK_IMAGE_MMAP); > if (IS_ERR_OR_NULL(disk)) { > disk = disk_image__new(fd, st->st_size, &ro_ops_nowrite, DISK_IMAGE_REGULAR); > -#ifdef CONFIG_HAS_AIO > - if (!IS_ERR_OR_NULL(disk)) > - disk->async = 1; > -#endif > } > > return disk; > @@ -104,11 +100,6 @@ struct disk_image *raw_image__probe(int fd, struct stat *st, bool readonly) > /* > * Use read/write instead of mmap > */ > - disk = disk_image__new(fd, st->st_size, &raw_image_regular_ops, DISK_IMAGE_REGULAR); > -#ifdef CONFIG_HAS_AIO > - if (!IS_ERR_OR_NULL(disk)) > - disk->async = 1; > -#endif > - return disk; > + return disk_image__new(fd, st->st_size, &raw_image_regular_ops, DISK_IMAGE_REGULAR); > } > } > diff --git a/include/kvm/disk-image.h b/include/kvm/disk-image.h > index 953beb2d5..adc9fe465 100644 > --- a/include/kvm/disk-image.h > +++ b/include/kvm/disk-image.h > @@ -42,6 +42,7 @@ struct disk_image_operations { > int iovcount, void *param); > int (*flush)(struct disk_image *disk); > int (*close)(struct disk_image *disk); > + bool async; > }; > > struct disk_image_params {
diff --git a/disk/aio.c b/disk/aio.c index 6afcffe5a..007415c69 100644 --- a/disk/aio.c +++ b/disk/aio.c @@ -90,6 +90,10 @@ int disk_aio_setup(struct disk_image *disk) int r; pthread_t thread; + /* No need to setup AIO if the disk ops won't make use of it */ + if (!disk->ops->async) + return 0; + disk->evt = eventfd(0, 0); if (disk->evt < 0) return -errno; @@ -101,11 +105,16 @@ int disk_aio_setup(struct disk_image *disk) close(disk->evt); return r; } + + disk->async = true; return 0; } void disk_aio_destroy(struct disk_image *disk) { + if (!disk->async) + return; + close(disk->evt); io_destroy(disk->ctx); } diff --git a/disk/blk.c b/disk/blk.c index 37581d331..48922e028 100644 --- a/disk/blk.c +++ b/disk/blk.c @@ -9,6 +9,7 @@ static struct disk_image_operations blk_dev_ops = { .read = raw_image__read, .write = raw_image__write, + .async = true, }; static bool is_mounted(struct stat *st) @@ -35,7 +36,6 @@ static bool is_mounted(struct stat *st) struct disk_image *blkdev__probe(const char *filename, int flags, struct stat *st) { - struct disk_image *disk; int fd, r; u64 size; @@ -67,10 +67,5 @@ struct disk_image *blkdev__probe(const char *filename, int flags, struct stat *s * mmap large disk. There is not enough virtual address space * in 32-bit host. However, this works on 64-bit host. */ - disk = disk_image__new(fd, size, &blk_dev_ops, DISK_IMAGE_REGULAR); -#ifdef CONFIG_HAS_AIO - if (!IS_ERR_OR_NULL(disk)) - disk->async = 1; -#endif - return disk; + return disk_image__new(fd, size, &blk_dev_ops, DISK_IMAGE_REGULAR); } diff --git a/disk/qcow.c b/disk/qcow.c index bed70c65c..dd6be62ee 100644 --- a/disk/qcow.c +++ b/disk/qcow.c @@ -1337,7 +1337,6 @@ static struct disk_image *qcow2_probe(int fd, bool readonly) if (IS_ERR_OR_NULL(disk_image)) goto free_refcount_table; - disk_image->async = 0; disk_image->priv = q; return disk_image; @@ -1474,7 +1473,6 @@ static struct disk_image *qcow1_probe(int fd, bool readonly) if (!disk_image) goto free_l1_table; - disk_image->async = 1; disk_image->priv = q; return disk_image; diff --git a/disk/raw.c b/disk/raw.c index 09da7e081..e869d6cc2 100644 --- a/disk/raw.c +++ b/disk/raw.c @@ -67,6 +67,7 @@ int raw_image__close(struct disk_image *disk) static struct disk_image_operations raw_image_regular_ops = { .read = raw_image__read, .write = raw_image__write, + .async = true, }; struct disk_image_operations ro_ops = { @@ -77,12 +78,11 @@ struct disk_image_operations ro_ops = { struct disk_image_operations ro_ops_nowrite = { .read = raw_image__read, + .async = true, }; struct disk_image *raw_image__probe(int fd, struct stat *st, bool readonly) { - struct disk_image *disk; - if (readonly) { /* * Use mmap's MAP_PRIVATE to implement non-persistent write @@ -93,10 +93,6 @@ struct disk_image *raw_image__probe(int fd, struct stat *st, bool readonly) disk = disk_image__new(fd, st->st_size, &ro_ops, DISK_IMAGE_MMAP); if (IS_ERR_OR_NULL(disk)) { disk = disk_image__new(fd, st->st_size, &ro_ops_nowrite, DISK_IMAGE_REGULAR); -#ifdef CONFIG_HAS_AIO - if (!IS_ERR_OR_NULL(disk)) - disk->async = 1; -#endif } return disk; @@ -104,11 +100,6 @@ struct disk_image *raw_image__probe(int fd, struct stat *st, bool readonly) /* * Use read/write instead of mmap */ - disk = disk_image__new(fd, st->st_size, &raw_image_regular_ops, DISK_IMAGE_REGULAR); -#ifdef CONFIG_HAS_AIO - if (!IS_ERR_OR_NULL(disk)) - disk->async = 1; -#endif - return disk; + return disk_image__new(fd, st->st_size, &raw_image_regular_ops, DISK_IMAGE_REGULAR); } } diff --git a/include/kvm/disk-image.h b/include/kvm/disk-image.h index 953beb2d5..adc9fe465 100644 --- a/include/kvm/disk-image.h +++ b/include/kvm/disk-image.h @@ -42,6 +42,7 @@ struct disk_image_operations { int iovcount, void *param); int (*flush)(struct disk_image *disk); int (*close)(struct disk_image *disk); + bool async; }; struct disk_image_params {
Add an 'async' attribute to disk_image_operations, that describes if they can submit async I/O or not. disk_image->async is now set iff CONFIG_HAS_AIO and the ops do use AIO. This fixes qcow1, which used to set async = 1 even though the qcow operations don't use AIO. The disk core would perform the read/write operation without pushing the completion onto the virtio queue, and the guest would be stuck waiting. Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com> --- disk/aio.c | 9 +++++++++ disk/blk.c | 9 ++------- disk/qcow.c | 2 -- disk/raw.c | 15 +++------------ include/kvm/disk-image.h | 1 + 5 files changed, 15 insertions(+), 21 deletions(-)