Message ID | 20190509145927.293369-1-sgarzare@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] block/rbd: increase dynamically the image size | expand |
Kindly ping. Thanks, Stefano On Thu, May 09, 2019 at 04:59:27PM +0200, Stefano Garzarella wrote: > RBD APIs don't allow us to write more than the size set with > rbd_create() or rbd_resize(). > In order to support growing images (eg. qcow2), we resize the > image before write operations that exceed the current size. > > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > --- > v3: > - add 'image_size' field in the BDRVRBDState to keep track of the > current size of the RBD image [Jason, Kevin] > --- > block/rbd.c | 42 +++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 39 insertions(+), 3 deletions(-) > > diff --git a/block/rbd.c b/block/rbd.c > index 0c549c9935..b0355a2ce0 100644 > --- a/block/rbd.c > +++ b/block/rbd.c > @@ -102,6 +102,7 @@ typedef struct BDRVRBDState { > rbd_image_t image; > char *image_name; > char *snap; > + uint64_t image_size; > } BDRVRBDState; > > static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, > @@ -777,6 +778,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, > goto failed_open; > } > > + r = rbd_get_size(s->image, &s->image_size); > + if (r < 0) { > + error_setg_errno(errp, -r, "error getting image size from %s", > + s->image_name); > + rbd_close(s->image); > + goto failed_open; > + } > + > /* If we are using an rbd snapshot, we must be r/o, otherwise > * leave as-is */ > if (s->snap != NULL) { > @@ -833,6 +842,22 @@ static void qemu_rbd_close(BlockDriverState *bs) > rados_shutdown(s->cluster); > } > > +/* Resize the RBD image and update the 'image_size' with the current size */ > +static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) > +{ > + BDRVRBDState *s = bs->opaque; > + int r; > + > + r = rbd_resize(s->image, size); > + if (r < 0) { > + return r; > + } > + > + s->image_size = size; > + > + return 0; > +} > + > static const AIOCBInfo rbd_aiocb_info = { > .aiocb_size = sizeof(RBDAIOCB), > }; > @@ -934,13 +959,25 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > } > > switch (cmd) { > - case RBD_AIO_WRITE: > + case RBD_AIO_WRITE: { > + /* > + * RBD APIs don't allow us to write more than actual size, so in order > + * to support growing images, we resize the image before write > + * operations that exceed the current size. > + */ > + if (off + size > s->image_size) { > + r = qemu_rbd_resize(bs, off + size); > + if (r < 0) { > + goto failed_completion; > + } > + } > #ifdef LIBRBD_SUPPORTS_IOVEC > r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); > #else > r = rbd_aio_write(s->image, off, size, rcb->buf, c); > #endif > break; > + } > case RBD_AIO_READ: > #ifdef LIBRBD_SUPPORTS_IOVEC > r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); > @@ -1051,7 +1088,6 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, > PreallocMode prealloc, > Error **errp) > { > - BDRVRBDState *s = bs->opaque; > int r; > > if (prealloc != PREALLOC_MODE_OFF) { > @@ -1060,7 +1096,7 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, > return -ENOTSUP; > } > > - r = rbd_resize(s->image, offset); > + r = qemu_rbd_resize(bs, offset); > if (r < 0) { > error_setg_errno(errp, -r, "Failed to resize file"); > return r; > -- > 2.20.1 > >
Ping. Thanks, Stefano On Tue, May 21, 2019 at 10:56 AM Stefano Garzarella <sgarzare@redhat.com> wrote: > > Kindly ping. > > Thanks, > Stefano > > On Thu, May 09, 2019 at 04:59:27PM +0200, Stefano Garzarella wrote: > > RBD APIs don't allow us to write more than the size set with > > rbd_create() or rbd_resize(). > > In order to support growing images (eg. qcow2), we resize the > > image before write operations that exceed the current size. > > > > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > > --- > > v3: > > - add 'image_size' field in the BDRVRBDState to keep track of the > > current size of the RBD image [Jason, Kevin] > > --- > > block/rbd.c | 42 +++++++++++++++++++++++++++++++++++++++--- > > 1 file changed, 39 insertions(+), 3 deletions(-) > > > > diff --git a/block/rbd.c b/block/rbd.c > > index 0c549c9935..b0355a2ce0 100644 > > --- a/block/rbd.c > > +++ b/block/rbd.c > > @@ -102,6 +102,7 @@ typedef struct BDRVRBDState { > > rbd_image_t image; > > char *image_name; > > char *snap; > > + uint64_t image_size; > > } BDRVRBDState; > > > > static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, > > @@ -777,6 +778,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, > > goto failed_open; > > } > > > > + r = rbd_get_size(s->image, &s->image_size); > > + if (r < 0) { > > + error_setg_errno(errp, -r, "error getting image size from %s", > > + s->image_name); > > + rbd_close(s->image); > > + goto failed_open; > > + } > > + > > /* If we are using an rbd snapshot, we must be r/o, otherwise > > * leave as-is */ > > if (s->snap != NULL) { > > @@ -833,6 +842,22 @@ static void qemu_rbd_close(BlockDriverState *bs) > > rados_shutdown(s->cluster); > > } > > > > +/* Resize the RBD image and update the 'image_size' with the current size */ > > +static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) > > +{ > > + BDRVRBDState *s = bs->opaque; > > + int r; > > + > > + r = rbd_resize(s->image, size); > > + if (r < 0) { > > + return r; > > + } > > + > > + s->image_size = size; > > + > > + return 0; > > +} > > + > > static const AIOCBInfo rbd_aiocb_info = { > > .aiocb_size = sizeof(RBDAIOCB), > > }; > > @@ -934,13 +959,25 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, > > } > > > > switch (cmd) { > > - case RBD_AIO_WRITE: > > + case RBD_AIO_WRITE: { > > + /* > > + * RBD APIs don't allow us to write more than actual size, so in order > > + * to support growing images, we resize the image before write > > + * operations that exceed the current size. > > + */ > > + if (off + size > s->image_size) { > > + r = qemu_rbd_resize(bs, off + size); > > + if (r < 0) { > > + goto failed_completion; > > + } > > + } > > #ifdef LIBRBD_SUPPORTS_IOVEC > > r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); > > #else > > r = rbd_aio_write(s->image, off, size, rcb->buf, c); > > #endif > > break; > > + } > > case RBD_AIO_READ: > > #ifdef LIBRBD_SUPPORTS_IOVEC > > r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); > > @@ -1051,7 +1088,6 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, > > PreallocMode prealloc, > > Error **errp) > > { > > - BDRVRBDState *s = bs->opaque; > > int r; > > > > if (prealloc != PREALLOC_MODE_OFF) { > > @@ -1060,7 +1096,7 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, > > return -ENOTSUP; > > } > > > > - r = rbd_resize(s->image, offset); > > + r = qemu_rbd_resize(bs, offset); > > if (r < 0) { > > error_setg_errno(errp, -r, "Failed to resize file"); > > return r; > > -- > > 2.20.1 > > > > > > -- > > Stefano Garzarella > Software Engineer @ Red Hat
On 09.05.19 16:59, Stefano Garzarella wrote: > RBD APIs don't allow us to write more than the size set with > rbd_create() or rbd_resize(). > In order to support growing images (eg. qcow2), we resize the > image before write operations that exceed the current size. > > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > --- > v3: > - add 'image_size' field in the BDRVRBDState to keep track of the > current size of the RBD image [Jason, Kevin] > --- > block/rbd.c | 42 +++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 39 insertions(+), 3 deletions(-) > > diff --git a/block/rbd.c b/block/rbd.c > index 0c549c9935..b0355a2ce0 100644 > --- a/block/rbd.c > +++ b/block/rbd.c [...] > @@ -833,6 +842,22 @@ static void qemu_rbd_close(BlockDriverState *bs) > rados_shutdown(s->cluster); > } > > +/* Resize the RBD image and update the 'image_size' with the current size */ > +static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) > +{ > + BDRVRBDState *s = bs->opaque; > + int r; > + > + r = rbd_resize(s->image, size); > + if (r < 0) { > + return r; > + } > + > + s->image_size = size; I think this should update bs->total_sectors, too. In fact, I’m wondering why you don’t just use bs->total_sectors (or bdrv_getlength(), which returns bs->total_sectors * 512) instead of adding this new field? Max > + > + return 0; > +} > + > static const AIOCBInfo rbd_aiocb_info = { > .aiocb_size = sizeof(RBDAIOCB), > };
On Tue, Jun 25, 2019 at 04:02:04PM +0200, Max Reitz wrote: > On 09.05.19 16:59, Stefano Garzarella wrote: > > RBD APIs don't allow us to write more than the size set with > > rbd_create() or rbd_resize(). > > In order to support growing images (eg. qcow2), we resize the > > image before write operations that exceed the current size. > > > > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > > --- > > v3: > > - add 'image_size' field in the BDRVRBDState to keep track of the > > current size of the RBD image [Jason, Kevin] > > --- > > block/rbd.c | 42 +++++++++++++++++++++++++++++++++++++++--- > > 1 file changed, 39 insertions(+), 3 deletions(-) > > > > diff --git a/block/rbd.c b/block/rbd.c > > index 0c549c9935..b0355a2ce0 100644 > > --- a/block/rbd.c > > +++ b/block/rbd.c > > [...] > > > @@ -833,6 +842,22 @@ static void qemu_rbd_close(BlockDriverState *bs) > > rados_shutdown(s->cluster); > > } > > > > +/* Resize the RBD image and update the 'image_size' with the current size */ > > +static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) > > +{ > > + BDRVRBDState *s = bs->opaque; > > + int r; > > + > > + r = rbd_resize(s->image, size); > > + if (r < 0) { > > + return r; > > + } > > + > > + s->image_size = size; > > I think this should update bs->total_sectors, too. In fact, I’m > wondering why you don’t just use bs->total_sectors (or bdrv_getlength(), > which returns bs->total_sectors * 512) instead of adding this new field? > Hi Max, thanks for taking a look! I used bs->total_sectors in the v2, but Jason pointed out a possible issue with this, so I proposed to add a variable in the BDRVRBDState to track the latest resize and Kevin acked [1]. IIUC what Kevin said on his comment, the 'bs->total_sectors' should be updated by bdrv_co_write_req_finish(), for this reason I didn't update it. [1] https://www.mail-archive.com/qemu-devel@nongnu.org/msg615195.html Thanks, Stefano
On 25.06.19 16:47, Stefano Garzarella wrote: > On Tue, Jun 25, 2019 at 04:02:04PM +0200, Max Reitz wrote: >> On 09.05.19 16:59, Stefano Garzarella wrote: >>> RBD APIs don't allow us to write more than the size set with >>> rbd_create() or rbd_resize(). >>> In order to support growing images (eg. qcow2), we resize the >>> image before write operations that exceed the current size. >>> >>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> >>> --- >>> v3: >>> - add 'image_size' field in the BDRVRBDState to keep track of the >>> current size of the RBD image [Jason, Kevin] >>> --- >>> block/rbd.c | 42 +++++++++++++++++++++++++++++++++++++++--- >>> 1 file changed, 39 insertions(+), 3 deletions(-) >>> >>> diff --git a/block/rbd.c b/block/rbd.c >>> index 0c549c9935..b0355a2ce0 100644 >>> --- a/block/rbd.c >>> +++ b/block/rbd.c >> >> [...] >> >>> @@ -833,6 +842,22 @@ static void qemu_rbd_close(BlockDriverState *bs) >>> rados_shutdown(s->cluster); >>> } >>> >>> +/* Resize the RBD image and update the 'image_size' with the current size */ >>> +static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) >>> +{ >>> + BDRVRBDState *s = bs->opaque; >>> + int r; >>> + >>> + r = rbd_resize(s->image, size); >>> + if (r < 0) { >>> + return r; >>> + } >>> + >>> + s->image_size = size; >> >> I think this should update bs->total_sectors, too. In fact, I’m >> wondering why you don’t just use bs->total_sectors (or bdrv_getlength(), >> which returns bs->total_sectors * 512) instead of adding this new field? >> > > Hi Max, > thanks for taking a look! > > I used bs->total_sectors in the v2, but Jason pointed out a possible > issue with this, so I proposed to add a variable in the BDRVRBDState to > track the latest resize and Kevin acked [1]. > > IIUC what Kevin said on his comment, the 'bs->total_sectors' should be > updated by bdrv_co_write_req_finish(), for this reason I didn't update > it. > > [1] https://www.mail-archive.com/qemu-devel@nongnu.org/msg615195.html Ah, right! Yeah, sure, now that I think about it, the block layer must have general code for successful writes beyond EOF... (Read: Now that I’m pointed towards it...) OK then; thanks for the patch, applied to my block branch: https://git.xanclic.moe/XanClic/qemu/commits/branch/block Max
On Tue, Jun 25, 2019 at 04:57:53PM +0200, Max Reitz wrote: > On 25.06.19 16:47, Stefano Garzarella wrote: > > On Tue, Jun 25, 2019 at 04:02:04PM +0200, Max Reitz wrote: > >> On 09.05.19 16:59, Stefano Garzarella wrote: > >>> RBD APIs don't allow us to write more than the size set with > >>> rbd_create() or rbd_resize(). > >>> In order to support growing images (eg. qcow2), we resize the > >>> image before write operations that exceed the current size. > >>> > >>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > >>> --- > >>> v3: > >>> - add 'image_size' field in the BDRVRBDState to keep track of the > >>> current size of the RBD image [Jason, Kevin] > >>> --- > >>> block/rbd.c | 42 +++++++++++++++++++++++++++++++++++++++--- > >>> 1 file changed, 39 insertions(+), 3 deletions(-) > >>> > >>> diff --git a/block/rbd.c b/block/rbd.c > >>> index 0c549c9935..b0355a2ce0 100644 > >>> --- a/block/rbd.c > >>> +++ b/block/rbd.c > >> > >> [...] > >> > >>> @@ -833,6 +842,22 @@ static void qemu_rbd_close(BlockDriverState *bs) > >>> rados_shutdown(s->cluster); > >>> } > >>> > >>> +/* Resize the RBD image and update the 'image_size' with the current size */ > >>> +static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) > >>> +{ > >>> + BDRVRBDState *s = bs->opaque; > >>> + int r; > >>> + > >>> + r = rbd_resize(s->image, size); > >>> + if (r < 0) { > >>> + return r; > >>> + } > >>> + > >>> + s->image_size = size; > >> > >> I think this should update bs->total_sectors, too. In fact, I’m > >> wondering why you don’t just use bs->total_sectors (or bdrv_getlength(), > >> which returns bs->total_sectors * 512) instead of adding this new field? > >> > > > > Hi Max, > > thanks for taking a look! > > > > I used bs->total_sectors in the v2, but Jason pointed out a possible > > issue with this, so I proposed to add a variable in the BDRVRBDState to > > track the latest resize and Kevin acked [1]. > > > > IIUC what Kevin said on his comment, the 'bs->total_sectors' should be > > updated by bdrv_co_write_req_finish(), for this reason I didn't update > > it. > > > > [1] https://www.mail-archive.com/qemu-devel@nongnu.org/msg615195.html > > Ah, right! Yeah, sure, now that I think about it, the block layer must > have general code for successful writes beyond EOF... (Read: Now that > I’m pointed towards it...) Yeah, do you mean for example to call .bdrv_co_truncate() (or a new callback implemented only in the driver that need to be resized like rbd) in the bdrv_driver_pwritev() if we will write beyond EOF? > > OK then; thanks for the patch, applied to my block branch: Thanks to take this! Stefano
On 25.06.19 17:28, Stefano Garzarella wrote: > On Tue, Jun 25, 2019 at 04:57:53PM +0200, Max Reitz wrote: >> On 25.06.19 16:47, Stefano Garzarella wrote: >>> On Tue, Jun 25, 2019 at 04:02:04PM +0200, Max Reitz wrote: >>>> On 09.05.19 16:59, Stefano Garzarella wrote: >>>>> RBD APIs don't allow us to write more than the size set with >>>>> rbd_create() or rbd_resize(). >>>>> In order to support growing images (eg. qcow2), we resize the >>>>> image before write operations that exceed the current size. >>>>> >>>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> >>>>> --- >>>>> v3: >>>>> - add 'image_size' field in the BDRVRBDState to keep track of the >>>>> current size of the RBD image [Jason, Kevin] >>>>> --- >>>>> block/rbd.c | 42 +++++++++++++++++++++++++++++++++++++++--- >>>>> 1 file changed, 39 insertions(+), 3 deletions(-) >>>>> >>>>> diff --git a/block/rbd.c b/block/rbd.c >>>>> index 0c549c9935..b0355a2ce0 100644 >>>>> --- a/block/rbd.c >>>>> +++ b/block/rbd.c >>>> >>>> [...] >>>> >>>>> @@ -833,6 +842,22 @@ static void qemu_rbd_close(BlockDriverState *bs) >>>>> rados_shutdown(s->cluster); >>>>> } >>>>> >>>>> +/* Resize the RBD image and update the 'image_size' with the current size */ >>>>> +static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) >>>>> +{ >>>>> + BDRVRBDState *s = bs->opaque; >>>>> + int r; >>>>> + >>>>> + r = rbd_resize(s->image, size); >>>>> + if (r < 0) { >>>>> + return r; >>>>> + } >>>>> + >>>>> + s->image_size = size; >>>> >>>> I think this should update bs->total_sectors, too. In fact, I’m >>>> wondering why you don’t just use bs->total_sectors (or bdrv_getlength(), >>>> which returns bs->total_sectors * 512) instead of adding this new field? >>>> >>> >>> Hi Max, >>> thanks for taking a look! >>> >>> I used bs->total_sectors in the v2, but Jason pointed out a possible >>> issue with this, so I proposed to add a variable in the BDRVRBDState to >>> track the latest resize and Kevin acked [1]. >>> >>> IIUC what Kevin said on his comment, the 'bs->total_sectors' should be >>> updated by bdrv_co_write_req_finish(), for this reason I didn't update >>> it. >>> >>> [1] https://www.mail-archive.com/qemu-devel@nongnu.org/msg615195.html >> >> Ah, right! Yeah, sure, now that I think about it, the block layer must >> have general code for successful writes beyond EOF... (Read: Now that >> I’m pointed towards it...) > > Yeah, do you mean for example to call .bdrv_co_truncate() (or a new > callback implemented only in the driver that need to be resized like > rbd) in the bdrv_driver_pwritev() if we will write beyond EOF? No, I just mean that in theory all drivers should support resizing by writing beyond the EOF (in practice, it only matters for protocol drivers). The general block layer code needs to recognize this and handle it as if the BDS was explicitly resized with bdrv_co_truncate(). And it does do that, specifically in bdrv_co_write_req_finish(), as you wrote. Max
diff --git a/block/rbd.c b/block/rbd.c index 0c549c9935..b0355a2ce0 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -102,6 +102,7 @@ typedef struct BDRVRBDState { rbd_image_t image; char *image_name; char *snap; + uint64_t image_size; } BDRVRBDState; static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx, @@ -777,6 +778,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, goto failed_open; } + r = rbd_get_size(s->image, &s->image_size); + if (r < 0) { + error_setg_errno(errp, -r, "error getting image size from %s", + s->image_name); + rbd_close(s->image); + goto failed_open; + } + /* If we are using an rbd snapshot, we must be r/o, otherwise * leave as-is */ if (s->snap != NULL) { @@ -833,6 +842,22 @@ static void qemu_rbd_close(BlockDriverState *bs) rados_shutdown(s->cluster); } +/* Resize the RBD image and update the 'image_size' with the current size */ +static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) +{ + BDRVRBDState *s = bs->opaque; + int r; + + r = rbd_resize(s->image, size); + if (r < 0) { + return r; + } + + s->image_size = size; + + return 0; +} + static const AIOCBInfo rbd_aiocb_info = { .aiocb_size = sizeof(RBDAIOCB), }; @@ -934,13 +959,25 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs, } switch (cmd) { - case RBD_AIO_WRITE: + case RBD_AIO_WRITE: { + /* + * RBD APIs don't allow us to write more than actual size, so in order + * to support growing images, we resize the image before write + * operations that exceed the current size. + */ + if (off + size > s->image_size) { + r = qemu_rbd_resize(bs, off + size); + if (r < 0) { + goto failed_completion; + } + } #ifdef LIBRBD_SUPPORTS_IOVEC r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c); #else r = rbd_aio_write(s->image, off, size, rcb->buf, c); #endif break; + } case RBD_AIO_READ: #ifdef LIBRBD_SUPPORTS_IOVEC r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c); @@ -1051,7 +1088,6 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, PreallocMode prealloc, Error **errp) { - BDRVRBDState *s = bs->opaque; int r; if (prealloc != PREALLOC_MODE_OFF) { @@ -1060,7 +1096,7 @@ static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs, return -ENOTSUP; } - r = rbd_resize(s->image, offset); + r = qemu_rbd_resize(bs, offset); if (r < 0) { error_setg_errno(errp, -r, "Failed to resize file"); return r;
RBD APIs don't allow us to write more than the size set with rbd_create() or rbd_resize(). In order to support growing images (eg. qcow2), we resize the image before write operations that exceed the current size. Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> --- v3: - add 'image_size' field in the BDRVRBDState to keep track of the current size of the RBD image [Jason, Kevin] --- block/rbd.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-)