Message ID | 20201227164236.10143-7-pl@kamp.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | block/rbd: migrate to coroutines and add write zeroes support | expand |
On Sun, Dec 27, 2020 at 11:42 AM Peter Lieven <pl@kamp.de> wrote: > > Signed-off-by: Peter Lieven <pl@kamp.de> > --- > block/rbd.c | 31 ++++++++++++++++++++++++++++++- > 1 file changed, 30 insertions(+), 1 deletion(-) > > diff --git a/block/rbd.c b/block/rbd.c > index 2d77d0007f..27b4404adf 100644 > --- a/block/rbd.c > +++ b/block/rbd.c > @@ -63,7 +63,8 @@ typedef enum { > RBD_AIO_READ, > RBD_AIO_WRITE, > RBD_AIO_DISCARD, > - RBD_AIO_FLUSH > + RBD_AIO_FLUSH, > + RBD_AIO_WRITE_ZEROES > } RBDAIOCmd; > > typedef struct BDRVRBDState { > @@ -221,8 +222,12 @@ done: > > static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp) > { > + BDRVRBDState *s = bs->opaque; > /* XXX Does RBD support AIO on less than 512-byte alignment? */ > bs->bl.request_alignment = 512; > +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES > + bs->bl.pwrite_zeroes_alignment = s->object_size; The optimal alignment is 512 bytes -- but it can safely work just fine down to 1 byte alignment since it will pad the request with additional writes if needed. > +#endif > } > > > @@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, > } > > s->aio_context = bdrv_get_aio_context(bs); > +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES > + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; > +#endif > > /* When extending regular files, we get zeros from the OS */ > bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; > @@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs, > case RBD_AIO_FLUSH: > r = rbd_aio_flush(s->image, c); > break; > +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES > + case RBD_AIO_WRITE_ZEROES: > + r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0); > + break; > +#endif > default: > r = -EINVAL; > } > @@ -880,6 +893,19 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs, > return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD); > } > > +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES > +static int > +coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, > + int count, BdrvRequestFlags flags) > +{ > + if (!(flags & BDRV_REQ_MAY_UNMAP)) { > + return -ENOTSUP; > + } There is a "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" flag that you can use to optionally disable unmap. > + return qemu_rbd_start_co(bs, offset, count, NULL, flags, > + RBD_AIO_WRITE_ZEROES); > +} > +#endif > + > static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi) > { > BDRVRBDState *s = bs->opaque; > @@ -1108,6 +1134,9 @@ static BlockDriver bdrv_rbd = { > .bdrv_co_pwritev = qemu_rbd_co_pwritev, > .bdrv_co_flush_to_disk = qemu_rbd_co_flush, > .bdrv_co_pdiscard = qemu_rbd_co_pdiscard, > +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES > + .bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes, > +#endif > > .bdrv_snapshot_create = qemu_rbd_snap_create, > .bdrv_snapshot_delete = qemu_rbd_snap_remove, > -- > 2.17.1 > > -- Jason
Am 14.01.21 um 20:19 schrieb Jason Dillaman: > On Sun, Dec 27, 2020 at 11:42 AM Peter Lieven <pl@kamp.de> wrote: >> Signed-off-by: Peter Lieven <pl@kamp.de> >> --- >> block/rbd.c | 31 ++++++++++++++++++++++++++++++- >> 1 file changed, 30 insertions(+), 1 deletion(-) >> >> diff --git a/block/rbd.c b/block/rbd.c >> index 2d77d0007f..27b4404adf 100644 >> --- a/block/rbd.c >> +++ b/block/rbd.c >> @@ -63,7 +63,8 @@ typedef enum { >> RBD_AIO_READ, >> RBD_AIO_WRITE, >> RBD_AIO_DISCARD, >> - RBD_AIO_FLUSH >> + RBD_AIO_FLUSH, >> + RBD_AIO_WRITE_ZEROES >> } RBDAIOCmd; >> >> typedef struct BDRVRBDState { >> @@ -221,8 +222,12 @@ done: >> >> static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp) >> { >> + BDRVRBDState *s = bs->opaque; >> /* XXX Does RBD support AIO on less than 512-byte alignment? */ >> bs->bl.request_alignment = 512; >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES >> + bs->bl.pwrite_zeroes_alignment = s->object_size; > The optimal alignment is 512 bytes -- but it can safely work just fine > down to 1 byte alignment since it will pad the request with additional > writes if needed. Okay and this will likely be faster than having Qemu doing that request split, right? If we drop the alignment hint Qemu will pass the original request. > >> +#endif >> } >> >> >> @@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, >> } >> >> s->aio_context = bdrv_get_aio_context(bs); >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES >> + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; >> +#endif >> >> /* When extending regular files, we get zeros from the OS */ >> bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; >> @@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs, >> case RBD_AIO_FLUSH: >> r = rbd_aio_flush(s->image, c); >> break; >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES >> + case RBD_AIO_WRITE_ZEROES: >> + r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0); >> + break; >> +#endif >> default: >> r = -EINVAL; >> } >> @@ -880,6 +893,19 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs, >> return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD); >> } >> >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES >> +static int >> +coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, >> + int count, BdrvRequestFlags flags) >> +{ >> + if (!(flags & BDRV_REQ_MAY_UNMAP)) { >> + return -ENOTSUP; >> + } > There is a "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" flag that you can > use to optionally disable unmap. I have seen that. If you want I can support for this, too. But afaik this is only available since Octopus release? Peter
On Thu, Jan 14, 2021 at 2:41 PM Peter Lieven <pl@kamp.de> wrote: > > Am 14.01.21 um 20:19 schrieb Jason Dillaman: > > On Sun, Dec 27, 2020 at 11:42 AM Peter Lieven <pl@kamp.de> wrote: > >> Signed-off-by: Peter Lieven <pl@kamp.de> > >> --- > >> block/rbd.c | 31 ++++++++++++++++++++++++++++++- > >> 1 file changed, 30 insertions(+), 1 deletion(-) > >> > >> diff --git a/block/rbd.c b/block/rbd.c > >> index 2d77d0007f..27b4404adf 100644 > >> --- a/block/rbd.c > >> +++ b/block/rbd.c > >> @@ -63,7 +63,8 @@ typedef enum { > >> RBD_AIO_READ, > >> RBD_AIO_WRITE, > >> RBD_AIO_DISCARD, > >> - RBD_AIO_FLUSH > >> + RBD_AIO_FLUSH, > >> + RBD_AIO_WRITE_ZEROES > >> } RBDAIOCmd; > >> > >> typedef struct BDRVRBDState { > >> @@ -221,8 +222,12 @@ done: > >> > >> static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp) > >> { > >> + BDRVRBDState *s = bs->opaque; > >> /* XXX Does RBD support AIO on less than 512-byte alignment? */ > >> bs->bl.request_alignment = 512; > >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES > >> + bs->bl.pwrite_zeroes_alignment = s->object_size; > > The optimal alignment is 512 bytes -- but it can safely work just fine > > down to 1 byte alignment since it will pad the request with additional > > writes if needed. > > > Okay and this will likely be faster than having Qemu doing that request split, right? > > If we drop the alignment hint Qemu will pass the original request. > > > > > >> +#endif > >> } > >> > >> > >> @@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, > >> } > >> > >> s->aio_context = bdrv_get_aio_context(bs); > >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES > >> + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; > >> +#endif > >> > >> /* When extending regular files, we get zeros from the OS */ > >> bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; > >> @@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs, > >> case RBD_AIO_FLUSH: > >> r = rbd_aio_flush(s->image, c); > >> break; > >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES > >> + case RBD_AIO_WRITE_ZEROES: > >> + r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0); > >> + break; > >> +#endif > >> default: > >> r = -EINVAL; > >> } > >> @@ -880,6 +893,19 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs, > >> return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD); > >> } > >> > >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES > >> +static int > >> +coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, > >> + int count, BdrvRequestFlags flags) > >> +{ > >> + if (!(flags & BDRV_REQ_MAY_UNMAP)) { > >> + return -ENOTSUP; > >> + } > > There is a "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" flag that you can > > use to optionally disable unmap. > > > I have seen that. If you want I can support for this, too. But afaik this > > is only available since Octopus release? True -- I didn't backport that support to Nautilus since it was a new feature vs the bug-fix that the write-zeroes API was introduced to fix. > > Peter > >
diff --git a/block/rbd.c b/block/rbd.c index 2d77d0007f..27b4404adf 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -63,7 +63,8 @@ typedef enum { RBD_AIO_READ, RBD_AIO_WRITE, RBD_AIO_DISCARD, - RBD_AIO_FLUSH + RBD_AIO_FLUSH, + RBD_AIO_WRITE_ZEROES } RBDAIOCmd; typedef struct BDRVRBDState { @@ -221,8 +222,12 @@ done: static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp) { + BDRVRBDState *s = bs->opaque; /* XXX Does RBD support AIO on less than 512-byte alignment? */ bs->bl.request_alignment = 512; +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES + bs->bl.pwrite_zeroes_alignment = s->object_size; +#endif } @@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, } s->aio_context = bdrv_get_aio_context(bs); +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; +#endif /* When extending regular files, we get zeros from the OS */ bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; @@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs, case RBD_AIO_FLUSH: r = rbd_aio_flush(s->image, c); break; +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES + case RBD_AIO_WRITE_ZEROES: + r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0); + break; +#endif default: r = -EINVAL; } @@ -880,6 +893,19 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs, return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD); } +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES +static int +coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int count, BdrvRequestFlags flags) +{ + if (!(flags & BDRV_REQ_MAY_UNMAP)) { + return -ENOTSUP; + } + return qemu_rbd_start_co(bs, offset, count, NULL, flags, + RBD_AIO_WRITE_ZEROES); +} +#endif + static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi) { BDRVRBDState *s = bs->opaque; @@ -1108,6 +1134,9 @@ static BlockDriver bdrv_rbd = { .bdrv_co_pwritev = qemu_rbd_co_pwritev, .bdrv_co_flush_to_disk = qemu_rbd_co_flush, .bdrv_co_pdiscard = qemu_rbd_co_pdiscard, +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES + .bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes, +#endif .bdrv_snapshot_create = qemu_rbd_snap_create, .bdrv_snapshot_delete = qemu_rbd_snap_remove,
Signed-off-by: Peter Lieven <pl@kamp.de> --- block/rbd.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-)