Message ID | 20210809134136.23140-1-pl@kamp.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | block/rbd: implement bdrv_co_block_status | expand |
On Mon, Aug 09, 2021 at 03:41:36PM +0200, Peter Lieven wrote: Please, can you add a description? For example also describing what happens if RBD image does not support RBD_FEATURE_FAST_DIFF. >Signed-off-by: Peter Lieven <pl@kamp.de> >--- > block/rbd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 119 insertions(+) > >diff --git a/block/rbd.c b/block/rbd.c >index dcf82b15b8..ef1eaa6af3 100644 >--- a/block/rbd.c >+++ b/block/rbd.c >@@ -88,6 +88,7 @@ typedef struct BDRVRBDState { > char *namespace; > uint64_t image_size; > uint64_t object_size; >+ uint64_t features; > } BDRVRBDState; > > typedef struct RBDTask { >@@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, > s->image_size = info.size; > s->object_size = info.obj_size; > >+ r = rbd_get_features(s->image, &s->features); >+ if (r < 0) { >+ error_setg_errno(errp, -r, "error getting image features from %s", >+ s->image_name); >+ rbd_close(s->image); >+ goto failed_open; ^ You can use `failed_post_open` label here, so you can avoid to call rbd_close(). >+ } >+ > /* If we are using an rbd snapshot, we must be r/o, otherwise > * leave as-is */ > if (s->snap != NULL) { >@@ -1259,6 +1268,115 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs, > return spec_info; > } > >+typedef struct rbd_diff_req { >+ uint64_t offs; >+ uint64_t bytes; >+ int exists; >+} rbd_diff_req; >+ >+static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len, >+ int exists, void *opaque) >+{ >+ struct rbd_diff_req *req = opaque; >+ >+ assert(req->offs + req->bytes <= offs); >+ assert(offs >= req->offs + req->bytes); I think just one of the two asserts is enough, isn't that the same condition? >+ >+ if (req->exists && offs > req->offs + req->bytes) { >+ /* >+ * we started in an allocated area and jumped over an unallocated area, >+ * req->bytes contains the length of the allocated area before the >+ * unallocated area. stop further processing. >+ */ >+ return -9000; ^ What is this magical value? Please add a macro (with a comment) and also use it below in other places. >+ } >+ if (req->exists && !exists) { >+ /* >+ * we started in an allocated area and reached a hole. >req->bytes >+ * contains the length of the allocated area before the hole. >+ * stop further processing. >+ */ >+ return -9000; >+ } >+ if (!req->exists && exists && offs > req->offs) { >+ /* >+ * we started in an unallocated area and hit the first allocated >+ * block. req->bytes must be set to the length of the unallocated area >+ * before the allocated area. stop further processing. >+ */ >+ req->bytes = offs - req->offs; >+ return -9000; >+ } >+ >+ /* >+ * assert that we catched all cases above and allocation state has not >+ * changed during callbacks. >+ */ >+ assert(exists == req->exists || !req->bytes); >+ req->exists = exists; >+ >+ /* >+ * assert that we either return an unallocated block or have got callbacks >+ * for all allocated blocks present. >+ */ >+ assert(!req->exists || offs == req->offs + req->bytes); >+ req->bytes = offs + len - req->offs; >+ >+ return 0; >+} >+ >+static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, >+ bool want_zero, int64_t offset, >+ int64_t bytes, int64_t *pnum, >+ int64_t *map, >+ BlockDriverState **file) >+{ >+ BDRVRBDState *s = bs->opaque; >+ int ret, r; >+ struct rbd_diff_req req = { .offs = offset }; >+ >+ assert(offset + bytes <= s->image_size); >+ >+ /* default to all sectors allocated */ >+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; >+ if (map) { >+ *map = offset; >+ } >+ *pnum = bytes; >+ >+ /* RBD image does not support fast-diff */ >+ if (!(s->features & RBD_FEATURE_FAST_DIFF)) { >+ goto out; >+ } >+ >+ r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, >+ qemu_rbd_co_block_status_cb, &req); >+ if (r < 0 && r != -9000) { >+ goto out; >+ } >+ assert(req.bytes <= bytes); >+ if (!req.exists) { >+ if (r == 0 && !req.bytes) { >+ /* >+ * rbd_diff_iterate2 does not invoke callbacks for >unallocated areas >+ * except for the case where an overlay has a hole where >the parent >+ * has not. This here catches the case where no callback >was >+ * invoked at all. >+ */ >+ req.bytes = bytes; >+ } >+ ret &= ~BDRV_BLOCK_DATA; >+ ret |= BDRV_BLOCK_ZERO; >+ } >+ *pnum = req.bytes; >+ >+out: >+ if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) { Can ret be zero at this point? Doesn't BDRV_BLOCK_OFFSET_VALID always stay set? Thanks, Stefano
Am 10.08.21 um 10:51 schrieb Stefano Garzarella: > On Mon, Aug 09, 2021 at 03:41:36PM +0200, Peter Lieven wrote: > > Please, can you add a description? > For example also describing what happens if RBD image does not support RBD_FEATURE_FAST_DIFF. Sure. > >> Signed-off-by: Peter Lieven <pl@kamp.de> >> --- >> block/rbd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 119 insertions(+) >> >> diff --git a/block/rbd.c b/block/rbd.c >> index dcf82b15b8..ef1eaa6af3 100644 >> --- a/block/rbd.c >> +++ b/block/rbd.c >> @@ -88,6 +88,7 @@ typedef struct BDRVRBDState { >> char *namespace; >> uint64_t image_size; >> uint64_t object_size; >> + uint64_t features; >> } BDRVRBDState; >> >> typedef struct RBDTask { >> @@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, >> s->image_size = info.size; >> s->object_size = info.obj_size; >> >> + r = rbd_get_features(s->image, &s->features); >> + if (r < 0) { >> + error_setg_errno(errp, -r, "error getting image features from %s", >> + s->image_name); >> + rbd_close(s->image); >> + goto failed_open; > ^ > You can use `failed_post_open` label here, so you can avoid to call rbd_close(). Bad me, I developed this patch in a Qemu version where failed_post_open wasn't present... > >> + } >> + >> /* If we are using an rbd snapshot, we must be r/o, otherwise >> * leave as-is */ >> if (s->snap != NULL) { >> @@ -1259,6 +1268,115 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs, >> return spec_info; >> } >> >> +typedef struct rbd_diff_req { >> + uint64_t offs; >> + uint64_t bytes; >> + int exists; >> +} rbd_diff_req; >> + >> +static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len, >> + int exists, void *opaque) >> +{ >> + struct rbd_diff_req *req = opaque; >> + >> + assert(req->offs + req->bytes <= offs); >> + assert(offs >= req->offs + req->bytes); > > I think just one of the two asserts is enough, isn't that the same condition? Right. > >> + >> + if (req->exists && offs > req->offs + req->bytes) { >> + /* >> + * we started in an allocated area and jumped over an unallocated area, >> + * req->bytes contains the length of the allocated area before the >> + * unallocated area. stop further processing. >> + */ >> + return -9000; > ^ > What is this magical value? > > Please add a macro (with a comment) and also use it below in other places. Will add in V2. > >> + } >> + if (req->exists && !exists) { >> + /* >> + * we started in an allocated area and reached a hole. req->bytes >> + * contains the length of the allocated area before the hole. >> + * stop further processing. >> + */ >> + return -9000; >> + } >> + if (!req->exists && exists && offs > req->offs) { >> + /* >> + * we started in an unallocated area and hit the first allocated >> + * block. req->bytes must be set to the length of the unallocated area >> + * before the allocated area. stop further processing. >> + */ >> + req->bytes = offs - req->offs; >> + return -9000; >> + } >> + >> + /* >> + * assert that we catched all cases above and allocation state has not >> + * changed during callbacks. >> + */ >> + assert(exists == req->exists || !req->bytes); >> + req->exists = exists; >> + >> + /* >> + * assert that we either return an unallocated block or have got callbacks >> + * for all allocated blocks present. >> + */ >> + assert(!req->exists || offs == req->offs + req->bytes); >> + req->bytes = offs + len - req->offs; >> + >> + return 0; >> +} >> + >> +static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, >> + bool want_zero, int64_t offset, >> + int64_t bytes, int64_t *pnum, >> + int64_t *map, >> + BlockDriverState **file) >> +{ >> + BDRVRBDState *s = bs->opaque; >> + int ret, r; >> + struct rbd_diff_req req = { .offs = offset }; >> + >> + assert(offset + bytes <= s->image_size); >> + >> + /* default to all sectors allocated */ >> + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; >> + if (map) { >> + *map = offset; >> + } >> + *pnum = bytes; >> + >> + /* RBD image does not support fast-diff */ >> + if (!(s->features & RBD_FEATURE_FAST_DIFF)) { >> + goto out; >> + } >> + >> + r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, >> + qemu_rbd_co_block_status_cb, &req); >> + if (r < 0 && r != -9000) { >> + goto out; >> + } >> + assert(req.bytes <= bytes); >> + if (!req.exists) { >> + if (r == 0 && !req.bytes) { >> + /* >> + * rbd_diff_iterate2 does not invoke callbacks for unallocated areas >> + * except for the case where an overlay has a hole where the parent >> + * has not. This here catches the case where no callback was >> + * invoked at all. >> + */ >> + req.bytes = bytes; >> + } >> + ret &= ~BDRV_BLOCK_DATA; >> + ret |= BDRV_BLOCK_ZERO; >> + } >> + *pnum = req.bytes; >> + >> +out: >> + if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) { > > Can ret be zero at this point? > Doesn't BDRV_BLOCK_OFFSET_VALID always stay set? Right, I decided to declare any area as allocated if rbd_diff_iterate2 would fail so this can't happen. I will send a V2 shortly. Peter
diff --git a/block/rbd.c b/block/rbd.c index dcf82b15b8..ef1eaa6af3 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -88,6 +88,7 @@ typedef struct BDRVRBDState { char *namespace; uint64_t image_size; uint64_t object_size; + uint64_t features; } BDRVRBDState; typedef struct RBDTask { @@ -983,6 +984,14 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, s->image_size = info.size; s->object_size = info.obj_size; + r = rbd_get_features(s->image, &s->features); + if (r < 0) { + error_setg_errno(errp, -r, "error getting image features from %s", + s->image_name); + rbd_close(s->image); + goto failed_open; + } + /* If we are using an rbd snapshot, we must be r/o, otherwise * leave as-is */ if (s->snap != NULL) { @@ -1259,6 +1268,115 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs, return spec_info; } +typedef struct rbd_diff_req { + uint64_t offs; + uint64_t bytes; + int exists; +} rbd_diff_req; + +static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len, + int exists, void *opaque) +{ + struct rbd_diff_req *req = opaque; + + assert(req->offs + req->bytes <= offs); + assert(offs >= req->offs + req->bytes); + + if (req->exists && offs > req->offs + req->bytes) { + /* + * we started in an allocated area and jumped over an unallocated area, + * req->bytes contains the length of the allocated area before the + * unallocated area. stop further processing. + */ + return -9000; + } + if (req->exists && !exists) { + /* + * we started in an allocated area and reached a hole. req->bytes + * contains the length of the allocated area before the hole. + * stop further processing. + */ + return -9000; + } + if (!req->exists && exists && offs > req->offs) { + /* + * we started in an unallocated area and hit the first allocated + * block. req->bytes must be set to the length of the unallocated area + * before the allocated area. stop further processing. + */ + req->bytes = offs - req->offs; + return -9000; + } + + /* + * assert that we catched all cases above and allocation state has not + * changed during callbacks. + */ + assert(exists == req->exists || !req->bytes); + req->exists = exists; + + /* + * assert that we either return an unallocated block or have got callbacks + * for all allocated blocks present. + */ + assert(!req->exists || offs == req->offs + req->bytes); + req->bytes = offs + len - req->offs; + + return 0; +} + +static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + bool want_zero, int64_t offset, + int64_t bytes, int64_t *pnum, + int64_t *map, + BlockDriverState **file) +{ + BDRVRBDState *s = bs->opaque; + int ret, r; + struct rbd_diff_req req = { .offs = offset }; + + assert(offset + bytes <= s->image_size); + + /* default to all sectors allocated */ + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; + if (map) { + *map = offset; + } + *pnum = bytes; + + /* RBD image does not support fast-diff */ + if (!(s->features & RBD_FEATURE_FAST_DIFF)) { + goto out; + } + + r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, + qemu_rbd_co_block_status_cb, &req); + if (r < 0 && r != -9000) { + goto out; + } + assert(req.bytes <= bytes); + if (!req.exists) { + if (r == 0 && !req.bytes) { + /* + * rbd_diff_iterate2 does not invoke callbacks for unallocated areas + * except for the case where an overlay has a hole where the parent + * has not. This here catches the case where no callback was + * invoked at all. + */ + req.bytes = bytes; + } + ret &= ~BDRV_BLOCK_DATA; + ret |= BDRV_BLOCK_ZERO; + } + *pnum = req.bytes; + +out: + if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) { + *file = bs; + } + return ret; +} + static int64_t qemu_rbd_getlength(BlockDriverState *bs) { BDRVRBDState *s = bs->opaque; @@ -1494,6 +1612,7 @@ static BlockDriver bdrv_rbd = { #ifdef LIBRBD_SUPPORTS_WRITE_ZEROES .bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes, #endif + .bdrv_co_block_status = qemu_rbd_co_block_status, .bdrv_snapshot_create = qemu_rbd_snap_create, .bdrv_snapshot_delete = qemu_rbd_snap_remove,
Signed-off-by: Peter Lieven <pl@kamp.de> --- block/rbd.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+)