Message ID | 1455615450-15138-3-git-send-email-xiecl.fnst@cn.fujitsu.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 16.02.2016 10:37, Changlong Xie wrote: > From: Wen Congyang <wency@cn.fujitsu.com> > > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> > Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> > Signed-off-by: Gonglei <arei.gonglei@huawei.com> > Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com> > --- > block.c | 8 ++-- > block/quorum.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++- > include/block/block.h | 4 ++ > 3 files changed, 128 insertions(+), 6 deletions(-) > > diff --git a/block.c b/block.c > index 08aa979..c3c9dc0 100644 > --- a/block.c > +++ b/block.c > @@ -1198,10 +1198,10 @@ static int bdrv_fill_options(QDict **options, const char *filename, > return 0; > } > > -static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, > - BlockDriverState *child_bs, > - const char *child_name, > - const BdrvChildRole *child_role) > +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, > + BlockDriverState *child_bs, > + const char *child_name, > + const BdrvChildRole *child_role) > { > BdrvChild *child = g_new(BdrvChild, 1); > *child = (BdrvChild) { > diff --git a/block/quorum.c b/block/quorum.c > index a5ae4b8..e5a7e4f 100644 > --- a/block/quorum.c > +++ b/block/quorum.c > @@ -24,6 +24,7 @@ > #include "qapi/qmp/qstring.h" > #include "qapi-event.h" > #include "crypto/hash.h" > +#include "qemu/bitmap.h" > > #define HASH_LENGTH 32 > > @@ -81,6 +82,8 @@ typedef struct BDRVQuorumState { > bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted > * block if Quorum is reached. > */ > + unsigned long *index_bitmap; > + int bsize; > > QuorumReadPattern read_pattern; > } BDRVQuorumState; > @@ -876,9 +879,9 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, > ret = -EINVAL; > goto exit; > } > - if (s->num_children < 2) { > + if (s->num_children < 1) { > error_setg(&local_err, > - "Number of provided children must be greater than 1"); > + "Number of provided children must be 1 or more"); Side note: Actually, we could work with 0 children, too. Quorum would then need to implement bdrv_is_inserted() and return false if there are no children. But that is something that can be implemented later on if the need arises. > ret = -EINVAL; > goto exit; > } > @@ -927,6 +930,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, > /* allocate the children array */ > s->children = g_new0(BdrvChild *, s->num_children); > opened = g_new0(bool, s->num_children); > + s->index_bitmap = bitmap_new(s->num_children); > > for (i = 0; i < s->num_children; i++) { > char indexstr[32]; > @@ -942,6 +946,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, > > opened[i] = true; > } > + bitmap_set(s->index_bitmap, 0, s->num_children); > + s->bsize = s->num_children; > > g_free(opened); > goto exit; > @@ -998,6 +1004,115 @@ static void quorum_attach_aio_context(BlockDriverState *bs, > } > } > > +static int get_new_child_index(BDRVQuorumState *s) > +{ > + int index; > + > + index = find_next_zero_bit(s->index_bitmap, s->bsize, 0); > + if (index < s->bsize) { > + return index; > + } > + > + if ((s->bsize % BITS_PER_LONG) == 0) { > + s->index_bitmap = bitmap_zero_extend(s->index_bitmap, s->bsize, > + s->bsize + 1); I think this function needs to be called unconditionally. Looking into its implementation, its call to g_realloc() will not do anything (and it will probably be pretty quick at that), but the following bitmap_clear() will only clear the bits from old_nbits (s->bsize) to new_nbits (s->bsize + 1). Thus, if you only call this function every 32nd/64th child, only that child's bit will be initialized to zero. All the rest is undefined. You probably didn't notice because bitmap_new() returns a zero-initialized bitmap, and thus you'd have to create around 64 children (on an x64 machine) to notice. > + } > + > + return s->bsize++; > +} > + > +static void remove_child_index(BDRVQuorumState *s, int index) > +{ > + int last_index; > + long new_len; size_t would be the more appropriate type. > + > + assert(index < s->bsize); > + > + clear_bit(index, s->index_bitmap); > + if (index < s->bsize - 1) { > + /* > + * The last bit is always set, and we don't clear s/don't/didn't/ > + * the last bit. > + */ > + return; > + } > + > + last_index = find_last_bit(s->index_bitmap, s->bsize); An assert(last_index < s->bsize); here wouldn't hurt. (last_index == s->bsize would be the case if no bit is set in s->index_bitmap anymore, which should be impossible.) > + s->bsize = last_index + 1; > + if (BITS_TO_LONGS(last_index + 1) == BITS_TO_LONGS(s->bsize)) { > + return; > + } > + > + new_len = BITS_TO_LONGS(last_index + 1) * sizeof(unsigned long); s/last_index + 1/s->bsize/ looks better to me. > + s->index_bitmap = g_realloc(s->index_bitmap, new_len); > +} > + > +static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, > + Error **errp) > +{ > + BDRVQuorumState *s = bs->opaque; > + BdrvChild *child; > + char indexstr[32]; > + int index, ret; > + > + index = get_new_child_index(s); > + ret = snprintf(indexstr, 32, "children.%d", index); > + if (ret < 0 || ret >= 32) { > + error_setg(errp, "cannot generate child name"); > + return; > + } > + > + bdrv_drain(bs); > + > + assert(s->num_children <= INT_MAX / sizeof(BdrvChild *)); > + if (s->num_children == INT_MAX / sizeof(BdrvChild *)) { > + error_setg(errp, "Too many children"); > + return; > + } > + s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); > + > + bdrv_ref(child_bs); > + child = bdrv_attach_child(bs, child_bs, indexstr, &child_format); > + s->children[s->num_children++] = child; > + set_bit(index, s->index_bitmap); > +} > + > +static void quorum_del_child(BlockDriverState *bs, BlockDriverState *child_bs, > + Error **errp) > +{ > + BDRVQuorumState *s = bs->opaque; > + BdrvChild *child; > + int i, index; > + > + for (i = 0; i < s->num_children; i++) { > + if (s->children[i]->bs == child_bs) { > + break; > + } > + } > + > + /* we have checked it in bdrv_del_child() */ > + assert(i < s->num_children); > + child = s->children[i]; > + > + if (s->num_children <= s->threshold) { > + error_setg(errp, > + "The number of children cannot be lower than the vote threshold %d", > + s->threshold); > + return; > + } > + > + /* child->name is "children.%d" */ Optional: assert(!strncmp(child->name, "children.", 9)); > + index = atoi(child->name + 9); Optional: Assert absence of an error: unsigned long index; char *endptr; index = strtoul(child->name + 9, &endptr, 10); assert(index >= 0 && !*endptr); Max > + > + bdrv_drain(bs); > + /* We can safely remove this child now */ > + memmove(&s->children[i], &s->children[i + 1], > + (s->num_children - i - 1) * sizeof(void *)); > + s->children = g_renew(BdrvChild *, s->children, --s->num_children); > + remove_child_index(s, index); > + bdrv_unref_child(bs, child); > +} > + > static void quorum_refresh_filename(BlockDriverState *bs, QDict *options) > { > BDRVQuorumState *s = bs->opaque; > @@ -1053,6 +1168,9 @@ static BlockDriver bdrv_quorum = { > .bdrv_detach_aio_context = quorum_detach_aio_context, > .bdrv_attach_aio_context = quorum_attach_aio_context, > > + .bdrv_add_child = quorum_add_child, > + .bdrv_del_child = quorum_del_child, > + > .is_filter = true, > .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, > }; > diff --git a/include/block/block.h b/include/block/block.h > index ecde190..4b787d2 100644 > --- a/include/block/block.h > +++ b/include/block/block.h > @@ -517,6 +517,10 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs); > void bdrv_ref(BlockDriverState *bs); > void bdrv_unref(BlockDriverState *bs); > void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); > +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, > + BlockDriverState *child_bs, > + const char *child_name, > + const BdrvChildRole *child_role); > > bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); > void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); >
On 03/06/2016 02:13 AM, Max Reitz wrote: > On 16.02.2016 10:37, Changlong Xie wrote: >> From: Wen Congyang <wency@cn.fujitsu.com> >> >> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> >> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> >> Signed-off-by: Gonglei <arei.gonglei@huawei.com> >> Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com> >> --- >> block.c | 8 ++-- >> block/quorum.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++- >> include/block/block.h | 4 ++ >> 3 files changed, 128 insertions(+), 6 deletions(-) >> >> diff --git a/block.c b/block.c >> index 08aa979..c3c9dc0 100644 >> --- a/block.c >> +++ b/block.c >> @@ -1198,10 +1198,10 @@ static int bdrv_fill_options(QDict **options, const char *filename, >> return 0; >> } >> >> -static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, >> - BlockDriverState *child_bs, >> - const char *child_name, >> - const BdrvChildRole *child_role) >> +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, >> + BlockDriverState *child_bs, >> + const char *child_name, >> + const BdrvChildRole *child_role) >> { >> BdrvChild *child = g_new(BdrvChild, 1); >> *child = (BdrvChild) { >> diff --git a/block/quorum.c b/block/quorum.c >> index a5ae4b8..e5a7e4f 100644 >> --- a/block/quorum.c >> +++ b/block/quorum.c >> @@ -24,6 +24,7 @@ >> #include "qapi/qmp/qstring.h" >> #include "qapi-event.h" >> #include "crypto/hash.h" >> +#include "qemu/bitmap.h" >> >> #define HASH_LENGTH 32 >> >> @@ -81,6 +82,8 @@ typedef struct BDRVQuorumState { >> bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted >> * block if Quorum is reached. >> */ >> + unsigned long *index_bitmap; >> + int bsize; >> >> QuorumReadPattern read_pattern; >> } BDRVQuorumState; >> @@ -876,9 +879,9 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, >> ret = -EINVAL; >> goto exit; >> } >> - if (s->num_children < 2) { >> + if (s->num_children < 1) { >> error_setg(&local_err, >> - "Number of provided children must be greater than 1"); >> + "Number of provided children must be 1 or more"); > > Side note: Actually, we could work with 0 children, too. Quorum would > then need to implement bdrv_is_inserted() and return false if there are > no children. > > But that is something that can be implemented later on if the need arises. Hi Max Thanks for pointing it out. > >> ret = -EINVAL; >> goto exit; >> } >> @@ -927,6 +930,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, >> /* allocate the children array */ >> s->children = g_new0(BdrvChild *, s->num_children); >> opened = g_new0(bool, s->num_children); >> + s->index_bitmap = bitmap_new(s->num_children); >> >> for (i = 0; i < s->num_children; i++) { >> char indexstr[32]; >> @@ -942,6 +946,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, >> >> opened[i] = true; >> } >> + bitmap_set(s->index_bitmap, 0, s->num_children); >> + s->bsize = s->num_children; >> >> g_free(opened); >> goto exit; >> @@ -998,6 +1004,115 @@ static void quorum_attach_aio_context(BlockDriverState *bs, >> } >> } >> >> +static int get_new_child_index(BDRVQuorumState *s) >> +{ >> + int index; >> + >> + index = find_next_zero_bit(s->index_bitmap, s->bsize, 0); >> + if (index < s->bsize) { >> + return index; >> + } >> + >> + if ((s->bsize % BITS_PER_LONG) == 0) { >> + s->index_bitmap = bitmap_zero_extend(s->index_bitmap, s->bsize, >> + s->bsize + 1); > > I think this function needs to be called unconditionally. Looking into > its implementation, its call to g_realloc() will not do anything (and it > will probably be pretty quick at that), but the following bitmap_clear() Yes. If "BITS_TO_LONGS(new_nbits) == BITS_TO_LONGS(old_nbits)", g_realloc will do nothing. > will only clear the bits from old_nbits (s->bsize) to new_nbits > (s->bsize + 1). > > Thus, if you only call this function every 32nd/64th child, only that > child's bit will be initialized to zero. All the rest is undefined. > > You probably didn't notice because bitmap_new() returns a > zero-initialized bitmap, and thus you'd have to create around 64 > children (on an x64 machine) to notice OOH! you're catching a *BIG* fish here. I'll remove the wrong "if" condition next version. *Thanks* > >> + } >> + >> + return s->bsize++; >> +} >> + >> +static void remove_child_index(BDRVQuorumState *s, int index) >> +{ >> + int last_index; >> + long new_len; > > size_t would be the more appropriate type. okay > >> + >> + assert(index < s->bsize); >> + >> + clear_bit(index, s->index_bitmap); >> + if (index < s->bsize - 1) { >> + /* >> + * The last bit is always set, and we don't clear > > s/don't/didn't/ I'm going to remove "and we don't clear the last bit" here. > >> + * the last bit. >> + */ >> + return; >> + } >> + >> + last_index = find_last_bit(s->index_bitmap, s->bsize); > > An assert(last_index < s->bsize); here wouldn't hurt. > okay. > (last_index == s->bsize would be the case if no bit is set in > s->index_bitmap anymore, which should be impossible.) > >> + s->bsize = last_index + 1; >> + if (BITS_TO_LONGS(last_index + 1) == BITS_TO_LONGS(s->bsize)) { I correct myself here, it should be "BITS_TO_LONGS(old_bsize) == BITS_TO_LONGS(s->bsize)". >> + return; >> + } >> + >> + new_len = BITS_TO_LONGS(last_index + 1) * sizeof(unsigned long); > > s/last_index + 1/s->bsize/ looks better to me. okay. > >> + s->index_bitmap = g_realloc(s->index_bitmap, new_len); >> +} >> + >> +static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, >> + Error **errp) >> +{ >> + BDRVQuorumState *s = bs->opaque; >> + BdrvChild *child; >> + char indexstr[32]; >> + int index, ret; >> + >> + index = get_new_child_index(s); >> + ret = snprintf(indexstr, 32, "children.%d", index); >> + if (ret < 0 || ret >= 32) { >> + error_setg(errp, "cannot generate child name"); >> + return; >> + } >> + >> + bdrv_drain(bs); >> + >> + assert(s->num_children <= INT_MAX / sizeof(BdrvChild *)); >> + if (s->num_children == INT_MAX / sizeof(BdrvChild *)) { >> + error_setg(errp, "Too many children"); >> + return; >> + } >> + s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); >> + >> + bdrv_ref(child_bs); >> + child = bdrv_attach_child(bs, child_bs, indexstr, &child_format); >> + s->children[s->num_children++] = child; >> + set_bit(index, s->index_bitmap); >> +} >> + >> +static void quorum_del_child(BlockDriverState *bs, BlockDriverState *child_bs, >> + Error **errp) >> +{ >> + BDRVQuorumState *s = bs->opaque; >> + BdrvChild *child; >> + int i, index; >> + >> + for (i = 0; i < s->num_children; i++) { >> + if (s->children[i]->bs == child_bs) { >> + break; >> + } >> + } >> + >> + /* we have checked it in bdrv_del_child() */ >> + assert(i < s->num_children); >> + child = s->children[i]; >> + >> + if (s->num_children <= s->threshold) { >> + error_setg(errp, >> + "The number of children cannot be lower than the vote threshold %d", >> + s->threshold); >> + return; >> + } >> + >> + /* child->name is "children.%d" */ > > Optional: assert(!strncmp(child->name, "children.", 9)); > >> + index = atoi(child->name + 9); > > Optional: Assert absence of an error: > > unsigned long index; > char *endptr; > > index = strtoul(child->name + 9, &endptr, 10); > assert(index >= 0 && !*endptr); Really useful, but since we strictly named 'child->name' in quorum_add_child, let's just keep the orignal one. Thanks -Xie > > Max > >> + >> + bdrv_drain(bs); >> + /* We can safely remove this child now */ >> + memmove(&s->children[i], &s->children[i + 1], >> + (s->num_children - i - 1) * sizeof(void *)); >> + s->children = g_renew(BdrvChild *, s->children, --s->num_children); >> + remove_child_index(s, index); >> + bdrv_unref_child(bs, child); >> +} >> + >> static void quorum_refresh_filename(BlockDriverState *bs, QDict *options) >> { >> BDRVQuorumState *s = bs->opaque; >> @@ -1053,6 +1168,9 @@ static BlockDriver bdrv_quorum = { >> .bdrv_detach_aio_context = quorum_detach_aio_context, >> .bdrv_attach_aio_context = quorum_attach_aio_context, >> >> + .bdrv_add_child = quorum_add_child, >> + .bdrv_del_child = quorum_del_child, >> + >> .is_filter = true, >> .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, >> }; >> diff --git a/include/block/block.h b/include/block/block.h >> index ecde190..4b787d2 100644 >> --- a/include/block/block.h >> +++ b/include/block/block.h >> @@ -517,6 +517,10 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs); >> void bdrv_ref(BlockDriverState *bs); >> void bdrv_unref(BlockDriverState *bs); >> void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); >> +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, >> + BlockDriverState *child_bs, >> + const char *child_name, >> + const BdrvChildRole *child_role); >> >> bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); >> void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); >> > >
On 03/05/2016 11:13 AM, Max Reitz wrote: >> + index = atoi(child->name + 9); > > Optional: Assert absence of an error: > Indeed, atoi() is worthless, because it cannot do error detection. > unsigned long index; > char *endptr; > > index = strtoul(child->name + 9, &endptr, 10); > assert(index >= 0 && !*endptr); Still incorrect; you aren't handling errno properly for detecting all errors. Even better is to use qemu_strtoul(), which already handles proper error detection.
On 07.03.2016 17:02, Eric Blake wrote: > On 03/05/2016 11:13 AM, Max Reitz wrote: > >>> + index = atoi(child->name + 9); >> >> Optional: Assert absence of an error: >> > > Indeed, atoi() is worthless, because it cannot do error detection. > >> unsigned long index; >> char *endptr; >> >> index = strtoul(child->name + 9, &endptr, 10); >> assert(index >= 0 && !*endptr); > > Still incorrect; you aren't handling errno properly for detecting all > errors. Even better is to use qemu_strtoul(), which already handles > proper error detection. Yeah, I keep forgetting that it returns ULONG_MAX on range error... Max
On 03/08/2016 12:02 AM, Eric Blake wrote: > On 03/05/2016 11:13 AM, Max Reitz wrote: > >>> + index = atoi(child->name + 9); >> >> Optional: Assert absence of an error: >> > > Indeed, atoi() is worthless, because it cannot do error detection. > >> unsigned long index; >> char *endptr; >> >> index = strtoul(child->name + 9, &endptr, 10); >> assert(index >= 0 && !*endptr); > > Still incorrect; you aren't handling errno properly for detecting all > errors. Even better is to use qemu_strtoul(), which already handles > proper error detection. > Will fix this in next version, thanks for pointing it out. Thanks -Xie
On 03/08/2016 12:02 AM, Max Reitz wrote: > On 07.03.2016 17:02, Eric Blake wrote: >> On 03/05/2016 11:13 AM, Max Reitz wrote: >> >>>> + index = atoi(child->name + 9); >>> >>> Optional: Assert absence of an error: >>> >> >> Indeed, atoi() is worthless, because it cannot do error detection. >> >>> unsigned long index; >>> char *endptr; >>> >>> index = strtoul(child->name + 9, &endptr, 10); >>> assert(index >= 0 && !*endptr); >> >> Still incorrect; you aren't handling errno properly for detecting all >> errors. Even better is to use qemu_strtoul(), which already handles >> proper error detection. > > Yeah, I keep forgetting that it returns ULONG_MAX on range error... Yes, we should limit the range to INT_MAX. How do you like the following codes, i just steal it from xen_host_pci_get_value(). int rc; const char *endptr; unsigned long value; assert(!strncmp(child->name, "children.", 9)); rc = qemu_strtoul(child->name + 9, &endptr, 10, &value); if (!rc) { assert(value <= INT_MAX); index = value; } else { error_setg_errno(errp, -rc, "Failed to parse value '%s'", child->name + 9); return; } Thanks -Xie > > Max >
On 08.03.2016 03:57, Changlong Xie wrote: > On 03/08/2016 12:02 AM, Max Reitz wrote: >> On 07.03.2016 17:02, Eric Blake wrote: >>> On 03/05/2016 11:13 AM, Max Reitz wrote: >>> >>>>> + index = atoi(child->name + 9); >>>> >>>> Optional: Assert absence of an error: >>>> >>> >>> Indeed, atoi() is worthless, because it cannot do error detection. >>> >>>> unsigned long index; >>>> char *endptr; >>>> >>>> index = strtoul(child->name + 9, &endptr, 10); >>>> assert(index >= 0 && !*endptr); >>> >>> Still incorrect; you aren't handling errno properly for detecting all >>> errors. Even better is to use qemu_strtoul(), which already handles >>> proper error detection. >> >> Yeah, I keep forgetting that it returns ULONG_MAX on range error... > > Yes, we should limit the range to INT_MAX. How do you like the following > codes, i just steal it from xen_host_pci_get_value(). > > int rc; > const char *endptr; > unsigned long value; > > assert(!strncmp(child->name, "children.", 9)); > rc = qemu_strtoul(child->name + 9, &endptr, 10, &value); Passing NULL instead of &endptr will make qemu_strtoul() check that the string passed to it (child->name + 9) only consists of a number; which should be true here, so you can do that (pass NULL instead of &endptr). > if (!rc) { > assert(value <= INT_MAX); > index = value; > } else { > error_setg_errno(errp, -rc, "Failed to parse value '%s'", > child->name + 9); > return; > } You could simplify this as assert(!rc && value <= INT_MAX); index = value; (It should be impossible for qemu_strtoul() to return an error here, so an assert() is just as fine as a normal error.) And you could get rid of the index = value assignment by making index an unsigned long and replacing all instances of "value" by "index". Max > > Thanks > -Xie > >> >> Max >> > >
diff --git a/block.c b/block.c index 08aa979..c3c9dc0 100644 --- a/block.c +++ b/block.c @@ -1198,10 +1198,10 @@ static int bdrv_fill_options(QDict **options, const char *filename, return 0; } -static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, - const char *child_name, - const BdrvChildRole *child_role) +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const char *child_name, + const BdrvChildRole *child_role) { BdrvChild *child = g_new(BdrvChild, 1); *child = (BdrvChild) { diff --git a/block/quorum.c b/block/quorum.c index a5ae4b8..e5a7e4f 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -24,6 +24,7 @@ #include "qapi/qmp/qstring.h" #include "qapi-event.h" #include "crypto/hash.h" +#include "qemu/bitmap.h" #define HASH_LENGTH 32 @@ -81,6 +82,8 @@ typedef struct BDRVQuorumState { bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted * block if Quorum is reached. */ + unsigned long *index_bitmap; + int bsize; QuorumReadPattern read_pattern; } BDRVQuorumState; @@ -876,9 +879,9 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, ret = -EINVAL; goto exit; } - if (s->num_children < 2) { + if (s->num_children < 1) { error_setg(&local_err, - "Number of provided children must be greater than 1"); + "Number of provided children must be 1 or more"); ret = -EINVAL; goto exit; } @@ -927,6 +930,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, /* allocate the children array */ s->children = g_new0(BdrvChild *, s->num_children); opened = g_new0(bool, s->num_children); + s->index_bitmap = bitmap_new(s->num_children); for (i = 0; i < s->num_children; i++) { char indexstr[32]; @@ -942,6 +946,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, opened[i] = true; } + bitmap_set(s->index_bitmap, 0, s->num_children); + s->bsize = s->num_children; g_free(opened); goto exit; @@ -998,6 +1004,115 @@ static void quorum_attach_aio_context(BlockDriverState *bs, } } +static int get_new_child_index(BDRVQuorumState *s) +{ + int index; + + index = find_next_zero_bit(s->index_bitmap, s->bsize, 0); + if (index < s->bsize) { + return index; + } + + if ((s->bsize % BITS_PER_LONG) == 0) { + s->index_bitmap = bitmap_zero_extend(s->index_bitmap, s->bsize, + s->bsize + 1); + } + + return s->bsize++; +} + +static void remove_child_index(BDRVQuorumState *s, int index) +{ + int last_index; + long new_len; + + assert(index < s->bsize); + + clear_bit(index, s->index_bitmap); + if (index < s->bsize - 1) { + /* + * The last bit is always set, and we don't clear + * the last bit. + */ + return; + } + + last_index = find_last_bit(s->index_bitmap, s->bsize); + s->bsize = last_index + 1; + if (BITS_TO_LONGS(last_index + 1) == BITS_TO_LONGS(s->bsize)) { + return; + } + + new_len = BITS_TO_LONGS(last_index + 1) * sizeof(unsigned long); + s->index_bitmap = g_realloc(s->index_bitmap, new_len); +} + +static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs, + Error **errp) +{ + BDRVQuorumState *s = bs->opaque; + BdrvChild *child; + char indexstr[32]; + int index, ret; + + index = get_new_child_index(s); + ret = snprintf(indexstr, 32, "children.%d", index); + if (ret < 0 || ret >= 32) { + error_setg(errp, "cannot generate child name"); + return; + } + + bdrv_drain(bs); + + assert(s->num_children <= INT_MAX / sizeof(BdrvChild *)); + if (s->num_children == INT_MAX / sizeof(BdrvChild *)) { + error_setg(errp, "Too many children"); + return; + } + s->children = g_renew(BdrvChild *, s->children, s->num_children + 1); + + bdrv_ref(child_bs); + child = bdrv_attach_child(bs, child_bs, indexstr, &child_format); + s->children[s->num_children++] = child; + set_bit(index, s->index_bitmap); +} + +static void quorum_del_child(BlockDriverState *bs, BlockDriverState *child_bs, + Error **errp) +{ + BDRVQuorumState *s = bs->opaque; + BdrvChild *child; + int i, index; + + for (i = 0; i < s->num_children; i++) { + if (s->children[i]->bs == child_bs) { + break; + } + } + + /* we have checked it in bdrv_del_child() */ + assert(i < s->num_children); + child = s->children[i]; + + if (s->num_children <= s->threshold) { + error_setg(errp, + "The number of children cannot be lower than the vote threshold %d", + s->threshold); + return; + } + + /* child->name is "children.%d" */ + index = atoi(child->name + 9); + + bdrv_drain(bs); + /* We can safely remove this child now */ + memmove(&s->children[i], &s->children[i + 1], + (s->num_children - i - 1) * sizeof(void *)); + s->children = g_renew(BdrvChild *, s->children, --s->num_children); + remove_child_index(s, index); + bdrv_unref_child(bs, child); +} + static void quorum_refresh_filename(BlockDriverState *bs, QDict *options) { BDRVQuorumState *s = bs->opaque; @@ -1053,6 +1168,9 @@ static BlockDriver bdrv_quorum = { .bdrv_detach_aio_context = quorum_detach_aio_context, .bdrv_attach_aio_context = quorum_attach_aio_context, + .bdrv_add_child = quorum_add_child, + .bdrv_del_child = quorum_del_child, + .is_filter = true, .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, }; diff --git a/include/block/block.h b/include/block/block.h index ecde190..4b787d2 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -517,6 +517,10 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs); void bdrv_ref(BlockDriverState *bs); void bdrv_unref(BlockDriverState *bs); void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const char *child_name, + const BdrvChildRole *child_role); bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);