Message ID | 1494842563-6534-1-git-send-email-pl@kamp.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, 05/15 12:02, Peter Lieven wrote: > Hi Block developers, > > I would like to add a feature to Qemu to drain all traffic from a block so that > I can take external snaphosts without the risk to that in the middle of a write > operation. Its meant for cases where where QGA freeze/thaw is not available. > > For me its enough to have this through qemu-io, but Kevin asked me to check > if its not worth to have a stable API for it and present it via QMP/HMP. > > What are your thoughts? For debugging purpose or a "hacky" usage where you know what you are doing, it may be fine to have this. The only issue is it should be a separate flag, like BlockJob.user_paused. What happens from guest perspective? In the case of virtio, the request queue is not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, the command is not effective (or rather the implementation is not complete). Fam > > Thanks, > Peter > > --- > qemu-io-cmds.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 78 insertions(+) > > diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c > index 312fc6d..49d82fe 100644 > --- a/qemu-io-cmds.c > +++ b/qemu-io-cmds.c > @@ -1565,6 +1565,82 @@ static const cmdinfo_t flush_cmd = { > .oneline = "flush all in-core file state to disk", > }; > > +static const cmdinfo_t drain_cmd; > + > +static int drain_f(BlockBackend *blk, int argc, char **argv) > +{ > + BlockDriverState *bs = blk_bs(blk); > + bool flush = false; > + int c; > + > + while ((c = getopt(argc, argv, "f")) != -1) { > + switch (c) { > + case 'f': > + flush = true; > + break; > + default: > + return qemuio_command_usage(&drain_cmd); > + } > + } > + > + if (optind != argc) { > + return qemuio_command_usage(&drain_cmd); > + } > + > + > + if (bs->quiesce_counter) { > + printf("drain failed: device is already drained!\n"); > + return 1; > + } > + > + bdrv_drained_begin(bs); /* complete I/O */ > + if (flush) { > + bdrv_flush(bs); > + bdrv_drain(bs); /* in case flush left pending I/O */ > + printf("flushed all pending I/O\n"); > + } > + printf("drain successful\n"); > + return 0; > +} > + > +static void drain_help(void) > +{ > + printf( > +"\n" > +" Drains all external I/O from the device\n" > +"\n" > +" -f, -- flush all in-core file state to disk\n" > +"\n"); > +} > + > +static const cmdinfo_t drain_cmd = { > + .name = "drain", > + .cfunc = drain_f, > + .args = "[-f]", > + .argmin = 0, > + .argmax = -1, > + .oneline = "cease to send I/O to the device", > + .help = drain_help > +}; > + > +static int undrain_f(BlockBackend *blk, int argc, char **argv) > +{ > + BlockDriverState *bs = blk_bs(blk); > + if (!bs->quiesce_counter) { > + printf("undrain failed: device is not drained!\n"); > + return 1; > + } > + bdrv_drained_end(bs); > + printf("undrain successful\n"); > + return 0; > +} > + > +static const cmdinfo_t undrain_cmd = { > + .name = "undrain", > + .cfunc = undrain_f, > + .oneline = "continue I/O to a drained device", > +}; > + > static int truncate_f(BlockBackend *blk, int argc, char **argv) > { > int64_t offset; > @@ -2296,6 +2372,8 @@ static void __attribute((constructor)) init_qemuio_commands(void) > qemuio_add_command(&aio_write_cmd); > qemuio_add_command(&aio_flush_cmd); > qemuio_add_command(&flush_cmd); > + qemuio_add_command(&drain_cmd); > + qemuio_add_command(&undrain_cmd); > qemuio_add_command(&truncate_cmd); > qemuio_add_command(&length_cmd); > qemuio_add_command(&info_cmd); > -- > 1.9.1 > >
Am 15.05.2017 um 12:50 schrieb Fam Zheng: > On Mon, 05/15 12:02, Peter Lieven wrote: >> Hi Block developers, >> >> I would like to add a feature to Qemu to drain all traffic from a block so that >> I can take external snaphosts without the risk to that in the middle of a write >> operation. Its meant for cases where where QGA freeze/thaw is not available. >> >> For me its enough to have this through qemu-io, but Kevin asked me to check >> if its not worth to have a stable API for it and present it via QMP/HMP. >> >> What are your thoughts? > For debugging purpose or a "hacky" usage where you know what you are doing, it > may be fine to have this. The only issue is it should be a separate flag, like > BlockJob.user_paused. How can I add, remove such a flag? > > What happens from guest perspective? In the case of virtio, the request queue is > not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, > the command is not effective (or rather the implementation is not complete). That it only works with virtio is fine. However, the thing it does not work correctly apply then also to all other users of the drained_begin/end functions, right? As for the timeout I only plan to drain the device for about 1 second. Thanks, Peter
On Mon, 05/15 13:26, Peter Lieven wrote: > Am 15.05.2017 um 12:50 schrieb Fam Zheng: > > On Mon, 05/15 12:02, Peter Lieven wrote: > > > Hi Block developers, > > > > > > I would like to add a feature to Qemu to drain all traffic from a block so that > > > I can take external snaphosts without the risk to that in the middle of a write > > > operation. Its meant for cases where where QGA freeze/thaw is not available. > > > > > > For me its enough to have this through qemu-io, but Kevin asked me to check > > > if its not worth to have a stable API for it and present it via QMP/HMP. > > > > > > What are your thoughts? > > For debugging purpose or a "hacky" usage where you know what you are doing, it > > may be fine to have this. The only issue is it should be a separate flag, like > > BlockJob.user_paused. > > How can I add, remove such a flag? Like bs->user_drained. Set it in "drain" command, then increment bs->quiesce_counter if toggled; vise versa. > > > > > What happens from guest perspective? In the case of virtio, the request queue is > > not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, > > the command is not effective (or rather the implementation is not complete). > > That it only works with virtio is fine. However, the thing it does not work correctly > apply then also to all other users of the drained_begin/end functions, right? > As for the timeout I only plan to drain the device for about 1 second. It didn't matter because for IDE, the invariant (staying quiesced as long as necessary) is already ensured by BQL. Virtio is different because it supports ioeventfd and data plane. Fam
Am 15.05.2017 um 13:53 schrieb Fam Zheng: > On Mon, 05/15 13:26, Peter Lieven wrote: >> Am 15.05.2017 um 12:50 schrieb Fam Zheng: >>> On Mon, 05/15 12:02, Peter Lieven wrote: >>>> Hi Block developers, >>>> >>>> I would like to add a feature to Qemu to drain all traffic from a block so that >>>> I can take external snaphosts without the risk to that in the middle of a write >>>> operation. Its meant for cases where where QGA freeze/thaw is not available. >>>> >>>> For me its enough to have this through qemu-io, but Kevin asked me to check >>>> if its not worth to have a stable API for it and present it via QMP/HMP. >>>> >>>> What are your thoughts? >>> For debugging purpose or a "hacky" usage where you know what you are doing, it >>> may be fine to have this. The only issue is it should be a separate flag, like >>> BlockJob.user_paused. >> How can I add, remove such a flag? > Like bs->user_drained. Set it in "drain" command, then increment > bs->quiesce_counter if toggled; vise versa. Ah okay. You wouldn't use bdrv_drained_begin/end? Because in these functions the counter is incremented already. > >>> What happens from guest perspective? In the case of virtio, the request queue is >>> not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, >>> the command is not effective (or rather the implementation is not complete). >> That it only works with virtio is fine. However, the thing it does not work correctly >> apply then also to all other users of the drained_begin/end functions, right? >> As for the timeout I only plan to drain the device for about 1 second. > It didn't matter because for IDE, the invariant (staying quiesced as long as > necessary) is already ensured by BQL. Virtio is different because it supports > ioeventfd and data plane. Okay understood. So my use of bdrv_drained_begin/end is more an abuse of these functions? Do you have another idea how to achieve what I want? I was thinking of throttle the I/O to zero. It would be enough to do this for writes, reading doesn't hurt in my case. Peter
On Mon, 05/15 13:58, Peter Lieven wrote: > Am 15.05.2017 um 13:53 schrieb Fam Zheng: > > On Mon, 05/15 13:26, Peter Lieven wrote: > > > Am 15.05.2017 um 12:50 schrieb Fam Zheng: > > > > On Mon, 05/15 12:02, Peter Lieven wrote: > > > > > Hi Block developers, > > > > > > > > > > I would like to add a feature to Qemu to drain all traffic from a block so that > > > > > I can take external snaphosts without the risk to that in the middle of a write > > > > > operation. Its meant for cases where where QGA freeze/thaw is not available. > > > > > > > > > > For me its enough to have this through qemu-io, but Kevin asked me to check > > > > > if its not worth to have a stable API for it and present it via QMP/HMP. > > > > > > > > > > What are your thoughts? > > > > For debugging purpose or a "hacky" usage where you know what you are doing, it > > > > may be fine to have this. The only issue is it should be a separate flag, like > > > > BlockJob.user_paused. > > > How can I add, remove such a flag? > > Like bs->user_drained. Set it in "drain" command, then increment > > bs->quiesce_counter if toggled; vise versa. > > Ah okay. You wouldn't use bdrv_drained_begin/end? Because in these functions > the counter is incremented already. You're right, calling bdrv_drained_begin() is better. > > > > > > > > > What happens from guest perspective? In the case of virtio, the request queue is > > > > not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, > > > > the command is not effective (or rather the implementation is not complete). > > > That it only works with virtio is fine. However, the thing it does not work correctly > > > apply then also to all other users of the drained_begin/end functions, right? > > > As for the timeout I only plan to drain the device for about 1 second. > > It didn't matter because for IDE, the invariant (staying quiesced as long as > > necessary) is already ensured by BQL. Virtio is different because it supports > > ioeventfd and data plane. > > Okay understood. So my use of bdrv_drained_begin/end is more an abuse of > these functions? Sort of. But it's not unreasonable to "extend" bdrv_drained_begin/end to cover IDE, I just haven't thought about "how". > > Do you have another idea how to achieve what I want? I was thinking of throttle > the I/O to zero. It would be enough to do this for writes, reading doesn't hurt in > my case. Maybe add a block filter on top of the drained node, drain it when doing so, then queue all further requests with a CoQueue until "undrain". (It is then not quite to "drain" but to "halt" or "pause", though.) Fam
Am 15.05.2017 um 14:28 schrieb Fam Zheng: > On Mon, 05/15 13:58, Peter Lieven wrote: >> Am 15.05.2017 um 13:53 schrieb Fam Zheng: >>> On Mon, 05/15 13:26, Peter Lieven wrote: >>>> Am 15.05.2017 um 12:50 schrieb Fam Zheng: >>>>> On Mon, 05/15 12:02, Peter Lieven wrote: >>>>>> Hi Block developers, >>>>>> >>>>>> I would like to add a feature to Qemu to drain all traffic from a block so that >>>>>> I can take external snaphosts without the risk to that in the middle of a write >>>>>> operation. Its meant for cases where where QGA freeze/thaw is not available. >>>>>> >>>>>> For me its enough to have this through qemu-io, but Kevin asked me to check >>>>>> if its not worth to have a stable API for it and present it via QMP/HMP. >>>>>> >>>>>> What are your thoughts? >>>>> For debugging purpose or a "hacky" usage where you know what you are doing, it >>>>> may be fine to have this. The only issue is it should be a separate flag, like >>>>> BlockJob.user_paused. >>>> How can I add, remove such a flag? >>> Like bs->user_drained. Set it in "drain" command, then increment >>> bs->quiesce_counter if toggled; vise versa. >> Ah okay. You wouldn't use bdrv_drained_begin/end? Because in these functions >> the counter is incremented already. > You're right, calling bdrv_drained_begin() is better. > >> >> >>>>> What happens from guest perspective? In the case of virtio, the request queue is >>>>> not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, >>>>> the command is not effective (or rather the implementation is not complete). >>>> That it only works with virtio is fine. However, the thing it does not work correctly >>>> apply then also to all other users of the drained_begin/end functions, right? >>>> As for the timeout I only plan to drain the device for about 1 second. >>> It didn't matter because for IDE, the invariant (staying quiesced as long as >>> necessary) is already ensured by BQL. Virtio is different because it supports >>> ioeventfd and data plane. >> Okay understood. So my use of bdrv_drained_begin/end is more an abuse of >> these functions? > Sort of. But it's not unreasonable to "extend" bdrv_drained_begin/end to cover > IDE, I just haven't thought about "how". > >> Do you have another idea how to achieve what I want? I was thinking of throttle >> the I/O to zero. It would be enough to do this for writes, reading doesn't hurt in >> my case. > Maybe add a block filter on top of the drained node, drain it when doing so, > then queue all further requests with a CoQueue until "undrain". (It is then not > quite to "drain" but to "halt" or "pause", though.) To get the drain for free was why I was looking at this approach. If I read correctly if I keep using bdrv_drained_begin/end its too hacky to implement it in QMP? If yes, would support adding it to qemu-io? Thanks, Peter
On Mon, 05/15 14:32, Peter Lieven wrote: > Am 15.05.2017 um 14:28 schrieb Fam Zheng: > > On Mon, 05/15 13:58, Peter Lieven wrote: > > > Am 15.05.2017 um 13:53 schrieb Fam Zheng: > > > > On Mon, 05/15 13:26, Peter Lieven wrote: > > > > > Am 15.05.2017 um 12:50 schrieb Fam Zheng: > > > > > > On Mon, 05/15 12:02, Peter Lieven wrote: > > > > > > > Hi Block developers, > > > > > > > > > > > > > > I would like to add a feature to Qemu to drain all traffic from a block so that > > > > > > > I can take external snaphosts without the risk to that in the middle of a write > > > > > > > operation. Its meant for cases where where QGA freeze/thaw is not available. > > > > > > > > > > > > > > For me its enough to have this through qemu-io, but Kevin asked me to check > > > > > > > if its not worth to have a stable API for it and present it via QMP/HMP. > > > > > > > > > > > > > > What are your thoughts? > > > > > > For debugging purpose or a "hacky" usage where you know what you are doing, it > > > > > > may be fine to have this. The only issue is it should be a separate flag, like > > > > > > BlockJob.user_paused. > > > > > How can I add, remove such a flag? > > > > Like bs->user_drained. Set it in "drain" command, then increment > > > > bs->quiesce_counter if toggled; vise versa. > > > Ah okay. You wouldn't use bdrv_drained_begin/end? Because in these functions > > > the counter is incremented already. > > You're right, calling bdrv_drained_begin() is better. > > > > > > > > > > > > > > What happens from guest perspective? In the case of virtio, the request queue is > > > > > > not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, > > > > > > the command is not effective (or rather the implementation is not complete). > > > > > That it only works with virtio is fine. However, the thing it does not work correctly > > > > > apply then also to all other users of the drained_begin/end functions, right? > > > > > As for the timeout I only plan to drain the device for about 1 second. > > > > It didn't matter because for IDE, the invariant (staying quiesced as long as > > > > necessary) is already ensured by BQL. Virtio is different because it supports > > > > ioeventfd and data plane. > > > Okay understood. So my use of bdrv_drained_begin/end is more an abuse of > > > these functions? > > Sort of. But it's not unreasonable to "extend" bdrv_drained_begin/end to cover > > IDE, I just haven't thought about "how". > > > > > Do you have another idea how to achieve what I want? I was thinking of throttle > > > the I/O to zero. It would be enough to do this for writes, reading doesn't hurt in > > > my case. > > Maybe add a block filter on top of the drained node, drain it when doing so, > > then queue all further requests with a CoQueue until "undrain". (It is then not > > quite to "drain" but to "halt" or "pause", though.) > > To get the drain for free was why I was looking at this approach. If I read correctly > if I keep using bdrv_drained_begin/end its too hacky to implement it in QMP? I think so. > If yes, would support adding it to qemu-io? I'm under the impression that you are looking to a real use case, I don't think I like the idea. Also, accessing the image from other processes while QEMU is using it is strongly discouraged, and there is the coming image locking mechanism to prevent this from happening. Why is the blockdev-snapshot command not enough? Fam
Am 15.05.2017 um 14:52 schrieb Fam Zheng: > On Mon, 05/15 14:32, Peter Lieven wrote: >> Am 15.05.2017 um 14:28 schrieb Fam Zheng: >>> On Mon, 05/15 13:58, Peter Lieven wrote: >>>> Am 15.05.2017 um 13:53 schrieb Fam Zheng: >>>>> On Mon, 05/15 13:26, Peter Lieven wrote: >>>>>> Am 15.05.2017 um 12:50 schrieb Fam Zheng: >>>>>>> On Mon, 05/15 12:02, Peter Lieven wrote: >>>>>>>> Hi Block developers, >>>>>>>> >>>>>>>> I would like to add a feature to Qemu to drain all traffic from a block so that >>>>>>>> I can take external snaphosts without the risk to that in the middle of a write >>>>>>>> operation. Its meant for cases where where QGA freeze/thaw is not available. >>>>>>>> >>>>>>>> For me its enough to have this through qemu-io, but Kevin asked me to check >>>>>>>> if its not worth to have a stable API for it and present it via QMP/HMP. >>>>>>>> >>>>>>>> What are your thoughts? >>>>>>> For debugging purpose or a "hacky" usage where you know what you are doing, it >>>>>>> may be fine to have this. The only issue is it should be a separate flag, like >>>>>>> BlockJob.user_paused. >>>>>> How can I add, remove such a flag? >>>>> Like bs->user_drained. Set it in "drain" command, then increment >>>>> bs->quiesce_counter if toggled; vise versa. >>>> Ah okay. You wouldn't use bdrv_drained_begin/end? Because in these functions >>>> the counter is incremented already. >>> You're right, calling bdrv_drained_begin() is better. >>> >>>> >>>>>>> What happens from guest perspective? In the case of virtio, the request queue is >>>>>>> not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, >>>>>>> the command is not effective (or rather the implementation is not complete). >>>>>> That it only works with virtio is fine. However, the thing it does not work correctly >>>>>> apply then also to all other users of the drained_begin/end functions, right? >>>>>> As for the timeout I only plan to drain the device for about 1 second. >>>>> It didn't matter because for IDE, the invariant (staying quiesced as long as >>>>> necessary) is already ensured by BQL. Virtio is different because it supports >>>>> ioeventfd and data plane. >>>> Okay understood. So my use of bdrv_drained_begin/end is more an abuse of >>>> these functions? >>> Sort of. But it's not unreasonable to "extend" bdrv_drained_begin/end to cover >>> IDE, I just haven't thought about "how". >>> >>>> Do you have another idea how to achieve what I want? I was thinking of throttle >>>> the I/O to zero. It would be enough to do this for writes, reading doesn't hurt in >>>> my case. >>> Maybe add a block filter on top of the drained node, drain it when doing so, >>> then queue all further requests with a CoQueue until "undrain". (It is then not >>> quite to "drain" but to "halt" or "pause", though.) >> To get the drain for free was why I was looking at this approach. If I read correctly >> if I keep using bdrv_drained_begin/end its too hacky to implement it in QMP? > I think so. > >> If yes, would support adding it to qemu-io? > I'm under the impression that you are looking to a real use case, I don't think > I like the idea. Also, accessing the image from other processes while QEMU is > using it is strongly discouraged, and there is the coming image locking > mechanism to prevent this from happening. Why is the blockdev-snapshot command > not enough? blockdev-snapshot is enough, but I still fear the case there is suddenly too much I/O for the live-commit. And that the whole snapshot / commit code is more senstive than just stopping I/O for a second or two. do you have a pointer to the image locking mechanism? Peter
Am 15.05.2017 um 14:52 hat Fam Zheng geschrieben: > On Mon, 05/15 14:32, Peter Lieven wrote: > > Am 15.05.2017 um 14:28 schrieb Fam Zheng: > > > On Mon, 05/15 13:58, Peter Lieven wrote: > > > > Am 15.05.2017 um 13:53 schrieb Fam Zheng: > > > > > On Mon, 05/15 13:26, Peter Lieven wrote: > > > > > > Am 15.05.2017 um 12:50 schrieb Fam Zheng: > > > > > > > On Mon, 05/15 12:02, Peter Lieven wrote: > > > > > > > > Hi Block developers, > > > > > > > > > > > > > > > > I would like to add a feature to Qemu to drain all traffic from a block so that > > > > > > > > I can take external snaphosts without the risk to that in the middle of a write > > > > > > > > operation. Its meant for cases where where QGA freeze/thaw is not available. > > > > > > > > > > > > > > > > For me its enough to have this through qemu-io, but Kevin asked me to check > > > > > > > > if its not worth to have a stable API for it and present it via QMP/HMP. > > > > > > > > > > > > > > > > What are your thoughts? > > > > > > > For debugging purpose or a "hacky" usage where you know what you are doing, it > > > > > > > may be fine to have this. The only issue is it should be a separate flag, like > > > > > > > BlockJob.user_paused. > > > > > > How can I add, remove such a flag? > > > > > Like bs->user_drained. Set it in "drain" command, then increment > > > > > bs->quiesce_counter if toggled; vise versa. > > > > Ah okay. You wouldn't use bdrv_drained_begin/end? Because in these functions > > > > the counter is incremented already. > > > You're right, calling bdrv_drained_begin() is better. > > > > > > > > > > > > > > > > > > What happens from guest perspective? In the case of virtio, the request queue is > > > > > > > not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, > > > > > > > the command is not effective (or rather the implementation is not complete). > > > > > > That it only works with virtio is fine. However, the thing it does not work correctly > > > > > > apply then also to all other users of the drained_begin/end functions, right? > > > > > > As for the timeout I only plan to drain the device for about 1 second. > > > > > It didn't matter because for IDE, the invariant (staying quiesced as long as > > > > > necessary) is already ensured by BQL. Virtio is different because it supports > > > > > ioeventfd and data plane. > > > > Okay understood. So my use of bdrv_drained_begin/end is more an abuse of > > > > these functions? > > > Sort of. But it's not unreasonable to "extend" bdrv_drained_begin/end to cover > > > IDE, I just haven't thought about "how". > > > > > > > Do you have another idea how to achieve what I want? I was thinking of throttle > > > > the I/O to zero. It would be enough to do this for writes, reading doesn't hurt in > > > > my case. > > > Maybe add a block filter on top of the drained node, drain it when doing so, > > > then queue all further requests with a CoQueue until "undrain". (It is then not > > > quite to "drain" but to "halt" or "pause", though.) > > > > To get the drain for free was why I was looking at this approach. If I read correctly > > if I keep using bdrv_drained_begin/end its too hacky to implement it in QMP? > > I think so. > > > If yes, would support adding it to qemu-io? > > I'm under the impression that you are looking to a real use case, I don't think > I like the idea. Also, accessing the image from other processes while QEMU is > using it is strongly discouraged, and there is the coming image locking > mechanism to prevent this from happening. Thinking a bit more about this, it looks to me as if what we really want is inactivating the image. Should we add an option to the 'stop' command (or introduce another command to be used on an already stopped VM) that inactivates all/some images? And then 'cont' regains control over the images, just like after migration. Automatically stopping the VM while the snapshot is taken also makes it work with IDE and prevents guests running into timeouts, which makes it look much more like a proper solution to me. But then, stop/cont would already achieve something very similar today (as 'stop' calls bdrv_drain_all(); just the locking part wouldn't be covered), so maybe there is a reason not to use it in your case, Peter? Kevin
On Mon, 05/15 15:01, Peter Lieven wrote: > Am 15.05.2017 um 14:52 schrieb Fam Zheng: > > On Mon, 05/15 14:32, Peter Lieven wrote: > > > Am 15.05.2017 um 14:28 schrieb Fam Zheng: > > > > On Mon, 05/15 13:58, Peter Lieven wrote: > > > > > Am 15.05.2017 um 13:53 schrieb Fam Zheng: > > > > > > On Mon, 05/15 13:26, Peter Lieven wrote: > > > > > > > Am 15.05.2017 um 12:50 schrieb Fam Zheng: > > > > > > > > On Mon, 05/15 12:02, Peter Lieven wrote: > > > > > > > > > Hi Block developers, > > > > > > > > > > > > > > > > > > I would like to add a feature to Qemu to drain all traffic from a block so that > > > > > > > > > I can take external snaphosts without the risk to that in the middle of a write > > > > > > > > > operation. Its meant for cases where where QGA freeze/thaw is not available. > > > > > > > > > > > > > > > > > > For me its enough to have this through qemu-io, but Kevin asked me to check > > > > > > > > > if its not worth to have a stable API for it and present it via QMP/HMP. > > > > > > > > > > > > > > > > > > What are your thoughts? > > > > > > > > For debugging purpose or a "hacky" usage where you know what you are doing, it > > > > > > > > may be fine to have this. The only issue is it should be a separate flag, like > > > > > > > > BlockJob.user_paused. > > > > > > > How can I add, remove such a flag? > > > > > > Like bs->user_drained. Set it in "drain" command, then increment > > > > > > bs->quiesce_counter if toggled; vise versa. > > > > > Ah okay. You wouldn't use bdrv_drained_begin/end? Because in these functions > > > > > the counter is incremented already. > > > > You're right, calling bdrv_drained_begin() is better. > > > > > > > > > > > > > > > > > What happens from guest perspective? In the case of virtio, the request queue is > > > > > > > > not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, > > > > > > > > the command is not effective (or rather the implementation is not complete). > > > > > > > That it only works with virtio is fine. However, the thing it does not work correctly > > > > > > > apply then also to all other users of the drained_begin/end functions, right? > > > > > > > As for the timeout I only plan to drain the device for about 1 second. > > > > > > It didn't matter because for IDE, the invariant (staying quiesced as long as > > > > > > necessary) is already ensured by BQL. Virtio is different because it supports > > > > > > ioeventfd and data plane. > > > > > Okay understood. So my use of bdrv_drained_begin/end is more an abuse of > > > > > these functions? > > > > Sort of. But it's not unreasonable to "extend" bdrv_drained_begin/end to cover > > > > IDE, I just haven't thought about "how". > > > > > > > > > Do you have another idea how to achieve what I want? I was thinking of throttle > > > > > the I/O to zero. It would be enough to do this for writes, reading doesn't hurt in > > > > > my case. > > > > Maybe add a block filter on top of the drained node, drain it when doing so, > > > > then queue all further requests with a CoQueue until "undrain". (It is then not > > > > quite to "drain" but to "halt" or "pause", though.) > > > To get the drain for free was why I was looking at this approach. If I read correctly > > > if I keep using bdrv_drained_begin/end its too hacky to implement it in QMP? > > I think so. > > > > > If yes, would support adding it to qemu-io? > > I'm under the impression that you are looking to a real use case, I don't think > > I like the idea. Also, accessing the image from other processes while QEMU is > > using it is strongly discouraged, and there is the coming image locking > > mechanism to prevent this from happening. Why is the blockdev-snapshot command > > not enough? > > blockdev-snapshot is enough, but I still fear the case there is suddenly too much I/O > for the live-commit. And that the whole snapshot / commit code is more senstive than > just stopping I/O for a second or two. In this case, the image fleecing approach may be what you need. It creates a temporary point in time snapshot which is lightweight and disposable. Something like: https://lists.gnu.org/archive/html/qemu-devel/2014-03/msg01359.html (Ccing John who may have more up-to-date pointers) > > do you have a pointer to the image locking mechanism? It hit qemu.git master just a moment ago. See raw_check_perm. Fam
Am 15.05.2017 um 15:35 schrieb Fam Zheng: > On Mon, 05/15 15:01, Peter Lieven wrote: >> Am 15.05.2017 um 14:52 schrieb Fam Zheng: >>> On Mon, 05/15 14:32, Peter Lieven wrote: >>>> Am 15.05.2017 um 14:28 schrieb Fam Zheng: >>>>> On Mon, 05/15 13:58, Peter Lieven wrote: >>>>>> Am 15.05.2017 um 13:53 schrieb Fam Zheng: >>>>>>> On Mon, 05/15 13:26, Peter Lieven wrote: >>>>>>>> Am 15.05.2017 um 12:50 schrieb Fam Zheng: >>>>>>>>> On Mon, 05/15 12:02, Peter Lieven wrote: >>>>>>>>>> Hi Block developers, >>>>>>>>>> >>>>>>>>>> I would like to add a feature to Qemu to drain all traffic from a block so that >>>>>>>>>> I can take external snaphosts without the risk to that in the middle of a write >>>>>>>>>> operation. Its meant for cases where where QGA freeze/thaw is not available. >>>>>>>>>> >>>>>>>>>> For me its enough to have this through qemu-io, but Kevin asked me to check >>>>>>>>>> if its not worth to have a stable API for it and present it via QMP/HMP. >>>>>>>>>> >>>>>>>>>> What are your thoughts? >>>>>>>>> For debugging purpose or a "hacky" usage where you know what you are doing, it >>>>>>>>> may be fine to have this. The only issue is it should be a separate flag, like >>>>>>>>> BlockJob.user_paused. >>>>>>>> How can I add, remove such a flag? >>>>>>> Like bs->user_drained. Set it in "drain" command, then increment >>>>>>> bs->quiesce_counter if toggled; vise versa. >>>>>> Ah okay. You wouldn't use bdrv_drained_begin/end? Because in these functions >>>>>> the counter is incremented already. >>>>> You're right, calling bdrv_drained_begin() is better. >>>>> >>>>>>>>> What happens from guest perspective? In the case of virtio, the request queue is >>>>>>>>> not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, >>>>>>>>> the command is not effective (or rather the implementation is not complete). >>>>>>>> That it only works with virtio is fine. However, the thing it does not work correctly >>>>>>>> apply then also to all other users of the drained_begin/end functions, right? >>>>>>>> As for the timeout I only plan to drain the device for about 1 second. >>>>>>> It didn't matter because for IDE, the invariant (staying quiesced as long as >>>>>>> necessary) is already ensured by BQL. Virtio is different because it supports >>>>>>> ioeventfd and data plane. >>>>>> Okay understood. So my use of bdrv_drained_begin/end is more an abuse of >>>>>> these functions? >>>>> Sort of. But it's not unreasonable to "extend" bdrv_drained_begin/end to cover >>>>> IDE, I just haven't thought about "how". >>>>> >>>>>> Do you have another idea how to achieve what I want? I was thinking of throttle >>>>>> the I/O to zero. It would be enough to do this for writes, reading doesn't hurt in >>>>>> my case. >>>>> Maybe add a block filter on top of the drained node, drain it when doing so, >>>>> then queue all further requests with a CoQueue until "undrain". (It is then not >>>>> quite to "drain" but to "halt" or "pause", though.) >>>> To get the drain for free was why I was looking at this approach. If I read correctly >>>> if I keep using bdrv_drained_begin/end its too hacky to implement it in QMP? >>> I think so. >>> >>>> If yes, would support adding it to qemu-io? >>> I'm under the impression that you are looking to a real use case, I don't think >>> I like the idea. Also, accessing the image from other processes while QEMU is >>> using it is strongly discouraged, and there is the coming image locking >>> mechanism to prevent this from happening. Why is the blockdev-snapshot command >>> not enough? >> blockdev-snapshot is enough, but I still fear the case there is suddenly too much I/O >> for the live-commit. And that the whole snapshot / commit code is more senstive than >> just stopping I/O for a second or two. > In this case, the image fleecing approach may be what you need. It creates a > temporary point in time snapshot which is lightweight and disposable. Something > like: > > https://lists.gnu.org/archive/html/qemu-devel/2014-03/msg01359.html > > (Ccing John who may have more up-to-date pointers) We discussed about it a few months ago. But maybe he has an update. > >> do you have a pointer to the image locking mechanism? > It hit qemu.git master just a moment ago. See raw_check_perm. which master are you looking at? $ git fetch upstream $ git log upstream/master --oneline 3a87606 Merge tag 'tracing-pull-request' into staging b54933e Merge tag 'block-pull-request' into staging 3753e25 Merge remote-tracking branch 'kwolf/tags/for-upstream' into staging 5651743 trace: add sanity check 321d1db aio: add missing aio_notify() to aio_enable_external() ee29d6a block: Simplify BDRV_BLOCK_RAW recursion 33c53c5 coroutine: remove GThread implementation ecc1f5a maintainers: Add myself as linux-user reviewer d541e20 Merge remote-tracking branch 'mreitz/tags/pull-block-2017-05-11' into queue-block 8dd30c8 MAINTAINERS: Add qemu-progress to the block layer d2cb36a qcow2: Discard/zero clusters by byte count f10ee13 qcow2: Assert that cluster operations are aligned fbaa6bb qcow2: Optimize write zero of unaligned tail cluster e249d51 iotests: Add test 179 to cover write zeroes with unmap d9ca221 iotests: Improve _filter_qemu_img_map 06cc5e2 qcow2: Optimize zero_single_l2() to minimize L2 churn fdfab37 qcow2: Make distinction between zero cluster types obvious 3ef9521 qcow2: Name typedef for cluster type 4341df8 qcow2: Correctly report status of preallocated zero clusters 4c41cb4 block: Update comments on BDRV_BLOCK_* meanings Thanks, Peter
On Mon, 05/15 16:02, Peter Lieven wrote: > > > do you have a pointer to the image locking mechanism? > > It hit qemu.git master just a moment ago. See raw_check_perm. > > which master are you looking at? > > $ git fetch upstream > $ git log upstream/master --oneline > 3a87606 Merge tag 'tracing-pull-request' into staging > b54933e Merge tag 'block-pull-request' into staging > 3753e25 Merge remote-tracking branch 'kwolf/tags/for-upstream' into staging > 5651743 trace: add sanity check > 321d1db aio: add missing aio_notify() to aio_enable_external() > ee29d6a block: Simplify BDRV_BLOCK_RAW recursion > 33c53c5 coroutine: remove GThread implementation > ecc1f5a maintainers: Add myself as linux-user reviewer > d541e20 Merge remote-tracking branch 'mreitz/tags/pull-block-2017-05-11' into queue-block > 8dd30c8 MAINTAINERS: Add qemu-progress to the block layer > d2cb36a qcow2: Discard/zero clusters by byte count > f10ee13 qcow2: Assert that cluster operations are aligned > fbaa6bb qcow2: Optimize write zero of unaligned tail cluster > e249d51 iotests: Add test 179 to cover write zeroes with unmap > d9ca221 iotests: Improve _filter_qemu_img_map > 06cc5e2 qcow2: Optimize zero_single_l2() to minimize L2 churn > fdfab37 qcow2: Make distinction between zero cluster types obvious > 3ef9521 qcow2: Name typedef for cluster type > 4341df8 qcow2: Correctly report status of preallocated zero clusters > 4c41cb4 block: Update comments on BDRV_BLOCK_* meanings It's 244a5668 "file-posix: Add image locking to perm operations". Fam
Am 15.05.2017 um 15:02 schrieb Kevin Wolf: > Am 15.05.2017 um 14:52 hat Fam Zheng geschrieben: >> On Mon, 05/15 14:32, Peter Lieven wrote: >>> Am 15.05.2017 um 14:28 schrieb Fam Zheng: >>>> On Mon, 05/15 13:58, Peter Lieven wrote: >>>>> Am 15.05.2017 um 13:53 schrieb Fam Zheng: >>>>>> On Mon, 05/15 13:26, Peter Lieven wrote: >>>>>>> Am 15.05.2017 um 12:50 schrieb Fam Zheng: >>>>>>>> On Mon, 05/15 12:02, Peter Lieven wrote: >>>>>>>>> Hi Block developers, >>>>>>>>> >>>>>>>>> I would like to add a feature to Qemu to drain all traffic from a block so that >>>>>>>>> I can take external snaphosts without the risk to that in the middle of a write >>>>>>>>> operation. Its meant for cases where where QGA freeze/thaw is not available. >>>>>>>>> >>>>>>>>> For me its enough to have this through qemu-io, but Kevin asked me to check >>>>>>>>> if its not worth to have a stable API for it and present it via QMP/HMP. >>>>>>>>> >>>>>>>>> What are your thoughts? >>>>>>>> For debugging purpose or a "hacky" usage where you know what you are doing, it >>>>>>>> may be fine to have this. The only issue is it should be a separate flag, like >>>>>>>> BlockJob.user_paused. >>>>>>> How can I add, remove such a flag? >>>>>> Like bs->user_drained. Set it in "drain" command, then increment >>>>>> bs->quiesce_counter if toggled; vise versa. >>>>> Ah okay. You wouldn't use bdrv_drained_begin/end? Because in these functions >>>>> the counter is incremented already. >>>> You're right, calling bdrv_drained_begin() is better. >>>> >>>>> >>>>>>>> What happens from guest perspective? In the case of virtio, the request queue is >>>>>>>> not handled and -ETIMEDOUT may happen. With IDE, I/O commands are still handled, >>>>>>>> the command is not effective (or rather the implementation is not complete). >>>>>>> That it only works with virtio is fine. However, the thing it does not work correctly >>>>>>> apply then also to all other users of the drained_begin/end functions, right? >>>>>>> As for the timeout I only plan to drain the device for about 1 second. >>>>>> It didn't matter because for IDE, the invariant (staying quiesced as long as >>>>>> necessary) is already ensured by BQL. Virtio is different because it supports >>>>>> ioeventfd and data plane. >>>>> Okay understood. So my use of bdrv_drained_begin/end is more an abuse of >>>>> these functions? >>>> Sort of. But it's not unreasonable to "extend" bdrv_drained_begin/end to cover >>>> IDE, I just haven't thought about "how". >>>> >>>>> Do you have another idea how to achieve what I want? I was thinking of throttle >>>>> the I/O to zero. It would be enough to do this for writes, reading doesn't hurt in >>>>> my case. >>>> Maybe add a block filter on top of the drained node, drain it when doing so, >>>> then queue all further requests with a CoQueue until "undrain". (It is then not >>>> quite to "drain" but to "halt" or "pause", though.) >>> To get the drain for free was why I was looking at this approach. If I read correctly >>> if I keep using bdrv_drained_begin/end its too hacky to implement it in QMP? >> I think so. >> >>> If yes, would support adding it to qemu-io? >> I'm under the impression that you are looking to a real use case, I don't think >> I like the idea. Also, accessing the image from other processes while QEMU is >> using it is strongly discouraged, and there is the coming image locking >> mechanism to prevent this from happening. > Thinking a bit more about this, it looks to me as if what we really want > is inactivating the image. Should we add an option to the 'stop' command > (or introduce another command to be used on an already stopped VM) that > inactivates all/some images? And then 'cont' regains control over the > images, just like after migration. > > Automatically stopping the VM while the snapshot is taken also makes it > work with IDE and prevents guests running into timeouts, which makes it > look much more like a proper solution to me. > > But then, stop/cont would already achieve something very similar today > (as 'stop' calls bdrv_drain_all(); just the locking part wouldn't be > covered), so maybe there is a reason not to use it in your case, Peter? Maybe this indeed is the best solution. Locking won't work for my case I don't use a posix file in the backend. Clever solution, thanks Kevin. Peter
On Mon, May 15, 2017 at 12:02:43PM +0200, Peter Lieven wrote: > I would like to add a feature to Qemu to drain all traffic from a block so that > I can take external snaphosts without the risk to that in the middle of a write > operation. Its meant for cases where where QGA freeze/thaw is not available. > > For me its enough to have this through qemu-io, but Kevin asked me to check > if its not worth to have a stable API for it and present it via QMP/HMP. > > What are your thoughts? Are the existing snapshot and blockjobs not sufficient for taking external snapshots?
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 312fc6d..49d82fe 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -1565,6 +1565,82 @@ static const cmdinfo_t flush_cmd = { .oneline = "flush all in-core file state to disk", }; +static const cmdinfo_t drain_cmd; + +static int drain_f(BlockBackend *blk, int argc, char **argv) +{ + BlockDriverState *bs = blk_bs(blk); + bool flush = false; + int c; + + while ((c = getopt(argc, argv, "f")) != -1) { + switch (c) { + case 'f': + flush = true; + break; + default: + return qemuio_command_usage(&drain_cmd); + } + } + + if (optind != argc) { + return qemuio_command_usage(&drain_cmd); + } + + + if (bs->quiesce_counter) { + printf("drain failed: device is already drained!\n"); + return 1; + } + + bdrv_drained_begin(bs); /* complete I/O */ + if (flush) { + bdrv_flush(bs); + bdrv_drain(bs); /* in case flush left pending I/O */ + printf("flushed all pending I/O\n"); + } + printf("drain successful\n"); + return 0; +} + +static void drain_help(void) +{ + printf( +"\n" +" Drains all external I/O from the device\n" +"\n" +" -f, -- flush all in-core file state to disk\n" +"\n"); +} + +static const cmdinfo_t drain_cmd = { + .name = "drain", + .cfunc = drain_f, + .args = "[-f]", + .argmin = 0, + .argmax = -1, + .oneline = "cease to send I/O to the device", + .help = drain_help +}; + +static int undrain_f(BlockBackend *blk, int argc, char **argv) +{ + BlockDriverState *bs = blk_bs(blk); + if (!bs->quiesce_counter) { + printf("undrain failed: device is not drained!\n"); + return 1; + } + bdrv_drained_end(bs); + printf("undrain successful\n"); + return 0; +} + +static const cmdinfo_t undrain_cmd = { + .name = "undrain", + .cfunc = undrain_f, + .oneline = "continue I/O to a drained device", +}; + static int truncate_f(BlockBackend *blk, int argc, char **argv) { int64_t offset; @@ -2296,6 +2372,8 @@ static void __attribute((constructor)) init_qemuio_commands(void) qemuio_add_command(&aio_write_cmd); qemuio_add_command(&aio_flush_cmd); qemuio_add_command(&flush_cmd); + qemuio_add_command(&drain_cmd); + qemuio_add_command(&undrain_cmd); qemuio_add_command(&truncate_cmd); qemuio_add_command(&length_cmd); qemuio_add_command(&info_cmd);