diff mbox

[v2,1/3] block: add bdrv_co_drain_end callback

Message ID 20170921131707.17364-2-el13635@mail.ntua.gr (mailing list archive)
State New, archived
Headers show

Commit Message

Manos Pitsidianakis Sept. 21, 2017, 1:17 p.m. UTC
BlockDriverState has a bdrv_do_drain() callback but no equivalent for the end
of the drain. The throttle driver (block/throttle.c) needs a way to mark the
end of the drain in order to toggle io_limits_disabled correctly, thus
bdrv_co_drain_end is needed.

Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
---
 include/block/block_int.h |  6 ++++++
 block/io.c                | 48 +++++++++++++++++++++++++++++++++--------------
 2 files changed, 40 insertions(+), 14 deletions(-)

Comments

Fam Zheng Sept. 21, 2017, 1:29 p.m. UTC | #1
On Thu, 09/21 16:17, Manos Pitsidianakis wrote:
> BlockDriverState has a bdrv_do_drain() callback but no equivalent for the end

s/bdrv_do_drain/bdrv_co_drain/

> of the drain. The throttle driver (block/throttle.c) needs a way to mark the
> end of the drain in order to toggle io_limits_disabled correctly, thus
> bdrv_co_drain_end is needed.
> 
> Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
> ---
>  include/block/block_int.h |  6 ++++++
>  block/io.c                | 48 +++++++++++++++++++++++++++++++++--------------
>  2 files changed, 40 insertions(+), 14 deletions(-)
> 
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index ba4c383393..21950cfda3 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -356,8 +356,14 @@ struct BlockDriver {
>      /**
>       * Drain and stop any internal sources of requests in the driver, and
>       * remain so until next I/O callback (e.g. bdrv_co_writev) is called.

This line needs update too, maybe:

       /**
        * bdrv_co_drain drains and stops any ... and remain so until
        * bdrv_co_drain_end is called.

> +     *
> +     * The callbacks are called at the beginning and ending of the drain
> +     * respectively. They should be implemented if the driver needs to e.g.

As implied by above change, should we explicitly require "should both be
implemented"? It may not be technically required, but I think is cleaner and
easier to reason about.

> +     * manage scheduled I/O requests, or toggle an internal state. After an

s/After an/After a/

> +     * bdrv_co_drain_end() invocation new requests will continue normally.
>       */
>      void coroutine_fn (*bdrv_co_drain)(BlockDriverState *bs);
> +    void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
>  
>      void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
>                             Error **errp);
> diff --git a/block/io.c b/block/io.c
> index 4378ae4c7d..b0a10ad3ef 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -153,6 +153,7 @@ typedef struct {
>      Coroutine *co;
>      BlockDriverState *bs;
>      bool done;
> +    bool begin;
>  } BdrvCoDrainData;
>  
>  static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
> @@ -160,18 +161,23 @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
>      BdrvCoDrainData *data = opaque;
>      BlockDriverState *bs = data->bs;
>  
> -    bs->drv->bdrv_co_drain(bs);
> +    if (data->begin) {
> +        bs->drv->bdrv_co_drain(bs);
> +    } else {
> +        bs->drv->bdrv_co_drain_end(bs);
> +    }
>  
>      /* Set data->done before reading bs->wakeup.  */
>      atomic_mb_set(&data->done, true);
>      bdrv_wakeup(bs);
>  }
>  
> -static void bdrv_drain_invoke(BlockDriverState *bs)
> +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
>  {
> -    BdrvCoDrainData data = { .bs = bs, .done = false };
> +    BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
>  
> -    if (!bs->drv || !bs->drv->bdrv_co_drain) {
> +    if (!bs->drv || (begin && !bs->drv->bdrv_co_drain) ||
> +            (!begin && !bs->drv->bdrv_co_drain_end)) {
>          return;
>      }
>  
> @@ -180,15 +186,16 @@ static void bdrv_drain_invoke(BlockDriverState *bs)
>      BDRV_POLL_WHILE(bs, !data.done);
>  }
>  
> -static bool bdrv_drain_recurse(BlockDriverState *bs)
> +static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
>  {
>      BdrvChild *child, *tmp;
>      bool waited;
>  
> -    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
> -
>      /* Ensure any pending metadata writes are submitted to bs->file.  */
> -    bdrv_drain_invoke(bs);
> +    bdrv_drain_invoke(bs, begin);
> +
> +    /* Wait for drained requests to finish */
> +    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
>  
>      QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
>          BlockDriverState *bs = child->bs;
> @@ -205,7 +212,7 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
>               */
>              bdrv_ref(bs);
>          }
> -        waited |= bdrv_drain_recurse(bs);
> +        waited |= bdrv_drain_recurse(bs, begin);
>          if (in_main_loop) {
>              bdrv_unref(bs);
>          }
> @@ -221,12 +228,18 @@ static void bdrv_co_drain_bh_cb(void *opaque)
>      BlockDriverState *bs = data->bs;
>  
>      bdrv_dec_in_flight(bs);
> -    bdrv_drained_begin(bs);
> +    if (data->begin) {
> +        bdrv_drained_begin(bs);
> +    } else {
> +        bdrv_drained_end(bs);
> +    }
> +
>      data->done = true;
>      aio_co_wake(co);
>  }
>  
> -static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
> +static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
> +                                                bool begin)
>  {
>      BdrvCoDrainData data;
>  
> @@ -239,6 +252,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
>          .co = qemu_coroutine_self(),
>          .bs = bs,
>          .done = false,
> +        .begin = begin,
>      };
>      bdrv_inc_in_flight(bs);
>      aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
> @@ -253,7 +267,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
>  void bdrv_drained_begin(BlockDriverState *bs)
>  {
>      if (qemu_in_coroutine()) {
> -        bdrv_co_yield_to_drain(bs);
> +        bdrv_co_yield_to_drain(bs, true);
>          return;
>      }
>  
> @@ -262,17 +276,22 @@ void bdrv_drained_begin(BlockDriverState *bs)
>          bdrv_parent_drained_begin(bs);
>      }
>  
> -    bdrv_drain_recurse(bs);
> +    bdrv_drain_recurse(bs, true);
>  }
>  
>  void bdrv_drained_end(BlockDriverState *bs)
>  {
> +    if (qemu_in_coroutine()) {
> +        bdrv_co_yield_to_drain(bs, false);
> +        return;
> +    }
>      assert(bs->quiesce_counter > 0);
>      if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
>          return;
>      }
>  
>      bdrv_parent_drained_end(bs);
> +    bdrv_drain_recurse(bs, false);
>      aio_enable_external(bdrv_get_aio_context(bs));
>  }
>  
> @@ -350,7 +369,7 @@ void bdrv_drain_all_begin(void)
>              aio_context_acquire(aio_context);
>              for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
>                  if (aio_context == bdrv_get_aio_context(bs)) {
> -                    waited |= bdrv_drain_recurse(bs);
> +                    waited |= bdrv_drain_recurse(bs, true);
>                  }
>              }
>              aio_context_release(aio_context);
> @@ -371,6 +390,7 @@ void bdrv_drain_all_end(void)
>          aio_context_acquire(aio_context);
>          aio_enable_external(aio_context);
>          bdrv_parent_drained_end(bs);
> +        bdrv_drain_recurse(bs, false);
>          aio_context_release(aio_context);
>      }
>  
> -- 
> 2.11.0
>
Fam Zheng Sept. 21, 2017, 1:30 p.m. UTC | #2
On Thu, 09/21 16:17, Manos Pitsidianakis wrote:
> BlockDriverState has a bdrv_do_drain() callback but no equivalent for the end
> of the drain. The throttle driver (block/throttle.c) needs a way to mark the
> end of the drain in order to toggle io_limits_disabled correctly, thus
> bdrv_co_drain_end is needed.
> 
> Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>

Reviewed-by: Fam Zheng <famz@redhat.com>
Manos Pitsidianakis Sept. 21, 2017, 3:39 p.m. UTC | #3
On Thu, Sep 21, 2017 at 09:29:43PM +0800, Fam Zheng wrote:
>On Thu, 09/21 16:17, Manos Pitsidianakis wrote:
>> BlockDriverState has a bdrv_do_drain() callback but no equivalent for the end
>
>s/bdrv_do_drain/bdrv_co_drain/
>
>> of the drain. The throttle driver (block/throttle.c) needs a way to mark the
>> end of the drain in order to toggle io_limits_disabled correctly, thus
>> bdrv_co_drain_end is needed.
>>
>> Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
>> ---
>>  include/block/block_int.h |  6 ++++++
>>  block/io.c                | 48 +++++++++++++++++++++++++++++++++--------------
>>  2 files changed, 40 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index ba4c383393..21950cfda3 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -356,8 +356,14 @@ struct BlockDriver {
>>      /**
>>       * Drain and stop any internal sources of requests in the driver, and
>>       * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
>
>This line needs update too, maybe:
>
>       /**
>        * bdrv_co_drain drains and stops any ... and remain so until
>        * bdrv_co_drain_end is called.
>
>> +     *
>> +     * The callbacks are called at the beginning and ending of the drain
>> +     * respectively. They should be implemented if the driver needs to e.g.
>
>As implied by above change, should we explicitly require "should both be
>implemented"? It may not be technically required, but I think is cleaner and
>easier to reason about.
>

It might imply to someone that there's an 
assert(drv->bdrv_co_drain_begin && drv->bdrv_co_drain_end) somewhere 
unless you state they don't have to be implemented at the same time. How 
about we be completely explicit:

  bdrv_co_drain_begin is called if implemented in the beggining of a
  drain operation to drain and stop any internal sources of requests in
  the driver.
  bdrv_co_drain_end is called if implemented at the end of the drain.

  They should be used by the driver to e.g. manage scheduled I/O
  requests, or toggle an internal state. After the end of the drain new
  requests will continue normally.

I hope this is easier for a reader to understand!

>> +     * manage scheduled I/O requests, or toggle an internal state. 
>> After an
>
>s/After an/After a/
>
>> +     * bdrv_co_drain_end() invocation new requests will continue normally.
>>       */
>>      void coroutine_fn (*bdrv_co_drain)(BlockDriverState *bs);
>> +    void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
>>
>>      void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
>>                             Error **errp);
>> diff --git a/block/io.c b/block/io.c
>> index 4378ae4c7d..b0a10ad3ef 100644
>> --- a/block/io.c
>> +++ b/block/io.c
>> @@ -153,6 +153,7 @@ typedef struct {
>>      Coroutine *co;
>>      BlockDriverState *bs;
>>      bool done;
>> +    bool begin;
>>  } BdrvCoDrainData;
>>
>>  static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
>> @@ -160,18 +161,23 @@ static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
>>      BdrvCoDrainData *data = opaque;
>>      BlockDriverState *bs = data->bs;
>>
>> -    bs->drv->bdrv_co_drain(bs);
>> +    if (data->begin) {
>> +        bs->drv->bdrv_co_drain(bs);
>> +    } else {
>> +        bs->drv->bdrv_co_drain_end(bs);
>> +    }
>>
>>      /* Set data->done before reading bs->wakeup.  */
>>      atomic_mb_set(&data->done, true);
>>      bdrv_wakeup(bs);
>>  }
>>
>> -static void bdrv_drain_invoke(BlockDriverState *bs)
>> +static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
>>  {
>> -    BdrvCoDrainData data = { .bs = bs, .done = false };
>> +    BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
>>
>> -    if (!bs->drv || !bs->drv->bdrv_co_drain) {
>> +    if (!bs->drv || (begin && !bs->drv->bdrv_co_drain) ||
>> +            (!begin && !bs->drv->bdrv_co_drain_end)) {
>>          return;
>>      }
>>
>> @@ -180,15 +186,16 @@ static void bdrv_drain_invoke(BlockDriverState *bs)
>>      BDRV_POLL_WHILE(bs, !data.done);
>>  }
>>
>> -static bool bdrv_drain_recurse(BlockDriverState *bs)
>> +static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
>>  {
>>      BdrvChild *child, *tmp;
>>      bool waited;
>>
>> -    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
>> -
>>      /* Ensure any pending metadata writes are submitted to bs->file.  */
>> -    bdrv_drain_invoke(bs);
>> +    bdrv_drain_invoke(bs, begin);
>> +
>> +    /* Wait for drained requests to finish */
>> +    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
>>
>>      QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
>>          BlockDriverState *bs = child->bs;
>> @@ -205,7 +212,7 @@ static bool bdrv_drain_recurse(BlockDriverState *bs)
>>               */
>>              bdrv_ref(bs);
>>          }
>> -        waited |= bdrv_drain_recurse(bs);
>> +        waited |= bdrv_drain_recurse(bs, begin);
>>          if (in_main_loop) {
>>              bdrv_unref(bs);
>>          }
>> @@ -221,12 +228,18 @@ static void bdrv_co_drain_bh_cb(void *opaque)
>>      BlockDriverState *bs = data->bs;
>>
>>      bdrv_dec_in_flight(bs);
>> -    bdrv_drained_begin(bs);
>> +    if (data->begin) {
>> +        bdrv_drained_begin(bs);
>> +    } else {
>> +        bdrv_drained_end(bs);
>> +    }
>> +
>>      data->done = true;
>>      aio_co_wake(co);
>>  }
>>
>> -static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
>> +static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
>> +                                                bool begin)
>>  {
>>      BdrvCoDrainData data;
>>
>> @@ -239,6 +252,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
>>          .co = qemu_coroutine_self(),
>>          .bs = bs,
>>          .done = false,
>> +        .begin = begin,
>>      };
>>      bdrv_inc_in_flight(bs);
>>      aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
>> @@ -253,7 +267,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
>>  void bdrv_drained_begin(BlockDriverState *bs)
>>  {
>>      if (qemu_in_coroutine()) {
>> -        bdrv_co_yield_to_drain(bs);
>> +        bdrv_co_yield_to_drain(bs, true);
>>          return;
>>      }
>>
>> @@ -262,17 +276,22 @@ void bdrv_drained_begin(BlockDriverState *bs)
>>          bdrv_parent_drained_begin(bs);
>>      }
>>
>> -    bdrv_drain_recurse(bs);
>> +    bdrv_drain_recurse(bs, true);
>>  }
>>
>>  void bdrv_drained_end(BlockDriverState *bs)
>>  {
>> +    if (qemu_in_coroutine()) {
>> +        bdrv_co_yield_to_drain(bs, false);
>> +        return;
>> +    }
>>      assert(bs->quiesce_counter > 0);
>>      if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
>>          return;
>>      }
>>
>>      bdrv_parent_drained_end(bs);
>> +    bdrv_drain_recurse(bs, false);
>>      aio_enable_external(bdrv_get_aio_context(bs));
>>  }
>>
>> @@ -350,7 +369,7 @@ void bdrv_drain_all_begin(void)
>>              aio_context_acquire(aio_context);
>>              for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
>>                  if (aio_context == bdrv_get_aio_context(bs)) {
>> -                    waited |= bdrv_drain_recurse(bs);
>> +                    waited |= bdrv_drain_recurse(bs, true);
>>                  }
>>              }
>>              aio_context_release(aio_context);
>> @@ -371,6 +390,7 @@ void bdrv_drain_all_end(void)
>>          aio_context_acquire(aio_context);
>>          aio_enable_external(aio_context);
>>          bdrv_parent_drained_end(bs);
>> +        bdrv_drain_recurse(bs, false);
>>          aio_context_release(aio_context);
>>      }
>>
>> --
>> 2.11.0
>>
>
Fam Zheng Sept. 22, 2017, 2:30 a.m. UTC | #4
On Thu, 09/21 18:39, Manos Pitsidianakis wrote:
> On Thu, Sep 21, 2017 at 09:29:43PM +0800, Fam Zheng wrote:
> > On Thu, 09/21 16:17, Manos Pitsidianakis wrote:
> > > BlockDriverState has a bdrv_do_drain() callback but no equivalent for the end
> > 
> > s/bdrv_do_drain/bdrv_co_drain/
> > 
> > > of the drain. The throttle driver (block/throttle.c) needs a way to mark the
> > > end of the drain in order to toggle io_limits_disabled correctly, thus
> > > bdrv_co_drain_end is needed.
> > > 
> > > Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
> > > ---
> > >  include/block/block_int.h |  6 ++++++
> > >  block/io.c                | 48 +++++++++++++++++++++++++++++++++--------------
> > >  2 files changed, 40 insertions(+), 14 deletions(-)
> > > 
> > > diff --git a/include/block/block_int.h b/include/block/block_int.h
> > > index ba4c383393..21950cfda3 100644
> > > --- a/include/block/block_int.h
> > > +++ b/include/block/block_int.h
> > > @@ -356,8 +356,14 @@ struct BlockDriver {
> > >      /**
> > >       * Drain and stop any internal sources of requests in the driver, and
> > >       * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
> > 
> > This line needs update too, maybe:
> > 
> >       /**
> >        * bdrv_co_drain drains and stops any ... and remain so until
> >        * bdrv_co_drain_end is called.
> > 
> > > +     *
> > > +     * The callbacks are called at the beginning and ending of the drain
> > > +     * respectively. They should be implemented if the driver needs to e.g.
> > 
> > As implied by above change, should we explicitly require "should both be
> > implemented"? It may not be technically required, but I think is cleaner and
> > easier to reason about.
> > 
> 
> It might imply to someone that there's an assert(drv->bdrv_co_drain_begin &&
> drv->bdrv_co_drain_end) somewhere unless you state they don't have to be
> implemented at the same time. How about we be completely explicit:
> 
>  bdrv_co_drain_begin is called if implemented in the beggining of a
>  drain operation to drain and stop any internal sources of requests in
>  the driver.
>  bdrv_co_drain_end is called if implemented at the end of the drain.
> 
>  They should be used by the driver to e.g. manage scheduled I/O
>  requests, or toggle an internal state. After the end of the drain new
>  requests will continue normally.
> 
> I hope this is easier for a reader to understand!

I don't like the inconsistent semantics of when the drained section ends, if we
allow drivers to implement bdrv_co_drain_begin but omit bdrv_co_drained_end.
Currently the point where the section ends is, as said in the comment, when next
I/O callback is invoked. Now we are adding the explicit ".bdrv_co_drain_end"
into the fomular, if we still keep the previous convention, the interface
contract is just mixed of two things for no good reason. I don't think it's
technically necessary.

Let's just add the assert:

    assert(!!drv->bdrv_co_drain_begin == !!bdrv_co_drain_end);

in bdrv_drain_invoke, add a comment here, then add an empty .bdrv_co_drain_end
in qed.

Fam
Kevin Wolf Sept. 22, 2017, 11:03 a.m. UTC | #5
Am 22.09.2017 um 04:30 hat Fam Zheng geschrieben:
> On Thu, 09/21 18:39, Manos Pitsidianakis wrote:
> > On Thu, Sep 21, 2017 at 09:29:43PM +0800, Fam Zheng wrote:
> > > On Thu, 09/21 16:17, Manos Pitsidianakis wrote:
> > It might imply to someone that there's an assert(drv->bdrv_co_drain_begin &&
> > drv->bdrv_co_drain_end) somewhere unless you state they don't have to be
> > implemented at the same time. How about we be completely explicit:
> > 
> >  bdrv_co_drain_begin is called if implemented in the beggining of a
> >  drain operation to drain and stop any internal sources of requests in
> >  the driver.
> >  bdrv_co_drain_end is called if implemented at the end of the drain.
> > 
> >  They should be used by the driver to e.g. manage scheduled I/O
> >  requests, or toggle an internal state. After the end of the drain new
> >  requests will continue normally.
> > 
> > I hope this is easier for a reader to understand!
> 
> I don't like the inconsistent semantics of when the drained section
> ends, if we allow drivers to implement bdrv_co_drain_begin but omit
> bdrv_co_drained_end.  Currently the point where the section ends is,
> as said in the comment, when next I/O callback is invoked. Now we are
> adding the explicit ".bdrv_co_drain_end" into the fomular, if we still
> keep the previous convention, the interface contract is just mixed of
> two things for no good reason. I don't think it's technically
> necessary.

We don't keep the convention with the next I/O callback. We just allow
drivers to omit an empty implementation of either callback, which seems
to be a very sensible default to me.

> Let's just add the assert:
> 
>     assert(!!drv->bdrv_co_drain_begin == !!bdrv_co_drain_end);
> 
> in bdrv_drain_invoke, add a comment here

I'm not in favour of this, but if we do want to have it, wouldn't
bdrv_register() be a much better place for the assertion?

> then add an empty .bdrv_co_drain_end in qed.

If you need empty functions anywhere, it's a sign that we have a bad
default behaviour.

Kevin
Fam Zheng Sept. 22, 2017, 12:34 p.m. UTC | #6
On Fri, 09/22 13:03, Kevin Wolf wrote:
> Am 22.09.2017 um 04:30 hat Fam Zheng geschrieben:
> > On Thu, 09/21 18:39, Manos Pitsidianakis wrote:
> > > On Thu, Sep 21, 2017 at 09:29:43PM +0800, Fam Zheng wrote:
> > > > On Thu, 09/21 16:17, Manos Pitsidianakis wrote:
> > > It might imply to someone that there's an assert(drv->bdrv_co_drain_begin &&
> > > drv->bdrv_co_drain_end) somewhere unless you state they don't have to be
> > > implemented at the same time. How about we be completely explicit:
> > > 
> > >  bdrv_co_drain_begin is called if implemented in the beggining of a
> > >  drain operation to drain and stop any internal sources of requests in
> > >  the driver.
> > >  bdrv_co_drain_end is called if implemented at the end of the drain.
> > > 
> > >  They should be used by the driver to e.g. manage scheduled I/O
> > >  requests, or toggle an internal state. After the end of the drain new
> > >  requests will continue normally.
> > > 
> > > I hope this is easier for a reader to understand!
> > 
> > I don't like the inconsistent semantics of when the drained section
> > ends, if we allow drivers to implement bdrv_co_drain_begin but omit
> > bdrv_co_drained_end.  Currently the point where the section ends is,
> > as said in the comment, when next I/O callback is invoked. Now we are
> > adding the explicit ".bdrv_co_drain_end" into the fomular, if we still
> > keep the previous convention, the interface contract is just mixed of
> > two things for no good reason. I don't think it's technically
> > necessary.
> 
> We don't keep the convention with the next I/O callback. We just allow
> drivers to omit an empty implementation of either callback, which seems
> to be a very sensible default to me.
> 

OK, I'm fine with this.

Fam
diff mbox

Patch

diff --git a/include/block/block_int.h b/include/block/block_int.h
index ba4c383393..21950cfda3 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -356,8 +356,14 @@  struct BlockDriver {
     /**
      * Drain and stop any internal sources of requests in the driver, and
      * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
+     *
+     * The callbacks are called at the beginning and ending of the drain
+     * respectively. They should be implemented if the driver needs to e.g.
+     * manage scheduled I/O requests, or toggle an internal state. After an
+     * bdrv_co_drain_end() invocation new requests will continue normally.
      */
     void coroutine_fn (*bdrv_co_drain)(BlockDriverState *bs);
+    void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
 
     void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
                            Error **errp);
diff --git a/block/io.c b/block/io.c
index 4378ae4c7d..b0a10ad3ef 100644
--- a/block/io.c
+++ b/block/io.c
@@ -153,6 +153,7 @@  typedef struct {
     Coroutine *co;
     BlockDriverState *bs;
     bool done;
+    bool begin;
 } BdrvCoDrainData;
 
 static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
@@ -160,18 +161,23 @@  static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
     BdrvCoDrainData *data = opaque;
     BlockDriverState *bs = data->bs;
 
-    bs->drv->bdrv_co_drain(bs);
+    if (data->begin) {
+        bs->drv->bdrv_co_drain(bs);
+    } else {
+        bs->drv->bdrv_co_drain_end(bs);
+    }
 
     /* Set data->done before reading bs->wakeup.  */
     atomic_mb_set(&data->done, true);
     bdrv_wakeup(bs);
 }
 
-static void bdrv_drain_invoke(BlockDriverState *bs)
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
 {
-    BdrvCoDrainData data = { .bs = bs, .done = false };
+    BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
 
-    if (!bs->drv || !bs->drv->bdrv_co_drain) {
+    if (!bs->drv || (begin && !bs->drv->bdrv_co_drain) ||
+            (!begin && !bs->drv->bdrv_co_drain_end)) {
         return;
     }
 
@@ -180,15 +186,16 @@  static void bdrv_drain_invoke(BlockDriverState *bs)
     BDRV_POLL_WHILE(bs, !data.done);
 }
 
-static bool bdrv_drain_recurse(BlockDriverState *bs)
+static bool bdrv_drain_recurse(BlockDriverState *bs, bool begin)
 {
     BdrvChild *child, *tmp;
     bool waited;
 
-    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
-
     /* Ensure any pending metadata writes are submitted to bs->file.  */
-    bdrv_drain_invoke(bs);
+    bdrv_drain_invoke(bs, begin);
+
+    /* Wait for drained requests to finish */
+    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
 
     QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
         BlockDriverState *bs = child->bs;
@@ -205,7 +212,7 @@  static bool bdrv_drain_recurse(BlockDriverState *bs)
              */
             bdrv_ref(bs);
         }
-        waited |= bdrv_drain_recurse(bs);
+        waited |= bdrv_drain_recurse(bs, begin);
         if (in_main_loop) {
             bdrv_unref(bs);
         }
@@ -221,12 +228,18 @@  static void bdrv_co_drain_bh_cb(void *opaque)
     BlockDriverState *bs = data->bs;
 
     bdrv_dec_in_flight(bs);
-    bdrv_drained_begin(bs);
+    if (data->begin) {
+        bdrv_drained_begin(bs);
+    } else {
+        bdrv_drained_end(bs);
+    }
+
     data->done = true;
     aio_co_wake(co);
 }
 
-static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
+static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
+                                                bool begin)
 {
     BdrvCoDrainData data;
 
@@ -239,6 +252,7 @@  static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
         .co = qemu_coroutine_self(),
         .bs = bs,
         .done = false,
+        .begin = begin,
     };
     bdrv_inc_in_flight(bs);
     aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
@@ -253,7 +267,7 @@  static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
 void bdrv_drained_begin(BlockDriverState *bs)
 {
     if (qemu_in_coroutine()) {
-        bdrv_co_yield_to_drain(bs);
+        bdrv_co_yield_to_drain(bs, true);
         return;
     }
 
@@ -262,17 +276,22 @@  void bdrv_drained_begin(BlockDriverState *bs)
         bdrv_parent_drained_begin(bs);
     }
 
-    bdrv_drain_recurse(bs);
+    bdrv_drain_recurse(bs, true);
 }
 
 void bdrv_drained_end(BlockDriverState *bs)
 {
+    if (qemu_in_coroutine()) {
+        bdrv_co_yield_to_drain(bs, false);
+        return;
+    }
     assert(bs->quiesce_counter > 0);
     if (atomic_fetch_dec(&bs->quiesce_counter) > 1) {
         return;
     }
 
     bdrv_parent_drained_end(bs);
+    bdrv_drain_recurse(bs, false);
     aio_enable_external(bdrv_get_aio_context(bs));
 }
 
@@ -350,7 +369,7 @@  void bdrv_drain_all_begin(void)
             aio_context_acquire(aio_context);
             for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
                 if (aio_context == bdrv_get_aio_context(bs)) {
-                    waited |= bdrv_drain_recurse(bs);
+                    waited |= bdrv_drain_recurse(bs, true);
                 }
             }
             aio_context_release(aio_context);
@@ -371,6 +390,7 @@  void bdrv_drain_all_end(void)
         aio_context_acquire(aio_context);
         aio_enable_external(aio_context);
         bdrv_parent_drained_end(bs);
+        bdrv_drain_recurse(bs, false);
         aio_context_release(aio_context);
     }