diff mbox

[RFC,v3,3/8] block: add throttle block filter driver

Message ID 20170623124700.1389-4-el13635@mail.ntua.gr (mailing list archive)
State New, archived
Headers show

Commit Message

Manos Pitsidianakis June 23, 2017, 12:46 p.m. UTC
block/throttle.c uses existing I/O throttle infrastructure inside a
block filter driver. I/O operations are intercepted in the filter's
read/write coroutines, and referred to block/throttle-groups.c

The driver can be used with the command
-drive driver=throttle,file.filename=foo.qcow2,iops-total=...
The configuration flags and semantics are identical to the hardcoded
throttling ones.

Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
---
 block/Makefile.objs             |   1 +
 block/throttle.c                | 427 ++++++++++++++++++++++++++++++++++++++++
 include/qemu/throttle-options.h |  60 ++++--
 3 files changed, 469 insertions(+), 19 deletions(-)
 create mode 100644 block/throttle.c

Comments

Manos Pitsidianakis June 26, 2017, 2 p.m. UTC | #1
On Fri, Jun 23, 2017 at 03:46:55PM +0300, Manos Pitsidianakis wrote:
>+static QemuOptsList throttle_opts = {
>+    .name = "throttle",
>+    .head = QTAILQ_HEAD_INITIALIZER(throttle_opts.head),
>+    .desc = {
>+        {
>+            .name = QEMU_OPT_IOPS_TOTAL,

throttle_opts should use THROTTLE_OPTS from throttle-options.h (how many 
times can you say throttle opts in a sentence?), this will be corrected.

>+
>+static int throttle_configure_tgm(BlockDriverState *bs, ThrottleGroupMember *tgm,
>+                                                    QDict *options, Error **errp)
>+{
> ....
>+    /* Copy previous configuration */
>+    throttle_get_config(ts, &cfg);
> ....
>+    /* Update group configuration */
>+    throttle_config(ts, tt, &cfg);

These should be throttle_group_get_config() and throttle_group_config() 
respectively, to use the throttle group locks.

>+
>+block_init(bdrv_throttle_init);
>diff --git a/include/qemu/throttle-options.h b/include/qemu/throttle-options.h
>index 3133d1ca40..508ee72625 100644
>--- a/include/qemu/throttle-options.h
>+++ b/include/qemu/throttle-options.h
>@@ -10,81 +10,103 @@
> #ifndef THROTTLE_OPTIONS_H
> #define THROTTLE_OPTIONS_H
>
>+#define QEMU_OPT_IOPS_TOTAL "iops-total"
>+#define QEMU_OPT_IOPS_TOTAL_MAX "iops-total-max"
>+#define QEMU_OPT_IOPS_TOTAL_MAX_LENGTH "iops-total-max-length"
>+#define QEMU_OPT_IOPS_READ "iops-read"
>+#define QEMU_OPT_IOPS_READ_MAX "iops-read-max"
>+#define QEMU_OPT_IOPS_READ_MAX_LENGTH "iops-read-max-length"
>+#define QEMU_OPT_IOPS_WRITE "iops-write"
>+#define QEMU_OPT_IOPS_WRITE_MAX "iops-write-max"
>+#define QEMU_OPT_IOPS_WRITE_MAX_LENGTH "iops-write-max-length"
>+#define QEMU_OPT_BPS_TOTAL "bps-total"
>+#define QEMU_OPT_BPS_TOTAL_MAX "bps-total-max"
>+#define QEMU_OPT_BPS_TOTAL_MAX_LENGTH "bps-total-max-length"
>+#define QEMU_OPT_BPS_READ "bps-read"
>+#define QEMU_OPT_BPS_READ_MAX "bps-read-max"
>+#define QEMU_OPT_BPS_READ_MAX_LENGTH "bps-read-max-length"
>+#define QEMU_OPT_BPS_WRITE "bps-write"
>+#define QEMU_OPT_BPS_WRITE_MAX "bps-write-max"
>+#define QEMU_OPT_BPS_WRITE_MAX_LENGTH "bps-write-max-length"
>+#define QEMU_OPT_IOPS_SIZE "iops-size"
>+#define QEMU_OPT_THROTTLE_GROUP_NAME "throttling-group"
>+
>+#define THROTTLE_OPT_PREFIX "throttling."
> #define THROTTLE_OPTS \
>           { \
>-            .name = "throttling.iops-total",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "limit total I/O operations per second",\
>         },{ \
>-            .name = "throttling.iops-read",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "limit read operations per second",\
>         },{ \
>-            .name = "throttling.iops-write",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "limit write operations per second",\
>         },{ \
>-            .name = "throttling.bps-total",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "limit total bytes per second",\
>         },{ \
>-            .name = "throttling.bps-read",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "limit read bytes per second",\
>         },{ \
>-            .name = "throttling.bps-write",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "limit write bytes per second",\
>         },{ \
>-            .name = "throttling.iops-total-max",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "I/O operations burst",\
>         },{ \
>-            .name = "throttling.iops-read-max",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "I/O operations read burst",\
>         },{ \
>-            .name = "throttling.iops-write-max",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "I/O operations write burst",\
>         },{ \
>-            .name = "throttling.bps-total-max",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "total bytes burst",\
>         },{ \
>-            .name = "throttling.bps-read-max",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "total bytes read burst",\
>         },{ \
>-            .name = "throttling.bps-write-max",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "total bytes write burst",\
>         },{ \
>-            .name = "throttling.iops-total-max-length",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX_LENGTH,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "length of the iops-total-max burst period, in seconds",\
>         },{ \
>-            .name = "throttling.iops-read-max-length",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX_LENGTH,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "length of the iops-read-max burst period, in seconds",\
>         },{ \
>-            .name = "throttling.iops-write-max-length",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX_LENGTH,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "length of the iops-write-max burst period, in seconds",\
>         },{ \
>-            .name = "throttling.bps-total-max-length",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX_LENGTH,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "length of the bps-total-max burst period, in seconds",\
>         },{ \
>-            .name = "throttling.bps-read-max-length",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX_LENGTH,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "length of the bps-read-max burst period, in seconds",\
>         },{ \
>-            .name = "throttling.bps-write-max-length",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX_LENGTH,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "length of the bps-write-max burst period, in seconds",\
>         },{ \
>-            .name = "throttling.iops-size",\
>+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_SIZE,\
>             .type = QEMU_OPT_NUMBER,\
>             .help = "when limiting by iops max size of an I/O in bytes",\
>         }
>-- 
>2.11.0
>
>
>
Stefan Hajnoczi June 26, 2017, 2:30 p.m. UTC | #2
On Fri, Jun 23, 2017 at 03:46:55PM +0300, Manos Pitsidianakis wrote:
> +static int throttle_configure_tgm(BlockDriverState *bs, ThrottleGroupMember *tgm,
> +                                                    QDict *options, Error **errp)
> +{
> +    int ret = 0;
> +    ThrottleState *ts;
> +    ThrottleTimers *tt;
> +    ThrottleConfig cfg;
> +    QemuOpts *opts = NULL;
> +    const char *group_name = NULL;
> +    Error *local_err = NULL;
> +
> +    opts = qemu_opts_create(&throttle_opts, NULL, 0, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return -EINVAL;
> +    }
> +    qemu_opts_absorb_qdict(opts, options, &local_err);
> +    if (local_err) {
> +        goto err;
> +    }
> +
> +    group_name = qemu_opt_get(opts, QEMU_OPT_THROTTLE_GROUP_NAME);
> +    if (!group_name) {
> +        group_name = bdrv_get_device_or_node_name(bs);
> +        if (!strlen(group_name)) {
> +            error_setg(&local_err,
> +                       "A group name must be specified for this device.");

To make the error message clearer:
s/A group name/A throttle group name/

> +static int throttle_open(BlockDriverState *bs, QDict *options,
> +                            int flags, Error **errp)
> +{
> +    ThrottleGroupMember *tgm = bs->opaque;
> +    Error *local_err = NULL;
> +
> +    bs->open_flags = flags;

Why is this necessary?  The other block drivers don't do it.  It should
be taken care of in block.c.

> +    bs->file = bdrv_open_child(NULL, options, "file",
> +                                    bs, &child_file, false, &local_err);
> +
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return -EINVAL;
> +    }
> +
> +    qdict_flatten(options);
> +    return throttle_configure_tgm(bs, tgm, options, errp);

Who destroys bs->file on error?

> +}
> +
> +static void throttle_close(BlockDriverState *bs)
> +{
> +    ThrottleGroupMember *tgm = bs->opaque;
> +    bdrv_drained_begin(bs);

Why is this necessary?  bdrv_close() already encloses drv->bdrv_close()
in a bdrv_drained_begin/end region.

> +    throttle_group_unregister_tgm(tgm);
> +    bdrv_drained_end(bs);
> +    return;

Please omit return at the end of a void function.  They are a
distraction.

> +static void throttle_reopen_commit(BDRVReopenState *state)
> +{
> +    ThrottleGroupMember *tgm = state->bs->opaque;
> +    BlockDriverState *bs = state->bs;
> +
> +    bdrv_drained_begin(bs);

Why is this necessary?  bdrv_reopen_multiple() calls
bdrv_reopen_commit() from within a bdrv_drain_all_begin()/end() region.

> +static BlockDriver bdrv_throttle = {
> +    .format_name                        =   "throttle",
> +    .protocol_name                      =   "throttle",
> +    .instance_size                      =   sizeof(ThrottleGroupMember),
> +
> +    .bdrv_file_open                     =   throttle_open,
> +    .bdrv_close                         =   throttle_close,
> +    .bdrv_co_flush                      =   throttle_co_flush,
> +
> +    .bdrv_child_perm                    =   bdrv_filter_default_perms,
> +
> +    .bdrv_getlength                     =   throttle_getlength,
> +
> +    .bdrv_co_preadv                     =   throttle_co_preadv,
> +    .bdrv_co_pwritev                    =   throttle_co_pwritev,
> +
> +    .bdrv_co_pwrite_zeroes              =   throttle_co_pwrite_zeroes,
> +    .bdrv_co_pdiscard                   =   throttle_co_pdiscard,
> +
> +    .bdrv_recurse_is_first_non_filter   =   bdrv_recurse_is_first_non_filter,
> +
> +    .bdrv_attach_aio_context            =   throttle_attach_aio_context,
> +    .bdrv_detach_aio_context            =   throttle_detach_aio_context,
> +
> +    .bdrv_reopen_prepare                =   throttle_reopen_prepare,
> +    .bdrv_reopen_commit                 =   throttle_reopen_commit,
> +    .bdrv_reopen_abort                  =   throttle_reopen_abort,
> +
> +    .is_filter                          =   true,
> +};

Missing:
bdrv_co_get_block_status()
bdrv_truncate()
bdrv_get_info()
bdrv_probe_blocksizes()
bdrv_probe_geometry()
bdrv_media_changed()
bdrv_eject()
bdrv_lock_medium()
bdrv_co_ioctl()

See block/raw-format.c.

I think most of these could be modified in block.c or block/io.c to
automatically call bs->file's function if drv doesn't implement them.
This way all block drivers would transparently pass them through by
default and block/raw-format.c code could be eliminated.
Stefan Hajnoczi June 26, 2017, 2:34 p.m. UTC | #3
On Fri, Jun 23, 2017 at 03:46:55PM +0300, Manos Pitsidianakis wrote:
> +    qemu_co_queue_init(&tgm->throttled_reqs[0]);
> +    qemu_co_queue_init(&tgm->throttled_reqs[1]);

Where is reqs_lock initialized?

It would be cleaner to move the throttled_reqs[] and reqs_lock
initialization into the tgm registration function.  That way it's done
in a single place.
Manos Pitsidianakis June 26, 2017, 4:01 p.m. UTC | #4
On Mon, Jun 26, 2017 at 03:30:55PM +0100, Stefan Hajnoczi wrote:
>> +static BlockDriver bdrv_throttle = {
>> +    .format_name                        =   "throttle",
>> +    .protocol_name                      =   "throttle",
>> +    .instance_size                      =   sizeof(ThrottleGroupMember),
>> +
>> +    .bdrv_file_open                     =   throttle_open,
>> +    .bdrv_close                         =   throttle_close,
>> +    .bdrv_co_flush                      =   throttle_co_flush,
>> +
>> +    .bdrv_child_perm                    =   bdrv_filter_default_perms,
>> +
>> +    .bdrv_getlength                     =   throttle_getlength,
>> +
>> +    .bdrv_co_preadv                     =   throttle_co_preadv,
>> +    .bdrv_co_pwritev                    =   throttle_co_pwritev,
>> +
>> +    .bdrv_co_pwrite_zeroes              =   throttle_co_pwrite_zeroes,
>> +    .bdrv_co_pdiscard                   =   throttle_co_pdiscard,
>> +
>> +    .bdrv_recurse_is_first_non_filter   =   bdrv_recurse_is_first_non_filter,
>> +
>> +    .bdrv_attach_aio_context            =   throttle_attach_aio_context,
>> +    .bdrv_detach_aio_context            =   throttle_detach_aio_context,
>> +
>> +    .bdrv_reopen_prepare                =   throttle_reopen_prepare,
>> +    .bdrv_reopen_commit                 =   throttle_reopen_commit,
>> +    .bdrv_reopen_abort                  =   throttle_reopen_abort,
>> +
>> +    .is_filter                          =   true,
>> +};
>
>Missing:
>bdrv_co_get_block_status()
>bdrv_truncate()
>bdrv_get_info()
>bdrv_probe_blocksizes()
>bdrv_probe_geometry()
>bdrv_media_changed()
>bdrv_eject()
>bdrv_lock_medium()
>bdrv_co_ioctl()
>
>See block/raw-format.c.
>
>I think most of these could be modified in block.c or block/io.c to
>automatically call bs->file's function if drv doesn't implement them.
>This way all block drivers would transparently pass them through by
>default and block/raw-format.c code could be eliminated.

Are these truly necessary? Because other filter drivers (ie quorum, 
blkverify) don't implement them.
Manos Pitsidianakis June 26, 2017, 4:26 p.m. UTC | #5
On Mon, Jun 26, 2017 at 03:30:55PM +0100, Stefan Hajnoczi wrote:
>> +    bs->file = bdrv_open_child(NULL, options, "file",
>> +                                    bs, &child_file, false, &local_err);
>> +
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return -EINVAL;
>> +    }
>> +
>> +    qdict_flatten(options);
>> +    return throttle_configure_tgm(bs, tgm, options, errp);
>
>Who destroys bs->file on error?

It is reaped by bdrv_open_inherit() on failure, if I'm not mistaken.
That's how other drivers handle this as well. Some (eg block/qcow2.c)
check if bs->file is NULL instead of the error pointer they pass, so
this is not not very consistent.
Stefan Hajnoczi June 27, 2017, 12:42 p.m. UTC | #6
On Mon, Jun 26, 2017 at 07:01:18PM +0300, Manos Pitsidianakis wrote:
> On Mon, Jun 26, 2017 at 03:30:55PM +0100, Stefan Hajnoczi wrote:
> > > +static BlockDriver bdrv_throttle = {
> > > +    .format_name                        =   "throttle",
> > > +    .protocol_name                      =   "throttle",
> > > +    .instance_size                      =   sizeof(ThrottleGroupMember),
> > > +
> > > +    .bdrv_file_open                     =   throttle_open,
> > > +    .bdrv_close                         =   throttle_close,
> > > +    .bdrv_co_flush                      =   throttle_co_flush,
> > > +
> > > +    .bdrv_child_perm                    =   bdrv_filter_default_perms,
> > > +
> > > +    .bdrv_getlength                     =   throttle_getlength,
> > > +
> > > +    .bdrv_co_preadv                     =   throttle_co_preadv,
> > > +    .bdrv_co_pwritev                    =   throttle_co_pwritev,
> > > +
> > > +    .bdrv_co_pwrite_zeroes              =   throttle_co_pwrite_zeroes,
> > > +    .bdrv_co_pdiscard                   =   throttle_co_pdiscard,
> > > +
> > > +    .bdrv_recurse_is_first_non_filter   =   bdrv_recurse_is_first_non_filter,
> > > +
> > > +    .bdrv_attach_aio_context            =   throttle_attach_aio_context,
> > > +    .bdrv_detach_aio_context            =   throttle_detach_aio_context,
> > > +
> > > +    .bdrv_reopen_prepare                =   throttle_reopen_prepare,
> > > +    .bdrv_reopen_commit                 =   throttle_reopen_commit,
> > > +    .bdrv_reopen_abort                  =   throttle_reopen_abort,
> > > +
> > > +    .is_filter                          =   true,
> > > +};
> > 
> > Missing:
> > bdrv_co_get_block_status()
> > bdrv_truncate()
> > bdrv_get_info()
> > bdrv_probe_blocksizes()
> > bdrv_probe_geometry()
> > bdrv_media_changed()
> > bdrv_eject()
> > bdrv_lock_medium()
> > bdrv_co_ioctl()
> > 
> > See block/raw-format.c.
> > 
> > I think most of these could be modified in block.c or block/io.c to
> > automatically call bs->file's function if drv doesn't implement them.
> > This way all block drivers would transparently pass them through by
> > default and block/raw-format.c code could be eliminated.
> 
> Are these truly necessary? Because other filter drivers (ie quorum,
> blkverify) don't implement them.

Both quorum and blkverify are rarely used.  This explains why the issue
hasn't been found yet.

These are the callbacks I identified which do not automatically forward
to bs->file.  Therefore the throttle driver will break these features
when bs->file supports them.

That's why I suggest forwarding to bs->file in block.c.  Then individual
drivers do not have to implement these callbacks just to forward to
bs->file.  And if the driver wishes to prohibit a feature, it can
implement the callback and return -ENOTSUP.

You can send this fix as a separate patch series, independent of the
throttle driver.  Once it has been merged the throttle driver will gain
support for these features.

Stefan
Stefan Hajnoczi June 27, 2017, 12:45 p.m. UTC | #7
On Mon, Jun 26, 2017 at 07:26:41PM +0300, Manos Pitsidianakis wrote:
> On Mon, Jun 26, 2017 at 03:30:55PM +0100, Stefan Hajnoczi wrote:
> > > +    bs->file = bdrv_open_child(NULL, options, "file",
> > > +                                    bs, &child_file, false, &local_err);
> > > +
> > > +    if (local_err) {
> > > +        error_propagate(errp, local_err);
> > > +        return -EINVAL;
> > > +    }
> > > +
> > > +    qdict_flatten(options);
> > > +    return throttle_configure_tgm(bs, tgm, options, errp);
> > 
> > Who destroys bs->file on error?
> 
> It is reaped by bdrv_open_inherit() on failure, if I'm not mistaken.
> That's how other drivers handle this as well. Some (eg block/qcow2.c)
> check if bs->file is NULL instead of the error pointer they pass, so
> this is not not very consistent.

Maybe I'm missing it but I don't see relevant bs->file cleanup in
bdrv_open_inherit() or bdrv_open_common().

Please post the exact line where it happens.

Stefan
Manos Pitsidianakis June 27, 2017, 1:34 p.m. UTC | #8
On Tue, Jun 27, 2017 at 01:45:40PM +0100, Stefan Hajnoczi wrote:
>On Mon, Jun 26, 2017 at 07:26:41PM +0300, Manos Pitsidianakis wrote:
>> On Mon, Jun 26, 2017 at 03:30:55PM +0100, Stefan Hajnoczi wrote:
>> > > +    bs->file = bdrv_open_child(NULL, options, "file",
>> > > +                                    bs, &child_file, false, &local_err);
>> > > +
>> > > +    if (local_err) {
>> > > +        error_propagate(errp, local_err);
>> > > +        return -EINVAL;
>> > > +    }
>> > > +
>> > > +    qdict_flatten(options);
>> > > +    return throttle_configure_tgm(bs, tgm, options, errp);
>> >
>> > Who destroys bs->file on error?
>>
>> It is reaped by bdrv_open_inherit() on failure, if I'm not mistaken.
>> That's how other drivers handle this as well. Some (eg block/qcow2.c)
>> check if bs->file is NULL instead of the error pointer they pass, so
>> this is not not very consistent.
>
>Maybe I'm missing it but I don't see relevant bs->file cleanup in
>bdrv_open_inherit() or bdrv_open_common().
>
>Please post the exact line where it happens.
>
>Stefan

Relevant commit: de234897b60e034ba94b307fc289e2dc692c9251 block: Do not 
unref bs->file on error in BD's open

bdrv_open_inherit() does this on failure:

fail:
    blk_unref(file);
    if (bs->file != NULL) {
        bdrv_unref_child(bs, bs->file);
    }

While looking into this I noticed bdrv_new_open_driver() doesn't handle 
bs->file on failure. It simply unrefs the bs but because its child's ref 
still remains, it is leaked.
Stefan Hajnoczi June 28, 2017, 12:11 p.m. UTC | #9
On Tue, Jun 27, 2017 at 04:34:22PM +0300, Manos Pitsidianakis wrote:
> On Tue, Jun 27, 2017 at 01:45:40PM +0100, Stefan Hajnoczi wrote:
> > On Mon, Jun 26, 2017 at 07:26:41PM +0300, Manos Pitsidianakis wrote:
> > > On Mon, Jun 26, 2017 at 03:30:55PM +0100, Stefan Hajnoczi wrote:
> > > > > +    bs->file = bdrv_open_child(NULL, options, "file",
> > > > > +                                    bs, &child_file, false, &local_err);
> > > > > +
> > > > > +    if (local_err) {
> > > > > +        error_propagate(errp, local_err);
> > > > > +        return -EINVAL;
> > > > > +    }
> > > > > +
> > > > > +    qdict_flatten(options);
> > > > > +    return throttle_configure_tgm(bs, tgm, options, errp);
> > > >
> > > > Who destroys bs->file on error?
> > > 
> > > It is reaped by bdrv_open_inherit() on failure, if I'm not mistaken.
> > > That's how other drivers handle this as well. Some (eg block/qcow2.c)
> > > check if bs->file is NULL instead of the error pointer they pass, so
> > > this is not not very consistent.
> > 
> > Maybe I'm missing it but I don't see relevant bs->file cleanup in
> > bdrv_open_inherit() or bdrv_open_common().
> > 
> > Please post the exact line where it happens.
> > 
> > Stefan
> 
> Relevant commit: de234897b60e034ba94b307fc289e2dc692c9251 block: Do not
> unref bs->file on error in BD's open
> 
> bdrv_open_inherit() does this on failure:
> 
> fail:
>    blk_unref(file);
>    if (bs->file != NULL) {
>        bdrv_unref_child(bs, bs->file);
>    }

Thanks, you are right.  I missed it.

> While looking into this I noticed bdrv_new_open_driver() doesn't handle
> bs->file on failure. It simply unrefs the bs but because its child's ref
> still remains, it is leaked.

That's a good candidate for a separate bug fix patch.
Kevin Wolf June 28, 2017, 2:40 p.m. UTC | #10
Am 23.06.2017 um 14:46 hat Manos Pitsidianakis geschrieben:
> block/throttle.c uses existing I/O throttle infrastructure inside a
> block filter driver. I/O operations are intercepted in the filter's
> read/write coroutines, and referred to block/throttle-groups.c
> 
> The driver can be used with the command
> -drive driver=throttle,file.filename=foo.qcow2,iops-total=...
> The configuration flags and semantics are identical to the hardcoded
> throttling ones.
> 
> Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
> ---
>  block/Makefile.objs             |   1 +
>  block/throttle.c                | 427 ++++++++++++++++++++++++++++++++++++++++
>  include/qemu/throttle-options.h |  60 ++++--
>  3 files changed, 469 insertions(+), 19 deletions(-)
>  create mode 100644 block/throttle.c
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index ea955302c8..bb811a4d01 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -25,6 +25,7 @@ block-obj-y += accounting.o dirty-bitmap.o
>  block-obj-y += write-threshold.o
>  block-obj-y += backup.o
>  block-obj-$(CONFIG_REPLICATION) += replication.o
> +block-obj-y += throttle.o
>  
>  block-obj-y += crypto.o
>  
> diff --git a/block/throttle.c b/block/throttle.c
> new file mode 100644
> index 0000000000..0c17051161
> --- /dev/null
> +++ b/block/throttle.c
> @@ -0,0 +1,427 @@
> +/*
> + * QEMU block throttling filter driver infrastructure
> + *
> + * Copyright (c) 2017 Manos Pitsidianakis
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation; either version 2 or
> + * (at your option) version 3 of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, see <http://www.gnu.org/licenses/>.
> + */

Please consider using the LGPL. We're still hoping to turn the block
layer into a library one day, and almost all code in it is licensed
liberally (MIT or LGPL).

> +#include "qemu/osdep.h"
> +#include "block/throttle-groups.h"
> +#include "qemu/throttle-options.h"
> +#include "qapi/error.h"
> +
> +
> +static QemuOptsList throttle_opts = {
> +    .name = "throttle",
> +    .head = QTAILQ_HEAD_INITIALIZER(throttle_opts.head),
> +    .desc = {
> +        {
> +            .name = QEMU_OPT_IOPS_TOTAL,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit total I/O operations per second",
> +        },{
> +            .name = QEMU_OPT_IOPS_READ,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit read operations per second",
> +        },{
> +            .name = QEMU_OPT_IOPS_WRITE,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit write operations per second",
> +        },{
> +            .name = QEMU_OPT_BPS_TOTAL,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit total bytes per second",
> +        },{
> +            .name = QEMU_OPT_BPS_READ,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit read bytes per second",
> +        },{
> +            .name = QEMU_OPT_BPS_WRITE,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "limit write bytes per second",
> +        },{
> +            .name = QEMU_OPT_IOPS_TOTAL_MAX,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "I/O operations burst",
> +        },{
> +            .name = QEMU_OPT_IOPS_READ_MAX,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "I/O operations read burst",
> +        },{
> +            .name = QEMU_OPT_IOPS_WRITE_MAX,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "I/O operations write burst",
> +        },{
> +            .name = QEMU_OPT_BPS_TOTAL_MAX,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "total bytes burst",
> +        },{
> +            .name = QEMU_OPT_BPS_READ_MAX,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "total bytes read burst",
> +        },{
> +            .name = QEMU_OPT_BPS_WRITE_MAX,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "total bytes write burst",
> +        },{
> +            .name = QEMU_OPT_IOPS_TOTAL_MAX_LENGTH,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the iopstotalmax burst period, in seconds",
> +        },{
> +            .name = QEMU_OPT_IOPS_READ_MAX_LENGTH,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the iopsreadmax burst period, in seconds",
> +        },{
> +            .name = QEMU_OPT_IOPS_WRITE_MAX_LENGTH,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the iopswritemax burst period, in seconds",
> +        },{
> +            .name = QEMU_OPT_BPS_TOTAL_MAX_LENGTH,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the bpstotalmax burst period, in seconds",
> +        },{
> +            .name = QEMU_OPT_BPS_READ_MAX_LENGTH,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the bpsreadmax burst period, in seconds",
> +        },{
> +            .name = QEMU_OPT_BPS_WRITE_MAX_LENGTH,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "length of the bpswritemax burst period, in seconds",
> +        },{
> +            .name = QEMU_OPT_IOPS_SIZE,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "when limiting by iops max size of an I/O in bytes",
> +        },
> +        {
> +            .name = QEMU_OPT_THROTTLE_GROUP_NAME,
> +            .type = QEMU_OPT_STRING,
> +            .help = "throttle group name",
> +        },
> +        { /* end of list */ }
> +    },
> +};
> +
> +/* Extract ThrottleConfig options. Assumes cfg is initialized and will be
> + * checked for validity.
> + */
> +
> +static void throttle_extract_options(QemuOpts *opts, ThrottleConfig *cfg)
> +{
> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL)) {
> +        cfg->buckets[THROTTLE_BPS_TOTAL].avg =
> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ)) {
> +        cfg->buckets[THROTTLE_BPS_READ].avg  =
> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE)) {
> +        cfg->buckets[THROTTLE_BPS_WRITE].avg =
> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL)) {
> +        cfg->buckets[THROTTLE_OPS_TOTAL].avg =
> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ)) {
> +        cfg->buckets[THROTTLE_OPS_READ].avg =
> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE)) {
> +        cfg->buckets[THROTTLE_OPS_WRITE].avg =
> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL_MAX)) {
> +        cfg->buckets[THROTTLE_BPS_TOTAL].max =
> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL_MAX, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ_MAX)) {
> +        cfg->buckets[THROTTLE_BPS_READ].max  =
> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ_MAX, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE_MAX)) {
> +        cfg->buckets[THROTTLE_BPS_WRITE].max =
> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE_MAX, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL_MAX)) {
> +        cfg->buckets[THROTTLE_OPS_TOTAL].max =
> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL_MAX, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ_MAX)) {
> +        cfg->buckets[THROTTLE_OPS_READ].max =
> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ_MAX, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE_MAX)) {
> +        cfg->buckets[THROTTLE_OPS_WRITE].max =
> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE_MAX, 0);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL_MAX_LENGTH)) {
> +        cfg->buckets[THROTTLE_BPS_TOTAL].burst_length =
> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL_MAX_LENGTH, 1);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ_MAX_LENGTH)) {
> +        cfg->buckets[THROTTLE_BPS_READ].burst_length  =
> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ_MAX_LENGTH, 1);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE_MAX_LENGTH)) {
> +        cfg->buckets[THROTTLE_BPS_WRITE].burst_length =
> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE_MAX_LENGTH, 1);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL_MAX_LENGTH)) {
> +        cfg->buckets[THROTTLE_OPS_TOTAL].burst_length =
> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL_MAX_LENGTH, 1);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ_MAX_LENGTH)) {
> +        cfg->buckets[THROTTLE_OPS_READ].burst_length =
> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ_MAX_LENGTH, 1);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE_MAX_LENGTH)) {
> +        cfg->buckets[THROTTLE_OPS_WRITE].burst_length =
> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE_MAX_LENGTH, 1);
> +    }
> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_SIZE)) {
> +        cfg->op_size =
> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_SIZE, 0);
> +    }
> +}
> +
> +static int throttle_configure_tgm(BlockDriverState *bs, ThrottleGroupMember *tgm,
> +                                                    QDict *options, Error **errp)

Both lines exceed 80 characters. The indentation is off, too: QDict on
the second line should be aligned with BlockDriverState on the first
one.

> +{
> +    int ret = 0;
> +    ThrottleState *ts;
> +    ThrottleTimers *tt;
> +    ThrottleConfig cfg;
> +    QemuOpts *opts = NULL;
> +    const char *group_name = NULL;
> +    Error *local_err = NULL;
> +
> +    opts = qemu_opts_create(&throttle_opts, NULL, 0, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return -EINVAL;
> +    }
> +    qemu_opts_absorb_qdict(opts, options, &local_err);
> +    if (local_err) {
> +        goto err;
> +    }
> +
> +    group_name = qemu_opt_get(opts, QEMU_OPT_THROTTLE_GROUP_NAME);
> +    if (!group_name) {
> +        group_name = bdrv_get_device_or_node_name(bs);

This is a legacy function, we should avoid adding new callers.

Worse yet, if the group isn't specified, we want to create a new
group internally just for this throttle node. But nothing stops the user
from creating a group that is named like the device or node name of
another throttle node, so we might end up attaching to an existing
throttle group instead!

I think throttle groups should be anonymous rather than having a default
name if they are created implicitly.

> +        if (!strlen(group_name)) {

More efficiently written as if (!*group_name), but it should go away
anyway when you address the above comment.

> +            error_setg(&local_err,
> +                       "A group name must be specified for this device.");

You can directly set errp and avoid the error_propagate() later.

> +            goto err;
> +        }
> +    }
> +
> +    tgm->aio_context = bdrv_get_aio_context(bs);
> +    /* Register membership to group with name group_name */
> +    throttle_group_register_tgm(tgm, group_name);
> +
> +    ts = tgm->throttle_state;
> +    /* Copy previous configuration */
> +    throttle_get_config(ts, &cfg);
> +
> +    /* Change limits if user has specified them */
> +    throttle_extract_options(opts, &cfg);
> +    if (!throttle_is_valid(&cfg, &local_err)) {

Here errp can directly be used, too.

You only need the &local_err idiom if you want to check whether an error
was set (because the caller might pass errp == NULL and then you can't
tell whether an error occurred or not).

> +        throttle_group_unregister_tgm(tgm);
> +        goto err;
> +    }
> +    tt = &tgm->throttle_timers;
> +    /* Update group configuration */
> +    throttle_config(ts, tt, &cfg);
> +
> +    qemu_co_queue_init(&tgm->throttled_reqs[0]);
> +    qemu_co_queue_init(&tgm->throttled_reqs[1]);
> +
> +    goto fin;

I prefer an explicit ret = 0 right here because ret tends to be used to
store the return value for all kinds of functions. In this specific
case, it's still 0 from the initialisation, but that's easy to break
accidentally in a future patch.

> +
> +err:
> +    error_propagate(errp, local_err);
> +    ret = -EINVAL;
> +fin:
> +    qemu_opts_del(opts);
> +    return ret;
> +}
> +
> +static int throttle_open(BlockDriverState *bs, QDict *options,
> +                            int flags, Error **errp)
> +{
> +    ThrottleGroupMember *tgm = bs->opaque;
> +    Error *local_err = NULL;
> +
> +    bs->open_flags = flags;
> +    bs->file = bdrv_open_child(NULL, options, "file",
> +                                    bs, &child_file, false, &local_err);
> +
> +    if (local_err) {

If you check bs->file == NULL instead, you can avoid local_err.

> +        error_propagate(errp, local_err);
> +        return -EINVAL;
> +    }
> +
> +    qdict_flatten(options);

Why do you need this? options should already be flattened.

> +    return throttle_configure_tgm(bs, tgm, options, errp);
> +}
> +
> +static void throttle_close(BlockDriverState *bs)
> +{
> +    ThrottleGroupMember *tgm = bs->opaque;
> +    bdrv_drained_begin(bs);
> +    throttle_group_unregister_tgm(tgm);
> +    bdrv_drained_end(bs);
> +    return;

This is a useless return statement. I think I've seen compilers warn
about it (and we use -Werror, so this would be a build failure on them).

bdrv_drained_begin/end should be unnecessary in .bdrv_close
implementations, we always drain all requests before calling the
callback.

> +}
> +
> +
> +static int64_t throttle_getlength(BlockDriverState *bs)
> +{
> +    return bdrv_getlength(bs->file->bs);
> +}
> +
> +
> +static int coroutine_fn throttle_co_preadv(BlockDriverState *bs, uint64_t offset,

This line exceeds 80 characters.

> +                                            uint64_t bytes, QEMUIOVector *qiov,
> +                                            int flags)

Indentation is off by one.

> +{
> +
> +    ThrottleGroupMember *tgm = bs->opaque;
> +    throttle_group_co_io_limits_intercept(tgm, bytes, false);
> +
> +    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
> +}
> +
> +static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs, uint64_t offset,

Another long line.

> +                                            uint64_t bytes, QEMUIOVector *qiov,
> +                                            int flags)
> +{
> +    ThrottleGroupMember *tgm = bs->opaque;
> +    throttle_group_co_io_limits_intercept(tgm, bytes, true);
> +
> +    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
> +}

Kevin
Manos Pitsidianakis June 28, 2017, 3:22 p.m. UTC | #11
On Wed, Jun 28, 2017 at 04:40:12PM +0200, Kevin Wolf wrote:
>Am 23.06.2017 um 14:46 hat Manos Pitsidianakis geschrieben:
>> block/throttle.c uses existing I/O throttle infrastructure inside a
>> block filter driver. I/O operations are intercepted in the filter's
>> read/write coroutines, and referred to block/throttle-groups.c
>>
>> The driver can be used with the command
>> -drive driver=throttle,file.filename=foo.qcow2,iops-total=...
>> The configuration flags and semantics are identical to the hardcoded
>> throttling ones.
>>
>> Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
>> ---
>>  block/Makefile.objs             |   1 +
>>  block/throttle.c                | 427 ++++++++++++++++++++++++++++++++++++++++
>>  include/qemu/throttle-options.h |  60 ++++--
>>  3 files changed, 469 insertions(+), 19 deletions(-)
>>  create mode 100644 block/throttle.c
>>
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index ea955302c8..bb811a4d01 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -25,6 +25,7 @@ block-obj-y += accounting.o dirty-bitmap.o
>>  block-obj-y += write-threshold.o
>>  block-obj-y += backup.o
>>  block-obj-$(CONFIG_REPLICATION) += replication.o
>> +block-obj-y += throttle.o
>>
>>  block-obj-y += crypto.o
>>
>> diff --git a/block/throttle.c b/block/throttle.c
>> new file mode 100644
>> index 0000000000..0c17051161
>> --- /dev/null
>> +++ b/block/throttle.c
>> @@ -0,0 +1,427 @@
>> +/*
>> + * QEMU block throttling filter driver infrastructure
>> + *
>> + * Copyright (c) 2017 Manos Pitsidianakis
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License as
>> + * published by the Free Software Foundation; either version 2 or
>> + * (at your option) version 3 of the License.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program; if not, see <http://www.gnu.org/licenses/>.
>> + */
>
>Please consider using the LGPL. We're still hoping to turn the block
>layer into a library one day, and almost all code in it is licensed
>liberally (MIT or LGPL).
>
>> +#include "qemu/osdep.h"
>> +#include "block/throttle-groups.h"
>> +#include "qemu/throttle-options.h"
>> +#include "qapi/error.h"
>> +
>> +
>> +static QemuOptsList throttle_opts = {
>> +    .name = "throttle",
>> +    .head = QTAILQ_HEAD_INITIALIZER(throttle_opts.head),
>> +    .desc = {
>> +        {
>> +            .name = QEMU_OPT_IOPS_TOTAL,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit total I/O operations per second",
>> +        },{
>> +            .name = QEMU_OPT_IOPS_READ,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit read operations per second",
>> +        },{
>> +            .name = QEMU_OPT_IOPS_WRITE,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit write operations per second",
>> +        },{
>> +            .name = QEMU_OPT_BPS_TOTAL,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit total bytes per second",
>> +        },{
>> +            .name = QEMU_OPT_BPS_READ,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit read bytes per second",
>> +        },{
>> +            .name = QEMU_OPT_BPS_WRITE,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "limit write bytes per second",
>> +        },{
>> +            .name = QEMU_OPT_IOPS_TOTAL_MAX,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "I/O operations burst",
>> +        },{
>> +            .name = QEMU_OPT_IOPS_READ_MAX,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "I/O operations read burst",
>> +        },{
>> +            .name = QEMU_OPT_IOPS_WRITE_MAX,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "I/O operations write burst",
>> +        },{
>> +            .name = QEMU_OPT_BPS_TOTAL_MAX,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "total bytes burst",
>> +        },{
>> +            .name = QEMU_OPT_BPS_READ_MAX,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "total bytes read burst",
>> +        },{
>> +            .name = QEMU_OPT_BPS_WRITE_MAX,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "total bytes write burst",
>> +        },{
>> +            .name = QEMU_OPT_IOPS_TOTAL_MAX_LENGTH,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the iopstotalmax burst period, in seconds",
>> +        },{
>> +            .name = QEMU_OPT_IOPS_READ_MAX_LENGTH,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the iopsreadmax burst period, in seconds",
>> +        },{
>> +            .name = QEMU_OPT_IOPS_WRITE_MAX_LENGTH,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the iopswritemax burst period, in seconds",
>> +        },{
>> +            .name = QEMU_OPT_BPS_TOTAL_MAX_LENGTH,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the bpstotalmax burst period, in seconds",
>> +        },{
>> +            .name = QEMU_OPT_BPS_READ_MAX_LENGTH,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the bpsreadmax burst period, in seconds",
>> +        },{
>> +            .name = QEMU_OPT_BPS_WRITE_MAX_LENGTH,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "length of the bpswritemax burst period, in seconds",
>> +        },{
>> +            .name = QEMU_OPT_IOPS_SIZE,
>> +            .type = QEMU_OPT_NUMBER,
>> +            .help = "when limiting by iops max size of an I/O in bytes",
>> +        },
>> +        {
>> +            .name = QEMU_OPT_THROTTLE_GROUP_NAME,
>> +            .type = QEMU_OPT_STRING,
>> +            .help = "throttle group name",
>> +        },
>> +        { /* end of list */ }
>> +    },
>> +};
>> +
>> +/* Extract ThrottleConfig options. Assumes cfg is initialized and will be
>> + * checked for validity.
>> + */
>> +
>> +static void throttle_extract_options(QemuOpts *opts, ThrottleConfig *cfg)
>> +{
>> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL)) {
>> +        cfg->buckets[THROTTLE_BPS_TOTAL].avg =
>> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ)) {
>> +        cfg->buckets[THROTTLE_BPS_READ].avg  =
>> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE)) {
>> +        cfg->buckets[THROTTLE_BPS_WRITE].avg =
>> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL)) {
>> +        cfg->buckets[THROTTLE_OPS_TOTAL].avg =
>> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ)) {
>> +        cfg->buckets[THROTTLE_OPS_READ].avg =
>> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE)) {
>> +        cfg->buckets[THROTTLE_OPS_WRITE].avg =
>> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL_MAX)) {
>> +        cfg->buckets[THROTTLE_BPS_TOTAL].max =
>> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL_MAX, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ_MAX)) {
>> +        cfg->buckets[THROTTLE_BPS_READ].max  =
>> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ_MAX, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE_MAX)) {
>> +        cfg->buckets[THROTTLE_BPS_WRITE].max =
>> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE_MAX, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL_MAX)) {
>> +        cfg->buckets[THROTTLE_OPS_TOTAL].max =
>> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL_MAX, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ_MAX)) {
>> +        cfg->buckets[THROTTLE_OPS_READ].max =
>> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ_MAX, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE_MAX)) {
>> +        cfg->buckets[THROTTLE_OPS_WRITE].max =
>> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE_MAX, 0);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL_MAX_LENGTH)) {
>> +        cfg->buckets[THROTTLE_BPS_TOTAL].burst_length =
>> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL_MAX_LENGTH, 1);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ_MAX_LENGTH)) {
>> +        cfg->buckets[THROTTLE_BPS_READ].burst_length  =
>> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ_MAX_LENGTH, 1);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE_MAX_LENGTH)) {
>> +        cfg->buckets[THROTTLE_BPS_WRITE].burst_length =
>> +            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE_MAX_LENGTH, 1);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL_MAX_LENGTH)) {
>> +        cfg->buckets[THROTTLE_OPS_TOTAL].burst_length =
>> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL_MAX_LENGTH, 1);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ_MAX_LENGTH)) {
>> +        cfg->buckets[THROTTLE_OPS_READ].burst_length =
>> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ_MAX_LENGTH, 1);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE_MAX_LENGTH)) {
>> +        cfg->buckets[THROTTLE_OPS_WRITE].burst_length =
>> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE_MAX_LENGTH, 1);
>> +    }
>> +    if (qemu_opt_get(opts, QEMU_OPT_IOPS_SIZE)) {
>> +        cfg->op_size =
>> +            qemu_opt_get_number(opts, QEMU_OPT_IOPS_SIZE, 0);
>> +    }
>> +}
>> +
>> +static int throttle_configure_tgm(BlockDriverState *bs, ThrottleGroupMember *tgm,
>> +                                                    QDict *options, Error **errp)
>
>Both lines exceed 80 characters. The indentation is off, too: QDict on
>the second line should be aligned with BlockDriverState on the first
>one.
>
>> +{
>> +    int ret = 0;
>> +    ThrottleState *ts;
>> +    ThrottleTimers *tt;
>> +    ThrottleConfig cfg;
>> +    QemuOpts *opts = NULL;
>> +    const char *group_name = NULL;
>> +    Error *local_err = NULL;
>> +
>> +    opts = qemu_opts_create(&throttle_opts, NULL, 0, &local_err);
>> +    if (local_err) {
>> +        error_propagate(errp, local_err);
>> +        return -EINVAL;
>> +    }
>> +    qemu_opts_absorb_qdict(opts, options, &local_err);
>> +    if (local_err) {
>> +        goto err;
>> +    }
>> +
>> +    group_name = qemu_opt_get(opts, QEMU_OPT_THROTTLE_GROUP_NAME);
>> +    if (!group_name) {
>> +        group_name = bdrv_get_device_or_node_name(bs);
>
>This is a legacy function, we should avoid adding new callers.
>
>Worse yet, if the group isn't specified, we want to create a new
>group internally just for this throttle node. But nothing stops the user
>from creating a group that is named like the device or node name of
>another throttle node, so we might end up attaching to an existing
>throttle group instead!
>
>I think throttle groups should be anonymous rather than having a default
>name if they are created implicitly.

Since we're moving groups to QOM we will need ids for each group. Can 
objects be anonymous?


>
>> +        if (!strlen(group_name)) {
>
>More efficiently written as if (!*group_name), but it should go away
>anyway when you address the above comment.
>
>> +            error_setg(&local_err,
>> +                       "A group name must be specified for this device.");
>
>You can directly set errp and avoid the error_propagate() later.
>
>> +            goto err;
>> +        }
>> +    }
>> +
>> +    tgm->aio_context = bdrv_get_aio_context(bs);
>> +    /* Register membership to group with name group_name */
>> +    throttle_group_register_tgm(tgm, group_name);
>> +
>> +    ts = tgm->throttle_state;
>> +    /* Copy previous configuration */
>> +    throttle_get_config(ts, &cfg);
>> +
>> +    /* Change limits if user has specified them */
>> +    throttle_extract_options(opts, &cfg);
>> +    if (!throttle_is_valid(&cfg, &local_err)) {
>
>Here errp can directly be used, too.
>
>You only need the &local_err idiom if you want to check whether an error
>was set (because the caller might pass errp == NULL and then you can't
>tell whether an error occurred or not).
>
>> +        throttle_group_unregister_tgm(tgm);
>> +        goto err;
>> +    }
>> +    tt = &tgm->throttle_timers;
>> +    /* Update group configuration */
>> +    throttle_config(ts, tt, &cfg);
>> +
>> +    qemu_co_queue_init(&tgm->throttled_reqs[0]);
>> +    qemu_co_queue_init(&tgm->throttled_reqs[1]);
>> +
>> +    goto fin;
>
>I prefer an explicit ret = 0 right here because ret tends to be used to
>store the return value for all kinds of functions. In this specific
>case, it's still 0 from the initialisation, but that's easy to break
>accidentally in a future patch.
>
>> +
>> +err:
>> +    error_propagate(errp, local_err);
>> +    ret = -EINVAL;
>> +fin:
>> +    qemu_opts_del(opts);
>> +    return ret;
>> +}
>> +
>> +static int throttle_open(BlockDriverState *bs, QDict *options,
>> +                            int flags, Error **errp)
>> +{
>> +    ThrottleGroupMember *tgm = bs->opaque;
>> +    Error *local_err = NULL;
>> +
>> +    bs->open_flags = flags;
>> +    bs->file = bdrv_open_child(NULL, options, "file",
>> +                                    bs, &child_file, false, &local_err);
>> +
>> +    if (local_err) {
>
>If you check bs->file == NULL instead, you can avoid local_err.
>
>> +        error_propagate(errp, local_err);
>> +        return -EINVAL;
>> +    }
>> +
>> +    qdict_flatten(options);
>
>Why do you need this? options should already be flattened.
>
>> +    return throttle_configure_tgm(bs, tgm, options, errp);
>> +}
>> +
>> +static void throttle_close(BlockDriverState *bs)
>> +{
>> +    ThrottleGroupMember *tgm = bs->opaque;
>> +    bdrv_drained_begin(bs);
>> +    throttle_group_unregister_tgm(tgm);
>> +    bdrv_drained_end(bs);
>> +    return;
>
>This is a useless return statement. I think I've seen compilers warn
>about it (and we use -Werror, so this would be a build failure on them).
>
>bdrv_drained_begin/end should be unnecessary in .bdrv_close
>implementations, we always drain all requests before calling the
>callback.
>
>> +}
>> +
>> +
>> +static int64_t throttle_getlength(BlockDriverState *bs)
>> +{
>> +    return bdrv_getlength(bs->file->bs);
>> +}
>> +
>> +
>> +static int coroutine_fn throttle_co_preadv(BlockDriverState *bs, uint64_t offset,
>
>This line exceeds 80 characters.
>
>> +                                            uint64_t bytes, QEMUIOVector *qiov,
>> +                                            int flags)
>
>Indentation is off by one.
>
>> +{
>> +
>> +    ThrottleGroupMember *tgm = bs->opaque;
>> +    throttle_group_co_io_limits_intercept(tgm, bytes, false);
>> +
>> +    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
>> +}
>> +
>> +static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs, uint64_t offset,
>
>Another long line.
>
>> +                                            uint64_t bytes, QEMUIOVector *qiov,
>> +                                            int flags)
>> +{
>> +    ThrottleGroupMember *tgm = bs->opaque;
>> +    throttle_group_co_io_limits_intercept(tgm, bytes, true);
>> +
>> +    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
>> +}
>
>Kevin

Thanks a lot for the comments. (This was one of the first things I 
wrote, so this is why it is not very idiomatic)
Kevin Wolf June 28, 2017, 3:36 p.m. UTC | #12
Am 28.06.2017 um 17:22 hat Manos Pitsidianakis geschrieben:
> On Wed, Jun 28, 2017 at 04:40:12PM +0200, Kevin Wolf wrote:
> >Am 23.06.2017 um 14:46 hat Manos Pitsidianakis geschrieben:
> >>block/throttle.c uses existing I/O throttle infrastructure inside a
> >>block filter driver. I/O operations are intercepted in the filter's
> >>read/write coroutines, and referred to block/throttle-groups.c
> >>
> >>The driver can be used with the command
> >>-drive driver=throttle,file.filename=foo.qcow2,iops-total=...
> >>The configuration flags and semantics are identical to the hardcoded
> >>throttling ones.
> >>
> >>Signed-off-by: Manos Pitsidianakis <el13635@mail.ntua.gr>
> >>---
> >> block/Makefile.objs             |   1 +
> >> block/throttle.c                | 427 ++++++++++++++++++++++++++++++++++++++++
> >> include/qemu/throttle-options.h |  60 ++++--
> >> 3 files changed, 469 insertions(+), 19 deletions(-)
> >> create mode 100644 block/throttle.c
> >>
> >>diff --git a/block/Makefile.objs b/block/Makefile.objs
> >>index ea955302c8..bb811a4d01 100644
> >>--- a/block/Makefile.objs
> >>+++ b/block/Makefile.objs
> >>@@ -25,6 +25,7 @@ block-obj-y += accounting.o dirty-bitmap.o
> >> block-obj-y += write-threshold.o
> >> block-obj-y += backup.o
> >> block-obj-$(CONFIG_REPLICATION) += replication.o
> >>+block-obj-y += throttle.o
> >>
> >> block-obj-y += crypto.o
> >>
> >>diff --git a/block/throttle.c b/block/throttle.c
> >>new file mode 100644
> >>index 0000000000..0c17051161
> >>--- /dev/null
> >>+++ b/block/throttle.c
> >>@@ -0,0 +1,427 @@
> >>+/*
> >>+ * QEMU block throttling filter driver infrastructure
> >>+ *
> >>+ * Copyright (c) 2017 Manos Pitsidianakis
> >>+ *
> >>+ * This program is free software; you can redistribute it and/or
> >>+ * modify it under the terms of the GNU General Public License as
> >>+ * published by the Free Software Foundation; either version 2 or
> >>+ * (at your option) version 3 of the License.
> >>+ *
> >>+ * This program is distributed in the hope that it will be useful,
> >>+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> >>+ * GNU General Public License for more details.
> >>+ *
> >>+ * You should have received a copy of the GNU General Public License
> >>+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
> >>+ */
> >
> >Please consider using the LGPL. We're still hoping to turn the block
> >layer into a library one day, and almost all code in it is licensed
> >liberally (MIT or LGPL).
> >
> >>+#include "qemu/osdep.h"
> >>+#include "block/throttle-groups.h"
> >>+#include "qemu/throttle-options.h"
> >>+#include "qapi/error.h"
> >>+
> >>+
> >>+static QemuOptsList throttle_opts = {
> >>+    .name = "throttle",
> >>+    .head = QTAILQ_HEAD_INITIALIZER(throttle_opts.head),
> >>+    .desc = {
> >>+        {
> >>+            .name = QEMU_OPT_IOPS_TOTAL,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "limit total I/O operations per second",
> >>+        },{
> >>+            .name = QEMU_OPT_IOPS_READ,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "limit read operations per second",
> >>+        },{
> >>+            .name = QEMU_OPT_IOPS_WRITE,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "limit write operations per second",
> >>+        },{
> >>+            .name = QEMU_OPT_BPS_TOTAL,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "limit total bytes per second",
> >>+        },{
> >>+            .name = QEMU_OPT_BPS_READ,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "limit read bytes per second",
> >>+        },{
> >>+            .name = QEMU_OPT_BPS_WRITE,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "limit write bytes per second",
> >>+        },{
> >>+            .name = QEMU_OPT_IOPS_TOTAL_MAX,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "I/O operations burst",
> >>+        },{
> >>+            .name = QEMU_OPT_IOPS_READ_MAX,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "I/O operations read burst",
> >>+        },{
> >>+            .name = QEMU_OPT_IOPS_WRITE_MAX,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "I/O operations write burst",
> >>+        },{
> >>+            .name = QEMU_OPT_BPS_TOTAL_MAX,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "total bytes burst",
> >>+        },{
> >>+            .name = QEMU_OPT_BPS_READ_MAX,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "total bytes read burst",
> >>+        },{
> >>+            .name = QEMU_OPT_BPS_WRITE_MAX,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "total bytes write burst",
> >>+        },{
> >>+            .name = QEMU_OPT_IOPS_TOTAL_MAX_LENGTH,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "length of the iopstotalmax burst period, in seconds",
> >>+        },{
> >>+            .name = QEMU_OPT_IOPS_READ_MAX_LENGTH,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "length of the iopsreadmax burst period, in seconds",
> >>+        },{
> >>+            .name = QEMU_OPT_IOPS_WRITE_MAX_LENGTH,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "length of the iopswritemax burst period, in seconds",
> >>+        },{
> >>+            .name = QEMU_OPT_BPS_TOTAL_MAX_LENGTH,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "length of the bpstotalmax burst period, in seconds",
> >>+        },{
> >>+            .name = QEMU_OPT_BPS_READ_MAX_LENGTH,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "length of the bpsreadmax burst period, in seconds",
> >>+        },{
> >>+            .name = QEMU_OPT_BPS_WRITE_MAX_LENGTH,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "length of the bpswritemax burst period, in seconds",
> >>+        },{
> >>+            .name = QEMU_OPT_IOPS_SIZE,
> >>+            .type = QEMU_OPT_NUMBER,
> >>+            .help = "when limiting by iops max size of an I/O in bytes",
> >>+        },
> >>+        {
> >>+            .name = QEMU_OPT_THROTTLE_GROUP_NAME,
> >>+            .type = QEMU_OPT_STRING,
> >>+            .help = "throttle group name",
> >>+        },
> >>+        { /* end of list */ }
> >>+    },
> >>+};
> >>+
> >>+/* Extract ThrottleConfig options. Assumes cfg is initialized and will be
> >>+ * checked for validity.
> >>+ */
> >>+
> >>+static void throttle_extract_options(QemuOpts *opts, ThrottleConfig *cfg)
> >>+{
> >>+    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL)) {
> >>+        cfg->buckets[THROTTLE_BPS_TOTAL].avg =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ)) {
> >>+        cfg->buckets[THROTTLE_BPS_READ].avg  =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE)) {
> >>+        cfg->buckets[THROTTLE_BPS_WRITE].avg =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL)) {
> >>+        cfg->buckets[THROTTLE_OPS_TOTAL].avg =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ)) {
> >>+        cfg->buckets[THROTTLE_OPS_READ].avg =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE)) {
> >>+        cfg->buckets[THROTTLE_OPS_WRITE].avg =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL_MAX)) {
> >>+        cfg->buckets[THROTTLE_BPS_TOTAL].max =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL_MAX, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ_MAX)) {
> >>+        cfg->buckets[THROTTLE_BPS_READ].max  =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ_MAX, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE_MAX)) {
> >>+        cfg->buckets[THROTTLE_BPS_WRITE].max =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE_MAX, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL_MAX)) {
> >>+        cfg->buckets[THROTTLE_OPS_TOTAL].max =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL_MAX, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ_MAX)) {
> >>+        cfg->buckets[THROTTLE_OPS_READ].max =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ_MAX, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE_MAX)) {
> >>+        cfg->buckets[THROTTLE_OPS_WRITE].max =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE_MAX, 0);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL_MAX_LENGTH)) {
> >>+        cfg->buckets[THROTTLE_BPS_TOTAL].burst_length =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL_MAX_LENGTH, 1);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ_MAX_LENGTH)) {
> >>+        cfg->buckets[THROTTLE_BPS_READ].burst_length  =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ_MAX_LENGTH, 1);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE_MAX_LENGTH)) {
> >>+        cfg->buckets[THROTTLE_BPS_WRITE].burst_length =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE_MAX_LENGTH, 1);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL_MAX_LENGTH)) {
> >>+        cfg->buckets[THROTTLE_OPS_TOTAL].burst_length =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL_MAX_LENGTH, 1);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ_MAX_LENGTH)) {
> >>+        cfg->buckets[THROTTLE_OPS_READ].burst_length =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ_MAX_LENGTH, 1);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE_MAX_LENGTH)) {
> >>+        cfg->buckets[THROTTLE_OPS_WRITE].burst_length =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE_MAX_LENGTH, 1);
> >>+    }
> >>+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_SIZE)) {
> >>+        cfg->op_size =
> >>+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_SIZE, 0);
> >>+    }
> >>+}
> >>+
> >>+static int throttle_configure_tgm(BlockDriverState *bs, ThrottleGroupMember *tgm,
> >>+                                                    QDict *options, Error **errp)
> >
> >Both lines exceed 80 characters. The indentation is off, too: QDict on
> >the second line should be aligned with BlockDriverState on the first
> >one.
> >
> >>+{
> >>+    int ret = 0;
> >>+    ThrottleState *ts;
> >>+    ThrottleTimers *tt;
> >>+    ThrottleConfig cfg;
> >>+    QemuOpts *opts = NULL;
> >>+    const char *group_name = NULL;
> >>+    Error *local_err = NULL;
> >>+
> >>+    opts = qemu_opts_create(&throttle_opts, NULL, 0, &local_err);
> >>+    if (local_err) {
> >>+        error_propagate(errp, local_err);
> >>+        return -EINVAL;
> >>+    }
> >>+    qemu_opts_absorb_qdict(opts, options, &local_err);
> >>+    if (local_err) {
> >>+        goto err;
> >>+    }
> >>+
> >>+    group_name = qemu_opt_get(opts, QEMU_OPT_THROTTLE_GROUP_NAME);
> >>+    if (!group_name) {
> >>+        group_name = bdrv_get_device_or_node_name(bs);
> >
> >This is a legacy function, we should avoid adding new callers.
> >
> >Worse yet, if the group isn't specified, we want to create a new
> >group internally just for this throttle node. But nothing stops the user
> >from creating a group that is named like the device or node name of
> >another throttle node, so we might end up attaching to an existing
> >throttle group instead!
> >
> >I think throttle groups should be anonymous rather than having a default
> >name if they are created implicitly.
> 
> Since we're moving groups to QOM we will need ids for each group.
> Can objects be anonymous?

Hm, that's a good question. But object_new() doesn't take an ID, so I
think they can be anonymous.

Looking a bit closer, strcut Object doesn't even have a field for the
ID. It seems that what the ID really is is the name of a property in a
parent object that points to the new object. So as long as you don't
want to have another QOM object point to it, there is no such thing as
an ID.

Anyway, I followed the call chain from throttle_group_register_tgm() and
it ends in throttle_group_incref() where we simply have this:

    tg = g_new0(ThrottleGroup, 1);
    tg->name = g_strdup(name);

Shouldn't this be using something a little more QOMy now?

Kevin
Manos Pitsidianakis June 28, 2017, 3:50 p.m. UTC | #13
On Wed, Jun 28, 2017 at 05:36:54PM +0200, Kevin Wolf wrote:
>Am 28.06.2017 um 17:22 hat Manos Pitsidianakis geschrieben:
>> Since we're moving groups to QOM we will need ids for each group.
>> Can objects be anonymous?
>
>Hm, that's a good question. But object_new() doesn't take an ID, so I
>think they can be anonymous.
>
>Looking a bit closer, strcut Object doesn't even have a field for the
>ID. It seems that what the ID really is is the name of a property in a
>parent object that points to the new object. So as long as you don't
>want to have another QOM object point to it, there is no such thing as
>an ID.
>
>Anyway, I followed the call chain from throttle_group_register_tgm() and
>it ends in throttle_group_incref() where we simply have this:
>
>    tg = g_new0(ThrottleGroup, 1);
>    tg->name = g_strdup(name);
>
>Shouldn't this be using something a little more QOMy now?

Yes, like it was mentioned in the QOM patch's thread 
block/throttle-groups.c should use QOM internally. I will change this 
for the next revision and also look into anonymity.
diff mbox

Patch

diff --git a/block/Makefile.objs b/block/Makefile.objs
index ea955302c8..bb811a4d01 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -25,6 +25,7 @@  block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
+block-obj-y += throttle.o
 
 block-obj-y += crypto.o
 
diff --git a/block/throttle.c b/block/throttle.c
new file mode 100644
index 0000000000..0c17051161
--- /dev/null
+++ b/block/throttle.c
@@ -0,0 +1,427 @@ 
+/*
+ * QEMU block throttling filter driver infrastructure
+ *
+ * Copyright (c) 2017 Manos Pitsidianakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "block/throttle-groups.h"
+#include "qemu/throttle-options.h"
+#include "qapi/error.h"
+
+
+static QemuOptsList throttle_opts = {
+    .name = "throttle",
+    .head = QTAILQ_HEAD_INITIALIZER(throttle_opts.head),
+    .desc = {
+        {
+            .name = QEMU_OPT_IOPS_TOTAL,
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit total I/O operations per second",
+        },{
+            .name = QEMU_OPT_IOPS_READ,
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit read operations per second",
+        },{
+            .name = QEMU_OPT_IOPS_WRITE,
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit write operations per second",
+        },{
+            .name = QEMU_OPT_BPS_TOTAL,
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit total bytes per second",
+        },{
+            .name = QEMU_OPT_BPS_READ,
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit read bytes per second",
+        },{
+            .name = QEMU_OPT_BPS_WRITE,
+            .type = QEMU_OPT_NUMBER,
+            .help = "limit write bytes per second",
+        },{
+            .name = QEMU_OPT_IOPS_TOTAL_MAX,
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations burst",
+        },{
+            .name = QEMU_OPT_IOPS_READ_MAX,
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations read burst",
+        },{
+            .name = QEMU_OPT_IOPS_WRITE_MAX,
+            .type = QEMU_OPT_NUMBER,
+            .help = "I/O operations write burst",
+        },{
+            .name = QEMU_OPT_BPS_TOTAL_MAX,
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes burst",
+        },{
+            .name = QEMU_OPT_BPS_READ_MAX,
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes read burst",
+        },{
+            .name = QEMU_OPT_BPS_WRITE_MAX,
+            .type = QEMU_OPT_NUMBER,
+            .help = "total bytes write burst",
+        },{
+            .name = QEMU_OPT_IOPS_TOTAL_MAX_LENGTH,
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the iopstotalmax burst period, in seconds",
+        },{
+            .name = QEMU_OPT_IOPS_READ_MAX_LENGTH,
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the iopsreadmax burst period, in seconds",
+        },{
+            .name = QEMU_OPT_IOPS_WRITE_MAX_LENGTH,
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the iopswritemax burst period, in seconds",
+        },{
+            .name = QEMU_OPT_BPS_TOTAL_MAX_LENGTH,
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the bpstotalmax burst period, in seconds",
+        },{
+            .name = QEMU_OPT_BPS_READ_MAX_LENGTH,
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the bpsreadmax burst period, in seconds",
+        },{
+            .name = QEMU_OPT_BPS_WRITE_MAX_LENGTH,
+            .type = QEMU_OPT_NUMBER,
+            .help = "length of the bpswritemax burst period, in seconds",
+        },{
+            .name = QEMU_OPT_IOPS_SIZE,
+            .type = QEMU_OPT_NUMBER,
+            .help = "when limiting by iops max size of an I/O in bytes",
+        },
+        {
+            .name = QEMU_OPT_THROTTLE_GROUP_NAME,
+            .type = QEMU_OPT_STRING,
+            .help = "throttle group name",
+        },
+        { /* end of list */ }
+    },
+};
+
+/* Extract ThrottleConfig options. Assumes cfg is initialized and will be
+ * checked for validity.
+ */
+
+static void throttle_extract_options(QemuOpts *opts, ThrottleConfig *cfg)
+{
+    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL)) {
+        cfg->buckets[THROTTLE_BPS_TOTAL].avg =
+            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ)) {
+        cfg->buckets[THROTTLE_BPS_READ].avg  =
+            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE)) {
+        cfg->buckets[THROTTLE_BPS_WRITE].avg =
+            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL)) {
+        cfg->buckets[THROTTLE_OPS_TOTAL].avg =
+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ)) {
+        cfg->buckets[THROTTLE_OPS_READ].avg =
+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE)) {
+        cfg->buckets[THROTTLE_OPS_WRITE].avg =
+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL_MAX)) {
+        cfg->buckets[THROTTLE_BPS_TOTAL].max =
+            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL_MAX, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ_MAX)) {
+        cfg->buckets[THROTTLE_BPS_READ].max  =
+            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ_MAX, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE_MAX)) {
+        cfg->buckets[THROTTLE_BPS_WRITE].max =
+            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE_MAX, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL_MAX)) {
+        cfg->buckets[THROTTLE_OPS_TOTAL].max =
+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL_MAX, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ_MAX)) {
+        cfg->buckets[THROTTLE_OPS_READ].max =
+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ_MAX, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE_MAX)) {
+        cfg->buckets[THROTTLE_OPS_WRITE].max =
+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE_MAX, 0);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_BPS_TOTAL_MAX_LENGTH)) {
+        cfg->buckets[THROTTLE_BPS_TOTAL].burst_length =
+            qemu_opt_get_number(opts, QEMU_OPT_BPS_TOTAL_MAX_LENGTH, 1);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_BPS_READ_MAX_LENGTH)) {
+        cfg->buckets[THROTTLE_BPS_READ].burst_length  =
+            qemu_opt_get_number(opts, QEMU_OPT_BPS_READ_MAX_LENGTH, 1);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_BPS_WRITE_MAX_LENGTH)) {
+        cfg->buckets[THROTTLE_BPS_WRITE].burst_length =
+            qemu_opt_get_number(opts, QEMU_OPT_BPS_WRITE_MAX_LENGTH, 1);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_TOTAL_MAX_LENGTH)) {
+        cfg->buckets[THROTTLE_OPS_TOTAL].burst_length =
+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_TOTAL_MAX_LENGTH, 1);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_READ_MAX_LENGTH)) {
+        cfg->buckets[THROTTLE_OPS_READ].burst_length =
+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_READ_MAX_LENGTH, 1);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_WRITE_MAX_LENGTH)) {
+        cfg->buckets[THROTTLE_OPS_WRITE].burst_length =
+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_WRITE_MAX_LENGTH, 1);
+    }
+    if (qemu_opt_get(opts, QEMU_OPT_IOPS_SIZE)) {
+        cfg->op_size =
+            qemu_opt_get_number(opts, QEMU_OPT_IOPS_SIZE, 0);
+    }
+}
+
+static int throttle_configure_tgm(BlockDriverState *bs, ThrottleGroupMember *tgm,
+                                                    QDict *options, Error **errp)
+{
+    int ret = 0;
+    ThrottleState *ts;
+    ThrottleTimers *tt;
+    ThrottleConfig cfg;
+    QemuOpts *opts = NULL;
+    const char *group_name = NULL;
+    Error *local_err = NULL;
+
+    opts = qemu_opts_create(&throttle_opts, NULL, 0, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return -EINVAL;
+    }
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        goto err;
+    }
+
+    group_name = qemu_opt_get(opts, QEMU_OPT_THROTTLE_GROUP_NAME);
+    if (!group_name) {
+        group_name = bdrv_get_device_or_node_name(bs);
+        if (!strlen(group_name)) {
+            error_setg(&local_err,
+                       "A group name must be specified for this device.");
+            goto err;
+        }
+    }
+
+    tgm->aio_context = bdrv_get_aio_context(bs);
+    /* Register membership to group with name group_name */
+    throttle_group_register_tgm(tgm, group_name);
+
+    ts = tgm->throttle_state;
+    /* Copy previous configuration */
+    throttle_get_config(ts, &cfg);
+
+    /* Change limits if user has specified them */
+    throttle_extract_options(opts, &cfg);
+    if (!throttle_is_valid(&cfg, &local_err)) {
+        throttle_group_unregister_tgm(tgm);
+        goto err;
+    }
+    tt = &tgm->throttle_timers;
+    /* Update group configuration */
+    throttle_config(ts, tt, &cfg);
+
+    qemu_co_queue_init(&tgm->throttled_reqs[0]);
+    qemu_co_queue_init(&tgm->throttled_reqs[1]);
+
+    goto fin;
+
+err:
+    error_propagate(errp, local_err);
+    ret = -EINVAL;
+fin:
+    qemu_opts_del(opts);
+    return ret;
+}
+
+static int throttle_open(BlockDriverState *bs, QDict *options,
+                            int flags, Error **errp)
+{
+    ThrottleGroupMember *tgm = bs->opaque;
+    Error *local_err = NULL;
+
+    bs->open_flags = flags;
+    bs->file = bdrv_open_child(NULL, options, "file",
+                                    bs, &child_file, false, &local_err);
+
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return -EINVAL;
+    }
+
+    qdict_flatten(options);
+    return throttle_configure_tgm(bs, tgm, options, errp);
+}
+
+static void throttle_close(BlockDriverState *bs)
+{
+    ThrottleGroupMember *tgm = bs->opaque;
+    bdrv_drained_begin(bs);
+    throttle_group_unregister_tgm(tgm);
+    bdrv_drained_end(bs);
+    return;
+}
+
+
+static int64_t throttle_getlength(BlockDriverState *bs)
+{
+    return bdrv_getlength(bs->file->bs);
+}
+
+
+static int coroutine_fn throttle_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                            uint64_t bytes, QEMUIOVector *qiov,
+                                            int flags)
+{
+
+    ThrottleGroupMember *tgm = bs->opaque;
+    throttle_group_co_io_limits_intercept(tgm, bytes, false);
+
+    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn throttle_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                            uint64_t bytes, QEMUIOVector *qiov,
+                                            int flags)
+{
+    ThrottleGroupMember *tgm = bs->opaque;
+    throttle_group_co_io_limits_intercept(tgm, bytes, true);
+
+    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn throttle_co_pwrite_zeroes(BlockDriverState *bs,
+        int64_t offset, int bytes, BdrvRequestFlags flags)
+{
+    ThrottleGroupMember *tgm = bs->opaque;
+    throttle_group_co_io_limits_intercept(tgm, bytes, true);
+
+    return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
+}
+
+static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs,
+        int64_t offset, int bytes)
+{
+    ThrottleGroupMember *tgm = bs->opaque;
+    throttle_group_co_io_limits_intercept(tgm, bytes, true);
+
+    return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
+}
+
+static int throttle_co_flush(BlockDriverState *bs)
+{
+    return bdrv_co_flush(bs->file->bs);
+}
+
+static void throttle_detach_aio_context(BlockDriverState *bs)
+{
+    ThrottleGroupMember *tgm = bs->opaque;
+    throttle_timers_detach_aio_context(&tgm->throttle_timers);
+}
+
+static void throttle_attach_aio_context(BlockDriverState *bs,
+                                    AioContext *new_context)
+{
+    ThrottleGroupMember *tgm = bs->opaque;
+    throttle_timers_attach_aio_context(&tgm->throttle_timers, new_context);
+}
+
+static int throttle_reopen_prepare(BDRVReopenState *reopen_state,
+                                      BlockReopenQueue *queue, Error **errp)
+{
+    ThrottleGroupMember *tgm = NULL;
+
+    assert(reopen_state != NULL);
+    assert(reopen_state->bs != NULL);
+
+    reopen_state->opaque = g_new0(ThrottleGroupMember, 1);
+    tgm = reopen_state->opaque;
+
+    return throttle_configure_tgm(reopen_state->bs, tgm, reopen_state->options,
+            errp);
+}
+
+static void throttle_reopen_commit(BDRVReopenState *state)
+{
+    ThrottleGroupMember *tgm = state->bs->opaque;
+    BlockDriverState *bs = state->bs;
+
+    bdrv_drained_begin(bs);
+    throttle_group_unregister_tgm(tgm);
+    g_free(state->bs->opaque);
+    state->bs->opaque = state->opaque;
+    bdrv_drained_end(bs);
+
+    state->opaque = NULL;
+}
+
+static void throttle_reopen_abort(BDRVReopenState *state)
+{
+    ThrottleGroupMember *tgm = state->opaque;
+    throttle_group_unregister_tgm(tgm);
+    g_free(state->opaque);
+    state->opaque = NULL;
+}
+
+static BlockDriver bdrv_throttle = {
+    .format_name                        =   "throttle",
+    .protocol_name                      =   "throttle",
+    .instance_size                      =   sizeof(ThrottleGroupMember),
+
+    .bdrv_file_open                     =   throttle_open,
+    .bdrv_close                         =   throttle_close,
+    .bdrv_co_flush                      =   throttle_co_flush,
+
+    .bdrv_child_perm                    =   bdrv_filter_default_perms,
+
+    .bdrv_getlength                     =   throttle_getlength,
+
+    .bdrv_co_preadv                     =   throttle_co_preadv,
+    .bdrv_co_pwritev                    =   throttle_co_pwritev,
+
+    .bdrv_co_pwrite_zeroes              =   throttle_co_pwrite_zeroes,
+    .bdrv_co_pdiscard                   =   throttle_co_pdiscard,
+
+    .bdrv_recurse_is_first_non_filter   =   bdrv_recurse_is_first_non_filter,
+
+    .bdrv_attach_aio_context            =   throttle_attach_aio_context,
+    .bdrv_detach_aio_context            =   throttle_detach_aio_context,
+
+    .bdrv_reopen_prepare                =   throttle_reopen_prepare,
+    .bdrv_reopen_commit                 =   throttle_reopen_commit,
+    .bdrv_reopen_abort                  =   throttle_reopen_abort,
+
+    .is_filter                          =   true,
+};
+
+static void bdrv_throttle_init(void)
+{
+    bdrv_register(&bdrv_throttle);
+}
+
+block_init(bdrv_throttle_init);
diff --git a/include/qemu/throttle-options.h b/include/qemu/throttle-options.h
index 3133d1ca40..508ee72625 100644
--- a/include/qemu/throttle-options.h
+++ b/include/qemu/throttle-options.h
@@ -10,81 +10,103 @@ 
 #ifndef THROTTLE_OPTIONS_H
 #define THROTTLE_OPTIONS_H
 
+#define QEMU_OPT_IOPS_TOTAL "iops-total"
+#define QEMU_OPT_IOPS_TOTAL_MAX "iops-total-max"
+#define QEMU_OPT_IOPS_TOTAL_MAX_LENGTH "iops-total-max-length"
+#define QEMU_OPT_IOPS_READ "iops-read"
+#define QEMU_OPT_IOPS_READ_MAX "iops-read-max"
+#define QEMU_OPT_IOPS_READ_MAX_LENGTH "iops-read-max-length"
+#define QEMU_OPT_IOPS_WRITE "iops-write"
+#define QEMU_OPT_IOPS_WRITE_MAX "iops-write-max"
+#define QEMU_OPT_IOPS_WRITE_MAX_LENGTH "iops-write-max-length"
+#define QEMU_OPT_BPS_TOTAL "bps-total"
+#define QEMU_OPT_BPS_TOTAL_MAX "bps-total-max"
+#define QEMU_OPT_BPS_TOTAL_MAX_LENGTH "bps-total-max-length"
+#define QEMU_OPT_BPS_READ "bps-read"
+#define QEMU_OPT_BPS_READ_MAX "bps-read-max"
+#define QEMU_OPT_BPS_READ_MAX_LENGTH "bps-read-max-length"
+#define QEMU_OPT_BPS_WRITE "bps-write"
+#define QEMU_OPT_BPS_WRITE_MAX "bps-write-max"
+#define QEMU_OPT_BPS_WRITE_MAX_LENGTH "bps-write-max-length"
+#define QEMU_OPT_IOPS_SIZE "iops-size"
+#define QEMU_OPT_THROTTLE_GROUP_NAME "throttling-group"
+
+#define THROTTLE_OPT_PREFIX "throttling."
 #define THROTTLE_OPTS \
           { \
-            .name = "throttling.iops-total",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL,\
             .type = QEMU_OPT_NUMBER,\
             .help = "limit total I/O operations per second",\
         },{ \
-            .name = "throttling.iops-read",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ,\
             .type = QEMU_OPT_NUMBER,\
             .help = "limit read operations per second",\
         },{ \
-            .name = "throttling.iops-write",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE,\
             .type = QEMU_OPT_NUMBER,\
             .help = "limit write operations per second",\
         },{ \
-            .name = "throttling.bps-total",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL,\
             .type = QEMU_OPT_NUMBER,\
             .help = "limit total bytes per second",\
         },{ \
-            .name = "throttling.bps-read",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ,\
             .type = QEMU_OPT_NUMBER,\
             .help = "limit read bytes per second",\
         },{ \
-            .name = "throttling.bps-write",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE,\
             .type = QEMU_OPT_NUMBER,\
             .help = "limit write bytes per second",\
         },{ \
-            .name = "throttling.iops-total-max",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX,\
             .type = QEMU_OPT_NUMBER,\
             .help = "I/O operations burst",\
         },{ \
-            .name = "throttling.iops-read-max",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX,\
             .type = QEMU_OPT_NUMBER,\
             .help = "I/O operations read burst",\
         },{ \
-            .name = "throttling.iops-write-max",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX,\
             .type = QEMU_OPT_NUMBER,\
             .help = "I/O operations write burst",\
         },{ \
-            .name = "throttling.bps-total-max",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX,\
             .type = QEMU_OPT_NUMBER,\
             .help = "total bytes burst",\
         },{ \
-            .name = "throttling.bps-read-max",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX,\
             .type = QEMU_OPT_NUMBER,\
             .help = "total bytes read burst",\
         },{ \
-            .name = "throttling.bps-write-max",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX,\
             .type = QEMU_OPT_NUMBER,\
             .help = "total bytes write burst",\
         },{ \
-            .name = "throttling.iops-total-max-length",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_TOTAL_MAX_LENGTH,\
             .type = QEMU_OPT_NUMBER,\
             .help = "length of the iops-total-max burst period, in seconds",\
         },{ \
-            .name = "throttling.iops-read-max-length",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_READ_MAX_LENGTH,\
             .type = QEMU_OPT_NUMBER,\
             .help = "length of the iops-read-max burst period, in seconds",\
         },{ \
-            .name = "throttling.iops-write-max-length",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_WRITE_MAX_LENGTH,\
             .type = QEMU_OPT_NUMBER,\
             .help = "length of the iops-write-max burst period, in seconds",\
         },{ \
-            .name = "throttling.bps-total-max-length",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_TOTAL_MAX_LENGTH,\
             .type = QEMU_OPT_NUMBER,\
             .help = "length of the bps-total-max burst period, in seconds",\
         },{ \
-            .name = "throttling.bps-read-max-length",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_READ_MAX_LENGTH,\
             .type = QEMU_OPT_NUMBER,\
             .help = "length of the bps-read-max burst period, in seconds",\
         },{ \
-            .name = "throttling.bps-write-max-length",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_BPS_WRITE_MAX_LENGTH,\
             .type = QEMU_OPT_NUMBER,\
             .help = "length of the bps-write-max burst period, in seconds",\
         },{ \
-            .name = "throttling.iops-size",\
+            .name = THROTTLE_OPT_PREFIX QEMU_OPT_IOPS_SIZE,\
             .type = QEMU_OPT_NUMBER,\
             .help = "when limiting by iops max size of an I/O in bytes",\
         }