diff mbox

nvme: Support image creation

Message ID 20180613074655.16289-1-famz@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Fam Zheng June 13, 2018, 7:46 a.m. UTC
Similar to the host_device's implementation, we check the requested
length against the namespace size.

Truncation is necessary to make qcow2 creation work.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/nvme.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

Comments

Kevin Wolf June 13, 2018, 8:06 a.m. UTC | #1
Am 13.06.2018 um 09:46 hat Fam Zheng geschrieben:
> Similar to the host_device's implementation, we check the requested
> length against the namespace size.
> 
> Truncation is necessary to make qcow2 creation work.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>

> +static int coroutine_fn nvme_co_create_opts(const char *filename, QemuOpts *opts,
> +                                            Error **errp)
> +{
> +    int ret = 0;
> +    BlockDriverState *bs = NULL;
> +    int64_t size;
> +
> +    if (strncmp(filename, "nvme://", strlen("nvme://"))) {
> +        error_setg(errp, "Invalid filename (must start with \"nvme://\")");
> +        ret = -EINVAL;
> +        goto out;
> +    }
> +
> +    bs = bdrv_open(filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
> +    if (!bs) {
> +        ret = -EINVAL;
> +        goto out;
> +    }
> +
> +    size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
> +
> +    if (size < 0 || bdrv_getlength(bs) < size) {
> +        error_setg(errp, "Invalid image size");
> +        ret = -EINVAL;
> +    }
> +
> +out:
> +    bdrv_unref(bs);
> +    /* Hold breath for a little while before letting image format creation run.
> +     * The problem is when testing with Intel P3700, the controller doesn't
> +     * like the immediate open after close, as a result, nvme_init() will fail.
> +     * This works around that.
> +     **/
> +    g_usleep(2000000);

This suggests that nbd_init() is buggy.

If we need to sleep here (for two whole seconds?!), I'm sure there are
other cases that would have to sleep as well. So even if we can't find a
solution other than sleeping - which feels horribly wrong - the sleep
should probably be in nvme_init() rather than here.

What kind of error are you running into without the sleep?

Kevin
Fam Zheng June 13, 2018, 9:17 a.m. UTC | #2
On Wed, 06/13 10:06, Kevin Wolf wrote:
> Am 13.06.2018 um 09:46 hat Fam Zheng geschrieben:
> > Similar to the host_device's implementation, we check the requested
> > length against the namespace size.
> > 
> > Truncation is necessary to make qcow2 creation work.
> > 
> > Signed-off-by: Fam Zheng <famz@redhat.com>
> 
> > +static int coroutine_fn nvme_co_create_opts(const char *filename, QemuOpts *opts,
> > +                                            Error **errp)
> > +{
> > +    int ret = 0;
> > +    BlockDriverState *bs = NULL;
> > +    int64_t size;
> > +
> > +    if (strncmp(filename, "nvme://", strlen("nvme://"))) {
> > +        error_setg(errp, "Invalid filename (must start with \"nvme://\")");
> > +        ret = -EINVAL;
> > +        goto out;
> > +    }
> > +
> > +    bs = bdrv_open(filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
> > +    if (!bs) {
> > +        ret = -EINVAL;
> > +        goto out;
> > +    }
> > +
> > +    size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
> > +
> > +    if (size < 0 || bdrv_getlength(bs) < size) {
> > +        error_setg(errp, "Invalid image size");
> > +        ret = -EINVAL;
> > +    }
> > +
> > +out:
> > +    bdrv_unref(bs);
> > +    /* Hold breath for a little while before letting image format creation run.
> > +     * The problem is when testing with Intel P3700, the controller doesn't
> > +     * like the immediate open after close, as a result, nvme_init() will fail.
> > +     * This works around that.
> > +     **/
> > +    g_usleep(2000000);
> 
> This suggests that nbd_init() is buggy.
> 
> If we need to sleep here (for two whole seconds?!), I'm sure there are
> other cases that would have to sleep as well. So even if we can't find a
> solution other than sleeping - which feels horribly wrong - the sleep
> should probably be in nvme_init() rather than here.
> 
> What kind of error are you running into without the sleep?

The error would be the "Timeout while waiting for device to start..." in
nvme_init(), which happens after waiting for 20 seconds after setting the
device's enable bit.

If we put a sleep in nvme_init() it will hurt the blockdev-add command and QEMU
launch badly, whereas being here it hurts x-blockdev-create, qemu-img create,
etc.  Both are really bad, but the first is worse.

BTW nvme_init() already has to spin for a few seconds waiting for bit 0 in this
loop:

    while (!(le32_to_cpu(s->regs->csts) & 0x1)) {
        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
            error_setg(errp, "Timeout while waiting for device to start (%"
                             PRId64 " ms)",
                       timeout_ms);
            ret = -ETIMEDOUT;
            goto fail_queue;
        }
    }

(we should probably insert a g_usleep(100) in the loop body, but it doesn't make
nvme_init return any faster.)

My wild guess is that the controller doesn't respond to the setting of CC.EN
(device enable) bit correctly when it is still internally busy due after a
previous reset in nvme_close(). But perhaps it probably the cleanup in
nvme_close() which is lame in the first place, compared to the complex de-init
procedure we have in vfio_pci_reset(), and that unbinding the device from Linux
nvme.ko coincidentally takes exactly 2 seconds when nvme_close() takes near 0.
What this suggests is that cleanly shutting down the device does take about two
seconds, but with the simplistic nvme_close(), the work is left asynchrously to
the controller or kernel.  I'll see if I can figure out what is missing.

Fam
diff mbox

Patch

diff --git a/block/nvme.c b/block/nvme.c
index 6f71122bf5..ec3d18e790 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -21,6 +21,7 @@ 
 #include "qemu/option.h"
 #include "qemu/vfio-helpers.h"
 #include "block/block_int.h"
+#include "sysemu/block-backend.h"
 #include "trace.h"
 
 #include "block/nvme.h"
@@ -1154,6 +1155,73 @@  static void nvme_unregister_buf(BlockDriverState *bs, void *host)
     qemu_vfio_dma_unmap(s->vfio, host);
 }
 
+static QemuOptsList nvme_create_opts = {
+    .name = "nvme-create-opts",
+    .head = QTAILQ_HEAD_INITIALIZER(nvme_create_opts.head),
+    .desc = {
+        {
+            .name = BLOCK_OPT_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Virtual disk size"
+        },
+        { /* end of list */ }
+    }
+};
+
+static int coroutine_fn nvme_co_create_opts(const char *filename, QemuOpts *opts,
+                                            Error **errp)
+{
+    int ret = 0;
+    BlockDriverState *bs = NULL;
+    int64_t size;
+
+    if (strncmp(filename, "nvme://", strlen("nvme://"))) {
+        error_setg(errp, "Invalid filename (must start with \"nvme://\")");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    bs = bdrv_open(filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+    if (!bs) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+
+    if (size < 0 || bdrv_getlength(bs) < size) {
+        error_setg(errp, "Invalid image size");
+        ret = -EINVAL;
+    }
+
+out:
+    bdrv_unref(bs);
+    /* Hold breath for a little while before letting image format creation run.
+     * The problem is when testing with Intel P3700, the controller doesn't
+     * like the immediate open after close, as a result, nvme_init() will fail.
+     * This works around that.
+     **/
+    g_usleep(2000000);
+    return ret;
+}
+
+static int nvme_truncate(BlockDriverState *bs, int64_t offset,
+                         PreallocMode prealloc, Error **errp)
+{
+    if (prealloc != PREALLOC_MODE_OFF) {
+        error_setg(errp, "Preallocation mode '%s' unsupported",
+                   PreallocMode_str(prealloc));
+        return -ENOTSUP;
+    }
+
+    if (offset > nvme_getlength(bs)) {
+        error_setg(errp, "Cannot grow device files");
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 static BlockDriver bdrv_nvme = {
     .format_name              = "nvme",
     .protocol_name            = "nvme",
@@ -1180,6 +1248,10 @@  static BlockDriver bdrv_nvme = {
 
     .bdrv_register_buf        = nvme_register_buf,
     .bdrv_unregister_buf      = nvme_unregister_buf,
+
+    .create_opts              = &nvme_create_opts,
+    .bdrv_co_create_opts      = nvme_co_create_opts,
+    .bdrv_truncate            = nvme_truncate,
 };
 
 static void bdrv_nvme_init(void)