Message ID | 20190124172323.230296-3-sgarzare@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | virtio-blk: add DISCARD and WRITE ZEROES features | expand |
On 2019-01-24 18:23, Stefano Garzarella wrote: > If the WRITE_ZEROES feature is enabled, we check this > command in the test_basic(). > > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > --- > tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 63 insertions(+) > > diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c > index 04c608764b..8cabbcb85a 100644 > --- a/tests/virtio-blk-test.c > +++ b/tests/virtio-blk-test.c > @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, > > guest_free(alloc, req_addr); > > + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { > + struct virtio_blk_discard_write_zeroes *dwz_hdr; > + void *expected; > + > + /* > + * WRITE_ZEROES request on the same sector of previous test where > + * we wrote "TEST". > + */ > + req.type = VIRTIO_BLK_T_WRITE_ZEROES; > + req.data = g_malloc0(512); Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or something similar here, to see whether zeroes or 0xaa is written? > + dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data; > + dwz_hdr->sector = 0; > + dwz_hdr->num_sectors = 1; > + dwz_hdr->flags = 0; > + > + req_addr = virtio_blk_request(alloc, dev, &req, 512); > + > + g_free(req.data); > + > + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); > + qvirtqueue_add(vq, req_addr + 16, 512, false, true); > + qvirtqueue_add(vq, req_addr + 528, 1, true, false); > + > + qvirtqueue_kick(dev, vq, free_head); > + > + qvirtio_wait_used_elem(dev, vq, free_head, NULL, > + QVIRTIO_BLK_TIMEOUT_US); > + status = readb(req_addr + 528); > + g_assert_cmpint(status, ==, 0); > + > + guest_free(alloc, req_addr); > + > + /* Read request to check if the sector contains all zeroes */ > + req.type = VIRTIO_BLK_T_IN; > + req.ioprio = 1; > + req.sector = 0; > + req.data = g_malloc0(512); > + > + req_addr = virtio_blk_request(alloc, dev, &req, 512); > + > + g_free(req.data); > + > + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); > + qvirtqueue_add(vq, req_addr + 16, 512, true, true); > + qvirtqueue_add(vq, req_addr + 528, 1, true, false); > + > + qvirtqueue_kick(dev, vq, free_head); > + > + qvirtio_wait_used_elem(dev, vq, free_head, NULL, > + QVIRTIO_BLK_TIMEOUT_US); > + status = readb(req_addr + 528); > + g_assert_cmpint(status, ==, 0); > + > + data = g_malloc(512); > + expected = g_malloc0(512); > + memread(req_addr + 16, data, 512); > + g_assert_cmpmem(data, 512, expected, 512); > + g_free(expected); > + g_free(data); > + > + guest_free(alloc, req_addr); > + } > + > if (features & (1u << VIRTIO_F_ANY_LAYOUT)) { > /* Write and read with 2 descriptor layout */ > /* Write request */ >
On 2019-01-25 07:01, Thomas Huth wrote: > On 2019-01-24 18:23, Stefano Garzarella wrote: >> If the WRITE_ZEROES feature is enabled, we check this >> command in the test_basic(). >> >> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> >> --- >> tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 63 insertions(+) >> >> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c >> index 04c608764b..8cabbcb85a 100644 >> --- a/tests/virtio-blk-test.c >> +++ b/tests/virtio-blk-test.c >> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, >> >> guest_free(alloc, req_addr); >> >> + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { >> + struct virtio_blk_discard_write_zeroes *dwz_hdr; >> + void *expected; >> + >> + /* >> + * WRITE_ZEROES request on the same sector of previous test where >> + * we wrote "TEST". >> + */ >> + req.type = VIRTIO_BLK_T_WRITE_ZEROES; >> + req.data = g_malloc0(512); > > Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or > something similar here, to see whether zeroes or 0xaa is written? Ah, never mind, I thought req.data would be a sector buffer here, but looking at the lines below, it apparently is something different. Why do you allocate 512 bytes here? I'd rather expect g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and then you could also use a local "struct virtio_blk_discard_write_zeroes dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely? >> + dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data; >> + dwz_hdr->sector = 0; >> + dwz_hdr->num_sectors = 1; >> + dwz_hdr->flags = 0; >> + >> + req_addr = virtio_blk_request(alloc, dev, &req, 512); >> + >> + g_free(req.data); >> + >> + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); >> + qvirtqueue_add(vq, req_addr + 16, 512, false, true); >> + qvirtqueue_add(vq, req_addr + 528, 1, true, false); >> + >> + qvirtqueue_kick(dev, vq, free_head); >> + >> + qvirtio_wait_used_elem(dev, vq, free_head, NULL, >> + QVIRTIO_BLK_TIMEOUT_US); >> + status = readb(req_addr + 528); >> + g_assert_cmpint(status, ==, 0); >> + >> + guest_free(alloc, req_addr); >> + >> + /* Read request to check if the sector contains all zeroes */ >> + req.type = VIRTIO_BLK_T_IN; >> + req.ioprio = 1; >> + req.sector = 0; >> + req.data = g_malloc0(512); >> + >> + req_addr = virtio_blk_request(alloc, dev, &req, 512); >> + >> + g_free(req.data); >> + >> + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); >> + qvirtqueue_add(vq, req_addr + 16, 512, true, true); >> + qvirtqueue_add(vq, req_addr + 528, 1, true, false); >> + >> + qvirtqueue_kick(dev, vq, free_head); >> + >> + qvirtio_wait_used_elem(dev, vq, free_head, NULL, >> + QVIRTIO_BLK_TIMEOUT_US); >> + status = readb(req_addr + 528); >> + g_assert_cmpint(status, ==, 0); >> + >> + data = g_malloc(512); >> + expected = g_malloc0(512); >> + memread(req_addr + 16, data, 512); >> + g_assert_cmpmem(data, 512, expected, 512); >> + g_free(expected); >> + g_free(data); >> + >> + guest_free(alloc, req_addr); >> + } >> + >> if (features & (1u << VIRTIO_F_ANY_LAYOUT)) { >> /* Write and read with 2 descriptor layout */ >> /* Write request */ >> > >
On 2019-01-25 07:01, Thomas Huth wrote: > On 2019-01-24 18:23, Stefano Garzarella wrote: >> If the WRITE_ZEROES feature is enabled, we check this >> command in the test_basic(). >> >> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> >> --- >> tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 63 insertions(+) >> >> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c >> index 04c608764b..8cabbcb85a 100644 >> --- a/tests/virtio-blk-test.c >> +++ b/tests/virtio-blk-test.c >> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, >> >> guest_free(alloc, req_addr); >> >> + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { >> + struct virtio_blk_discard_write_zeroes *dwz_hdr; >> + void *expected; >> + >> + /* >> + * WRITE_ZEROES request on the same sector of previous test where >> + * we wrote "TEST". >> + */ >> + req.type = VIRTIO_BLK_T_WRITE_ZEROES; >> + req.data = g_malloc0(512); > > Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or > something similar here, to see whether zeroes or 0xaa is written? Ah, never mind, I thought req.data would be a sector buffer here, but looking at the lines below, it apparently is something different. Why do you allocate 512 bytes here? I'd rather expect g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and then you could also use a local "struct virtio_blk_discard_write_zeroes dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely? >> + dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data; >> + dwz_hdr->sector = 0; >> + dwz_hdr->num_sectors = 1; >> + dwz_hdr->flags = 0; >> + >> + req_addr = virtio_blk_request(alloc, dev, &req, 512); >> + >> + g_free(req.data); >> + >> + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); >> + qvirtqueue_add(vq, req_addr + 16, 512, false, true); >> + qvirtqueue_add(vq, req_addr + 528, 1, true, false); >> + >> + qvirtqueue_kick(dev, vq, free_head); >> + >> + qvirtio_wait_used_elem(dev, vq, free_head, NULL, >> + QVIRTIO_BLK_TIMEOUT_US); >> + status = readb(req_addr + 528); >> + g_assert_cmpint(status, ==, 0); >> + >> + guest_free(alloc, req_addr); >> + >> + /* Read request to check if the sector contains all zeroes */ >> + req.type = VIRTIO_BLK_T_IN; >> + req.ioprio = 1; >> + req.sector = 0; >> + req.data = g_malloc0(512); >> + >> + req_addr = virtio_blk_request(alloc, dev, &req, 512); >> + >> + g_free(req.data); >> + >> + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); >> + qvirtqueue_add(vq, req_addr + 16, 512, true, true); >> + qvirtqueue_add(vq, req_addr + 528, 1, true, false); >> + >> + qvirtqueue_kick(dev, vq, free_head); >> + >> + qvirtio_wait_used_elem(dev, vq, free_head, NULL, >> + QVIRTIO_BLK_TIMEOUT_US); >> + status = readb(req_addr + 528); >> + g_assert_cmpint(status, ==, 0); >> + >> + data = g_malloc(512); >> + expected = g_malloc0(512); >> + memread(req_addr + 16, data, 512); >> + g_assert_cmpmem(data, 512, expected, 512); >> + g_free(expected); >> + g_free(data); >> + >> + guest_free(alloc, req_addr); >> + } >> + >> if (features & (1u << VIRTIO_F_ANY_LAYOUT)) { >> /* Write and read with 2 descriptor layout */ >> /* Write request */ >> > >
On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote: > On 2019-01-25 07:01, Thomas Huth wrote: > > On 2019-01-24 18:23, Stefano Garzarella wrote: > >> If the WRITE_ZEROES feature is enabled, we check this > >> command in the test_basic(). > >> > >> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > >> --- > >> tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ > >> 1 file changed, 63 insertions(+) > >> > >> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c > >> index 04c608764b..8cabbcb85a 100644 > >> --- a/tests/virtio-blk-test.c > >> +++ b/tests/virtio-blk-test.c > >> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, > >> > >> guest_free(alloc, req_addr); > >> > >> + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { > >> + struct virtio_blk_discard_write_zeroes *dwz_hdr; > >> + void *expected; > >> + > >> + /* > >> + * WRITE_ZEROES request on the same sector of previous test where > >> + * we wrote "TEST". > >> + */ > >> + req.type = VIRTIO_BLK_T_WRITE_ZEROES; > >> + req.data = g_malloc0(512); > > > > Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or > > something similar here, to see whether zeroes or 0xaa is written? > > Ah, never mind, I thought req.data would be a sector buffer here, but > looking at the lines below, it apparently is something different. > > Why do you allocate 512 bytes here? I'd rather expect > g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and > then you could also use a local "struct virtio_blk_discard_write_zeroes > dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely? > Hi Thomas, it was my initial implementation, but on the first test I discovered that virtio_blk_request() has an assert on the data_size and it requires a multiple of 512 bytes. Then I looked at the virtio-spec #1, and it seems that data should be multiple of 512 bytes also if it contains the struct virtio_blk_discard_write_zeroes. (I'm not sure) Anyway I tried to allocate only the space for that struct, commented the assert and the test works well. How do you suggest to proceed? [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944) Thanks, Stefano
On 2019-01-25 09:16, Stefano Garzarella wrote: > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote: >> On 2019-01-25 07:01, Thomas Huth wrote: >>> On 2019-01-24 18:23, Stefano Garzarella wrote: >>>> If the WRITE_ZEROES feature is enabled, we check this >>>> command in the test_basic(). >>>> >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> >>>> --- >>>> tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ >>>> 1 file changed, 63 insertions(+) >>>> >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c >>>> index 04c608764b..8cabbcb85a 100644 >>>> --- a/tests/virtio-blk-test.c >>>> +++ b/tests/virtio-blk-test.c >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, >>>> >>>> guest_free(alloc, req_addr); >>>> >>>> + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { >>>> + struct virtio_blk_discard_write_zeroes *dwz_hdr; >>>> + void *expected; >>>> + >>>> + /* >>>> + * WRITE_ZEROES request on the same sector of previous test where >>>> + * we wrote "TEST". >>>> + */ >>>> + req.type = VIRTIO_BLK_T_WRITE_ZEROES; >>>> + req.data = g_malloc0(512); >>> >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or >>> something similar here, to see whether zeroes or 0xaa is written? >> >> Ah, never mind, I thought req.data would be a sector buffer here, but >> looking at the lines below, it apparently is something different. >> >> Why do you allocate 512 bytes here? I'd rather expect >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and >> then you could also use a local "struct virtio_blk_discard_write_zeroes >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely? >> > > Hi Thomas, > it was my initial implementation, but on the first test I discovered > that virtio_blk_request() has an assert on the data_size and it requires > a multiple of 512 bytes. > Then I looked at the virtio-spec #1, and it seems that data should be > multiple of 512 bytes also if it contains the struct > virtio_blk_discard_write_zeroes. (I'm not sure) > > Anyway I tried to allocate only the space for that struct, commented the > assert and the test works well. > > How do you suggest to proceed? Wow, that's a tough question. Looking at the virtio spec, I agree with you, it looks like struct virtio_blk_discard_write_zeroes should be padded to 512 bytes here. But when I look at the Linux sources (drivers/block/virtio_blk.c), I fail to see that they are doing the padding there (but maybe I'm just too blind). Looking at the QEMU sources, it seems like it can deal with both and always sets the status right behind the last byte: req->in = (void *)in_iov[in_num - 1].iov_base + in_iov[in_num - 1].iov_len - sizeof(struct virtio_blk_inhdr); Anyway, I think the virtio spec should be clearer here to avoid bad implementations in the future, so maybe Changpeng or Michael could update the spec here a little bit? Thomas > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944) > > > Thanks, > Stefano >
> -----Original Message----- > From: Thomas Huth [mailto:thuth@redhat.com] > Sent: Friday, January 25, 2019 4:49 PM > To: Stefano Garzarella <sgarzare@redhat.com>; Michael S. Tsirkin > <mst@redhat.com>; Liu, Changpeng <changpeng.liu@intel.com> > Cc: qemu-devel@nongnu.org; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf > <kwolf@redhat.com>; qemu-block@nongnu.org; Max Reitz > <mreitz@redhat.com>; Stefan Hajnoczi <stefanha@redhat.com>; Paolo Bonzini > <pbonzini@redhat.com> > Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for > WRITE_ZEROES command > > On 2019-01-25 09:16, Stefano Garzarella wrote: > > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote: > >> On 2019-01-25 07:01, Thomas Huth wrote: > >>> On 2019-01-24 18:23, Stefano Garzarella wrote: > >>>> If the WRITE_ZEROES feature is enabled, we check this > >>>> command in the test_basic(). > >>>> > >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > >>>> --- > >>>> tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ > >>>> 1 file changed, 63 insertions(+) > >>>> > >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c > >>>> index 04c608764b..8cabbcb85a 100644 > >>>> --- a/tests/virtio-blk-test.c > >>>> +++ b/tests/virtio-blk-test.c > >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, > QGuestAllocator *alloc, > >>>> > >>>> guest_free(alloc, req_addr); > >>>> > >>>> + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { > >>>> + struct virtio_blk_discard_write_zeroes *dwz_hdr; > >>>> + void *expected; > >>>> + > >>>> + /* > >>>> + * WRITE_ZEROES request on the same sector of previous test where > >>>> + * we wrote "TEST". > >>>> + */ > >>>> + req.type = VIRTIO_BLK_T_WRITE_ZEROES; > >>>> + req.data = g_malloc0(512); > >>> > >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or > >>> something similar here, to see whether zeroes or 0xaa is written? > >> > >> Ah, never mind, I thought req.data would be a sector buffer here, but > >> looking at the lines below, it apparently is something different. > >> > >> Why do you allocate 512 bytes here? I'd rather expect > >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and > >> then you could also use a local "struct virtio_blk_discard_write_zeroes > >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely? > >> > > > > Hi Thomas, > > it was my initial implementation, but on the first test I discovered > > that virtio_blk_request() has an assert on the data_size and it requires > > a multiple of 512 bytes. > > Then I looked at the virtio-spec #1, and it seems that data should be > > multiple of 512 bytes also if it contains the struct > > virtio_blk_discard_write_zeroes. (I'm not sure) > > > > Anyway I tried to allocate only the space for that struct, commented the > > assert and the test works well. > > > > How do you suggest to proceed? > > Wow, that's a tough question. Looking at the virtio spec, I agree with > you, it looks like struct virtio_blk_discard_write_zeroes should be > padded to 512 bytes here. But when I look at the Linux sources > (drivers/block/virtio_blk.c), I fail to see that they are doing the > padding there (but maybe I'm just too blind). > > Looking at the QEMU sources, it seems like it can deal with both and > always sets the status right behind the last byte: > > req->in = (void *)in_iov[in_num - 1].iov_base > + in_iov[in_num - 1].iov_len > - sizeof(struct virtio_blk_inhdr); > > Anyway, I think the virtio spec should be clearer here to avoid bad > implementations in the future, so maybe Changpeng or Michael could > update the spec here a little bit? The data for Discard and Write Zeroes commands are struct virtio_blk_discard_write_zeroes aligned, that means you can pass 16 bytes aligned data, based on the segments number supported, this is also aligned with NVMe specification and the SCSI specification. > > Thomas > > > > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944) > > > > > > Thanks, > > Stefano > >
On 2019-01-25 12:58, Liu, Changpeng wrote: > > >> -----Original Message----- >> From: Thomas Huth [mailto:thuth@redhat.com] >> Sent: Friday, January 25, 2019 4:49 PM >> To: Stefano Garzarella <sgarzare@redhat.com>; Michael S. Tsirkin >> <mst@redhat.com>; Liu, Changpeng <changpeng.liu@intel.com> >> Cc: qemu-devel@nongnu.org; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf >> <kwolf@redhat.com>; qemu-block@nongnu.org; Max Reitz >> <mreitz@redhat.com>; Stefan Hajnoczi <stefanha@redhat.com>; Paolo Bonzini >> <pbonzini@redhat.com> >> Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for >> WRITE_ZEROES command >> >> On 2019-01-25 09:16, Stefano Garzarella wrote: >>> On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote: >>>> On 2019-01-25 07:01, Thomas Huth wrote: >>>>> On 2019-01-24 18:23, Stefano Garzarella wrote: >>>>>> If the WRITE_ZEROES feature is enabled, we check this >>>>>> command in the test_basic(). >>>>>> >>>>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> >>>>>> --- >>>>>> tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ >>>>>> 1 file changed, 63 insertions(+) >>>>>> >>>>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c >>>>>> index 04c608764b..8cabbcb85a 100644 >>>>>> --- a/tests/virtio-blk-test.c >>>>>> +++ b/tests/virtio-blk-test.c >>>>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, >> QGuestAllocator *alloc, >>>>>> >>>>>> guest_free(alloc, req_addr); >>>>>> >>>>>> + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { >>>>>> + struct virtio_blk_discard_write_zeroes *dwz_hdr; >>>>>> + void *expected; >>>>>> + >>>>>> + /* >>>>>> + * WRITE_ZEROES request on the same sector of previous test where >>>>>> + * we wrote "TEST". >>>>>> + */ >>>>>> + req.type = VIRTIO_BLK_T_WRITE_ZEROES; >>>>>> + req.data = g_malloc0(512); >>>>> >>>>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or >>>>> something similar here, to see whether zeroes or 0xaa is written? >>>> >>>> Ah, never mind, I thought req.data would be a sector buffer here, but >>>> looking at the lines below, it apparently is something different. >>>> >>>> Why do you allocate 512 bytes here? I'd rather expect >>>> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and >>>> then you could also use a local "struct virtio_blk_discard_write_zeroes >>>> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely? >>>> >>> >>> Hi Thomas, >>> it was my initial implementation, but on the first test I discovered >>> that virtio_blk_request() has an assert on the data_size and it requires >>> a multiple of 512 bytes. >>> Then I looked at the virtio-spec #1, and it seems that data should be >>> multiple of 512 bytes also if it contains the struct >>> virtio_blk_discard_write_zeroes. (I'm not sure) >>> >>> Anyway I tried to allocate only the space for that struct, commented the >>> assert and the test works well. >>> >>> How do you suggest to proceed? >> >> Wow, that's a tough question. Looking at the virtio spec, I agree with >> you, it looks like struct virtio_blk_discard_write_zeroes should be >> padded to 512 bytes here. But when I look at the Linux sources >> (drivers/block/virtio_blk.c), I fail to see that they are doing the >> padding there (but maybe I'm just too blind). >> >> Looking at the QEMU sources, it seems like it can deal with both and >> always sets the status right behind the last byte: >> >> req->in = (void *)in_iov[in_num - 1].iov_base >> + in_iov[in_num - 1].iov_len >> - sizeof(struct virtio_blk_inhdr); >> >> Anyway, I think the virtio spec should be clearer here to avoid bad >> implementations in the future, so maybe Changpeng or Michael could >> update the spec here a little bit? > The data for Discard and Write Zeroes commands are struct virtio_blk_discard_write_zeroes > aligned, that means you can pass 16 bytes aligned data, based on the segments number supported, > this is also aligned with NVMe specification and the SCSI specification. Ok, thanks, so the "u8 data[][512];" is wrong in the virtio spec in this case? See: https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944 At least this should be mentioned in the description of the data field, I think. Thomas
On Fri, Jan 25, 2019 at 09:49:03AM +0100, Thomas Huth wrote: > On 2019-01-25 09:16, Stefano Garzarella wrote: > > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote: > >> On 2019-01-25 07:01, Thomas Huth wrote: > >>> On 2019-01-24 18:23, Stefano Garzarella wrote: > >>>> If the WRITE_ZEROES feature is enabled, we check this > >>>> command in the test_basic(). > >>>> > >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > >>>> --- > >>>> tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ > >>>> 1 file changed, 63 insertions(+) > >>>> > >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c > >>>> index 04c608764b..8cabbcb85a 100644 > >>>> --- a/tests/virtio-blk-test.c > >>>> +++ b/tests/virtio-blk-test.c > >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, > >>>> > >>>> guest_free(alloc, req_addr); > >>>> > >>>> + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { > >>>> + struct virtio_blk_discard_write_zeroes *dwz_hdr; > >>>> + void *expected; > >>>> + > >>>> + /* > >>>> + * WRITE_ZEROES request on the same sector of previous test where > >>>> + * we wrote "TEST". > >>>> + */ > >>>> + req.type = VIRTIO_BLK_T_WRITE_ZEROES; > >>>> + req.data = g_malloc0(512); > >>> > >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or > >>> something similar here, to see whether zeroes or 0xaa is written? > >> > >> Ah, never mind, I thought req.data would be a sector buffer here, but > >> looking at the lines below, it apparently is something different. > >> > >> Why do you allocate 512 bytes here? I'd rather expect > >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and > >> then you could also use a local "struct virtio_blk_discard_write_zeroes > >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely? > >> > > > > Hi Thomas, > > it was my initial implementation, but on the first test I discovered > > that virtio_blk_request() has an assert on the data_size and it requires > > a multiple of 512 bytes. > > Then I looked at the virtio-spec #1, and it seems that data should be > > multiple of 512 bytes also if it contains the struct > > virtio_blk_discard_write_zeroes. (I'm not sure) > > > > Anyway I tried to allocate only the space for that struct, commented the > > assert and the test works well. > > > > How do you suggest to proceed? > > Wow, that's a tough question. Looking at the virtio spec, I agree with > you, it looks like struct virtio_blk_discard_write_zeroes should be > padded to 512 bytes here. But when I look at the Linux sources > (drivers/block/virtio_blk.c), I fail to see that they are doing the > padding there (but maybe I'm just too blind). The only evidence for "pad to 512 bytes" interpretation that I see in the spec is "u8 data[][512];". Or have I missed something more explicit? Based on the Linux guest driver code and the lack of more evidence in the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes for discard/write zero requests. > Looking at the QEMU sources, it seems like it can deal with both and > always sets the status right behind the last byte: > > req->in = (void *)in_iov[in_num - 1].iov_base > + in_iov[in_num - 1].iov_len > - sizeof(struct virtio_blk_inhdr); > > Anyway, I think the virtio spec should be clearer here to avoid bad > implementations in the future, so maybe Changpeng or Michael could > update the spec here a little bit? Yep, good point. VIRTIO 1.1 is available for public comments, so I've CCed the list. Stefan > Thomas > > > > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944) > > > > > > Thanks, > > Stefano > > > >
On Fri, Jan 25, 2019 at 09:49:03AM +0100, Thomas Huth wrote: > On 2019-01-25 09:16, Stefano Garzarella wrote: > > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote: > >> On 2019-01-25 07:01, Thomas Huth wrote: > >>> On 2019-01-24 18:23, Stefano Garzarella wrote: > >>>> If the WRITE_ZEROES feature is enabled, we check this > >>>> command in the test_basic(). > >>>> > >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > >>>> --- > >>>> tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ > >>>> 1 file changed, 63 insertions(+) > >>>> > >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c > >>>> index 04c608764b..8cabbcb85a 100644 > >>>> --- a/tests/virtio-blk-test.c > >>>> +++ b/tests/virtio-blk-test.c > >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, > >>>> > >>>> guest_free(alloc, req_addr); > >>>> > >>>> + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { > >>>> + struct virtio_blk_discard_write_zeroes *dwz_hdr; > >>>> + void *expected; > >>>> + > >>>> + /* > >>>> + * WRITE_ZEROES request on the same sector of previous test where > >>>> + * we wrote "TEST". > >>>> + */ > >>>> + req.type = VIRTIO_BLK_T_WRITE_ZEROES; > >>>> + req.data = g_malloc0(512); > >>> > >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or > >>> something similar here, to see whether zeroes or 0xaa is written? > >> > >> Ah, never mind, I thought req.data would be a sector buffer here, but > >> looking at the lines below, it apparently is something different. > >> > >> Why do you allocate 512 bytes here? I'd rather expect > >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and > >> then you could also use a local "struct virtio_blk_discard_write_zeroes > >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely? > >> > > > > Hi Thomas, > > it was my initial implementation, but on the first test I discovered > > that virtio_blk_request() has an assert on the data_size and it requires > > a multiple of 512 bytes. > > Then I looked at the virtio-spec #1, and it seems that data should be > > multiple of 512 bytes also if it contains the struct > > virtio_blk_discard_write_zeroes. (I'm not sure) > > > > Anyway I tried to allocate only the space for that struct, commented the > > assert and the test works well. > > > > How do you suggest to proceed? > > Wow, that's a tough question. Looking at the virtio spec, I agree with > you, it looks like struct virtio_blk_discard_write_zeroes should be > padded to 512 bytes here. But when I look at the Linux sources > (drivers/block/virtio_blk.c), I fail to see that they are doing the > padding there (but maybe I'm just too blind). > > Looking at the QEMU sources, it seems like it can deal with both and > always sets the status right behind the last byte: > > req->in = (void *)in_iov[in_num - 1].iov_base > + in_iov[in_num - 1].iov_len > - sizeof(struct virtio_blk_inhdr); > > Anyway, I think the virtio spec should be clearer here to avoid bad > implementations in the future, so maybe Changpeng or Michael could > update the spec here a little bit? > > Thomas I agree the spec makes it look like data is padded to 512 bytes. I'm happy to write it up but let's decide what it is we want to support. Arbitrary padding the way qemu does it? Or packed format? > > > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944) > > > > > > Thanks, > > Stefano > >
On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote: > On Fri, Jan 25, 2019 at 09:49:03AM +0100, Thomas Huth wrote: > > On 2019-01-25 09:16, Stefano Garzarella wrote: > > > On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote: > > >> On 2019-01-25 07:01, Thomas Huth wrote: > > >>> On 2019-01-24 18:23, Stefano Garzarella wrote: > > >>>> If the WRITE_ZEROES feature is enabled, we check this > > >>>> command in the test_basic(). > > >>>> > > >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > > >>>> --- > > >>>> tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ > > >>>> 1 file changed, 63 insertions(+) > > >>>> > > >>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c > > >>>> index 04c608764b..8cabbcb85a 100644 > > >>>> --- a/tests/virtio-blk-test.c > > >>>> +++ b/tests/virtio-blk-test.c > > >>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, > > >>>> > > >>>> guest_free(alloc, req_addr); > > >>>> > > >>>> + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { > > >>>> + struct virtio_blk_discard_write_zeroes *dwz_hdr; > > >>>> + void *expected; > > >>>> + > > >>>> + /* > > >>>> + * WRITE_ZEROES request on the same sector of previous test where > > >>>> + * we wrote "TEST". > > >>>> + */ > > >>>> + req.type = VIRTIO_BLK_T_WRITE_ZEROES; > > >>>> + req.data = g_malloc0(512); > > >>> > > >>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or > > >>> something similar here, to see whether zeroes or 0xaa is written? > > >> > > >> Ah, never mind, I thought req.data would be a sector buffer here, but > > >> looking at the lines below, it apparently is something different. > > >> > > >> Why do you allocate 512 bytes here? I'd rather expect > > >> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and > > >> then you could also use a local "struct virtio_blk_discard_write_zeroes > > >> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely? > > >> > > > > > > Hi Thomas, > > > it was my initial implementation, but on the first test I discovered > > > that virtio_blk_request() has an assert on the data_size and it requires > > > a multiple of 512 bytes. > > > Then I looked at the virtio-spec #1, and it seems that data should be > > > multiple of 512 bytes also if it contains the struct > > > virtio_blk_discard_write_zeroes. (I'm not sure) > > > > > > Anyway I tried to allocate only the space for that struct, commented the > > > assert and the test works well. > > > > > > How do you suggest to proceed? > > > > Wow, that's a tough question. Looking at the virtio spec, I agree with > > you, it looks like struct virtio_blk_discard_write_zeroes should be > > padded to 512 bytes here. But when I look at the Linux sources > > (drivers/block/virtio_blk.c), I fail to see that they are doing the > > padding there (but maybe I'm just too blind). > > The only evidence for "pad to 512 bytes" interpretation that I see in > the spec is "u8 data[][512];". Or have I missed something more > explicit? That's it. But if it doesn't mean "any number of 512 byte chunks" then what does it mean? > Based on the Linux guest driver code and the lack of more evidence in > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes > for discard/write zero requests. OK. Must devices support such padding? > > Looking at the QEMU sources, it seems like it can deal with both and > > always sets the status right behind the last byte: > > > > req->in = (void *)in_iov[in_num - 1].iov_base > > + in_iov[in_num - 1].iov_len > > - sizeof(struct virtio_blk_inhdr); > > > > Anyway, I think the virtio spec should be clearer here to avoid bad > > implementations in the future, so maybe Changpeng or Michael could > > update the spec here a little bit? > > Yep, good point. VIRTIO 1.1 is available for public comments, so I've > CCed the list. > > Stefan Thanks! Care creating a github issue? And maybe proposing a spec patch. > > Thomas > > > > > > > [1](https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944) > > > > > > > > > Thanks, > > > Stefano > > > > > > >
On Fri, Jan 25, 2019 at 01:48:26PM +0100, Thomas Huth wrote: > On 2019-01-25 12:58, Liu, Changpeng wrote: > > > > > >> -----Original Message----- > >> From: Thomas Huth [mailto:thuth@redhat.com] > >> Sent: Friday, January 25, 2019 4:49 PM > >> To: Stefano Garzarella <sgarzare@redhat.com>; Michael S. Tsirkin > >> <mst@redhat.com>; Liu, Changpeng <changpeng.liu@intel.com> > >> Cc: qemu-devel@nongnu.org; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf > >> <kwolf@redhat.com>; qemu-block@nongnu.org; Max Reitz > >> <mreitz@redhat.com>; Stefan Hajnoczi <stefanha@redhat.com>; Paolo Bonzini > >> <pbonzini@redhat.com> > >> Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for > >> WRITE_ZEROES command > >> > >> On 2019-01-25 09:16, Stefano Garzarella wrote: > >>> On Fri, Jan 25, 2019 at 07:07:35AM +0100, Thomas Huth wrote: > >>>> On 2019-01-25 07:01, Thomas Huth wrote: > >>>>> On 2019-01-24 18:23, Stefano Garzarella wrote: > >>>>>> If the WRITE_ZEROES feature is enabled, we check this > >>>>>> command in the test_basic(). > >>>>>> > >>>>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > >>>>>> --- > >>>>>> tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ > >>>>>> 1 file changed, 63 insertions(+) > >>>>>> > >>>>>> diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c > >>>>>> index 04c608764b..8cabbcb85a 100644 > >>>>>> --- a/tests/virtio-blk-test.c > >>>>>> +++ b/tests/virtio-blk-test.c > >>>>>> @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, > >> QGuestAllocator *alloc, > >>>>>> > >>>>>> guest_free(alloc, req_addr); > >>>>>> > >>>>>> + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { > >>>>>> + struct virtio_blk_discard_write_zeroes *dwz_hdr; > >>>>>> + void *expected; > >>>>>> + > >>>>>> + /* > >>>>>> + * WRITE_ZEROES request on the same sector of previous test where > >>>>>> + * we wrote "TEST". > >>>>>> + */ > >>>>>> + req.type = VIRTIO_BLK_T_WRITE_ZEROES; > >>>>>> + req.data = g_malloc0(512); > >>>>> > >>>>> Wouldn't it be more interesting to do a memset(req.data, 0xaa, 512) or > >>>>> something similar here, to see whether zeroes or 0xaa is written? > >>>> > >>>> Ah, never mind, I thought req.data would be a sector buffer here, but > >>>> looking at the lines below, it apparently is something different. > >>>> > >>>> Why do you allocate 512 bytes here? I'd rather expect > >>>> g_malloc0(sizeof(struct virtio_blk_discard_write_zeroes)) here. ... and > >>>> then you could also use a local "struct virtio_blk_discard_write_zeroes > >>>> dwz_hdr" variable instead of a pointer, and drop the g_malloc0() completely? > >>>> > >>> > >>> Hi Thomas, > >>> it was my initial implementation, but on the first test I discovered > >>> that virtio_blk_request() has an assert on the data_size and it requires > >>> a multiple of 512 bytes. > >>> Then I looked at the virtio-spec #1, and it seems that data should be > >>> multiple of 512 bytes also if it contains the struct > >>> virtio_blk_discard_write_zeroes. (I'm not sure) > >>> > >>> Anyway I tried to allocate only the space for that struct, commented the > >>> assert and the test works well. > >>> > >>> How do you suggest to proceed? > >> > >> Wow, that's a tough question. Looking at the virtio spec, I agree with > >> you, it looks like struct virtio_blk_discard_write_zeroes should be > >> padded to 512 bytes here. But when I look at the Linux sources > >> (drivers/block/virtio_blk.c), I fail to see that they are doing the > >> padding there (but maybe I'm just too blind). > >> > >> Looking at the QEMU sources, it seems like it can deal with both and > >> always sets the status right behind the last byte: > >> > >> req->in = (void *)in_iov[in_num - 1].iov_base > >> + in_iov[in_num - 1].iov_len > >> - sizeof(struct virtio_blk_inhdr); > >> > >> Anyway, I think the virtio spec should be clearer here to avoid bad > >> implementations in the future, so maybe Changpeng or Michael could > >> update the spec here a little bit? > > The data for Discard and Write Zeroes commands are struct virtio_blk_discard_write_zeroes > > aligned, that means you can pass 16 bytes aligned data, based on the segments number supported, > > this is also aligned with NVMe specification and the SCSI specification. > > Ok, thanks, so the "u8 data[][512];" is wrong in the virtio spec in this > case? See: > > https://github.com/oasis-tcs/virtio-spec/blob/master/content.tex#L3944 > > At least this should be mentioned in the description of the data field, > I think. > > Thomas OK. Is it a multiple of 512 for all other operations?
On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote: > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote: > > Based on the Linux guest driver code and the lack of more evidence in > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes > > for discard/write zero requests. > > OK. Must devices support such padding? I see no reason to require padding. Host APIs and physical storage controllers do not require it. > > > Looking at the QEMU sources, it seems like it can deal with both and > > > always sets the status right behind the last byte: > > > > > > req->in = (void *)in_iov[in_num - 1].iov_base > > > + in_iov[in_num - 1].iov_len > > > - sizeof(struct virtio_blk_inhdr); > > > > > > Anyway, I think the virtio spec should be clearer here to avoid bad > > > implementations in the future, so maybe Changpeng or Michael could > > > update the spec here a little bit? > > > > Yep, good point. VIRTIO 1.1 is available for public comments, so I've > > CCed the list. > > > > Stefan > > Thanks! > Care creating a github issue? And maybe proposing a spec patch. Okay. I will do this on Wednesday 30th of January. Stefan
On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote: > On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote: > > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote: > > > Based on the Linux guest driver code and the lack of more evidence in > > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes > > > for discard/write zero requests. > > > > OK. Must devices support such padding? > > I see no reason to require padding. Host APIs and physical storage > controllers do not require it. I'm not talking about requiring padding I'm talking about supporting padding on device side. One reason would be ease of extending the spec for guest. E.g. imagine adding more fields to the command. With suppport for padding guest simply adds fields to its structure, and the unused ones are ignored by device. Without support for padding you need two versions of the structure. Another reason would be compatibility. For better or worse the spec does make it look like 512 padding is present. This patch was merged very recently so I agree it's not too late to clarify it that it's not required, but it seems a bit too much to disallow that completely. Let's assume for a minute a device turns up that already implemented the 512 byte assumption? If padding is allowed then we can write a driver that works with both devices. > > > > Looking at the QEMU sources, it seems like it can deal with both and > > > > always sets the status right behind the last byte: > > > > > > > > req->in = (void *)in_iov[in_num - 1].iov_base > > > > + in_iov[in_num - 1].iov_len > > > > - sizeof(struct virtio_blk_inhdr); > > > > > > > > Anyway, I think the virtio spec should be clearer here to avoid bad > > > > implementations in the future, so maybe Changpeng or Michael could > > > > update the spec here a little bit? > > > > > > Yep, good point. VIRTIO 1.1 is available for public comments, so I've > > > CCed the list. > > > > > > Stefan > > > > Thanks! > > Care creating a github issue? And maybe proposing a spec patch. > > Okay. I will do this on Wednesday 30th of January. > > Stefan
On Sun, Jan 27, 2019 at 01:42:03PM -0500, Michael S. Tsirkin wrote: > On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote: > > On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote: > > > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote: > > > > Based on the Linux guest driver code and the lack of more evidence in > > > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes > > > > for discard/write zero requests. > > > > > > OK. Must devices support such padding? > > > > I see no reason to require padding. Host APIs and physical storage > > controllers do not require it. > > I'm not talking about requiring padding I'm talking about > supporting padding on device side. > > One reason would be ease of extending the spec for guest. > > E.g. imagine adding more fields to the command. > With suppport for padding guest simply adds fields > to its structure, and the unused ones are ignored > by device. Without support for padding you need two > versions of the structure. The simplest way is to say each struct virtio_blk_discard_write_zeroes (currently 16 bytes long) is padded to 512 bytes, but this wastes a lot of memory. Stefano and I looked at the status of multiple segment discard/write zeroes in Linux: only the NVMe driver supports multiple segments. Even SCSI sd doesn't support multiple segments. Userspace APIs do not offer a way for applications to submit multiple segments in a single call. Only the block I/O scheduler creates requests with multiple segments. SPDK's virtio-blk driver and vhost-user-blk device backend also only support 1 segment per request. Given this situation, I'm not sure it's worth the complexity to spec out a fancy padding scheme that no one will implement. Wasting 512 - 16 bytes per segment also seems unattractive. IMO it's acceptable to introduce a feature bit if struct virtio_blk_discard_write_zeroes needs extension in the future. > Another reason would be compatibility. For better or worse the spec > does make it look like 512 padding is present. This patch was merged very > recently so I agree it's not too late to clarify it that it's not > required, but it seems a bit too much to disallow that completely. > Let's assume for a minute a device turns up that already > implemented the 512 byte assumption? If padding is allowed then we can > write a driver that works with both devices. The Linux guest driver doesn't honor 512 byte padding, so device backends would already need to make an exception if we spec 512 byte padding. Let's begin VIRTIO 1.1 with the state of real-world implementations: data[] must be a multiple of sizeof(struct virtio_blk_discard_write_zeroes). I'll send a patch to the virtio TC and we can discuss further in that thread. Stefan
> -----Original Message----- > From: Stefan Hajnoczi [mailto:stefanha@redhat.com] > Sent: Wednesday, January 30, 2019 3:40 PM > To: Michael S. Tsirkin <mst@redhat.com> > Cc: Stefan Hajnoczi <stefanha@gmail.com>; Thomas Huth <thuth@redhat.com>; > Stefano Garzarella <sgarzare@redhat.com>; Liu, Changpeng > <changpeng.liu@intel.com>; Laurent Vivier <lvivier@redhat.com>; Kevin Wolf > <kwolf@redhat.com>; qemu-block@nongnu.org; qemu-devel@nongnu.org; Max > Reitz <mreitz@redhat.com>; Paolo Bonzini <pbonzini@redhat.com>; virtio- > comment@lists.oasis-open.org > Subject: Re: [Qemu-devel] [PATCH RFC 2/2] tests/virtio-blk: add test for > WRITE_ZEROES command > > On Sun, Jan 27, 2019 at 01:42:03PM -0500, Michael S. Tsirkin wrote: > > On Sun, Jan 27, 2019 at 12:57:20PM +0000, Stefan Hajnoczi wrote: > > > On Fri, Jan 25, 2019 at 02:17:01PM -0500, Michael S. Tsirkin wrote: > > > > On Fri, Jan 25, 2019 at 03:12:45PM +0000, Stefan Hajnoczi wrote: > > > > > Based on the Linux guest driver code and the lack of more evidence in > > > > > the spec, I'm pretty sure data[] doesn't need to be padded to 512 bytes > > > > > for discard/write zero requests. > > > > > > > > OK. Must devices support such padding? > > > > > > I see no reason to require padding. Host APIs and physical storage > > > controllers do not require it. > > > > I'm not talking about requiring padding I'm talking about > > supporting padding on device side. > > > > One reason would be ease of extending the spec for guest. > > > > E.g. imagine adding more fields to the command. > > With suppport for padding guest simply adds fields > > to its structure, and the unused ones are ignored > > by device. Without support for padding you need two > > versions of the structure. > > The simplest way is to say each struct virtio_blk_discard_write_zeroes > (currently 16 bytes long) is padded to 512 bytes, but this wastes a lot > of memory. > > Stefano and I looked at the status of multiple segment discard/write > zeroes in Linux: only the NVMe driver supports multiple segments. Even > SCSI sd doesn't support multiple segments. > > Userspace APIs do not offer a way for applications to submit multiple > segments in a single call. Only the block I/O scheduler creates > requests with multiple segments. > > SPDK's virtio-blk driver and vhost-user-blk device backend also only > support 1 segment per request. > > Given this situation, I'm not sure it's worth the complexity to spec out > a fancy padding scheme that no one will implement. Wasting 512 - 16 > bytes per segment also seems unattractive. IMO it's acceptable to > introduce a feature bit if struct virtio_blk_discard_write_zeroes needs > extension in the future. > > > Another reason would be compatibility. For better or worse the spec > > does make it look like 512 padding is present. This patch was merged very > > recently so I agree it's not too late to clarify it that it's not > > required, but it seems a bit too much to disallow that completely. > > Let's assume for a minute a device turns up that already > > implemented the 512 byte assumption? If padding is allowed then we can > > write a driver that works with both devices. > > The Linux guest driver doesn't honor 512 byte padding, so device > backends would already need to make an exception if we spec 512 byte > padding. > > Let's begin VIRTIO 1.1 with the state of real-world implementations: > data[] must be a multiple of sizeof(struct > virtio_blk_discard_write_zeroes). Actually that's the purpose at the first beginning, padding to 512 bytes will waste some memory space, it's nice to have an exception other Read/Write commands. > > I'll send a patch to the virtio TC and we can discuss further in that > thread. > > Stefan
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c index 04c608764b..8cabbcb85a 100644 --- a/tests/virtio-blk-test.c +++ b/tests/virtio-blk-test.c @@ -231,6 +231,69 @@ static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc, guest_free(alloc, req_addr); + if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) { + struct virtio_blk_discard_write_zeroes *dwz_hdr; + void *expected; + + /* + * WRITE_ZEROES request on the same sector of previous test where + * we wrote "TEST". + */ + req.type = VIRTIO_BLK_T_WRITE_ZEROES; + req.data = g_malloc0(512); + dwz_hdr = (struct virtio_blk_discard_write_zeroes *)req.data; + dwz_hdr->sector = 0; + dwz_hdr->num_sectors = 1; + dwz_hdr->flags = 0; + + req_addr = virtio_blk_request(alloc, dev, &req, 512); + + g_free(req.data); + + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, 512, false, true); + qvirtqueue_add(vq, req_addr + 528, 1, true, false); + + qvirtqueue_kick(dev, vq, free_head); + + qvirtio_wait_used_elem(dev, vq, free_head, NULL, + QVIRTIO_BLK_TIMEOUT_US); + status = readb(req_addr + 528); + g_assert_cmpint(status, ==, 0); + + guest_free(alloc, req_addr); + + /* Read request to check if the sector contains all zeroes */ + req.type = VIRTIO_BLK_T_IN; + req.ioprio = 1; + req.sector = 0; + req.data = g_malloc0(512); + + req_addr = virtio_blk_request(alloc, dev, &req, 512); + + g_free(req.data); + + free_head = qvirtqueue_add(vq, req_addr, 16, false, true); + qvirtqueue_add(vq, req_addr + 16, 512, true, true); + qvirtqueue_add(vq, req_addr + 528, 1, true, false); + + qvirtqueue_kick(dev, vq, free_head); + + qvirtio_wait_used_elem(dev, vq, free_head, NULL, + QVIRTIO_BLK_TIMEOUT_US); + status = readb(req_addr + 528); + g_assert_cmpint(status, ==, 0); + + data = g_malloc(512); + expected = g_malloc0(512); + memread(req_addr + 16, data, 512); + g_assert_cmpmem(data, 512, expected, 512); + g_free(expected); + g_free(data); + + guest_free(alloc, req_addr); + } + if (features & (1u << VIRTIO_F_ANY_LAYOUT)) { /* Write and read with 2 descriptor layout */ /* Write request */
If the WRITE_ZEROES feature is enabled, we check this command in the test_basic(). Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> --- tests/virtio-blk-test.c | 63 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+)