Message ID | fec3b2318fe49e39d026a18fdccdb2737226aafc.1636057885.git.john.g.johnson@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user client | expand |
On Mon, 8 Nov 2021 16:46:40 -0800 John Johnson <john.g.johnson@oracle.com> wrote: > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > --- > hw/vfio/pci.h | 1 + > hw/vfio/user-protocol.h | 12 +++++ > hw/vfio/user.h | 1 + > include/hw/vfio/vfio-common.h | 1 + > hw/vfio/common.c | 7 ++- > hw/vfio/pci.c | 7 +++ > hw/vfio/user.c | 101 ++++++++++++++++++++++++++++++++++++++++++ > 7 files changed, 129 insertions(+), 1 deletion(-) > > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > index ec9f345..643ff75 100644 > --- a/hw/vfio/pci.h > +++ b/hw/vfio/pci.h > @@ -194,6 +194,7 @@ struct VFIOUserPCIDevice { > VFIOPCIDevice device; > char *sock_name; > bool send_queued; /* all sends are queued */ > + bool no_post; /* all regions write are sync */ > }; > > /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */ > diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h > index 104bf4f..56904cf 100644 > --- a/hw/vfio/user-protocol.h > +++ b/hw/vfio/user-protocol.h > @@ -109,4 +109,16 @@ typedef struct { > uint64_t offset; > } VFIOUserRegionInfo; > > +/* > + * VFIO_USER_REGION_READ > + * VFIO_USER_REGION_WRITE > + */ > +typedef struct { > + VFIOUserHdr hdr; > + uint64_t offset; > + uint32_t region; > + uint32_t count; > + char data[]; > +} VFIOUserRegionRW; > + > #endif /* VFIO_USER_PROTOCOL_H */ > diff --git a/hw/vfio/user.h b/hw/vfio/user.h > index 19edd84..f2098f2 100644 > --- a/hw/vfio/user.h > +++ b/hw/vfio/user.h > @@ -75,6 +75,7 @@ typedef struct VFIOProxy { > /* VFIOProxy flags */ > #define VFIO_PROXY_CLIENT 0x1 > #define VFIO_PROXY_FORCE_QUEUED 0x4 > +#define VFIO_PROXY_NO_POST 0x8 > > VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp); > void vfio_user_disconnect(VFIOProxy *proxy); > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index e2d7ee1..b498964 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -56,6 +56,7 @@ typedef struct VFIORegion { > uint32_t nr_mmaps; > VFIOMmap *mmaps; > uint8_t nr; /* cache the region number for debug */ > + bool post_wr; /* writes can be posted */ As with the fd in the previous patch, this is where the concept of posted writes should be introduced throughout. Or maybe even better would be to introduce write support without posting and the next patch could expose posted writes. Thanks, Alex > int remfd; /* fd if exported from remote process */ > } VFIORegion; > > diff --git a/hw/vfio/common.c b/hw/vfio/common.c > index 47ec28f..e19f321 100644 > --- a/hw/vfio/common.c > +++ b/hw/vfio/common.c > @@ -213,6 +213,7 @@ void vfio_region_write(void *opaque, hwaddr addr, > uint32_t dword; > uint64_t qword; > } buf; > + bool post = region->post_wr; > int ret; > > switch (size) { > @@ -233,7 +234,11 @@ void vfio_region_write(void *opaque, hwaddr addr, > break; > } > > - ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf, false); > + /* read-after-write hazard if guest can directly access region */ > + if (region->nr_mmaps) { > + post = false; > + } > + ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf, post); > if (ret != size) { > const char *err = ret < 0 ? strerror(-ret) : "short write"; > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 40eb9e6..d5f9987 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -1665,6 +1665,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr) > bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : > ~PCI_BASE_ADDRESS_MEM_MASK); > bar->size = bar->region.size; > + > + /* IO regions are sync, memory can be async */ > + bar->region.post_wr = (bar->ioport == 0); > } > > static void vfio_bars_prepare(VFIOPCIDevice *vdev) > @@ -3513,6 +3516,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) > if (udev->send_queued) { > proxy->flags |= VFIO_PROXY_FORCE_QUEUED; > } > + if (udev->no_post) { > + proxy->flags |= VFIO_PROXY_NO_POST; > + } > > vfio_user_validate_version(vbasedev, &err); > if (err != NULL) { > @@ -3565,6 +3571,7 @@ static void vfio_user_instance_finalize(Object *obj) > static Property vfio_user_pci_dev_properties[] = { > DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name), > DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false), > + DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, false), > DEFINE_PROP_END_OF_LIST(), > }; > > diff --git a/hw/vfio/user.c b/hw/vfio/user.c > index b40c4ed..781cbfd 100644 > --- a/hw/vfio/user.c > +++ b/hw/vfio/user.c > @@ -50,6 +50,8 @@ static void vfio_user_cb(void *opaque); > > static void vfio_user_request(void *opaque); > static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg); > +static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr, > + VFIOUserFDs *fds); > static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr, > VFIOUserFDs *fds, int rsize, bool nobql); > static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, > @@ -533,6 +535,33 @@ static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg) > return 0; > } > > +/* > + * async send - msg can be queued, but will be freed when sent > + */ > +static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr, > + VFIOUserFDs *fds) > +{ > + VFIOUserMsg *msg; > + int ret; > + > + if (!(hdr->flags & (VFIO_USER_NO_REPLY|VFIO_USER_REPLY))) { > + error_printf("vfio_user_send_async on sync message\n"); > + return; > + } > + > + QEMU_LOCK_GUARD(&proxy->lock); > + > + msg = vfio_user_getmsg(proxy, hdr, fds); > + msg->id = hdr->id; > + msg->rsize = 0; > + msg->type = VFIO_MSG_ASYNC; > + > + ret = vfio_user_send_queued(proxy, msg); > + if (ret < 0) { > + vfio_user_recycle(proxy, msg); > + } > +} > + > static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr, > VFIOUserFDs *fds, int rsize, bool nobql) > { > @@ -957,6 +986,62 @@ static int vfio_user_get_region_info(VFIOProxy *proxy, > return 0; > } > > +static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset, > + uint32_t count, void *data) > +{ > + g_autofree VFIOUserRegionRW *msgp = NULL; > + int size = sizeof(*msgp) + count; > + > + msgp = g_malloc0(size); > + vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0); > + msgp->offset = offset; > + msgp->region = index; > + msgp->count = count; > + > + vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, false); > + if (msgp->hdr.flags & VFIO_USER_ERROR) { > + return -msgp->hdr.error_reply; > + } else if (msgp->count > count) { > + return -E2BIG; > + } else { > + memcpy(data, &msgp->data, msgp->count); > + } > + > + return msgp->count; > +} > + > +static int vfio_user_region_write(VFIOProxy *proxy, uint8_t index, off_t offset, > + uint32_t count, void *data, bool post) > +{ > + VFIOUserRegionRW *msgp = NULL; > + int flags = post ? VFIO_USER_NO_REPLY : 0; > + int size = sizeof(*msgp) + count; > + int ret; > + > + msgp = g_malloc0(size); > + vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags); > + msgp->offset = offset; > + msgp->region = index; > + msgp->count = count; > + memcpy(&msgp->data, data, count); > + > + /* async send will free msg after it's sent */ > + if (post && !(proxy->flags & VFIO_PROXY_NO_POST)) { > + vfio_user_send_async(proxy, &msgp->hdr, NULL); > + return count; > + } > + > + vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false); > + if (msgp->hdr.flags & VFIO_USER_ERROR) { > + ret = -msgp->hdr.error_reply; > + } else { > + ret = count; > + } > + > + g_free(msgp); > + return ret; > +} > + > > /* > * Socket-based io_ops > @@ -990,8 +1075,24 @@ static int vfio_user_io_get_region_info(VFIODevice *vbasedev, > return VDEV_VALID_REGION_INFO(vbasedev, info, fd); > } > > +static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index, > + off_t off, uint32_t size, void *data) > +{ > + return vfio_user_region_read(vbasedev->proxy, index, off, size, data); > +} > + > +static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index, > + off_t off, unsigned size, void *data, > + bool post) > +{ > + return vfio_user_region_write(vbasedev->proxy, index, off, size, data, > + post); > +} > + > VFIODevIO vfio_dev_io_sock = { > .get_info = vfio_user_io_get_info, > .get_region_info = vfio_user_io_get_region_info, > + .region_read = vfio_user_io_region_read, > + .region_write = vfio_user_io_region_write, > }; >
> On Nov 19, 2021, at 2:42 PM, Alex Williamson <alex.williamson@redhat.com> wrote: > > On Mon, 8 Nov 2021 16:46:40 -0800 > John Johnson <john.g.johnson@oracle.com> wrote: > >> >> VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp); >> void vfio_user_disconnect(VFIOProxy *proxy); >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index e2d7ee1..b498964 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -56,6 +56,7 @@ typedef struct VFIORegion { >> uint32_t nr_mmaps; >> VFIOMmap *mmaps; >> uint8_t nr; /* cache the region number for debug */ >> + bool post_wr; /* writes can be posted */ > > As with the fd in the previous patch, this is where the concept of > posted writes should be introduced throughout. Or maybe even better > would be to introduce write support without posting and the next patch > could expose posted writes. Thanks, > I can place it with the region write ops patch where it’s used. JJ
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index ec9f345..643ff75 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -194,6 +194,7 @@ struct VFIOUserPCIDevice { VFIOPCIDevice device; char *sock_name; bool send_queued; /* all sends are queued */ + bool no_post; /* all regions write are sync */ }; /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */ diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index 104bf4f..56904cf 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -109,4 +109,16 @@ typedef struct { uint64_t offset; } VFIOUserRegionInfo; +/* + * VFIO_USER_REGION_READ + * VFIO_USER_REGION_WRITE + */ +typedef struct { + VFIOUserHdr hdr; + uint64_t offset; + uint32_t region; + uint32_t count; + char data[]; +} VFIOUserRegionRW; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio/user.h b/hw/vfio/user.h index 19edd84..f2098f2 100644 --- a/hw/vfio/user.h +++ b/hw/vfio/user.h @@ -75,6 +75,7 @@ typedef struct VFIOProxy { /* VFIOProxy flags */ #define VFIO_PROXY_CLIENT 0x1 #define VFIO_PROXY_FORCE_QUEUED 0x4 +#define VFIO_PROXY_NO_POST 0x8 VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp); void vfio_user_disconnect(VFIOProxy *proxy); diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index e2d7ee1..b498964 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -56,6 +56,7 @@ typedef struct VFIORegion { uint32_t nr_mmaps; VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ + bool post_wr; /* writes can be posted */ int remfd; /* fd if exported from remote process */ } VFIORegion; diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 47ec28f..e19f321 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -213,6 +213,7 @@ void vfio_region_write(void *opaque, hwaddr addr, uint32_t dword; uint64_t qword; } buf; + bool post = region->post_wr; int ret; switch (size) { @@ -233,7 +234,11 @@ void vfio_region_write(void *opaque, hwaddr addr, break; } - ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf, false); + /* read-after-write hazard if guest can directly access region */ + if (region->nr_mmaps) { + post = false; + } + ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf, post); if (ret != size) { const char *err = ret < 0 ? strerror(-ret) : "short write"; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 40eb9e6..d5f9987 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1665,6 +1665,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr) bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); bar->size = bar->region.size; + + /* IO regions are sync, memory can be async */ + bar->region.post_wr = (bar->ioport == 0); } static void vfio_bars_prepare(VFIOPCIDevice *vdev) @@ -3513,6 +3516,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) if (udev->send_queued) { proxy->flags |= VFIO_PROXY_FORCE_QUEUED; } + if (udev->no_post) { + proxy->flags |= VFIO_PROXY_NO_POST; + } vfio_user_validate_version(vbasedev, &err); if (err != NULL) { @@ -3565,6 +3571,7 @@ static void vfio_user_instance_finalize(Object *obj) static Property vfio_user_pci_dev_properties[] = { DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name), DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false), + DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/vfio/user.c b/hw/vfio/user.c index b40c4ed..781cbfd 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -50,6 +50,8 @@ static void vfio_user_cb(void *opaque); static void vfio_user_request(void *opaque); static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg); +static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds); static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr, VFIOUserFDs *fds, int rsize, bool nobql); static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, @@ -533,6 +535,33 @@ static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg) return 0; } +/* + * async send - msg can be queued, but will be freed when sent + */ +static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds) +{ + VFIOUserMsg *msg; + int ret; + + if (!(hdr->flags & (VFIO_USER_NO_REPLY|VFIO_USER_REPLY))) { + error_printf("vfio_user_send_async on sync message\n"); + return; + } + + QEMU_LOCK_GUARD(&proxy->lock); + + msg = vfio_user_getmsg(proxy, hdr, fds); + msg->id = hdr->id; + msg->rsize = 0; + msg->type = VFIO_MSG_ASYNC; + + ret = vfio_user_send_queued(proxy, msg); + if (ret < 0) { + vfio_user_recycle(proxy, msg); + } +} + static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr, VFIOUserFDs *fds, int rsize, bool nobql) { @@ -957,6 +986,62 @@ static int vfio_user_get_region_info(VFIOProxy *proxy, return 0; } +static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset, + uint32_t count, void *data) +{ + g_autofree VFIOUserRegionRW *msgp = NULL; + int size = sizeof(*msgp) + count; + + msgp = g_malloc0(size); + vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0); + msgp->offset = offset; + msgp->region = index; + msgp->count = count; + + vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, false); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } else if (msgp->count > count) { + return -E2BIG; + } else { + memcpy(data, &msgp->data, msgp->count); + } + + return msgp->count; +} + +static int vfio_user_region_write(VFIOProxy *proxy, uint8_t index, off_t offset, + uint32_t count, void *data, bool post) +{ + VFIOUserRegionRW *msgp = NULL; + int flags = post ? VFIO_USER_NO_REPLY : 0; + int size = sizeof(*msgp) + count; + int ret; + + msgp = g_malloc0(size); + vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags); + msgp->offset = offset; + msgp->region = index; + msgp->count = count; + memcpy(&msgp->data, data, count); + + /* async send will free msg after it's sent */ + if (post && !(proxy->flags & VFIO_PROXY_NO_POST)) { + vfio_user_send_async(proxy, &msgp->hdr, NULL); + return count; + } + + vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + ret = -msgp->hdr.error_reply; + } else { + ret = count; + } + + g_free(msgp); + return ret; +} + /* * Socket-based io_ops @@ -990,8 +1075,24 @@ static int vfio_user_io_get_region_info(VFIODevice *vbasedev, return VDEV_VALID_REGION_INFO(vbasedev, info, fd); } +static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index, + off_t off, uint32_t size, void *data) +{ + return vfio_user_region_read(vbasedev->proxy, index, off, size, data); +} + +static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index, + off_t off, unsigned size, void *data, + bool post) +{ + return vfio_user_region_write(vbasedev->proxy, index, off, size, data, + post); +} + VFIODevIO vfio_dev_io_sock = { .get_info = vfio_user_io_get_info, .get_region_info = vfio_user_io_get_region_info, + .region_read = vfio_user_io_region_read, + .region_write = vfio_user_io_region_write, };