diff mbox series

[v9,10/17] vfio-user: run vfio-user context

Message ID 7350f4813b73af783965b758ecf39d0a6a76db53.1651586203.git.jag.raman@oracle.com (mailing list archive)
State New, archived
Headers show
Series vfio-user server in QEMU | expand

Commit Message

Jag Raman May 3, 2022, 2:16 p.m. UTC
Setup a handler to run vfio-user context. The context is driven by
messages to the file descriptor associated with it - get the fd for
the context and hook up the handler with it

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 qapi/misc.json            |  30 +++++++++++
 hw/remote/vfio-user-obj.c | 102 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 131 insertions(+), 1 deletion(-)

Comments

Markus Armbruster May 4, 2022, 11:42 a.m. UTC | #1
Jagannathan Raman <jag.raman@oracle.com> writes:

> Setup a handler to run vfio-user context. The context is driven by
> messages to the file descriptor associated with it - get the fd for
> the context and hook up the handler with it
>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  qapi/misc.json            |  30 +++++++++++
>  hw/remote/vfio-user-obj.c | 102 +++++++++++++++++++++++++++++++++++++-
>  2 files changed, 131 insertions(+), 1 deletion(-)
>
> diff --git a/qapi/misc.json b/qapi/misc.json
> index b83cc39029..fa49f2876a 100644
> --- a/qapi/misc.json
> +++ b/qapi/misc.json
> @@ -553,3 +553,33 @@
>  ##
>  { 'event': 'RTC_CHANGE',
>    'data': { 'offset': 'int', 'qom-path': 'str' } }
> +
> +##
> +# @VFU_CLIENT_HANGUP:
> +#
> +# Emitted when the client of a TYPE_VFIO_USER_SERVER closes the
> +# communication channel
> +#
> +# @vfu-id: ID of the TYPE_VFIO_USER_SERVER object
> +#
> +# @vfu-qom-path: path to the TYPE_VFIO_USER_SERVER object in the QOM tree
> +#
> +# @dev-id: ID of attached PCI device
> +#
> +# @dev-qom-path: path to attached PCI device in the QOM tree

I'm still unsure what kind(s) of ID @vfu-id and @dev-id are.  See below.

> +#
> +# Since: 7.1
> +#
> +# Example:
> +#
> +# <- { "event": "VFU_CLIENT_HANGUP",
> +#      "data": { "vfu-id": "vfu1",
> +#                "vfu-qom-path": "/objects/vfu1",
> +#                "dev-id": "sas1",
> +#                "dev-qom-path": "/machine/peripheral/sas1" },
> +#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
> +#
> +##
> +{ 'event': 'VFU_CLIENT_HANGUP',
> +  'data': { 'vfu-id': 'str', 'vfu-qom-path': 'str',
> +            'dev-id': 'str', 'dev-qom-path': 'str' } }
> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
> index 3ca6aa2b45..3a4c6a9fa0 100644
> --- a/hw/remote/vfio-user-obj.c
> +++ b/hw/remote/vfio-user-obj.c
> @@ -27,6 +27,9 @@
>   *
>   * device - id of a device on the server, a required option. PCI devices
>   *          alone are supported presently.
> + *
> + * notes - x-vfio-user-server could block IO and monitor during the
> + *         initialization phase.
>   */
>  
>  #include "qemu/osdep.h"
> @@ -40,11 +43,14 @@
>  #include "hw/remote/machine.h"
>  #include "qapi/error.h"
>  #include "qapi/qapi-visit-sockets.h"
> +#include "qapi/qapi-events-misc.h"
>  #include "qemu/notify.h"
> +#include "qemu/thread.h"
>  #include "sysemu/sysemu.h"
>  #include "libvfio-user.h"
>  #include "hw/qdev-core.h"
>  #include "hw/pci/pci.h"
> +#include "qemu/timer.h"
>  
>  #define TYPE_VFU_OBJECT "x-vfio-user-server"
>  OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
> @@ -86,6 +92,8 @@ struct VfuObject {
>      PCIDevice *pci_dev;
>  
>      Error *unplug_blocker;
> +
> +    int vfu_poll_fd;
>  };
>  
>  static void vfu_object_init_ctx(VfuObject *o, Error **errp);
> @@ -164,6 +172,76 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
>      vfu_object_init_ctx(o, errp);
>  }
>  
> +static void vfu_object_ctx_run(void *opaque)
> +{
> +    VfuObject *o = opaque;
> +    const char *vfu_id;
> +    char *vfu_path, *pci_dev_path;
> +    int ret = -1;
> +
> +    while (ret != 0) {
> +        ret = vfu_run_ctx(o->vfu_ctx);
> +        if (ret < 0) {
> +            if (errno == EINTR) {
> +                continue;
> +            } else if (errno == ENOTCONN) {
> +                vfu_id = object_get_canonical_path_component(OBJECT(o));
> +                vfu_path = object_get_canonical_path(OBJECT(o));

Hmm.  @vfu_id is always the last component of @vfu_path.  Why do we need
to send both?

> +                g_assert(o->pci_dev);
> +                pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
> +                qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
> +                                                  o->device, pci_dev_path);

Where is o->device set?  I'm asking because I it must not be null here,
and that's not locally obvious.

> +                qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
> +                o->vfu_poll_fd = -1;
> +                object_unparent(OBJECT(o));
> +                g_free(vfu_path);
> +                g_free(pci_dev_path);
> +                break;
> +            } else {
> +                VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
> +                                 o->device, strerror(errno));
> +                break;
> +            }
> +        }
> +    }
> +}

[...]
Jag Raman May 4, 2022, 3:22 p.m. UTC | #2
> On May 4, 2022, at 7:42 AM, Markus Armbruster <armbru@redhat.com> wrote:
> 
> Jagannathan Raman <jag.raman@oracle.com> writes:
> 
>> Setup a handler to run vfio-user context. The context is driven by
>> messages to the file descriptor associated with it - get the fd for
>> the context and hook up the handler with it
>> 
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
>> ---
>> qapi/misc.json | 30 +++++++++++
>> hw/remote/vfio-user-obj.c | 102 +++++++++++++++++++++++++++++++++++++-
>> 2 files changed, 131 insertions(+), 1 deletion(-)
>> 
>> diff --git a/qapi/misc.json b/qapi/misc.json
>> index b83cc39029..fa49f2876a 100644
>> --- a/qapi/misc.json
>> +++ b/qapi/misc.json
>> @@ -553,3 +553,33 @@
>> ##
>> { 'event': 'RTC_CHANGE',
>> 'data': { 'offset': 'int', 'qom-path': 'str' } }
>> +
>> +##
>> +# @VFU_CLIENT_HANGUP:
>> +#
>> +# Emitted when the client of a TYPE_VFIO_USER_SERVER closes the
>> +# communication channel
>> +#
>> +# @vfu-id: ID of the TYPE_VFIO_USER_SERVER object
>> +#
>> +# @vfu-qom-path: path to the TYPE_VFIO_USER_SERVER object in the QOM tree
>> +#
>> +# @dev-id: ID of attached PCI device
>> +#
>> +# @dev-qom-path: path to attached PCI device in the QOM tree
> 
> I'm still unsure what kind(s) of ID @vfu-id and @dev-id are. See below.

I’m not sure what you mean by kind of ID - I thought of ID as a
unique string. I’ll try my best to explain.

dev-id and vfu-id are the “id" sub-options of “-device” and “-object” command-line
options respectively.

"dev-id” is the “id” member of “DeviceState” which QEMU sets using
qdev_set_id() when the device is added. 

The Object ID (vfu-id in this case) is a bit tricky. It’s also the “id”
command-line sub-option, but QEMU stores it as a child property
of the parent object.

> 
>> +#
>> +# Since: 7.1
>> +#
>> +# Example:
>> +#
>> +# <- { "event": "VFU_CLIENT_HANGUP",
>> +# "data": { "vfu-id": "vfu1",
>> +# "vfu-qom-path": "/objects/vfu1",
>> +# "dev-id": "sas1",
>> +# "dev-qom-path": "/machine/peripheral/sas1" },
>> +# "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
>> +#
>> +##
>> +{ 'event': 'VFU_CLIENT_HANGUP',
>> + 'data': { 'vfu-id': 'str', 'vfu-qom-path': 'str',
>> + 'dev-id': 'str', 'dev-qom-path': 'str' } }
>> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
>> index 3ca6aa2b45..3a4c6a9fa0 100644
>> --- a/hw/remote/vfio-user-obj.c
>> +++ b/hw/remote/vfio-user-obj.c
>> @@ -27,6 +27,9 @@
>> *
>> * device - id of a device on the server, a required option. PCI devices
>> * alone are supported presently.
>> + *
>> + * notes - x-vfio-user-server could block IO and monitor during the
>> + * initialization phase.
>> */
>> 
>> #include "qemu/osdep.h"
>> @@ -40,11 +43,14 @@
>> #include "hw/remote/machine.h"
>> #include "qapi/error.h"
>> #include "qapi/qapi-visit-sockets.h"
>> +#include "qapi/qapi-events-misc.h"
>> #include "qemu/notify.h"
>> +#include "qemu/thread.h"
>> #include "sysemu/sysemu.h"
>> #include "libvfio-user.h"
>> #include "hw/qdev-core.h"
>> #include "hw/pci/pci.h"
>> +#include "qemu/timer.h"
>> 
>> #define TYPE_VFU_OBJECT "x-vfio-user-server"
>> OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
>> @@ -86,6 +92,8 @@ struct VfuObject {
>> PCIDevice *pci_dev;
>> 
>> Error *unplug_blocker;
>> +
>> + int vfu_poll_fd;
>> };
>> 
>> static void vfu_object_init_ctx(VfuObject *o, Error **errp);
>> @@ -164,6 +172,76 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
>> vfu_object_init_ctx(o, errp);
>> }
>> 
>> +static void vfu_object_ctx_run(void *opaque)
>> +{
>> + VfuObject *o = opaque;
>> + const char *vfu_id;
>> + char *vfu_path, *pci_dev_path;
>> + int ret = -1;
>> +
>> + while (ret != 0) {
>> + ret = vfu_run_ctx(o->vfu_ctx);
>> + if (ret < 0) {
>> + if (errno == EINTR) {
>> + continue;
>> + } else if (errno == ENOTCONN) {
>> + vfu_id = object_get_canonical_path_component(OBJECT(o));
>> + vfu_path = object_get_canonical_path(OBJECT(o));
> 
> Hmm. @vfu_id is always the last component of @vfu_path. Why do we need
> to send both?

vfu_id is the ID that the user/Orchestrator passed as a command-line option
during addition/creation. So it made sense to report back with the same ID
that they used. But I’m OK with dropping this if that’s what you prefer.

> 
>> + g_assert(o->pci_dev);
>> + pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
>> + qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
>> + o->device, pci_dev_path);
> 
> Where is o->device set? I'm asking because I it must not be null here,
> and that's not locally obvious.

Yeah, it’s not obvious from this patch that o->device is guaranteed to be
non-NULL. It’s set by vfu_object_set_device(). Please see the following
patches in the series:
vfio-user: define vfio-user-server object
vfio-user: instantiate vfio-user context

There’s already an assert for o->pci_dev here, but we could add one
for o->device too?

Thank you!
—
Jag

> 
>> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
>> + o->vfu_poll_fd = -1;
>> + object_unparent(OBJECT(o));
>> + g_free(vfu_path);
>> + g_free(pci_dev_path);
>> + break;
>> + } else {
>> + VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
>> + o->device, strerror(errno));
>> + break;
>> + }
>> + }
>> + }
>> +}
> 
> [...]
Markus Armbruster May 5, 2022, 7:44 a.m. UTC | #3
Jag Raman <jag.raman@oracle.com> writes:

>> On May 4, 2022, at 7:42 AM, Markus Armbruster <armbru@redhat.com> wrote:
>> 
>> Jagannathan Raman <jag.raman@oracle.com> writes:
>> 
>>> Setup a handler to run vfio-user context. The context is driven by
>>> messages to the file descriptor associated with it - get the fd for
>>> the context and hook up the handler with it
>>> 
>>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>>> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
>>> ---
>>> qapi/misc.json | 30 +++++++++++
>>> hw/remote/vfio-user-obj.c | 102 +++++++++++++++++++++++++++++++++++++-
>>> 2 files changed, 131 insertions(+), 1 deletion(-)
>>> 
>>> diff --git a/qapi/misc.json b/qapi/misc.json
>>> index b83cc39029..fa49f2876a 100644
>>> --- a/qapi/misc.json
>>> +++ b/qapi/misc.json
>>> @@ -553,3 +553,33 @@
>>> ##
>>> { 'event': 'RTC_CHANGE',
>>> 'data': { 'offset': 'int', 'qom-path': 'str' } }
>>> +
>>> +##
>>> +# @VFU_CLIENT_HANGUP:
>>> +#
>>> +# Emitted when the client of a TYPE_VFIO_USER_SERVER closes the
>>> +# communication channel
>>> +#
>>> +# @vfu-id: ID of the TYPE_VFIO_USER_SERVER object
>>> +#
>>> +# @vfu-qom-path: path to the TYPE_VFIO_USER_SERVER object in the QOM tree
>>> +#
>>> +# @dev-id: ID of attached PCI device
>>> +#
>>> +# @dev-qom-path: path to attached PCI device in the QOM tree
>> 
>> I'm still unsure what kind(s) of ID @vfu-id and @dev-id are. See below.
>
> I’m not sure what you mean by kind of ID - I thought of ID as a
> unique string. I’ll try my best to explain.

Okay, let me try to clarify.

We have many, many ID namespaces, each associated with a certain kind of
object: device IDs for TYPE_DEVICE, object IDs for TYPE_OBJECT
implementing TYPE_USER_CREATABLE), block backend node names for
BlockDriverState, ...

Aside: I believe a single namespace would have been a wiser design
choice, but that ship sailed long ago.

To which of these namespaces do these two IDs belong, respectively?

> dev-id and vfu-id are the “id" sub-options of “-device” and “-object” command-line
> options respectively.

This answers my question.

> "dev-id” is the “id” member of “DeviceState” which QEMU sets using
> qdev_set_id() when the device is added. 
>
> The Object ID (vfu-id in this case) is a bit tricky. It’s also the “id”
> command-line sub-option, but QEMU stores it as a child property
> of the parent object.
>
>> 
>>> +#
>>> +# Since: 7.1
>>> +#
>>> +# Example:
>>> +#
>>> +# <- { "event": "VFU_CLIENT_HANGUP",
>>> +# "data": { "vfu-id": "vfu1",
>>> +# "vfu-qom-path": "/objects/vfu1",
>>> +# "dev-id": "sas1",
>>> +# "dev-qom-path": "/machine/peripheral/sas1" },
>>> +# "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
>>> +#
>>> +##
>>> +{ 'event': 'VFU_CLIENT_HANGUP',
>>> + 'data': { 'vfu-id': 'str', 'vfu-qom-path': 'str',
>>> + 'dev-id': 'str', 'dev-qom-path': 'str' } }
>>> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
>>> index 3ca6aa2b45..3a4c6a9fa0 100644
>>> --- a/hw/remote/vfio-user-obj.c
>>> +++ b/hw/remote/vfio-user-obj.c
>>> @@ -27,6 +27,9 @@
>>> *
>>> * device - id of a device on the server, a required option. PCI devices
>>> * alone are supported presently.
>>> + *
>>> + * notes - x-vfio-user-server could block IO and monitor during the
>>> + * initialization phase.
>>> */
>>> 
>>> #include "qemu/osdep.h"
>>> @@ -40,11 +43,14 @@
>>> #include "hw/remote/machine.h"
>>> #include "qapi/error.h"
>>> #include "qapi/qapi-visit-sockets.h"
>>> +#include "qapi/qapi-events-misc.h"
>>> #include "qemu/notify.h"
>>> +#include "qemu/thread.h"
>>> #include "sysemu/sysemu.h"
>>> #include "libvfio-user.h"
>>> #include "hw/qdev-core.h"
>>> #include "hw/pci/pci.h"
>>> +#include "qemu/timer.h"
>>> 
>>> #define TYPE_VFU_OBJECT "x-vfio-user-server"
>>> OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
>>> @@ -86,6 +92,8 @@ struct VfuObject {
>>> PCIDevice *pci_dev;
>>> 
>>> Error *unplug_blocker;
>>> +
>>> + int vfu_poll_fd;
>>> };
>>> 
>>> static void vfu_object_init_ctx(VfuObject *o, Error **errp);
>>> @@ -164,6 +172,76 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
>>> vfu_object_init_ctx(o, errp);
>>> }
>>> 
>>> +static void vfu_object_ctx_run(void *opaque)
>>> +{
>>> + VfuObject *o = opaque;
>>> + const char *vfu_id;
>>> + char *vfu_path, *pci_dev_path;
>>> + int ret = -1;
>>> +
>>> + while (ret != 0) {
>>> + ret = vfu_run_ctx(o->vfu_ctx);
>>> + if (ret < 0) {
>>> + if (errno == EINTR) {
>>> + continue;
>>> + } else if (errno == ENOTCONN) {
>>> + vfu_id = object_get_canonical_path_component(OBJECT(o));
>>> + vfu_path = object_get_canonical_path(OBJECT(o));
>> 
>> Hmm. @vfu_id is always the last component of @vfu_path. Why do we need
>> to send both?
>
> vfu_id is the ID that the user/Orchestrator passed as a command-line option
> during addition/creation. So it made sense to report back with the same ID
> that they used. But I’m OK with dropping this if that’s what you prefer.

Matter of taste, I guess.  I'd drop it simply to saves us the trouble of
documenting it.

If we decide to keep it, then I think we should document it's always the
last component of @vfu_path.

>>> + g_assert(o->pci_dev);
>>> + pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
>>> + qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
>>> + o->device, pci_dev_path);
>> 
>> Where is o->device set? I'm asking because I it must not be null here,
>> and that's not locally obvious.
>
> Yeah, it’s not obvious from this patch that o->device is guaranteed to be
> non-NULL. It’s set by vfu_object_set_device(). Please see the following
> patches in the series:
> vfio-user: define vfio-user-server object
> vfio-user: instantiate vfio-user context

vfu_object_set_device() is a QOM property setter.  It gets called if and
only if the property is set.  If it's never set, ->device remains null.
What ensures it's always set?

> There’s already an assert for o->pci_dev here, but we could add one
> for o->device too?

I'll make up my mind when I'm convinced o->device can't be null here.

> Thank you!

You're welcome!
Jag Raman May 5, 2022, 1:39 p.m. UTC | #4
> On May 5, 2022, at 3:44 AM, Markus Armbruster <armbru@redhat.com> wrote:
> 
> Jag Raman <jag.raman@oracle.com> writes:
> 
>>> On May 4, 2022, at 7:42 AM, Markus Armbruster <armbru@redhat.com> wrote:
>>> 
>>> Jagannathan Raman <jag.raman@oracle.com> writes:
>>> 
>>>> Setup a handler to run vfio-user context. The context is driven by
>>>> messages to the file descriptor associated with it - get the fd for
>>>> the context and hook up the handler with it
>>>> 
>>>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>>>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>>>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>>>> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
>>>> ---
>>>> qapi/misc.json | 30 +++++++++++
>>>> hw/remote/vfio-user-obj.c | 102 +++++++++++++++++++++++++++++++++++++-
>>>> 2 files changed, 131 insertions(+), 1 deletion(-)
>>>> 
>>>> diff --git a/qapi/misc.json b/qapi/misc.json
>>>> index b83cc39029..fa49f2876a 100644
>>>> --- a/qapi/misc.json
>>>> +++ b/qapi/misc.json
>>>> @@ -553,3 +553,33 @@
>>>> ##
>>>> { 'event': 'RTC_CHANGE',
>>>> 'data': { 'offset': 'int', 'qom-path': 'str' } }
>>>> +
>>>> +##
>>>> +# @VFU_CLIENT_HANGUP:
>>>> +#
>>>> +# Emitted when the client of a TYPE_VFIO_USER_SERVER closes the
>>>> +# communication channel
>>>> +#
>>>> +# @vfu-id: ID of the TYPE_VFIO_USER_SERVER object
>>>> +#
>>>> +# @vfu-qom-path: path to the TYPE_VFIO_USER_SERVER object in the QOM tree
>>>> +#
>>>> +# @dev-id: ID of attached PCI device
>>>> +#
>>>> +# @dev-qom-path: path to attached PCI device in the QOM tree
>>> 
>>> I'm still unsure what kind(s) of ID @vfu-id and @dev-id are. See below.
>> 
>> I’m not sure what you mean by kind of ID - I thought of ID as a
>> unique string. I’ll try my best to explain.
> 
> Okay, let me try to clarify.
> 
> We have many, many ID namespaces, each associated with a certain kind of
> object: device IDs for TYPE_DEVICE, object IDs for TYPE_OBJECT
> implementing TYPE_USER_CREATABLE), block backend node names for
> BlockDriverState, ...
> 
> Aside: I believe a single namespace would have been a wiser design
> choice, but that ship sailed long ago.
> 
> To which of these namespaces do these two IDs belong, respectively?
> 
>> dev-id and vfu-id are the “id" sub-options of “-device” and “-object” command-line
>> options respectively.
> 
> This answers my question.
> 
>> "dev-id” is the “id” member of “DeviceState” which QEMU sets using
>> qdev_set_id() when the device is added. 
>> 
>> The Object ID (vfu-id in this case) is a bit tricky. It’s also the “id”
>> command-line sub-option, but QEMU stores it as a child property
>> of the parent object.
>> 
>>> 
>>>> +#
>>>> +# Since: 7.1
>>>> +#
>>>> +# Example:
>>>> +#
>>>> +# <- { "event": "VFU_CLIENT_HANGUP",
>>>> +# "data": { "vfu-id": "vfu1",
>>>> +# "vfu-qom-path": "/objects/vfu1",
>>>> +# "dev-id": "sas1",
>>>> +# "dev-qom-path": "/machine/peripheral/sas1" },
>>>> +# "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
>>>> +#
>>>> +##
>>>> +{ 'event': 'VFU_CLIENT_HANGUP',
>>>> + 'data': { 'vfu-id': 'str', 'vfu-qom-path': 'str',
>>>> + 'dev-id': 'str', 'dev-qom-path': 'str' } }
>>>> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
>>>> index 3ca6aa2b45..3a4c6a9fa0 100644
>>>> --- a/hw/remote/vfio-user-obj.c
>>>> +++ b/hw/remote/vfio-user-obj.c
>>>> @@ -27,6 +27,9 @@
>>>> *
>>>> * device - id of a device on the server, a required option. PCI devices
>>>> * alone are supported presently.
>>>> + *
>>>> + * notes - x-vfio-user-server could block IO and monitor during the
>>>> + * initialization phase.
>>>> */
>>>> 
>>>> #include "qemu/osdep.h"
>>>> @@ -40,11 +43,14 @@
>>>> #include "hw/remote/machine.h"
>>>> #include "qapi/error.h"
>>>> #include "qapi/qapi-visit-sockets.h"
>>>> +#include "qapi/qapi-events-misc.h"
>>>> #include "qemu/notify.h"
>>>> +#include "qemu/thread.h"
>>>> #include "sysemu/sysemu.h"
>>>> #include "libvfio-user.h"
>>>> #include "hw/qdev-core.h"
>>>> #include "hw/pci/pci.h"
>>>> +#include "qemu/timer.h"
>>>> 
>>>> #define TYPE_VFU_OBJECT "x-vfio-user-server"
>>>> OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
>>>> @@ -86,6 +92,8 @@ struct VfuObject {
>>>> PCIDevice *pci_dev;
>>>> 
>>>> Error *unplug_blocker;
>>>> +
>>>> + int vfu_poll_fd;
>>>> };
>>>> 
>>>> static void vfu_object_init_ctx(VfuObject *o, Error **errp);
>>>> @@ -164,6 +172,76 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
>>>> vfu_object_init_ctx(o, errp);
>>>> }
>>>> 
>>>> +static void vfu_object_ctx_run(void *opaque)
>>>> +{
>>>> + VfuObject *o = opaque;
>>>> + const char *vfu_id;
>>>> + char *vfu_path, *pci_dev_path;
>>>> + int ret = -1;
>>>> +
>>>> + while (ret != 0) {
>>>> + ret = vfu_run_ctx(o->vfu_ctx);
>>>> + if (ret < 0) {
>>>> + if (errno == EINTR) {
>>>> + continue;
>>>> + } else if (errno == ENOTCONN) {
>>>> + vfu_id = object_get_canonical_path_component(OBJECT(o));
>>>> + vfu_path = object_get_canonical_path(OBJECT(o));
>>> 
>>> Hmm. @vfu_id is always the last component of @vfu_path. Why do we need
>>> to send both?
>> 
>> vfu_id is the ID that the user/Orchestrator passed as a command-line option
>> during addition/creation. So it made sense to report back with the same ID
>> that they used. But I’m OK with dropping this if that’s what you prefer.
> 
> Matter of taste, I guess.  I'd drop it simply to saves us the trouble of
> documenting it.
> 
> If we decide to keep it, then I think we should document it's always the
> last component of @vfu_path.
> 
>>>> + g_assert(o->pci_dev);
>>>> + pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
>>>> + qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
>>>> + o->device, pci_dev_path);
>>> 
>>> Where is o->device set? I'm asking because I it must not be null here,
>>> and that's not locally obvious.
>> 
>> Yeah, it’s not obvious from this patch that o->device is guaranteed to be
>> non-NULL. It’s set by vfu_object_set_device(). Please see the following
>> patches in the series:
>> vfio-user: define vfio-user-server object
>> vfio-user: instantiate vfio-user context
> 
> vfu_object_set_device() is a QOM property setter.  It gets called if and
> only if the property is set.  If it's never set, ->device remains null.
> What ensures it's always set?

That’s a good question - it’s not obvious from this patch.

The code would not reach here if o->device is not set. If o->device is NULL,
vfu_object_init_ctx() would bail out early without setting up
vfu_object_attach_ctx() and vfu_object_ctx_run() (this function) handlers.

Also, device is a required parameter. QEMU would not initialize this object
without it. Please see the definition of VfioUserServerProperties in the
following patch - noting that optional parameters are prefixed with a ‘*’:
[PATCH v9 07/17] vfio-user: define vfio-user-server object.

May be we should add a comment here to explain why o->device
wouldn’t be NULL?

Thank you!

> 
>> There’s already an assert for o->pci_dev here, but we could add one
>> for o->device too?
> 
> I'll make up my mind when I'm convinced o->device can't be null here.
> 
>> Thank you!
> 
> You're welcome!
>
Markus Armbruster May 5, 2022, 2:42 p.m. UTC | #5
Jag Raman <jag.raman@oracle.com> writes:

>> On May 5, 2022, at 3:44 AM, Markus Armbruster <armbru@redhat.com> wrote:
>> 
>> Jag Raman <jag.raman@oracle.com> writes:
>> 
>>>> On May 4, 2022, at 7:42 AM, Markus Armbruster <armbru@redhat.com> wrote:
>>>> 
>>>> Jagannathan Raman <jag.raman@oracle.com> writes:
>>>> 
>>>>> Setup a handler to run vfio-user context. The context is driven by
>>>>> messages to the file descriptor associated with it - get the fd for
>>>>> the context and hook up the handler with it
>>>>> 
>>>>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>>>>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>>>>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>>>>> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>

[...]

>>>>> @@ -164,6 +172,76 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
>>>>> vfu_object_init_ctx(o, errp);
>>>>> }
>>>>> 
>>>>> +static void vfu_object_ctx_run(void *opaque)
>>>>> +{
>>>>> + VfuObject *o = opaque;
>>>>> + const char *vfu_id;
>>>>> + char *vfu_path, *pci_dev_path;
>>>>> + int ret = -1;
>>>>> +
>>>>> + while (ret != 0) {
>>>>> + ret = vfu_run_ctx(o->vfu_ctx);
>>>>> + if (ret < 0) {
>>>>> + if (errno == EINTR) {
>>>>> + continue;
>>>>> + } else if (errno == ENOTCONN) {
>>>>> + vfu_id = object_get_canonical_path_component(OBJECT(o));
>>>>> + vfu_path = object_get_canonical_path(OBJECT(o));
>>>> 
>>>> Hmm. @vfu_id is always the last component of @vfu_path. Why do we need
>>>> to send both?
>>> 
>>> vfu_id is the ID that the user/Orchestrator passed as a command-line option
>>> during addition/creation. So it made sense to report back with the same ID
>>> that they used. But I’m OK with dropping this if that’s what you prefer.
>> 
>> Matter of taste, I guess.  I'd drop it simply to saves us the trouble of
>> documenting it.
>> 
>> If we decide to keep it, then I think we should document it's always the
>> last component of @vfu_path.
>> 
>>>>> + g_assert(o->pci_dev);
>>>>> + pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
>>>>> + qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
>>>>> + o->device, pci_dev_path);
>>>> 
>>>> Where is o->device set? I'm asking because I it must not be null here,
>>>> and that's not locally obvious.
>>> 
>>> Yeah, it’s not obvious from this patch that o->device is guaranteed to be
>>> non-NULL. It’s set by vfu_object_set_device(). Please see the following
>>> patches in the series:
>>> vfio-user: define vfio-user-server object
>>> vfio-user: instantiate vfio-user context
>> 
>> vfu_object_set_device() is a QOM property setter.  It gets called if and
>> only if the property is set.  If it's never set, ->device remains null.
>> What ensures it's always set?
>
> That’s a good question - it’s not obvious from this patch.
>
> The code would not reach here if o->device is not set. If o->device is NULL,
> vfu_object_init_ctx() would bail out early without setting up
> vfu_object_attach_ctx() and vfu_object_ctx_run() (this function)
> handlers.

Yes:

    static void vfu_object_init_ctx(VfuObject *o, Error **errp)
    {
        ERRP_GUARD();
        DeviceState *dev = NULL;
        vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
        int ret;

        if (o->vfu_ctx || !o->socket || !o->device ||
                !phase_check(PHASE_MACHINE_READY)) {
            return;
        }

Bails out without setting an error.  Sure that's appropriate?

> Also, device is a required parameter. QEMU would not initialize this object
> without it. Please see the definition of VfioUserServerProperties in the
> following patch - noting that optional parameters are prefixed with a ‘*’:
> [PATCH v9 07/17] vfio-user: define vfio-user-server object.
>
> May be we should add a comment here to explain why o->device
> wouldn’t be NULL?

Perhaps assertion with a comment explaining why it holds.

> Thank you!

You're welcome!
Jag Raman May 5, 2022, 3:36 p.m. UTC | #6
> On May 5, 2022, at 10:42 AM, Markus Armbruster <armbru@redhat.com> wrote:
> 
> Jag Raman <jag.raman@oracle.com> writes:
> 
>>> On May 5, 2022, at 3:44 AM, Markus Armbruster <armbru@redhat.com> wrote:
>>> 
>>> Jag Raman <jag.raman@oracle.com> writes:
>>> 
>>>>> On May 4, 2022, at 7:42 AM, Markus Armbruster <armbru@redhat.com> wrote:
>>>>> 
>>>>> Jagannathan Raman <jag.raman@oracle.com> writes:
>>>>> 
>>>>>> Setup a handler to run vfio-user context. The context is driven by
>>>>>> messages to the file descriptor associated with it - get the fd for
>>>>>> the context and hook up the handler with it
>>>>>> 
>>>>>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>>>>>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>>>>>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>>>>>> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> 
> [...]
> 
>>>>>> @@ -164,6 +172,76 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
>>>>>> vfu_object_init_ctx(o, errp);
>>>>>> }
>>>>>> 
>>>>>> +static void vfu_object_ctx_run(void *opaque)
>>>>>> +{
>>>>>> + VfuObject *o = opaque;
>>>>>> + const char *vfu_id;
>>>>>> + char *vfu_path, *pci_dev_path;
>>>>>> + int ret = -1;
>>>>>> +
>>>>>> + while (ret != 0) {
>>>>>> + ret = vfu_run_ctx(o->vfu_ctx);
>>>>>> + if (ret < 0) {
>>>>>> + if (errno == EINTR) {
>>>>>> + continue;
>>>>>> + } else if (errno == ENOTCONN) {
>>>>>> + vfu_id = object_get_canonical_path_component(OBJECT(o));
>>>>>> + vfu_path = object_get_canonical_path(OBJECT(o));
>>>>> 
>>>>> Hmm. @vfu_id is always the last component of @vfu_path. Why do we need
>>>>> to send both?
>>>> 
>>>> vfu_id is the ID that the user/Orchestrator passed as a command-line option
>>>> during addition/creation. So it made sense to report back with the same ID
>>>> that they used. But I’m OK with dropping this if that’s what you prefer.
>>> 
>>> Matter of taste, I guess.  I'd drop it simply to saves us the trouble of
>>> documenting it.
>>> 
>>> If we decide to keep it, then I think we should document it's always the
>>> last component of @vfu_path.
>>> 
>>>>>> + g_assert(o->pci_dev);
>>>>>> + pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
>>>>>> + qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
>>>>>> + o->device, pci_dev_path);
>>>>> 
>>>>> Where is o->device set? I'm asking because I it must not be null here,
>>>>> and that's not locally obvious.
>>>> 
>>>> Yeah, it’s not obvious from this patch that o->device is guaranteed to be
>>>> non-NULL. It’s set by vfu_object_set_device(). Please see the following
>>>> patches in the series:
>>>> vfio-user: define vfio-user-server object
>>>> vfio-user: instantiate vfio-user context
>>> 
>>> vfu_object_set_device() is a QOM property setter.  It gets called if and
>>> only if the property is set.  If it's never set, ->device remains null.
>>> What ensures it's always set?
>> 
>> That’s a good question - it’s not obvious from this patch.
>> 
>> The code would not reach here if o->device is not set. If o->device is NULL,
>> vfu_object_init_ctx() would bail out early without setting up
>> vfu_object_attach_ctx() and vfu_object_ctx_run() (this function)
>> handlers.
> 
> Yes:
> 
>    static void vfu_object_init_ctx(VfuObject *o, Error **errp)
>    {
>        ERRP_GUARD();
>        DeviceState *dev = NULL;
>        vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
>        int ret;
> 
>        if (o->vfu_ctx || !o->socket || !o->device ||
>                !phase_check(PHASE_MACHINE_READY)) {
>            return;
>        }
> 
> Bails out without setting an error.  Sure that's appropriate?

It’s not an error per se - vfu_object_init_ctx() doesn’t proceed
further if device/socket is not set or if the machine is not ready.

Both socket and device are required properties, so they would
eventually be set by vfu_object_set_socket() /
vfu_object_set_device() - and these setters eventually kick
vfu_object_init_ctx().

> 
>> Also, device is a required parameter. QEMU would not initialize this object
>> without it. Please see the definition of VfioUserServerProperties in the
>> following patch - noting that optional parameters are prefixed with a ‘*’:
>> [PATCH v9 07/17] vfio-user: define vfio-user-server object.
>> 
>> May be we should add a comment here to explain why o->device
>> wouldn’t be NULL?
> 
> Perhaps assertion with a comment explaining why it holds.

OK, will do.

--
Jag

> 
>> Thank you!
> 
> You're welcome!
>
Markus Armbruster May 6, 2022, 5:44 a.m. UTC | #7
Jag Raman <jag.raman@oracle.com> writes:

>> On May 5, 2022, at 10:42 AM, Markus Armbruster <armbru@redhat.com> wrote:
>> 
>> Jag Raman <jag.raman@oracle.com> writes:
>> 
>>>> On May 5, 2022, at 3:44 AM, Markus Armbruster <armbru@redhat.com> wrote:
>>>> 
>>>> Jag Raman <jag.raman@oracle.com> writes:
>>>> 
>>>>>> On May 4, 2022, at 7:42 AM, Markus Armbruster <armbru@redhat.com> wrote:
>>>>>> 
>>>>>> Jagannathan Raman <jag.raman@oracle.com> writes:
>>>>>> 
>>>>>>> Setup a handler to run vfio-user context. The context is driven by
>>>>>>> messages to the file descriptor associated with it - get the fd for
>>>>>>> the context and hook up the handler with it
>>>>>>> 
>>>>>>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>>>>>>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>>>>>>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>>>>>>> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
>> 
>> [...]
>> 
>>>>>>> @@ -164,6 +172,76 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
>>>>>>> vfu_object_init_ctx(o, errp);
>>>>>>> }
>>>>>>> 
>>>>>>> +static void vfu_object_ctx_run(void *opaque)
>>>>>>> +{
>>>>>>> + VfuObject *o = opaque;
>>>>>>> + const char *vfu_id;
>>>>>>> + char *vfu_path, *pci_dev_path;
>>>>>>> + int ret = -1;
>>>>>>> +
>>>>>>> + while (ret != 0) {
>>>>>>> + ret = vfu_run_ctx(o->vfu_ctx);
>>>>>>> + if (ret < 0) {
>>>>>>> + if (errno == EINTR) {
>>>>>>> + continue;
>>>>>>> + } else if (errno == ENOTCONN) {
>>>>>>> + vfu_id = object_get_canonical_path_component(OBJECT(o));
>>>>>>> + vfu_path = object_get_canonical_path(OBJECT(o));
>>>>>> 
>>>>>> Hmm. @vfu_id is always the last component of @vfu_path. Why do we need
>>>>>> to send both?
>>>>> 
>>>>> vfu_id is the ID that the user/Orchestrator passed as a command-line option
>>>>> during addition/creation. So it made sense to report back with the same ID
>>>>> that they used. But I’m OK with dropping this if that’s what you prefer.
>>>> 
>>>> Matter of taste, I guess.  I'd drop it simply to saves us the trouble of
>>>> documenting it.
>>>> 
>>>> If we decide to keep it, then I think we should document it's always the
>>>> last component of @vfu_path.
>>>> 
>>>>>>> + g_assert(o->pci_dev);
>>>>>>> + pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
>>>>>>> + qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
>>>>>>> + o->device, pci_dev_path);
>>>>>> 
>>>>>> Where is o->device set? I'm asking because I it must not be null here,
>>>>>> and that's not locally obvious.
>>>>> 
>>>>> Yeah, it’s not obvious from this patch that o->device is guaranteed to be
>>>>> non-NULL. It’s set by vfu_object_set_device(). Please see the following
>>>>> patches in the series:
>>>>> vfio-user: define vfio-user-server object
>>>>> vfio-user: instantiate vfio-user context
>>>> 
>>>> vfu_object_set_device() is a QOM property setter.  It gets called if and
>>>> only if the property is set.  If it's never set, ->device remains null.
>>>> What ensures it's always set?
>>> 
>>> That’s a good question - it’s not obvious from this patch.
>>> 
>>> The code would not reach here if o->device is not set. If o->device is NULL,
>>> vfu_object_init_ctx() would bail out early without setting up
>>> vfu_object_attach_ctx() and vfu_object_ctx_run() (this function)
>>> handlers.
>> 
>> Yes:
>> 
>>    static void vfu_object_init_ctx(VfuObject *o, Error **errp)
>>    {
>>        ERRP_GUARD();
>>        DeviceState *dev = NULL;
>>        vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
>>        int ret;
>> 
>>        if (o->vfu_ctx || !o->socket || !o->device ||
>>                !phase_check(PHASE_MACHINE_READY)) {
>>            return;
>>        }
>> 
>> Bails out without setting an error.  Sure that's appropriate?
>
> It’s not an error per se - vfu_object_init_ctx() doesn’t proceed
> further if device/socket is not set or if the machine is not ready.
>
> Both socket and device are required properties, so they would
> eventually be set by vfu_object_set_socket() /
> vfu_object_set_device() - and these setters eventually kick
> vfu_object_init_ctx().

Early returns from a function that sets error on failure triggers bug
spider sense, because forgetting to set an error on failure is a fairly
common mistake.

The root of the problem is of course that the function's contract is not
obvious.  The name vfu_object_init_ctx() suggests it initializes
something.  But it clearly doesn't unless certain conditions are met.
The reader is left to wonder whether that's a bug, or whether that's
what it is supposed to do.

A function contract spelling out when the function is supposed to do
what (including "nothing") would help.

[...]
Jag Raman May 7, 2022, 6:48 p.m. UTC | #8
> On May 6, 2022, at 1:44 AM, Markus Armbruster <armbru@redhat.com> wrote:
> 
> Jag Raman <jag.raman@oracle.com> writes:
> 
>>> On May 5, 2022, at 10:42 AM, Markus Armbruster <armbru@redhat.com> wrote:
>>> 
>>> Jag Raman <jag.raman@oracle.com> writes:
>>> 
>>>>> On May 5, 2022, at 3:44 AM, Markus Armbruster <armbru@redhat.com> wrote:
>>>>> 
>>>>> Jag Raman <jag.raman@oracle.com> writes:
>>>>> 
>>>>>>> On May 4, 2022, at 7:42 AM, Markus Armbruster <armbru@redhat.com> wrote:
>>>>>>> 
>>>>>>> Jagannathan Raman <jag.raman@oracle.com> writes:
>>>>>>> 
>>>>>>>> Setup a handler to run vfio-user context. The context is driven by
>>>>>>>> messages to the file descriptor associated with it - get the fd for
>>>>>>>> the context and hook up the handler with it
>>>>>>>> 
>>>>>>>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>>>>>>>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>>>>>>>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>>>>>>>> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
>>> 
>>> [...]
>>> 
>>>>>>>> @@ -164,6 +172,76 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
>>>>>>>> vfu_object_init_ctx(o, errp);
>>>>>>>> }
>>>>>>>> 
>>>>>>>> +static void vfu_object_ctx_run(void *opaque)
>>>>>>>> +{
>>>>>>>> + VfuObject *o = opaque;
>>>>>>>> + const char *vfu_id;
>>>>>>>> + char *vfu_path, *pci_dev_path;
>>>>>>>> + int ret = -1;
>>>>>>>> +
>>>>>>>> + while (ret != 0) {
>>>>>>>> + ret = vfu_run_ctx(o->vfu_ctx);
>>>>>>>> + if (ret < 0) {
>>>>>>>> + if (errno == EINTR) {
>>>>>>>> + continue;
>>>>>>>> + } else if (errno == ENOTCONN) {
>>>>>>>> + vfu_id = object_get_canonical_path_component(OBJECT(o));
>>>>>>>> + vfu_path = object_get_canonical_path(OBJECT(o));
>>>>>>> 
>>>>>>> Hmm. @vfu_id is always the last component of @vfu_path. Why do we need
>>>>>>> to send both?
>>>>>> 
>>>>>> vfu_id is the ID that the user/Orchestrator passed as a command-line option
>>>>>> during addition/creation. So it made sense to report back with the same ID
>>>>>> that they used. But I’m OK with dropping this if that’s what you prefer.
>>>>> 
>>>>> Matter of taste, I guess.  I'd drop it simply to saves us the trouble of
>>>>> documenting it.
>>>>> 
>>>>> If we decide to keep it, then I think we should document it's always the
>>>>> last component of @vfu_path.
>>>>> 
>>>>>>>> + g_assert(o->pci_dev);
>>>>>>>> + pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
>>>>>>>> + qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
>>>>>>>> + o->device, pci_dev_path);
>>>>>>> 
>>>>>>> Where is o->device set? I'm asking because I it must not be null here,
>>>>>>> and that's not locally obvious.
>>>>>> 
>>>>>> Yeah, it’s not obvious from this patch that o->device is guaranteed to be
>>>>>> non-NULL. It’s set by vfu_object_set_device(). Please see the following
>>>>>> patches in the series:
>>>>>> vfio-user: define vfio-user-server object
>>>>>> vfio-user: instantiate vfio-user context
>>>>> 
>>>>> vfu_object_set_device() is a QOM property setter.  It gets called if and
>>>>> only if the property is set.  If it's never set, ->device remains null.
>>>>> What ensures it's always set?
>>>> 
>>>> That’s a good question - it’s not obvious from this patch.
>>>> 
>>>> The code would not reach here if o->device is not set. If o->device is NULL,
>>>> vfu_object_init_ctx() would bail out early without setting up
>>>> vfu_object_attach_ctx() and vfu_object_ctx_run() (this function)
>>>> handlers.
>>> 
>>> Yes:
>>> 
>>>   static void vfu_object_init_ctx(VfuObject *o, Error **errp)
>>>   {
>>>       ERRP_GUARD();
>>>       DeviceState *dev = NULL;
>>>       vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
>>>       int ret;
>>> 
>>>       if (o->vfu_ctx || !o->socket || !o->device ||
>>>               !phase_check(PHASE_MACHINE_READY)) {
>>>           return;
>>>       }
>>> 
>>> Bails out without setting an error.  Sure that's appropriate?
>> 
>> It’s not an error per se - vfu_object_init_ctx() doesn’t proceed
>> further if device/socket is not set or if the machine is not ready.
>> 
>> Both socket and device are required properties, so they would
>> eventually be set by vfu_object_set_socket() /
>> vfu_object_set_device() - and these setters eventually kick
>> vfu_object_init_ctx().
> 
> Early returns from a function that sets error on failure triggers bug
> spider sense, because forgetting to set an error on failure is a fairly
> common mistake.
> 
> The root of the problem is of course that the function's contract is not
> obvious.  The name vfu_object_init_ctx() suggests it initializes
> something.  But it clearly doesn't unless certain conditions are met.
> The reader is left to wonder whether that's a bug, or whether that's
> what it is supposed to do.
> 
> A function contract spelling out when the function is supposed to do
> what (including "nothing") would help.

Sure, will add a comment explaining what this function is
supposed to do.

--
Jag

> 
> [...]
>
diff mbox series

Patch

diff --git a/qapi/misc.json b/qapi/misc.json
index b83cc39029..fa49f2876a 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -553,3 +553,33 @@ 
 ##
 { 'event': 'RTC_CHANGE',
   'data': { 'offset': 'int', 'qom-path': 'str' } }
+
+##
+# @VFU_CLIENT_HANGUP:
+#
+# Emitted when the client of a TYPE_VFIO_USER_SERVER closes the
+# communication channel
+#
+# @vfu-id: ID of the TYPE_VFIO_USER_SERVER object
+#
+# @vfu-qom-path: path to the TYPE_VFIO_USER_SERVER object in the QOM tree
+#
+# @dev-id: ID of attached PCI device
+#
+# @dev-qom-path: path to attached PCI device in the QOM tree
+#
+# Since: 7.1
+#
+# Example:
+#
+# <- { "event": "VFU_CLIENT_HANGUP",
+#      "data": { "vfu-id": "vfu1",
+#                "vfu-qom-path": "/objects/vfu1",
+#                "dev-id": "sas1",
+#                "dev-qom-path": "/machine/peripheral/sas1" },
+#      "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+#
+##
+{ 'event': 'VFU_CLIENT_HANGUP',
+  'data': { 'vfu-id': 'str', 'vfu-qom-path': 'str',
+            'dev-id': 'str', 'dev-qom-path': 'str' } }
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index 3ca6aa2b45..3a4c6a9fa0 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -27,6 +27,9 @@ 
  *
  * device - id of a device on the server, a required option. PCI devices
  *          alone are supported presently.
+ *
+ * notes - x-vfio-user-server could block IO and monitor during the
+ *         initialization phase.
  */
 
 #include "qemu/osdep.h"
@@ -40,11 +43,14 @@ 
 #include "hw/remote/machine.h"
 #include "qapi/error.h"
 #include "qapi/qapi-visit-sockets.h"
+#include "qapi/qapi-events-misc.h"
 #include "qemu/notify.h"
+#include "qemu/thread.h"
 #include "sysemu/sysemu.h"
 #include "libvfio-user.h"
 #include "hw/qdev-core.h"
 #include "hw/pci/pci.h"
+#include "qemu/timer.h"
 
 #define TYPE_VFU_OBJECT "x-vfio-user-server"
 OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
@@ -86,6 +92,8 @@  struct VfuObject {
     PCIDevice *pci_dev;
 
     Error *unplug_blocker;
+
+    int vfu_poll_fd;
 };
 
 static void vfu_object_init_ctx(VfuObject *o, Error **errp);
@@ -164,6 +172,76 @@  static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
     vfu_object_init_ctx(o, errp);
 }
 
+static void vfu_object_ctx_run(void *opaque)
+{
+    VfuObject *o = opaque;
+    const char *vfu_id;
+    char *vfu_path, *pci_dev_path;
+    int ret = -1;
+
+    while (ret != 0) {
+        ret = vfu_run_ctx(o->vfu_ctx);
+        if (ret < 0) {
+            if (errno == EINTR) {
+                continue;
+            } else if (errno == ENOTCONN) {
+                vfu_id = object_get_canonical_path_component(OBJECT(o));
+                vfu_path = object_get_canonical_path(OBJECT(o));
+                g_assert(o->pci_dev);
+                pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
+                qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
+                                                  o->device, pci_dev_path);
+                qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
+                o->vfu_poll_fd = -1;
+                object_unparent(OBJECT(o));
+                g_free(vfu_path);
+                g_free(pci_dev_path);
+                break;
+            } else {
+                VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
+                                 o->device, strerror(errno));
+                break;
+            }
+        }
+    }
+}
+
+static void vfu_object_attach_ctx(void *opaque)
+{
+    VfuObject *o = opaque;
+    GPollFD pfds[1];
+    int ret;
+
+    qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
+
+    pfds[0].fd = o->vfu_poll_fd;
+    pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+
+retry_attach:
+    ret = vfu_attach_ctx(o->vfu_ctx);
+    if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
+        /**
+         * vfu_object_attach_ctx can block QEMU's main loop
+         * during attach - the monitor and other IO
+         * could be unresponsive during this time.
+         */
+        (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
+        goto retry_attach;
+    } else if (ret < 0) {
+        VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s",
+                         o->device, strerror(errno));
+        return;
+    }
+
+    o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
+    if (o->vfu_poll_fd < 0) {
+        VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device);
+        return;
+    }
+
+    qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
+}
+
 /*
  * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
  * properties. It also depends on devices instantiated in QEMU. These
@@ -202,7 +280,8 @@  static void vfu_object_init_ctx(VfuObject *o, Error **errp)
         return;
     }
 
-    o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, 0,
+    o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path,
+                                LIBVFIO_USER_FLAG_ATTACH_NB,
                                 o, VFU_DEV_TYPE_PCI);
     if (o->vfu_ctx == NULL) {
         error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
@@ -241,6 +320,21 @@  static void vfu_object_init_ctx(VfuObject *o, Error **errp)
                TYPE_VFU_OBJECT, o->device);
     qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
 
+    ret = vfu_realize_ctx(o->vfu_ctx);
+    if (ret < 0) {
+        error_setg(errp, "vfu: Failed to realize device %s- %s",
+                   o->device, strerror(errno));
+        goto fail;
+    }
+
+    o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
+    if (o->vfu_poll_fd < 0) {
+        error_setg(errp, "vfu: Failed to get poll fd %s", o->device);
+        goto fail;
+    }
+
+    qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
+
     return;
 
 fail:
@@ -275,6 +369,7 @@  static void vfu_object_init(Object *obj)
         qemu_add_machine_init_done_notifier(&o->machine_done);
     }
 
+    o->vfu_poll_fd = -1;
 }
 
 static void vfu_object_finalize(Object *obj)
@@ -288,6 +383,11 @@  static void vfu_object_finalize(Object *obj)
 
     o->socket = NULL;
 
+    if (o->vfu_poll_fd != -1) {
+        qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
+        o->vfu_poll_fd = -1;
+    }
+
     if (o->vfu_ctx) {
         vfu_destroy_ctx(o->vfu_ctx);
         o->vfu_ctx = NULL;