diff mbox series

net: tap: check if the file descriptor is valid before using it

Message ID 20200624190009.300069-1-lvivier@redhat.com (mailing list archive)
State New, archived
Headers show
Series net: tap: check if the file descriptor is valid before using it | expand

Commit Message

Laurent Vivier June 24, 2020, 7 p.m. UTC
qemu_set_nonblock() checks that the file descriptor can be used and, if
not, crashes QEMU. An assert() is used for that. The use of assert() is
used to detect programming error and the coredump will allow to debug
the problem.

But in the case of the tap device, this assert() can be triggered by
a misconfiguration by the user. At startup, it's not a real problem, but it
can also happen during the hot-plug of a new device, and here it's a
problem because we can crash a perfectly healthy system.

For instance:
 # ip link add link virbr0 name macvtap0 type macvtap mode bridge
 # ip link set macvtap0 up
 # TAP=/dev/tap$(ip -o link show macvtap0 | cut -d: -f1)
 # qemu-system-x86_64 -machine q35 -device pcie-root-port,id=pcie-root-port-0 -monitor stdio 9<> $TAP
 (qemu) netdev_add type=tap,id=hostnet0,vhost=on,fd=9
 (qemu) device_add driver=virtio-net-pci,netdev=hostnet0,id=net0,bus=pcie-root-port-0
 (qemu) device_del net0
 (qemu) netdev_del hostnet0
 (qemu) netdev_add type=tap,id=hostnet1,vhost=on,fd=9
 qemu-system-x86_64: .../util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed.
 Aborted (core dumped)

To avoid that, check the file descriptor is valid before passing it to qemu_set_non_block() for
"fd=" and "fds=" parameters.

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 include/qemu/sockets.h |  1 +
 net/tap.c              | 13 +++++++++++++
 util/oslib-posix.c     |  5 +++++
 util/oslib-win32.c     |  6 ++++++
 4 files changed, 25 insertions(+)

Comments

Philippe Mathieu-Daudé June 25, 2020, 6:19 a.m. UTC | #1
On 6/24/20 9:00 PM, Laurent Vivier wrote:
> qemu_set_nonblock() checks that the file descriptor can be used and, if
> not, crashes QEMU. An assert() is used for that. The use of assert() is
> used to detect programming error and the coredump will allow to debug
> the problem.
> 
> But in the case of the tap device, this assert() can be triggered by
> a misconfiguration by the user. At startup, it's not a real problem, but it
> can also happen during the hot-plug of a new device, and here it's a
> problem because we can crash a perfectly healthy system.
> 
> For instance:
>  # ip link add link virbr0 name macvtap0 type macvtap mode bridge
>  # ip link set macvtap0 up
>  # TAP=/dev/tap$(ip -o link show macvtap0 | cut -d: -f1)
>  # qemu-system-x86_64 -machine q35 -device pcie-root-port,id=pcie-root-port-0 -monitor stdio 9<> $TAP
>  (qemu) netdev_add type=tap,id=hostnet0,vhost=on,fd=9
>  (qemu) device_add driver=virtio-net-pci,netdev=hostnet0,id=net0,bus=pcie-root-port-0
>  (qemu) device_del net0
>  (qemu) netdev_del hostnet0
>  (qemu) netdev_add type=tap,id=hostnet1,vhost=on,fd=9
>  qemu-system-x86_64: .../util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed.
>  Aborted (core dumped)
> 
> To avoid that, check the file descriptor is valid before passing it to qemu_set_non_block() for
> "fd=" and "fds=" parameters.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  include/qemu/sockets.h |  1 +
>  net/tap.c              | 13 +++++++++++++
>  util/oslib-posix.c     |  5 +++++
>  util/oslib-win32.c     |  6 ++++++
>  4 files changed, 25 insertions(+)
> 
> diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
> index 57cd049d6edd..5b0c2d77ddad 100644
> --- a/include/qemu/sockets.h
> +++ b/include/qemu/sockets.h
> @@ -17,6 +17,7 @@ int qemu_socket(int domain, int type, int protocol);
>  int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
>  int socket_set_cork(int fd, int v);
>  int socket_set_nodelay(int fd);
> +bool qemu_fd_is_valid(int fd);
>  void qemu_set_block(int fd);
>  void qemu_set_nonblock(int fd);
>  int socket_set_fast_reuse(int fd);
> diff --git a/net/tap.c b/net/tap.c
> index 6207f61f84ab..f65966aaccd8 100644
> --- a/net/tap.c
> +++ b/net/tap.c
> @@ -795,6 +795,12 @@ int net_init_tap(const Netdev *netdev, const char *name,
>              return -1;
>          }
>  
> +        /* Check if fd is valid */
> +        if (!qemu_fd_is_valid(fd)) {
> +            error_setg(errp, "Invalid file descriptor %d", fd);
> +            return -1;
> +        }
> +
>          qemu_set_nonblock(fd);
>  
>          vnet_hdr = tap_probe_vnet_hdr(fd);
> @@ -843,6 +849,13 @@ int net_init_tap(const Netdev *netdev, const char *name,
>                  goto free_fail;
>              }
>  
> +            /* Check if fd is valid */
> +            if (!qemu_fd_is_valid(fd)) {
> +                error_setg(errp, "Invalid file descriptor %d", fd);
> +                ret = -1;
> +                goto free_fail;
> +            }
> +
>              qemu_set_nonblock(fd);
>  
>              if (i == 0) {
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index 916f1be2243a..8d5705f598d3 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -244,6 +244,11 @@ void qemu_anon_ram_free(void *ptr, size_t size)
>      qemu_ram_munmap(-1, ptr, size);
>  }
>  
> +bool qemu_fd_is_valid(int fd)
> +{
> +    return fcntl(fd, F_GETFL) != -1;
> +}
> +
>  void qemu_set_block(int fd)
>  {
>      int f;
> diff --git a/util/oslib-win32.c b/util/oslib-win32.c
> index e9b14ab17847..a6be9445cfdb 100644
> --- a/util/oslib-win32.c
> +++ b/util/oslib-win32.c
> @@ -132,6 +132,12 @@ struct tm *localtime_r(const time_t *timep, struct tm *result)
>  }
>  #endif /* CONFIG_LOCALTIME_R */
>  
> +bool qemu_fd_is_valid(int fd)
> +{
> +    /* FIXME: how to check if fd is valid? */
> +    return true;
> +}

Maybe:

  bool qemu_fd_is_valid(int fd)
  {
      unsigned long res; /* ignored */

      return ioctlsocket(fd, FIONREAD, &res) == NO_ERROR;
  }

See:

https://docs.microsoft.com/en-us/windows/win32/winsock/winsock-ioctls

> +
>  void qemu_set_block(int fd)
>  {
>      unsigned long opt = 0;
>
Laurent Vivier June 25, 2020, 7:38 a.m. UTC | #2
On 25/06/2020 08:19, Philippe Mathieu-Daudé wrote:
> On 6/24/20 9:00 PM, Laurent Vivier wrote:
>> qemu_set_nonblock() checks that the file descriptor can be used and, if
>> not, crashes QEMU. An assert() is used for that. The use of assert() is
>> used to detect programming error and the coredump will allow to debug
>> the problem.
>>
>> But in the case of the tap device, this assert() can be triggered by
>> a misconfiguration by the user. At startup, it's not a real problem, but it
>> can also happen during the hot-plug of a new device, and here it's a
>> problem because we can crash a perfectly healthy system.
>>
>> For instance:
>>  # ip link add link virbr0 name macvtap0 type macvtap mode bridge
>>  # ip link set macvtap0 up
>>  # TAP=/dev/tap$(ip -o link show macvtap0 | cut -d: -f1)
>>  # qemu-system-x86_64 -machine q35 -device pcie-root-port,id=pcie-root-port-0 -monitor stdio 9<> $TAP
>>  (qemu) netdev_add type=tap,id=hostnet0,vhost=on,fd=9
>>  (qemu) device_add driver=virtio-net-pci,netdev=hostnet0,id=net0,bus=pcie-root-port-0
>>  (qemu) device_del net0
>>  (qemu) netdev_del hostnet0
>>  (qemu) netdev_add type=tap,id=hostnet1,vhost=on,fd=9
>>  qemu-system-x86_64: .../util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed.
>>  Aborted (core dumped)
>>
>> To avoid that, check the file descriptor is valid before passing it to qemu_set_non_block() for
>> "fd=" and "fds=" parameters.
>>
>> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
>> ---
>>  include/qemu/sockets.h |  1 +
>>  net/tap.c              | 13 +++++++++++++
>>  util/oslib-posix.c     |  5 +++++
>>  util/oslib-win32.c     |  6 ++++++
>>  4 files changed, 25 insertions(+)
>>
>> diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
>> index 57cd049d6edd..5b0c2d77ddad 100644
>> --- a/include/qemu/sockets.h
>> +++ b/include/qemu/sockets.h
>> @@ -17,6 +17,7 @@ int qemu_socket(int domain, int type, int protocol);
>>  int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
>>  int socket_set_cork(int fd, int v);
>>  int socket_set_nodelay(int fd);
>> +bool qemu_fd_is_valid(int fd);
>>  void qemu_set_block(int fd);
>>  void qemu_set_nonblock(int fd);
>>  int socket_set_fast_reuse(int fd);
>> diff --git a/net/tap.c b/net/tap.c
>> index 6207f61f84ab..f65966aaccd8 100644
>> --- a/net/tap.c
>> +++ b/net/tap.c
>> @@ -795,6 +795,12 @@ int net_init_tap(const Netdev *netdev, const char *name,
>>              return -1;
>>          }
>>  
>> +        /* Check if fd is valid */
>> +        if (!qemu_fd_is_valid(fd)) {
>> +            error_setg(errp, "Invalid file descriptor %d", fd);
>> +            return -1;
>> +        }
>> +
>>          qemu_set_nonblock(fd);
>>  
>>          vnet_hdr = tap_probe_vnet_hdr(fd);
>> @@ -843,6 +849,13 @@ int net_init_tap(const Netdev *netdev, const char *name,
>>                  goto free_fail;
>>              }
>>  
>> +            /* Check if fd is valid */
>> +            if (!qemu_fd_is_valid(fd)) {
>> +                error_setg(errp, "Invalid file descriptor %d", fd);
>> +                ret = -1;
>> +                goto free_fail;
>> +            }
>> +
>>              qemu_set_nonblock(fd);
>>  
>>              if (i == 0) {
>> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
>> index 916f1be2243a..8d5705f598d3 100644
>> --- a/util/oslib-posix.c
>> +++ b/util/oslib-posix.c
>> @@ -244,6 +244,11 @@ void qemu_anon_ram_free(void *ptr, size_t size)
>>      qemu_ram_munmap(-1, ptr, size);
>>  }
>>  
>> +bool qemu_fd_is_valid(int fd)
>> +{
>> +    return fcntl(fd, F_GETFL) != -1;
>> +}
>> +
>>  void qemu_set_block(int fd)
>>  {
>>      int f;
>> diff --git a/util/oslib-win32.c b/util/oslib-win32.c
>> index e9b14ab17847..a6be9445cfdb 100644
>> --- a/util/oslib-win32.c
>> +++ b/util/oslib-win32.c
>> @@ -132,6 +132,12 @@ struct tm *localtime_r(const time_t *timep, struct tm *result)
>>  }
>>  #endif /* CONFIG_LOCALTIME_R */
>>  
>> +bool qemu_fd_is_valid(int fd)
>> +{
>> +    /* FIXME: how to check if fd is valid? */
>> +    return true;
>> +}
> 
> Maybe:
> 
>   bool qemu_fd_is_valid(int fd)
>   {
>       unsigned long res; /* ignored */
> 
>       return ioctlsocket(fd, FIONREAD, &res) == NO_ERROR;
>   }

I can do that, but I have no way to test the change doesn't break
anything... whereas always returning true ensures me it continues to
work as before.

Thanks,
Laurent
Philippe Mathieu-Daudé June 25, 2020, 7:40 a.m. UTC | #3
On 6/25/20 9:38 AM, Laurent Vivier wrote:
> On 25/06/2020 08:19, Philippe Mathieu-Daudé wrote:
>> On 6/24/20 9:00 PM, Laurent Vivier wrote:
>>> qemu_set_nonblock() checks that the file descriptor can be used and, if
>>> not, crashes QEMU. An assert() is used for that. The use of assert() is
>>> used to detect programming error and the coredump will allow to debug
>>> the problem.
>>>
>>> But in the case of the tap device, this assert() can be triggered by
>>> a misconfiguration by the user. At startup, it's not a real problem, but it
>>> can also happen during the hot-plug of a new device, and here it's a
>>> problem because we can crash a perfectly healthy system.
>>>
>>> For instance:
>>>  # ip link add link virbr0 name macvtap0 type macvtap mode bridge
>>>  # ip link set macvtap0 up
>>>  # TAP=/dev/tap$(ip -o link show macvtap0 | cut -d: -f1)
>>>  # qemu-system-x86_64 -machine q35 -device pcie-root-port,id=pcie-root-port-0 -monitor stdio 9<> $TAP
>>>  (qemu) netdev_add type=tap,id=hostnet0,vhost=on,fd=9
>>>  (qemu) device_add driver=virtio-net-pci,netdev=hostnet0,id=net0,bus=pcie-root-port-0
>>>  (qemu) device_del net0
>>>  (qemu) netdev_del hostnet0
>>>  (qemu) netdev_add type=tap,id=hostnet1,vhost=on,fd=9
>>>  qemu-system-x86_64: .../util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed.
>>>  Aborted (core dumped)
>>>
>>> To avoid that, check the file descriptor is valid before passing it to qemu_set_non_block() for
>>> "fd=" and "fds=" parameters.
>>>
>>> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
>>> ---
>>>  include/qemu/sockets.h |  1 +
>>>  net/tap.c              | 13 +++++++++++++
>>>  util/oslib-posix.c     |  5 +++++
>>>  util/oslib-win32.c     |  6 ++++++
>>>  4 files changed, 25 insertions(+)
>>>
>>> diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
>>> index 57cd049d6edd..5b0c2d77ddad 100644
>>> --- a/include/qemu/sockets.h
>>> +++ b/include/qemu/sockets.h
>>> @@ -17,6 +17,7 @@ int qemu_socket(int domain, int type, int protocol);
>>>  int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
>>>  int socket_set_cork(int fd, int v);
>>>  int socket_set_nodelay(int fd);
>>> +bool qemu_fd_is_valid(int fd);
>>>  void qemu_set_block(int fd);
>>>  void qemu_set_nonblock(int fd);
>>>  int socket_set_fast_reuse(int fd);
>>> diff --git a/net/tap.c b/net/tap.c
>>> index 6207f61f84ab..f65966aaccd8 100644
>>> --- a/net/tap.c
>>> +++ b/net/tap.c
>>> @@ -795,6 +795,12 @@ int net_init_tap(const Netdev *netdev, const char *name,
>>>              return -1;
>>>          }
>>>  
>>> +        /* Check if fd is valid */
>>> +        if (!qemu_fd_is_valid(fd)) {
>>> +            error_setg(errp, "Invalid file descriptor %d", fd);
>>> +            return -1;
>>> +        }
>>> +
>>>          qemu_set_nonblock(fd);
>>>  
>>>          vnet_hdr = tap_probe_vnet_hdr(fd);
>>> @@ -843,6 +849,13 @@ int net_init_tap(const Netdev *netdev, const char *name,
>>>                  goto free_fail;
>>>              }
>>>  
>>> +            /* Check if fd is valid */
>>> +            if (!qemu_fd_is_valid(fd)) {
>>> +                error_setg(errp, "Invalid file descriptor %d", fd);
>>> +                ret = -1;
>>> +                goto free_fail;
>>> +            }
>>> +
>>>              qemu_set_nonblock(fd);
>>>  
>>>              if (i == 0) {
>>> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
>>> index 916f1be2243a..8d5705f598d3 100644
>>> --- a/util/oslib-posix.c
>>> +++ b/util/oslib-posix.c
>>> @@ -244,6 +244,11 @@ void qemu_anon_ram_free(void *ptr, size_t size)
>>>      qemu_ram_munmap(-1, ptr, size);
>>>  }
>>>  
>>> +bool qemu_fd_is_valid(int fd)
>>> +{
>>> +    return fcntl(fd, F_GETFL) != -1;
>>> +}
>>> +
>>>  void qemu_set_block(int fd)
>>>  {
>>>      int f;
>>> diff --git a/util/oslib-win32.c b/util/oslib-win32.c
>>> index e9b14ab17847..a6be9445cfdb 100644
>>> --- a/util/oslib-win32.c
>>> +++ b/util/oslib-win32.c
>>> @@ -132,6 +132,12 @@ struct tm *localtime_r(const time_t *timep, struct tm *result)
>>>  }
>>>  #endif /* CONFIG_LOCALTIME_R */
>>>  
>>> +bool qemu_fd_is_valid(int fd)
>>> +{
>>> +    /* FIXME: how to check if fd is valid? */
>>> +    return true;
>>> +}
>>
>> Maybe:
>>
>>   bool qemu_fd_is_valid(int fd)
>>   {
>>       unsigned long res; /* ignored */
>>
>>       return ioctlsocket(fd, FIONREAD, &res) == NO_ERROR;
>>   }
> 
> I can do that, but I have no way to test the change doesn't break
> anything... whereas always returning true ensures me it continues to
> work as before.

I'm only suggesting in case someone has a clue and way to test ;)

Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Daniel P. Berrangé June 25, 2020, 8:48 a.m. UTC | #4
On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
> qemu_set_nonblock() checks that the file descriptor can be used and, if
> not, crashes QEMU. An assert() is used for that. The use of assert() is
> used to detect programming error and the coredump will allow to debug
> the problem.
> 
> But in the case of the tap device, this assert() can be triggered by
> a misconfiguration by the user. At startup, it's not a real problem, but it
> can also happen during the hot-plug of a new device, and here it's a
> problem because we can crash a perfectly healthy system.

If the user/mgmt app is not correctly passing FDs, then there's a whole
pile of bad stuff that can happen. Checking whether the FD is valid is
only going to catch a small subset. eg consider if fd=9 refers to the
FD that is associated with the root disk QEMU has open. We'll fail to
setup the TAP device and close this FD, breaking the healthy system
again. 

I'm not saying we can't check if the FD is valid, but lets be clear that
this is not offering very much protection against a broken mgmt apps
passing bad FDs.

> For instance:
>  # ip link add link virbr0 name macvtap0 type macvtap mode bridge
>  # ip link set macvtap0 up
>  # TAP=/dev/tap$(ip -o link show macvtap0 | cut -d: -f1)
>  # qemu-system-x86_64 -machine q35 -device pcie-root-port,id=pcie-root-port-0 -monitor stdio 9<> $TAP
>  (qemu) netdev_add type=tap,id=hostnet0,vhost=on,fd=9
>  (qemu) device_add driver=virtio-net-pci,netdev=hostnet0,id=net0,bus=pcie-root-port-0
>  (qemu) device_del net0
>  (qemu) netdev_del hostnet0
>  (qemu) netdev_add type=tap,id=hostnet1,vhost=on,fd=9
>  qemu-system-x86_64: .../util/oslib-posix.c:247: qemu_set_nonblock: Assertion `f != -1' failed.
>  Aborted (core dumped)
> 
> To avoid that, check the file descriptor is valid before passing it to qemu_set_non_block() for
> "fd=" and "fds=" parameters.
> 
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>
> ---
>  include/qemu/sockets.h |  1 +
>  net/tap.c              | 13 +++++++++++++
>  util/oslib-posix.c     |  5 +++++
>  util/oslib-win32.c     |  6 ++++++
>  4 files changed, 25 insertions(+)
> 
> diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
> index 57cd049d6edd..5b0c2d77ddad 100644
> --- a/include/qemu/sockets.h
> +++ b/include/qemu/sockets.h
> @@ -17,6 +17,7 @@ int qemu_socket(int domain, int type, int protocol);
>  int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
>  int socket_set_cork(int fd, int v);
>  int socket_set_nodelay(int fd);
> +bool qemu_fd_is_valid(int fd);
>  void qemu_set_block(int fd);
>  void qemu_set_nonblock(int fd);
>  int socket_set_fast_reuse(int fd);
> diff --git a/net/tap.c b/net/tap.c
> index 6207f61f84ab..f65966aaccd8 100644
> --- a/net/tap.c
> +++ b/net/tap.c
> @@ -795,6 +795,12 @@ int net_init_tap(const Netdev *netdev, const char *name,
>              return -1;
>          }
>  
> +        /* Check if fd is valid */
> +        if (!qemu_fd_is_valid(fd)) {
> +            error_setg(errp, "Invalid file descriptor %d", fd);
> +            return -1;
> +        }
> +
>          qemu_set_nonblock(fd);
>  
>          vnet_hdr = tap_probe_vnet_hdr(fd);
> @@ -843,6 +849,13 @@ int net_init_tap(const Netdev *netdev, const char *name,
>                  goto free_fail;
>              }
>  
> +            /* Check if fd is valid */
> +            if (!qemu_fd_is_valid(fd)) {
> +                error_setg(errp, "Invalid file descriptor %d", fd);
> +                ret = -1;
> +                goto free_fail;
> +            }
> +
>              qemu_set_nonblock(fd);
>  
>              if (i == 0) {
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index 916f1be2243a..8d5705f598d3 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -244,6 +244,11 @@ void qemu_anon_ram_free(void *ptr, size_t size)
>      qemu_ram_munmap(-1, ptr, size);
>  }
>  
> +bool qemu_fd_is_valid(int fd)
> +{
> +    return fcntl(fd, F_GETFL) != -1;
> +}
> +
>  void qemu_set_block(int fd)
>  {
>      int f;
> diff --git a/util/oslib-win32.c b/util/oslib-win32.c
> index e9b14ab17847..a6be9445cfdb 100644
> --- a/util/oslib-win32.c
> +++ b/util/oslib-win32.c
> @@ -132,6 +132,12 @@ struct tm *localtime_r(const time_t *timep, struct tm *result)
>  }
>  #endif /* CONFIG_LOCALTIME_R */
>  
> +bool qemu_fd_is_valid(int fd)
> +{
> +    /* FIXME: how to check if fd is valid? */
> +    return true;
> +}
> +
>  void qemu_set_block(int fd)
>  {
>      unsigned long opt = 0;
> -- 
> 2.26.2
> 
> 

Regards,
Daniel
Laurent Vivier June 25, 2020, 11:56 a.m. UTC | #5
On 25/06/2020 10:48, Daniel P. Berrangé wrote:
> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
>> qemu_set_nonblock() checks that the file descriptor can be used and, if
>> not, crashes QEMU. An assert() is used for that. The use of assert() is
>> used to detect programming error and the coredump will allow to debug
>> the problem.
>>
>> But in the case of the tap device, this assert() can be triggered by
>> a misconfiguration by the user. At startup, it's not a real problem, but it
>> can also happen during the hot-plug of a new device, and here it's a
>> problem because we can crash a perfectly healthy system.
> 
> If the user/mgmt app is not correctly passing FDs, then there's a whole
> pile of bad stuff that can happen. Checking whether the FD is valid is
> only going to catch a small subset. eg consider if fd=9 refers to the
> FD that is associated with the root disk QEMU has open. We'll fail to
> setup the TAP device and close this FD, breaking the healthy system
> again. 
> 
> I'm not saying we can't check if the FD is valid, but lets be clear that
> this is not offering very much protection against a broken mgmt apps
> passing bad FDs.
> 

I agree with you, but my only goal here is to avoid the crash in this
particular case.

The punishment should fit the crime.

The user can think the netdev_del doesn't close the fd, and he can try
to reuse it. Sending back an error is better than crashing his system.
After that, if the system crashes, it will be for the good reasons, not
because of an assert.

Thanks,
Laurent
Jason Wang June 28, 2020, 6:31 a.m. UTC | #6
On 2020/6/25 下午7:56, Laurent Vivier wrote:
> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
>>> qemu_set_nonblock() checks that the file descriptor can be used and, if
>>> not, crashes QEMU. An assert() is used for that. The use of assert() is
>>> used to detect programming error and the coredump will allow to debug
>>> the problem.
>>>
>>> But in the case of the tap device, this assert() can be triggered by
>>> a misconfiguration by the user. At startup, it's not a real problem, but it
>>> can also happen during the hot-plug of a new device, and here it's a
>>> problem because we can crash a perfectly healthy system.
>> If the user/mgmt app is not correctly passing FDs, then there's a whole
>> pile of bad stuff that can happen. Checking whether the FD is valid is
>> only going to catch a small subset. eg consider if fd=9 refers to the
>> FD that is associated with the root disk QEMU has open. We'll fail to
>> setup the TAP device and close this FD, breaking the healthy system
>> again.
>>
>> I'm not saying we can't check if the FD is valid, but lets be clear that
>> this is not offering very much protection against a broken mgmt apps
>> passing bad FDs.
>>
> I agree with you, but my only goal here is to avoid the crash in this
> particular case.
>
> The punishment should fit the crime.
>
> The user can think the netdev_del doesn't close the fd, and he can try
> to reuse it. Sending back an error is better than crashing his system.
> After that, if the system crashes, it will be for the good reasons, not
> because of an assert.


Yes. And on top of this we may try to validate the TAP via st_dev 
through fstat[1].

Thanks

[1] https://patchwork.kernel.org/patch/10029443/


>
> Thanks,
> Laurent
Laurent Vivier June 29, 2020, 7:30 p.m. UTC | #7
On 28/06/2020 08:31, Jason Wang wrote:
> 
> On 2020/6/25 下午7:56, Laurent Vivier wrote:
>> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
>>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
>>>> qemu_set_nonblock() checks that the file descriptor can be used and, if
>>>> not, crashes QEMU. An assert() is used for that. The use of assert() is
>>>> used to detect programming error and the coredump will allow to debug
>>>> the problem.
>>>>
>>>> But in the case of the tap device, this assert() can be triggered by
>>>> a misconfiguration by the user. At startup, it's not a real problem,
>>>> but it
>>>> can also happen during the hot-plug of a new device, and here it's a
>>>> problem because we can crash a perfectly healthy system.
>>> If the user/mgmt app is not correctly passing FDs, then there's a whole
>>> pile of bad stuff that can happen. Checking whether the FD is valid is
>>> only going to catch a small subset. eg consider if fd=9 refers to the
>>> FD that is associated with the root disk QEMU has open. We'll fail to
>>> setup the TAP device and close this FD, breaking the healthy system
>>> again.
>>>
>>> I'm not saying we can't check if the FD is valid, but lets be clear that
>>> this is not offering very much protection against a broken mgmt apps
>>> passing bad FDs.
>>>
>> I agree with you, but my only goal here is to avoid the crash in this
>> particular case.
>>
>> The punishment should fit the crime.
>>
>> The user can think the netdev_del doesn't close the fd, and he can try
>> to reuse it. Sending back an error is better than crashing his system.
>> After that, if the system crashes, it will be for the good reasons, not
>> because of an assert.
> 
> 
> Yes. And on top of this we may try to validate the TAP via st_dev
> through fstat[1].

I agree, but the problem I have is to know which major(st_dev) we can
allow to use.

Do we allow only macvtap major number?
How to know the macvtap major number at user level?
[it is allocated dynamically: do we need to parse /proc/devices?]

Thanks,
Laurent
Jason Wang June 30, 2020, 9:21 a.m. UTC | #8
On 2020/6/30 上午3:30, Laurent Vivier wrote:
> On 28/06/2020 08:31, Jason Wang wrote:
>> On 2020/6/25 下午7:56, Laurent Vivier wrote:
>>> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
>>>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
>>>>> qemu_set_nonblock() checks that the file descriptor can be used and, if
>>>>> not, crashes QEMU. An assert() is used for that. The use of assert() is
>>>>> used to detect programming error and the coredump will allow to debug
>>>>> the problem.
>>>>>
>>>>> But in the case of the tap device, this assert() can be triggered by
>>>>> a misconfiguration by the user. At startup, it's not a real problem,
>>>>> but it
>>>>> can also happen during the hot-plug of a new device, and here it's a
>>>>> problem because we can crash a perfectly healthy system.
>>>> If the user/mgmt app is not correctly passing FDs, then there's a whole
>>>> pile of bad stuff that can happen. Checking whether the FD is valid is
>>>> only going to catch a small subset. eg consider if fd=9 refers to the
>>>> FD that is associated with the root disk QEMU has open. We'll fail to
>>>> setup the TAP device and close this FD, breaking the healthy system
>>>> again.
>>>>
>>>> I'm not saying we can't check if the FD is valid, but lets be clear that
>>>> this is not offering very much protection against a broken mgmt apps
>>>> passing bad FDs.
>>>>
>>> I agree with you, but my only goal here is to avoid the crash in this
>>> particular case.
>>>
>>> The punishment should fit the crime.
>>>
>>> The user can think the netdev_del doesn't close the fd, and he can try
>>> to reuse it. Sending back an error is better than crashing his system.
>>> After that, if the system crashes, it will be for the good reasons, not
>>> because of an assert.
>>
>> Yes. And on top of this we may try to validate the TAP via st_dev
>> through fstat[1].
> I agree, but the problem I have is to know which major(st_dev) we can
> allow to use.
>
> Do we allow only macvtap major number?


Macvtap and tuntap.


> How to know the macvtap major number at user level?
> [it is allocated dynamically: do we need to parse /proc/devices?]


I think we can get them through fstat for /dev/net/tun and /dev/macvtapX.

Thanks


>
> Thanks,
> Laurent
>
>
Daniel P. Berrangé June 30, 2020, 9:23 a.m. UTC | #9
On Tue, Jun 30, 2020 at 05:21:49PM +0800, Jason Wang wrote:
> 
> On 2020/6/30 上午3:30, Laurent Vivier wrote:
> > On 28/06/2020 08:31, Jason Wang wrote:
> > > On 2020/6/25 下午7:56, Laurent Vivier wrote:
> > > > On 25/06/2020 10:48, Daniel P. Berrangé wrote:
> > > > > On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
> > > > > > qemu_set_nonblock() checks that the file descriptor can be used and, if
> > > > > > not, crashes QEMU. An assert() is used for that. The use of assert() is
> > > > > > used to detect programming error and the coredump will allow to debug
> > > > > > the problem.
> > > > > > 
> > > > > > But in the case of the tap device, this assert() can be triggered by
> > > > > > a misconfiguration by the user. At startup, it's not a real problem,
> > > > > > but it
> > > > > > can also happen during the hot-plug of a new device, and here it's a
> > > > > > problem because we can crash a perfectly healthy system.
> > > > > If the user/mgmt app is not correctly passing FDs, then there's a whole
> > > > > pile of bad stuff that can happen. Checking whether the FD is valid is
> > > > > only going to catch a small subset. eg consider if fd=9 refers to the
> > > > > FD that is associated with the root disk QEMU has open. We'll fail to
> > > > > setup the TAP device and close this FD, breaking the healthy system
> > > > > again.
> > > > > 
> > > > > I'm not saying we can't check if the FD is valid, but lets be clear that
> > > > > this is not offering very much protection against a broken mgmt apps
> > > > > passing bad FDs.
> > > > > 
> > > > I agree with you, but my only goal here is to avoid the crash in this
> > > > particular case.
> > > > 
> > > > The punishment should fit the crime.
> > > > 
> > > > The user can think the netdev_del doesn't close the fd, and he can try
> > > > to reuse it. Sending back an error is better than crashing his system.
> > > > After that, if the system crashes, it will be for the good reasons, not
> > > > because of an assert.
> > > 
> > > Yes. And on top of this we may try to validate the TAP via st_dev
> > > through fstat[1].
> > I agree, but the problem I have is to know which major(st_dev) we can
> > allow to use.
> > 
> > Do we allow only macvtap major number?
> 
> 
> Macvtap and tuntap.
> 
> 
> > How to know the macvtap major number at user level?
> > [it is allocated dynamically: do we need to parse /proc/devices?]
> 
> 
> I think we can get them through fstat for /dev/net/tun and /dev/macvtapX.

Don't assume QEMU has any permission to access to these device nodes,
only the pre-opened FDs it is given by libvirt.

Regards,
Daniel
Daniel P. Berrangé June 30, 2020, 9:31 a.m. UTC | #10
On Tue, Jun 30, 2020 at 10:23:18AM +0100, Daniel P. Berrangé wrote:
> On Tue, Jun 30, 2020 at 05:21:49PM +0800, Jason Wang wrote:
> > 
> > On 2020/6/30 上午3:30, Laurent Vivier wrote:
> > > On 28/06/2020 08:31, Jason Wang wrote:
> > > > On 2020/6/25 下午7:56, Laurent Vivier wrote:
> > > > > On 25/06/2020 10:48, Daniel P. Berrangé wrote:
> > > > > > On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
> > > > > > > qemu_set_nonblock() checks that the file descriptor can be used and, if
> > > > > > > not, crashes QEMU. An assert() is used for that. The use of assert() is
> > > > > > > used to detect programming error and the coredump will allow to debug
> > > > > > > the problem.
> > > > > > > 
> > > > > > > But in the case of the tap device, this assert() can be triggered by
> > > > > > > a misconfiguration by the user. At startup, it's not a real problem,
> > > > > > > but it
> > > > > > > can also happen during the hot-plug of a new device, and here it's a
> > > > > > > problem because we can crash a perfectly healthy system.
> > > > > > If the user/mgmt app is not correctly passing FDs, then there's a whole
> > > > > > pile of bad stuff that can happen. Checking whether the FD is valid is
> > > > > > only going to catch a small subset. eg consider if fd=9 refers to the
> > > > > > FD that is associated with the root disk QEMU has open. We'll fail to
> > > > > > setup the TAP device and close this FD, breaking the healthy system
> > > > > > again.
> > > > > > 
> > > > > > I'm not saying we can't check if the FD is valid, but lets be clear that
> > > > > > this is not offering very much protection against a broken mgmt apps
> > > > > > passing bad FDs.
> > > > > > 
> > > > > I agree with you, but my only goal here is to avoid the crash in this
> > > > > particular case.
> > > > > 
> > > > > The punishment should fit the crime.
> > > > > 
> > > > > The user can think the netdev_del doesn't close the fd, and he can try
> > > > > to reuse it. Sending back an error is better than crashing his system.
> > > > > After that, if the system crashes, it will be for the good reasons, not
> > > > > because of an assert.
> > > > 
> > > > Yes. And on top of this we may try to validate the TAP via st_dev
> > > > through fstat[1].
> > > I agree, but the problem I have is to know which major(st_dev) we can
> > > allow to use.
> > > 
> > > Do we allow only macvtap major number?
> > 
> > 
> > Macvtap and tuntap.
> > 
> > 
> > > How to know the macvtap major number at user level?
> > > [it is allocated dynamically: do we need to parse /proc/devices?]
> > 
> > 
> > I think we can get them through fstat for /dev/net/tun and /dev/macvtapX.
> 
> Don't assume QEMU has any permission to access to these device nodes,
> only the pre-opened FDs it is given by libvirt.

Actually permissions are the least of the problem - the device nodes
won't even exist, because QEMU's almost certainly running in a private
mount namespace with a minimal /dev populated

Regards,
Daniel
Laurent Vivier June 30, 2020, 9:45 a.m. UTC | #11
On 30/06/2020 11:31, Daniel P. Berrangé wrote:
> On Tue, Jun 30, 2020 at 10:23:18AM +0100, Daniel P. Berrangé wrote:
>> On Tue, Jun 30, 2020 at 05:21:49PM +0800, Jason Wang wrote:
>>>
>>> On 2020/6/30 上午3:30, Laurent Vivier wrote:
>>>> On 28/06/2020 08:31, Jason Wang wrote:
>>>>> On 2020/6/25 下午7:56, Laurent Vivier wrote:
>>>>>> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
>>>>>>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
>>>>>>>> qemu_set_nonblock() checks that the file descriptor can be used and, if
>>>>>>>> not, crashes QEMU. An assert() is used for that. The use of assert() is
>>>>>>>> used to detect programming error and the coredump will allow to debug
>>>>>>>> the problem.
>>>>>>>>
>>>>>>>> But in the case of the tap device, this assert() can be triggered by
>>>>>>>> a misconfiguration by the user. At startup, it's not a real problem,
>>>>>>>> but it
>>>>>>>> can also happen during the hot-plug of a new device, and here it's a
>>>>>>>> problem because we can crash a perfectly healthy system.
>>>>>>> If the user/mgmt app is not correctly passing FDs, then there's a whole
>>>>>>> pile of bad stuff that can happen. Checking whether the FD is valid is
>>>>>>> only going to catch a small subset. eg consider if fd=9 refers to the
>>>>>>> FD that is associated with the root disk QEMU has open. We'll fail to
>>>>>>> setup the TAP device and close this FD, breaking the healthy system
>>>>>>> again.
>>>>>>>
>>>>>>> I'm not saying we can't check if the FD is valid, but lets be clear that
>>>>>>> this is not offering very much protection against a broken mgmt apps
>>>>>>> passing bad FDs.
>>>>>>>
>>>>>> I agree with you, but my only goal here is to avoid the crash in this
>>>>>> particular case.
>>>>>>
>>>>>> The punishment should fit the crime.
>>>>>>
>>>>>> The user can think the netdev_del doesn't close the fd, and he can try
>>>>>> to reuse it. Sending back an error is better than crashing his system.
>>>>>> After that, if the system crashes, it will be for the good reasons, not
>>>>>> because of an assert.
>>>>>
>>>>> Yes. And on top of this we may try to validate the TAP via st_dev
>>>>> through fstat[1].
>>>> I agree, but the problem I have is to know which major(st_dev) we can
>>>> allow to use.
>>>>
>>>> Do we allow only macvtap major number?
>>>
>>>
>>> Macvtap and tuntap.
>>>
>>>
>>>> How to know the macvtap major number at user level?
>>>> [it is allocated dynamically: do we need to parse /proc/devices?]
>>>
>>>
>>> I think we can get them through fstat for /dev/net/tun and /dev/macvtapX.
>>
>> Don't assume QEMU has any permission to access to these device nodes,
>> only the pre-opened FDs it is given by libvirt.
> 
> Actually permissions are the least of the problem - the device nodes
> won't even exist, because QEMU's almost certainly running in a private
> mount namespace with a minimal /dev populated
> 

I'm working on a solution using /proc/devices.
macvtap has its own major number, but tuntap use "misc" (10) major number.

Thanks,
Laurent
Jason Wang June 30, 2020, 9:56 a.m. UTC | #12
On 2020/6/30 下午5:31, Daniel P. Berrangé wrote:
> On Tue, Jun 30, 2020 at 10:23:18AM +0100, Daniel P. Berrangé wrote:
>> On Tue, Jun 30, 2020 at 05:21:49PM +0800, Jason Wang wrote:
>>> On 2020/6/30 上午3:30, Laurent Vivier wrote:
>>>> On 28/06/2020 08:31, Jason Wang wrote:
>>>>> On 2020/6/25 下午7:56, Laurent Vivier wrote:
>>>>>> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
>>>>>>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
>>>>>>>> qemu_set_nonblock() checks that the file descriptor can be used and, if
>>>>>>>> not, crashes QEMU. An assert() is used for that. The use of assert() is
>>>>>>>> used to detect programming error and the coredump will allow to debug
>>>>>>>> the problem.
>>>>>>>>
>>>>>>>> But in the case of the tap device, this assert() can be triggered by
>>>>>>>> a misconfiguration by the user. At startup, it's not a real problem,
>>>>>>>> but it
>>>>>>>> can also happen during the hot-plug of a new device, and here it's a
>>>>>>>> problem because we can crash a perfectly healthy system.
>>>>>>> If the user/mgmt app is not correctly passing FDs, then there's a whole
>>>>>>> pile of bad stuff that can happen. Checking whether the FD is valid is
>>>>>>> only going to catch a small subset. eg consider if fd=9 refers to the
>>>>>>> FD that is associated with the root disk QEMU has open. We'll fail to
>>>>>>> setup the TAP device and close this FD, breaking the healthy system
>>>>>>> again.
>>>>>>>
>>>>>>> I'm not saying we can't check if the FD is valid, but lets be clear that
>>>>>>> this is not offering very much protection against a broken mgmt apps
>>>>>>> passing bad FDs.
>>>>>>>
>>>>>> I agree with you, but my only goal here is to avoid the crash in this
>>>>>> particular case.
>>>>>>
>>>>>> The punishment should fit the crime.
>>>>>>
>>>>>> The user can think the netdev_del doesn't close the fd, and he can try
>>>>>> to reuse it. Sending back an error is better than crashing his system.
>>>>>> After that, if the system crashes, it will be for the good reasons, not
>>>>>> because of an assert.
>>>>> Yes. And on top of this we may try to validate the TAP via st_dev
>>>>> through fstat[1].
>>>> I agree, but the problem I have is to know which major(st_dev) we can
>>>> allow to use.
>>>>
>>>> Do we allow only macvtap major number?
>>>
>>> Macvtap and tuntap.
>>>
>>>
>>>> How to know the macvtap major number at user level?
>>>> [it is allocated dynamically: do we need to parse /proc/devices?]
>>>
>>> I think we can get them through fstat for /dev/net/tun and /dev/macvtapX.
>> Don't assume QEMU has any permission to access to these device nodes,
>> only the pre-opened FDs it is given by libvirt.
> Actually permissions are the least of the problem - the device nodes
> won't even exist, because QEMU's almost certainly running in a private
> mount namespace with a minimal /dev populated


Yes, it's just a kind of best effort, we can pass the check if we can't 
access them.

Thanks


>
> Regards,
> Daniel
Jason Wang June 30, 2020, 10:03 a.m. UTC | #13
On 2020/6/30 下午5:45, Laurent Vivier wrote:
> On 30/06/2020 11:31, Daniel P. Berrangé wrote:
>> On Tue, Jun 30, 2020 at 10:23:18AM +0100, Daniel P. Berrangé wrote:
>>> On Tue, Jun 30, 2020 at 05:21:49PM +0800, Jason Wang wrote:
>>>> On 2020/6/30 上午3:30, Laurent Vivier wrote:
>>>>> On 28/06/2020 08:31, Jason Wang wrote:
>>>>>> On 2020/6/25 下午7:56, Laurent Vivier wrote:
>>>>>>> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
>>>>>>>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
>>>>>>>>> qemu_set_nonblock() checks that the file descriptor can be used and, if
>>>>>>>>> not, crashes QEMU. An assert() is used for that. The use of assert() is
>>>>>>>>> used to detect programming error and the coredump will allow to debug
>>>>>>>>> the problem.
>>>>>>>>>
>>>>>>>>> But in the case of the tap device, this assert() can be triggered by
>>>>>>>>> a misconfiguration by the user. At startup, it's not a real problem,
>>>>>>>>> but it
>>>>>>>>> can also happen during the hot-plug of a new device, and here it's a
>>>>>>>>> problem because we can crash a perfectly healthy system.
>>>>>>>> If the user/mgmt app is not correctly passing FDs, then there's a whole
>>>>>>>> pile of bad stuff that can happen. Checking whether the FD is valid is
>>>>>>>> only going to catch a small subset. eg consider if fd=9 refers to the
>>>>>>>> FD that is associated with the root disk QEMU has open. We'll fail to
>>>>>>>> setup the TAP device and close this FD, breaking the healthy system
>>>>>>>> again.
>>>>>>>>
>>>>>>>> I'm not saying we can't check if the FD is valid, but lets be clear that
>>>>>>>> this is not offering very much protection against a broken mgmt apps
>>>>>>>> passing bad FDs.
>>>>>>>>
>>>>>>> I agree with you, but my only goal here is to avoid the crash in this
>>>>>>> particular case.
>>>>>>>
>>>>>>> The punishment should fit the crime.
>>>>>>>
>>>>>>> The user can think the netdev_del doesn't close the fd, and he can try
>>>>>>> to reuse it. Sending back an error is better than crashing his system.
>>>>>>> After that, if the system crashes, it will be for the good reasons, not
>>>>>>> because of an assert.
>>>>>> Yes. And on top of this we may try to validate the TAP via st_dev
>>>>>> through fstat[1].
>>>>> I agree, but the problem I have is to know which major(st_dev) we can
>>>>> allow to use.
>>>>>
>>>>> Do we allow only macvtap major number?
>>>>
>>>> Macvtap and tuntap.
>>>>
>>>>
>>>>> How to know the macvtap major number at user level?
>>>>> [it is allocated dynamically: do we need to parse /proc/devices?]
>>>>
>>>> I think we can get them through fstat for /dev/net/tun and /dev/macvtapX.
>>> Don't assume QEMU has any permission to access to these device nodes,
>>> only the pre-opened FDs it is given by libvirt.
>> Actually permissions are the least of the problem - the device nodes
>> won't even exist, because QEMU's almost certainly running in a private
>> mount namespace with a minimal /dev populated
>>
> I'm working on a solution using /proc/devices.


Similar issue with /dev. There's no guarantee that qemu can access 
/proc/devices or it may not exist (CONFIG_PROCFS).


> macvtap has its own major number, but tuntap use "misc" (10) major number.


Yes.

Another possible solution is to validate the result of TUNGETIFF, but it 
requires libvirt to tell us the ifname.

Thanks



>
> Thanks,
> Laurent
>
>
Laurent Vivier June 30, 2020, 10:35 a.m. UTC | #14
On 30/06/2020 12:03, Jason Wang wrote:
> 
> On 2020/6/30 下午5:45, Laurent Vivier wrote:
>> On 30/06/2020 11:31, Daniel P. Berrangé wrote:
>>> On Tue, Jun 30, 2020 at 10:23:18AM +0100, Daniel P. Berrangé wrote:
>>>> On Tue, Jun 30, 2020 at 05:21:49PM +0800, Jason Wang wrote:
>>>>> On 2020/6/30 上午3:30, Laurent Vivier wrote:
>>>>>> On 28/06/2020 08:31, Jason Wang wrote:
>>>>>>> On 2020/6/25 下午7:56, Laurent Vivier wrote:
>>>>>>>> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
>>>>>>>>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
>>>>>>>>>> qemu_set_nonblock() checks that the file descriptor can be
>>>>>>>>>> used and, if
>>>>>>>>>> not, crashes QEMU. An assert() is used for that. The use of
>>>>>>>>>> assert() is
>>>>>>>>>> used to detect programming error and the coredump will allow
>>>>>>>>>> to debug
>>>>>>>>>> the problem.
>>>>>>>>>>
>>>>>>>>>> But in the case of the tap device, this assert() can be
>>>>>>>>>> triggered by
>>>>>>>>>> a misconfiguration by the user. At startup, it's not a real
>>>>>>>>>> problem,
>>>>>>>>>> but it
>>>>>>>>>> can also happen during the hot-plug of a new device, and here
>>>>>>>>>> it's a
>>>>>>>>>> problem because we can crash a perfectly healthy system.
>>>>>>>>> If the user/mgmt app is not correctly passing FDs, then there's
>>>>>>>>> a whole
>>>>>>>>> pile of bad stuff that can happen. Checking whether the FD is
>>>>>>>>> valid is
>>>>>>>>> only going to catch a small subset. eg consider if fd=9 refers
>>>>>>>>> to the
>>>>>>>>> FD that is associated with the root disk QEMU has open. We'll
>>>>>>>>> fail to
>>>>>>>>> setup the TAP device and close this FD, breaking the healthy
>>>>>>>>> system
>>>>>>>>> again.
>>>>>>>>>
>>>>>>>>> I'm not saying we can't check if the FD is valid, but lets be
>>>>>>>>> clear that
>>>>>>>>> this is not offering very much protection against a broken mgmt
>>>>>>>>> apps
>>>>>>>>> passing bad FDs.
>>>>>>>>>
>>>>>>>> I agree with you, but my only goal here is to avoid the crash in
>>>>>>>> this
>>>>>>>> particular case.
>>>>>>>>
>>>>>>>> The punishment should fit the crime.
>>>>>>>>
>>>>>>>> The user can think the netdev_del doesn't close the fd, and he
>>>>>>>> can try
>>>>>>>> to reuse it. Sending back an error is better than crashing his
>>>>>>>> system.
>>>>>>>> After that, if the system crashes, it will be for the good
>>>>>>>> reasons, not
>>>>>>>> because of an assert.
>>>>>>> Yes. And on top of this we may try to validate the TAP via st_dev
>>>>>>> through fstat[1].
>>>>>> I agree, but the problem I have is to know which major(st_dev) we can
>>>>>> allow to use.
>>>>>>
>>>>>> Do we allow only macvtap major number?
>>>>>
>>>>> Macvtap and tuntap.
>>>>>
>>>>>
>>>>>> How to know the macvtap major number at user level?
>>>>>> [it is allocated dynamically: do we need to parse /proc/devices?]
>>>>>
>>>>> I think we can get them through fstat for /dev/net/tun and
>>>>> /dev/macvtapX.
>>>> Don't assume QEMU has any permission to access to these device nodes,
>>>> only the pre-opened FDs it is given by libvirt.
>>> Actually permissions are the least of the problem - the device nodes
>>> won't even exist, because QEMU's almost certainly running in a private
>>> mount namespace with a minimal /dev populated
>>>
>> I'm working on a solution using /proc/devices.
> 
> 
> Similar issue with /dev. There's no guarantee that qemu can access
> /proc/devices or it may not exist (CONFIG_PROCFS).

There is a lot of things that will not work without /proc (several tools
rely on /proc, like ps, top, lsof, mount, ...). Some information are
only available from /proc, and if /proc is there, I think /proc/devices
is always readable by everyone. Moreover /proc is already used by qemu
in several places.

It can also a best effort check.

The problem with fstat() on /dev files is to guess the /dev/macvtapX as
X varies (the same with /dev/tapY)..

> 
>> macvtap has its own major number, but tuntap use "misc" (10) major
>> number.

Another question: it is possible to use the "fd=" parameter with macvtap
as macvtap creates a /dev/tapY device, but how to do that with tuntap
that does not create a /dev/tapY device?

Thanks,
Laurent
Jason Wang June 30, 2020, 10:57 a.m. UTC | #15
On 2020/6/30 下午6:35, Laurent Vivier wrote:
> On 30/06/2020 12:03, Jason Wang wrote:
>> On 2020/6/30 下午5:45, Laurent Vivier wrote:
>>> On 30/06/2020 11:31, Daniel P. Berrangé wrote:
>>>> On Tue, Jun 30, 2020 at 10:23:18AM +0100, Daniel P. Berrangé wrote:
>>>>> On Tue, Jun 30, 2020 at 05:21:49PM +0800, Jason Wang wrote:
>>>>>> On 2020/6/30 上午3:30, Laurent Vivier wrote:
>>>>>>> On 28/06/2020 08:31, Jason Wang wrote:
>>>>>>>> On 2020/6/25 下午7:56, Laurent Vivier wrote:
>>>>>>>>> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
>>>>>>>>>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
>>>>>>>>>>> qemu_set_nonblock() checks that the file descriptor can be
>>>>>>>>>>> used and, if
>>>>>>>>>>> not, crashes QEMU. An assert() is used for that. The use of
>>>>>>>>>>> assert() is
>>>>>>>>>>> used to detect programming error and the coredump will allow
>>>>>>>>>>> to debug
>>>>>>>>>>> the problem.
>>>>>>>>>>>
>>>>>>>>>>> But in the case of the tap device, this assert() can be
>>>>>>>>>>> triggered by
>>>>>>>>>>> a misconfiguration by the user. At startup, it's not a real
>>>>>>>>>>> problem,
>>>>>>>>>>> but it
>>>>>>>>>>> can also happen during the hot-plug of a new device, and here
>>>>>>>>>>> it's a
>>>>>>>>>>> problem because we can crash a perfectly healthy system.
>>>>>>>>>> If the user/mgmt app is not correctly passing FDs, then there's
>>>>>>>>>> a whole
>>>>>>>>>> pile of bad stuff that can happen. Checking whether the FD is
>>>>>>>>>> valid is
>>>>>>>>>> only going to catch a small subset. eg consider if fd=9 refers
>>>>>>>>>> to the
>>>>>>>>>> FD that is associated with the root disk QEMU has open. We'll
>>>>>>>>>> fail to
>>>>>>>>>> setup the TAP device and close this FD, breaking the healthy
>>>>>>>>>> system
>>>>>>>>>> again.
>>>>>>>>>>
>>>>>>>>>> I'm not saying we can't check if the FD is valid, but lets be
>>>>>>>>>> clear that
>>>>>>>>>> this is not offering very much protection against a broken mgmt
>>>>>>>>>> apps
>>>>>>>>>> passing bad FDs.
>>>>>>>>>>
>>>>>>>>> I agree with you, but my only goal here is to avoid the crash in
>>>>>>>>> this
>>>>>>>>> particular case.
>>>>>>>>>
>>>>>>>>> The punishment should fit the crime.
>>>>>>>>>
>>>>>>>>> The user can think the netdev_del doesn't close the fd, and he
>>>>>>>>> can try
>>>>>>>>> to reuse it. Sending back an error is better than crashing his
>>>>>>>>> system.
>>>>>>>>> After that, if the system crashes, it will be for the good
>>>>>>>>> reasons, not
>>>>>>>>> because of an assert.
>>>>>>>> Yes. And on top of this we may try to validate the TAP via st_dev
>>>>>>>> through fstat[1].
>>>>>>> I agree, but the problem I have is to know which major(st_dev) we can
>>>>>>> allow to use.
>>>>>>>
>>>>>>> Do we allow only macvtap major number?
>>>>>> Macvtap and tuntap.
>>>>>>
>>>>>>
>>>>>>> How to know the macvtap major number at user level?
>>>>>>> [it is allocated dynamically: do we need to parse /proc/devices?]
>>>>>> I think we can get them through fstat for /dev/net/tun and
>>>>>> /dev/macvtapX.
>>>>> Don't assume QEMU has any permission to access to these device nodes,
>>>>> only the pre-opened FDs it is given by libvirt.
>>>> Actually permissions are the least of the problem - the device nodes
>>>> won't even exist, because QEMU's almost certainly running in a private
>>>> mount namespace with a minimal /dev populated
>>>>
>>> I'm working on a solution using /proc/devices.
>>
>> Similar issue with /dev. There's no guarantee that qemu can access
>> /proc/devices or it may not exist (CONFIG_PROCFS).
> There is a lot of things that will not work without /proc (several tools
> rely on /proc, like ps, top, lsof, mount, ...). Some information are
> only available from /proc, and if /proc is there, I think /proc/devices
> is always readable by everyone. Moreover /proc is already used by qemu
> in several places.
>
> It can also a best effort check.


Right.


>
> The problem with fstat() on /dev files is to guess the /dev/macvtapX as
> X varies (the same with /dev/tapY)..
>
>>> macvtap has its own major number, but tuntap use "misc" (10) major
>>> number.
> Another question: it is possible to use the "fd=" parameter with macvtap
> as macvtap creates a /dev/tapY device,


Yes.


> but how to do that with tuntap
> that does not create a /dev/tapY device?


I think there's no specific reason, it's just because it was wrote like 
that since the first version which is about 20 years ago.

Thanks


>
> Thanks,
> Laurent
>
>
Daniel P. Berrangé June 30, 2020, 11:03 a.m. UTC | #16
On Tue, Jun 30, 2020 at 12:35:46PM +0200, Laurent Vivier wrote:
> On 30/06/2020 12:03, Jason Wang wrote:
> > 
> > On 2020/6/30 下午5:45, Laurent Vivier wrote:
> >> On 30/06/2020 11:31, Daniel P. Berrangé wrote:
> >>> On Tue, Jun 30, 2020 at 10:23:18AM +0100, Daniel P. Berrangé wrote:
> >>>> On Tue, Jun 30, 2020 at 05:21:49PM +0800, Jason Wang wrote:
> >>>>> On 2020/6/30 上午3:30, Laurent Vivier wrote:
> >>>>>> On 28/06/2020 08:31, Jason Wang wrote:
> >>>>>>> On 2020/6/25 下午7:56, Laurent Vivier wrote:
> >>>>>>>> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
> >>>>>>>>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
> >>>>>>>>>> qemu_set_nonblock() checks that the file descriptor can be
> >>>>>>>>>> used and, if
> >>>>>>>>>> not, crashes QEMU. An assert() is used for that. The use of
> >>>>>>>>>> assert() is
> >>>>>>>>>> used to detect programming error and the coredump will allow
> >>>>>>>>>> to debug
> >>>>>>>>>> the problem.
> >>>>>>>>>>
> >>>>>>>>>> But in the case of the tap device, this assert() can be
> >>>>>>>>>> triggered by
> >>>>>>>>>> a misconfiguration by the user. At startup, it's not a real
> >>>>>>>>>> problem,
> >>>>>>>>>> but it
> >>>>>>>>>> can also happen during the hot-plug of a new device, and here
> >>>>>>>>>> it's a
> >>>>>>>>>> problem because we can crash a perfectly healthy system.
> >>>>>>>>> If the user/mgmt app is not correctly passing FDs, then there's
> >>>>>>>>> a whole
> >>>>>>>>> pile of bad stuff that can happen. Checking whether the FD is
> >>>>>>>>> valid is
> >>>>>>>>> only going to catch a small subset. eg consider if fd=9 refers
> >>>>>>>>> to the
> >>>>>>>>> FD that is associated with the root disk QEMU has open. We'll
> >>>>>>>>> fail to
> >>>>>>>>> setup the TAP device and close this FD, breaking the healthy
> >>>>>>>>> system
> >>>>>>>>> again.
> >>>>>>>>>
> >>>>>>>>> I'm not saying we can't check if the FD is valid, but lets be
> >>>>>>>>> clear that
> >>>>>>>>> this is not offering very much protection against a broken mgmt
> >>>>>>>>> apps
> >>>>>>>>> passing bad FDs.
> >>>>>>>>>
> >>>>>>>> I agree with you, but my only goal here is to avoid the crash in
> >>>>>>>> this
> >>>>>>>> particular case.
> >>>>>>>>
> >>>>>>>> The punishment should fit the crime.
> >>>>>>>>
> >>>>>>>> The user can think the netdev_del doesn't close the fd, and he
> >>>>>>>> can try
> >>>>>>>> to reuse it. Sending back an error is better than crashing his
> >>>>>>>> system.
> >>>>>>>> After that, if the system crashes, it will be for the good
> >>>>>>>> reasons, not
> >>>>>>>> because of an assert.
> >>>>>>> Yes. And on top of this we may try to validate the TAP via st_dev
> >>>>>>> through fstat[1].
> >>>>>> I agree, but the problem I have is to know which major(st_dev) we can
> >>>>>> allow to use.
> >>>>>>
> >>>>>> Do we allow only macvtap major number?
> >>>>>
> >>>>> Macvtap and tuntap.
> >>>>>
> >>>>>
> >>>>>> How to know the macvtap major number at user level?
> >>>>>> [it is allocated dynamically: do we need to parse /proc/devices?]
> >>>>>
> >>>>> I think we can get them through fstat for /dev/net/tun and
> >>>>> /dev/macvtapX.
> >>>> Don't assume QEMU has any permission to access to these device nodes,
> >>>> only the pre-opened FDs it is given by libvirt.
> >>> Actually permissions are the least of the problem - the device nodes
> >>> won't even exist, because QEMU's almost certainly running in a private
> >>> mount namespace with a minimal /dev populated
> >>>
> >> I'm working on a solution using /proc/devices.
> > 
> > 
> > Similar issue with /dev. There's no guarantee that qemu can access
> > /proc/devices or it may not exist (CONFIG_PROCFS).
> 
> There is a lot of things that will not work without /proc (several tools
> rely on /proc, like ps, top, lsof, mount, ...). Some information are
> only available from /proc, and if /proc is there, I think /proc/devices
> is always readable by everyone. Moreover /proc is already used by qemu
> in several places.
> 
> It can also a best effort check.
> 
> The problem with fstat() on /dev files is to guess the /dev/macvtapX as
> X varies (the same with /dev/tapY)..
> 
> > 
> >> macvtap has its own major number, but tuntap use "misc" (10) major
> >> number.
> 
> Another question: it is possible to use the "fd=" parameter with macvtap
> as macvtap creates a /dev/tapY device, but how to do that with tuntap
> that does not create a /dev/tapY device?


I think we should step back and ask why we need to check this at all.

IMHO, if the passed-in FD works with the syscalls that tap-linux.c
is executing, then that shows the FD is suitable for QEMU. The problem
is that many of the tap APIs don't use "Error **errp" parameters to
report errors, so we can't catch the failures. IOW, instead of checking
the FD major/minor number, we should make the existing code be better
at reporting errors, so they can be fed back to the QMP console
gracefully.


Regards,
Daniel
Laurent Vivier June 30, 2020, noon UTC | #17
On 30/06/2020 13:03, Daniel P. Berrangé wrote:
> On Tue, Jun 30, 2020 at 12:35:46PM +0200, Laurent Vivier wrote:
>> On 30/06/2020 12:03, Jason Wang wrote:
>>>
>>> On 2020/6/30 下午5:45, Laurent Vivier wrote:
>>>> On 30/06/2020 11:31, Daniel P. Berrangé wrote:
>>>>> On Tue, Jun 30, 2020 at 10:23:18AM +0100, Daniel P. Berrangé wrote:
>>>>>> On Tue, Jun 30, 2020 at 05:21:49PM +0800, Jason Wang wrote:
>>>>>>> On 2020/6/30 上午3:30, Laurent Vivier wrote:
>>>>>>>> On 28/06/2020 08:31, Jason Wang wrote:
>>>>>>>>> On 2020/6/25 下午7:56, Laurent Vivier wrote:
>>>>>>>>>> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
>>>>>>>>>>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
>>>>>>>>>>>> qemu_set_nonblock() checks that the file descriptor can be
>>>>>>>>>>>> used and, if
>>>>>>>>>>>> not, crashes QEMU. An assert() is used for that. The use of
>>>>>>>>>>>> assert() is
>>>>>>>>>>>> used to detect programming error and the coredump will allow
>>>>>>>>>>>> to debug
>>>>>>>>>>>> the problem.
>>>>>>>>>>>>
>>>>>>>>>>>> But in the case of the tap device, this assert() can be
>>>>>>>>>>>> triggered by
>>>>>>>>>>>> a misconfiguration by the user. At startup, it's not a real
>>>>>>>>>>>> problem,
>>>>>>>>>>>> but it
>>>>>>>>>>>> can also happen during the hot-plug of a new device, and here
>>>>>>>>>>>> it's a
>>>>>>>>>>>> problem because we can crash a perfectly healthy system.
>>>>>>>>>>> If the user/mgmt app is not correctly passing FDs, then there's
>>>>>>>>>>> a whole
>>>>>>>>>>> pile of bad stuff that can happen. Checking whether the FD is
>>>>>>>>>>> valid is
>>>>>>>>>>> only going to catch a small subset. eg consider if fd=9 refers
>>>>>>>>>>> to the
>>>>>>>>>>> FD that is associated with the root disk QEMU has open. We'll
>>>>>>>>>>> fail to
>>>>>>>>>>> setup the TAP device and close this FD, breaking the healthy
>>>>>>>>>>> system
>>>>>>>>>>> again.
>>>>>>>>>>>
>>>>>>>>>>> I'm not saying we can't check if the FD is valid, but lets be
>>>>>>>>>>> clear that
>>>>>>>>>>> this is not offering very much protection against a broken mgmt
>>>>>>>>>>> apps
>>>>>>>>>>> passing bad FDs.
>>>>>>>>>>>
>>>>>>>>>> I agree with you, but my only goal here is to avoid the crash in
>>>>>>>>>> this
>>>>>>>>>> particular case.
>>>>>>>>>>
>>>>>>>>>> The punishment should fit the crime.
>>>>>>>>>>
>>>>>>>>>> The user can think the netdev_del doesn't close the fd, and he
>>>>>>>>>> can try
>>>>>>>>>> to reuse it. Sending back an error is better than crashing his
>>>>>>>>>> system.
>>>>>>>>>> After that, if the system crashes, it will be for the good
>>>>>>>>>> reasons, not
>>>>>>>>>> because of an assert.
>>>>>>>>> Yes. And on top of this we may try to validate the TAP via st_dev
>>>>>>>>> through fstat[1].
>>>>>>>> I agree, but the problem I have is to know which major(st_dev) we can
>>>>>>>> allow to use.
>>>>>>>>
>>>>>>>> Do we allow only macvtap major number?
>>>>>>>
>>>>>>> Macvtap and tuntap.
>>>>>>>
>>>>>>>
>>>>>>>> How to know the macvtap major number at user level?
>>>>>>>> [it is allocated dynamically: do we need to parse /proc/devices?]
>>>>>>>
>>>>>>> I think we can get them through fstat for /dev/net/tun and
>>>>>>> /dev/macvtapX.
>>>>>> Don't assume QEMU has any permission to access to these device nodes,
>>>>>> only the pre-opened FDs it is given by libvirt.
>>>>> Actually permissions are the least of the problem - the device nodes
>>>>> won't even exist, because QEMU's almost certainly running in a private
>>>>> mount namespace with a minimal /dev populated
>>>>>
>>>> I'm working on a solution using /proc/devices.
>>>
>>>
>>> Similar issue with /dev. There's no guarantee that qemu can access
>>> /proc/devices or it may not exist (CONFIG_PROCFS).
>>
>> There is a lot of things that will not work without /proc (several tools
>> rely on /proc, like ps, top, lsof, mount, ...). Some information are
>> only available from /proc, and if /proc is there, I think /proc/devices
>> is always readable by everyone. Moreover /proc is already used by qemu
>> in several places.
>>
>> It can also a best effort check.
>>
>> The problem with fstat() on /dev files is to guess the /dev/macvtapX as
>> X varies (the same with /dev/tapY)..
>>
>>>
>>>> macvtap has its own major number, but tuntap use "misc" (10) major
>>>> number.
>>
>> Another question: it is possible to use the "fd=" parameter with macvtap
>> as macvtap creates a /dev/tapY device, but how to do that with tuntap
>> that does not create a /dev/tapY device?
> 
> 
> I think we should step back and ask why we need to check this at all.
> 
> IMHO, if the passed-in FD works with the syscalls that tap-linux.c
> is executing, then that shows the FD is suitable for QEMU. The problem
> is that many of the tap APIs don't use "Error **errp" parameters to
> report errors, so we can't catch the failures. IOW, instead of checking
> the FD major/minor number, we should make the existing code be better
> at reporting errors, so they can be fed back to the QMP console
> gracefully.

The problem here is the very first operation of net_init_tap() is a
qemu_set_nonblock() that has an assert() and crashes QEMU.

It's why I was only checking for the validity of the file descriptor,
not if it is a tap device or not.

I was adding a check before this function to be sure to not assert and
to report the error correctly to the QMP interface.

I think it is a step in the good direction.

Thanks,
Laurent
Daniel P. Berrangé June 30, 2020, 12:35 p.m. UTC | #18
On Tue, Jun 30, 2020 at 02:00:06PM +0200, Laurent Vivier wrote:
> On 30/06/2020 13:03, Daniel P. Berrangé wrote:
> > On Tue, Jun 30, 2020 at 12:35:46PM +0200, Laurent Vivier wrote:
> >> On 30/06/2020 12:03, Jason Wang wrote:
> >>>
> >>> On 2020/6/30 下午5:45, Laurent Vivier wrote:
> >>>> On 30/06/2020 11:31, Daniel P. Berrangé wrote:
> >>>>> On Tue, Jun 30, 2020 at 10:23:18AM +0100, Daniel P. Berrangé wrote:
> >>>>>> On Tue, Jun 30, 2020 at 05:21:49PM +0800, Jason Wang wrote:
> >>>>>>> On 2020/6/30 上午3:30, Laurent Vivier wrote:
> >>>>>>>> On 28/06/2020 08:31, Jason Wang wrote:
> >>>>>>>>> On 2020/6/25 下午7:56, Laurent Vivier wrote:
> >>>>>>>>>> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
> >>>>>>>>>>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
> >>>>>>>>>>>> qemu_set_nonblock() checks that the file descriptor can be
> >>>>>>>>>>>> used and, if
> >>>>>>>>>>>> not, crashes QEMU. An assert() is used for that. The use of
> >>>>>>>>>>>> assert() is
> >>>>>>>>>>>> used to detect programming error and the coredump will allow
> >>>>>>>>>>>> to debug
> >>>>>>>>>>>> the problem.
> >>>>>>>>>>>>
> >>>>>>>>>>>> But in the case of the tap device, this assert() can be
> >>>>>>>>>>>> triggered by
> >>>>>>>>>>>> a misconfiguration by the user. At startup, it's not a real
> >>>>>>>>>>>> problem,
> >>>>>>>>>>>> but it
> >>>>>>>>>>>> can also happen during the hot-plug of a new device, and here
> >>>>>>>>>>>> it's a
> >>>>>>>>>>>> problem because we can crash a perfectly healthy system.
> >>>>>>>>>>> If the user/mgmt app is not correctly passing FDs, then there's
> >>>>>>>>>>> a whole
> >>>>>>>>>>> pile of bad stuff that can happen. Checking whether the FD is
> >>>>>>>>>>> valid is
> >>>>>>>>>>> only going to catch a small subset. eg consider if fd=9 refers
> >>>>>>>>>>> to the
> >>>>>>>>>>> FD that is associated with the root disk QEMU has open. We'll
> >>>>>>>>>>> fail to
> >>>>>>>>>>> setup the TAP device and close this FD, breaking the healthy
> >>>>>>>>>>> system
> >>>>>>>>>>> again.
> >>>>>>>>>>>
> >>>>>>>>>>> I'm not saying we can't check if the FD is valid, but lets be
> >>>>>>>>>>> clear that
> >>>>>>>>>>> this is not offering very much protection against a broken mgmt
> >>>>>>>>>>> apps
> >>>>>>>>>>> passing bad FDs.
> >>>>>>>>>>>
> >>>>>>>>>> I agree with you, but my only goal here is to avoid the crash in
> >>>>>>>>>> this
> >>>>>>>>>> particular case.
> >>>>>>>>>>
> >>>>>>>>>> The punishment should fit the crime.
> >>>>>>>>>>
> >>>>>>>>>> The user can think the netdev_del doesn't close the fd, and he
> >>>>>>>>>> can try
> >>>>>>>>>> to reuse it. Sending back an error is better than crashing his
> >>>>>>>>>> system.
> >>>>>>>>>> After that, if the system crashes, it will be for the good
> >>>>>>>>>> reasons, not
> >>>>>>>>>> because of an assert.
> >>>>>>>>> Yes. And on top of this we may try to validate the TAP via st_dev
> >>>>>>>>> through fstat[1].
> >>>>>>>> I agree, but the problem I have is to know which major(st_dev) we can
> >>>>>>>> allow to use.
> >>>>>>>>
> >>>>>>>> Do we allow only macvtap major number?
> >>>>>>>
> >>>>>>> Macvtap and tuntap.
> >>>>>>>
> >>>>>>>
> >>>>>>>> How to know the macvtap major number at user level?
> >>>>>>>> [it is allocated dynamically: do we need to parse /proc/devices?]
> >>>>>>>
> >>>>>>> I think we can get them through fstat for /dev/net/tun and
> >>>>>>> /dev/macvtapX.
> >>>>>> Don't assume QEMU has any permission to access to these device nodes,
> >>>>>> only the pre-opened FDs it is given by libvirt.
> >>>>> Actually permissions are the least of the problem - the device nodes
> >>>>> won't even exist, because QEMU's almost certainly running in a private
> >>>>> mount namespace with a minimal /dev populated
> >>>>>
> >>>> I'm working on a solution using /proc/devices.
> >>>
> >>>
> >>> Similar issue with /dev. There's no guarantee that qemu can access
> >>> /proc/devices or it may not exist (CONFIG_PROCFS).
> >>
> >> There is a lot of things that will not work without /proc (several tools
> >> rely on /proc, like ps, top, lsof, mount, ...). Some information are
> >> only available from /proc, and if /proc is there, I think /proc/devices
> >> is always readable by everyone. Moreover /proc is already used by qemu
> >> in several places.
> >>
> >> It can also a best effort check.
> >>
> >> The problem with fstat() on /dev files is to guess the /dev/macvtapX as
> >> X varies (the same with /dev/tapY)..
> >>
> >>>
> >>>> macvtap has its own major number, but tuntap use "misc" (10) major
> >>>> number.
> >>
> >> Another question: it is possible to use the "fd=" parameter with macvtap
> >> as macvtap creates a /dev/tapY device, but how to do that with tuntap
> >> that does not create a /dev/tapY device?
> > 
> > 
> > I think we should step back and ask why we need to check this at all.
> > 
> > IMHO, if the passed-in FD works with the syscalls that tap-linux.c
> > is executing, then that shows the FD is suitable for QEMU. The problem
> > is that many of the tap APIs don't use "Error **errp" parameters to
> > report errors, so we can't catch the failures. IOW, instead of checking
> > the FD major/minor number, we should make the existing code be better
> > at reporting errors, so they can be fed back to the QMP console
> > gracefully.
> 
> The problem here is the very first operation of net_init_tap() is a
> qemu_set_nonblock() that has an assert() and crashes QEMU.
> 
> It's why I was only checking for the validity of the file descriptor,
> not if it is a tap device or not.

Yep, checking that it is really a FD is sufficient to avoid the
assert in nonblock.

As for whether it is really a tap device, I think we just need to
improve error reporting of the functions that come later, instead
of doing a literal "is it a tap" check.

That's what I'd tried in my old patch from a few years back

   https://patchwork.kernel.org/patch/10029443/

I can't remember why we didn't merge this back then

Regards,
Daniel
Laurent Vivier June 30, 2020, 12:42 p.m. UTC | #19
On 30/06/2020 14:35, Daniel P. Berrangé wrote:
> On Tue, Jun 30, 2020 at 02:00:06PM +0200, Laurent Vivier wrote:
>> On 30/06/2020 13:03, Daniel P. Berrangé wrote:
>>> On Tue, Jun 30, 2020 at 12:35:46PM +0200, Laurent Vivier wrote:
>>>> On 30/06/2020 12:03, Jason Wang wrote:
>>>>>
>>>>> On 2020/6/30 下午5:45, Laurent Vivier wrote:
>>>>>> On 30/06/2020 11:31, Daniel P. Berrangé wrote:
>>>>>>> On Tue, Jun 30, 2020 at 10:23:18AM +0100, Daniel P. Berrangé wrote:
>>>>>>>> On Tue, Jun 30, 2020 at 05:21:49PM +0800, Jason Wang wrote:
>>>>>>>>> On 2020/6/30 上午3:30, Laurent Vivier wrote:
>>>>>>>>>> On 28/06/2020 08:31, Jason Wang wrote:
>>>>>>>>>>> On 2020/6/25 下午7:56, Laurent Vivier wrote:
>>>>>>>>>>>> On 25/06/2020 10:48, Daniel P. Berrangé wrote:
>>>>>>>>>>>>> On Wed, Jun 24, 2020 at 09:00:09PM +0200, Laurent Vivier wrote:
>>>>>>>>>>>>>> qemu_set_nonblock() checks that the file descriptor can be
>>>>>>>>>>>>>> used and, if
>>>>>>>>>>>>>> not, crashes QEMU. An assert() is used for that. The use of
>>>>>>>>>>>>>> assert() is
>>>>>>>>>>>>>> used to detect programming error and the coredump will allow
>>>>>>>>>>>>>> to debug
>>>>>>>>>>>>>> the problem.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> But in the case of the tap device, this assert() can be
>>>>>>>>>>>>>> triggered by
>>>>>>>>>>>>>> a misconfiguration by the user. At startup, it's not a real
>>>>>>>>>>>>>> problem,
>>>>>>>>>>>>>> but it
>>>>>>>>>>>>>> can also happen during the hot-plug of a new device, and here
>>>>>>>>>>>>>> it's a
>>>>>>>>>>>>>> problem because we can crash a perfectly healthy system.
>>>>>>>>>>>>> If the user/mgmt app is not correctly passing FDs, then there's
>>>>>>>>>>>>> a whole
>>>>>>>>>>>>> pile of bad stuff that can happen. Checking whether the FD is
>>>>>>>>>>>>> valid is
>>>>>>>>>>>>> only going to catch a small subset. eg consider if fd=9 refers
>>>>>>>>>>>>> to the
>>>>>>>>>>>>> FD that is associated with the root disk QEMU has open. We'll
>>>>>>>>>>>>> fail to
>>>>>>>>>>>>> setup the TAP device and close this FD, breaking the healthy
>>>>>>>>>>>>> system
>>>>>>>>>>>>> again.
>>>>>>>>>>>>>
>>>>>>>>>>>>> I'm not saying we can't check if the FD is valid, but lets be
>>>>>>>>>>>>> clear that
>>>>>>>>>>>>> this is not offering very much protection against a broken mgmt
>>>>>>>>>>>>> apps
>>>>>>>>>>>>> passing bad FDs.
>>>>>>>>>>>>>
>>>>>>>>>>>> I agree with you, but my only goal here is to avoid the crash in
>>>>>>>>>>>> this
>>>>>>>>>>>> particular case.
>>>>>>>>>>>>
>>>>>>>>>>>> The punishment should fit the crime.
>>>>>>>>>>>>
>>>>>>>>>>>> The user can think the netdev_del doesn't close the fd, and he
>>>>>>>>>>>> can try
>>>>>>>>>>>> to reuse it. Sending back an error is better than crashing his
>>>>>>>>>>>> system.
>>>>>>>>>>>> After that, if the system crashes, it will be for the good
>>>>>>>>>>>> reasons, not
>>>>>>>>>>>> because of an assert.
>>>>>>>>>>> Yes. And on top of this we may try to validate the TAP via st_dev
>>>>>>>>>>> through fstat[1].
>>>>>>>>>> I agree, but the problem I have is to know which major(st_dev) we can
>>>>>>>>>> allow to use.
>>>>>>>>>>
>>>>>>>>>> Do we allow only macvtap major number?
>>>>>>>>>
>>>>>>>>> Macvtap and tuntap.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> How to know the macvtap major number at user level?
>>>>>>>>>> [it is allocated dynamically: do we need to parse /proc/devices?]
>>>>>>>>>
>>>>>>>>> I think we can get them through fstat for /dev/net/tun and
>>>>>>>>> /dev/macvtapX.
>>>>>>>> Don't assume QEMU has any permission to access to these device nodes,
>>>>>>>> only the pre-opened FDs it is given by libvirt.
>>>>>>> Actually permissions are the least of the problem - the device nodes
>>>>>>> won't even exist, because QEMU's almost certainly running in a private
>>>>>>> mount namespace with a minimal /dev populated
>>>>>>>
>>>>>> I'm working on a solution using /proc/devices.
>>>>>
>>>>>
>>>>> Similar issue with /dev. There's no guarantee that qemu can access
>>>>> /proc/devices or it may not exist (CONFIG_PROCFS).
>>>>
>>>> There is a lot of things that will not work without /proc (several tools
>>>> rely on /proc, like ps, top, lsof, mount, ...). Some information are
>>>> only available from /proc, and if /proc is there, I think /proc/devices
>>>> is always readable by everyone. Moreover /proc is already used by qemu
>>>> in several places.
>>>>
>>>> It can also a best effort check.
>>>>
>>>> The problem with fstat() on /dev files is to guess the /dev/macvtapX as
>>>> X varies (the same with /dev/tapY)..
>>>>
>>>>>
>>>>>> macvtap has its own major number, but tuntap use "misc" (10) major
>>>>>> number.
>>>>
>>>> Another question: it is possible to use the "fd=" parameter with macvtap
>>>> as macvtap creates a /dev/tapY device, but how to do that with tuntap
>>>> that does not create a /dev/tapY device?
>>>
>>>
>>> I think we should step back and ask why we need to check this at all.
>>>
>>> IMHO, if the passed-in FD works with the syscalls that tap-linux.c
>>> is executing, then that shows the FD is suitable for QEMU. The problem
>>> is that many of the tap APIs don't use "Error **errp" parameters to
>>> report errors, so we can't catch the failures. IOW, instead of checking
>>> the FD major/minor number, we should make the existing code be better
>>> at reporting errors, so they can be fed back to the QMP console
>>> gracefully.
>>
>> The problem here is the very first operation of net_init_tap() is a
>> qemu_set_nonblock() that has an assert() and crashes QEMU.
>>
>> It's why I was only checking for the validity of the file descriptor,
>> not if it is a tap device or not.
> 
> Yep, checking that it is really a FD is sufficient to avoid the
> assert in nonblock.
> 
> As for whether it is really a tap device, I think we just need to
> improve error reporting of the functions that come later, instead
> of doing a literal "is it a tap" check.

I agree. I will update my patches to have a series with my patch
checking for the validity of fd and another patch to return the errors
to QMP from the tap functions.

> That's what I'd tried in my old patch from a few years back
> 
>    https://patchwork.kernel.org/patch/10029443/
> 
> I can't remember why we didn't merge this back then

Jason already gave the link in the thread.
I'm going to try to use your patch in my series.

Thanks,
Laurent
diff mbox series

Patch

diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index 57cd049d6edd..5b0c2d77ddad 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -17,6 +17,7 @@  int qemu_socket(int domain, int type, int protocol);
 int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
 int socket_set_cork(int fd, int v);
 int socket_set_nodelay(int fd);
+bool qemu_fd_is_valid(int fd);
 void qemu_set_block(int fd);
 void qemu_set_nonblock(int fd);
 int socket_set_fast_reuse(int fd);
diff --git a/net/tap.c b/net/tap.c
index 6207f61f84ab..f65966aaccd8 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -795,6 +795,12 @@  int net_init_tap(const Netdev *netdev, const char *name,
             return -1;
         }
 
+        /* Check if fd is valid */
+        if (!qemu_fd_is_valid(fd)) {
+            error_setg(errp, "Invalid file descriptor %d", fd);
+            return -1;
+        }
+
         qemu_set_nonblock(fd);
 
         vnet_hdr = tap_probe_vnet_hdr(fd);
@@ -843,6 +849,13 @@  int net_init_tap(const Netdev *netdev, const char *name,
                 goto free_fail;
             }
 
+            /* Check if fd is valid */
+            if (!qemu_fd_is_valid(fd)) {
+                error_setg(errp, "Invalid file descriptor %d", fd);
+                ret = -1;
+                goto free_fail;
+            }
+
             qemu_set_nonblock(fd);
 
             if (i == 0) {
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 916f1be2243a..8d5705f598d3 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -244,6 +244,11 @@  void qemu_anon_ram_free(void *ptr, size_t size)
     qemu_ram_munmap(-1, ptr, size);
 }
 
+bool qemu_fd_is_valid(int fd)
+{
+    return fcntl(fd, F_GETFL) != -1;
+}
+
 void qemu_set_block(int fd)
 {
     int f;
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index e9b14ab17847..a6be9445cfdb 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -132,6 +132,12 @@  struct tm *localtime_r(const time_t *timep, struct tm *result)
 }
 #endif /* CONFIG_LOCALTIME_R */
 
+bool qemu_fd_is_valid(int fd)
+{
+    /* FIXME: how to check if fd is valid? */
+    return true;
+}
+
 void qemu_set_block(int fd)
 {
     unsigned long opt = 0;