diff mbox series

[v2,2/5] drm/virtio: Add a helper to map and note the dma addrs and lengths

Message ID 20240813035509.3360760-3-vivek.kasireddy@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/virtio: Import scanout buffers from other devices | expand

Commit Message

Kasireddy, Vivek Aug. 13, 2024, 3:49 a.m. UTC
This helper would be used when first initializing the object as
part of import and also when updating the plane where we need to
ensure that the imported object's backing is valid.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Gurchetan Singh <gurchetansingh@chromium.org>
Cc: Chia-I Wu <olvaffe@gmail.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
---
 drivers/gpu/drm/virtio/virtgpu_drv.h   |  5 +++
 drivers/gpu/drm/virtio/virtgpu_prime.c | 42 ++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

Comments

Dmitry Osipenko Oct. 21, 2024, 3:37 a.m. UTC | #1
On 8/13/24 06:49, Vivek Kasireddy wrote:
> +long virtgpu_dma_buf_import_sgt(struct virtio_gpu_mem_entry **ents,
> +				unsigned int *nents,
> +				struct virtio_gpu_object *bo,
> +				struct dma_buf_attachment *attach)
> +{
> +	struct scatterlist *sl;
> +	struct sg_table *sgt;
> +	long i, ret;
> +
> +	dma_resv_assert_held(attach->dmabuf->resv);
> +
> +	ret = dma_resv_wait_timeout(attach->dmabuf->resv,
> +				    DMA_RESV_USAGE_KERNEL,
> +				    false, MAX_SCHEDULE_TIMEOUT);

Why this wait is needed?
Kasireddy, Vivek Oct. 22, 2024, 4:51 a.m. UTC | #2
Hi Dmitry,

> 
> On 8/13/24 06:49, Vivek Kasireddy wrote:
> > +long virtgpu_dma_buf_import_sgt(struct virtio_gpu_mem_entry **ents,
> > +				unsigned int *nents,
> > +				struct virtio_gpu_object *bo,
> > +				struct dma_buf_attachment *attach)
> > +{
> > +	struct scatterlist *sl;
> > +	struct sg_table *sgt;
> > +	long i, ret;
> > +
> > +	dma_resv_assert_held(attach->dmabuf->resv);
> > +
> > +	ret = dma_resv_wait_timeout(attach->dmabuf->resv,
> > +				    DMA_RESV_USAGE_KERNEL,
> > +				    false, MAX_SCHEDULE_TIMEOUT);
> 
> Why this wait is needed?
The intention was to wait for any pending operations on the backing object
to finish and let it become idle before mapping and accessing the underlying
memory. But I suspect this wait may not be necessary given that we would
have already called dma_buf_pin() at this point, which would have had the
desired effect?

Thanks,
Vivek

> 
> --
> Best regards,
> Dmitry
Dmitry Osipenko Oct. 23, 2024, 3:45 a.m. UTC | #3
On 10/22/24 07:51, Kasireddy, Vivek wrote:
> Hi Dmitry,
> 
>>
>> On 8/13/24 06:49, Vivek Kasireddy wrote:
>>> +long virtgpu_dma_buf_import_sgt(struct virtio_gpu_mem_entry **ents,
>>> +				unsigned int *nents,
>>> +				struct virtio_gpu_object *bo,
>>> +				struct dma_buf_attachment *attach)
>>> +{
>>> +	struct scatterlist *sl;
>>> +	struct sg_table *sgt;
>>> +	long i, ret;
>>> +
>>> +	dma_resv_assert_held(attach->dmabuf->resv);
>>> +
>>> +	ret = dma_resv_wait_timeout(attach->dmabuf->resv,
>>> +				    DMA_RESV_USAGE_KERNEL,
>>> +				    false, MAX_SCHEDULE_TIMEOUT);
>>
>> Why this wait is needed?
> The intention was to wait for any pending operations on the backing object
> to finish and let it become idle before mapping and accessing the underlying
> memory. But I suspect this wait may not be necessary given that we would
> have already called dma_buf_pin() at this point, which would have had the
> desired effect?

Looking at the dma_buf_map_attachment() code, I see that it does both of
pinning and waiting for the fence by itself. Hence should be safe to
drop both dma_buf_pin() and dma_resv_wait_timeout(), please test.

BTW, is any DG2 GPU suitable for testing of this patchset? Will I be
able to test it using a regular consumer A750 card?
Kasireddy, Vivek Oct. 24, 2024, 5:49 a.m. UTC | #4
Hi Dmitry,

> Subject: Re: [PATCH v2 2/5] drm/virtio: Add a helper to map and note the
> dma addrs and lengths
> 
> On 10/22/24 07:51, Kasireddy, Vivek wrote:
> > Hi Dmitry,
> >
> >>
> >> On 8/13/24 06:49, Vivek Kasireddy wrote:
> >>> +long virtgpu_dma_buf_import_sgt(struct virtio_gpu_mem_entry
> **ents,
> >>> +				unsigned int *nents,
> >>> +				struct virtio_gpu_object *bo,
> >>> +				struct dma_buf_attachment *attach)
> >>> +{
> >>> +	struct scatterlist *sl;
> >>> +	struct sg_table *sgt;
> >>> +	long i, ret;
> >>> +
> >>> +	dma_resv_assert_held(attach->dmabuf->resv);
> >>> +
> >>> +	ret = dma_resv_wait_timeout(attach->dmabuf->resv,
> >>> +				    DMA_RESV_USAGE_KERNEL,
> >>> +				    false, MAX_SCHEDULE_TIMEOUT);
> >>
> >> Why this wait is needed?
> > The intention was to wait for any pending operations on the backing object
> > to finish and let it become idle before mapping and accessing the
> underlying
> > memory. But I suspect this wait may not be necessary given that we would
> > have already called dma_buf_pin() at this point, which would have had the
> > desired effect?
> 
> Looking at the dma_buf_map_attachment() code, I see that it does both of
> pinning and waiting for the fence by itself. Hence should be safe to
> drop both dma_buf_pin() and dma_resv_wait_timeout(), please test.
Sure, I'll retest again but it looks like dma_buf_map_attachment() pins and
or waits for fence only in specific situations. That is, it pins only if the exporter
is non-dynamic and if CONFIG_DMABUF_MOVE_NOTIFY is not enabled. And,
it waits for the fence only if the importer is non-dynamic. Since I have only tested
with a dynamic exporter (Xe driver), I did not encounter any issues but it makes
sense to augment the code to account for non-dynamic exporters as well.

> 
> BTW, is any DG2 GPU suitable for testing of this patchset? Will I be
> able to test it using a regular consumer A750 card?
Yes, you can test with any DG2 dGPU as long as you can passthrough it to the
Guest VM. And, if there is an iGPU available on the Host, you can use GTK/SDL UI
for local display or Spice UI for remote display if there is no iGPU on the Host.
This is exactly how I started testing this patch series but I am now predominantly
testing this series with SRIOV enabled iGPUs and dGPUs.

Thanks,
Vivek

> 
> --
> Best regards,
> Dmitry
Dmitry Osipenko Oct. 25, 2024, 10:36 p.m. UTC | #5
On 10/24/24 08:49, Kasireddy, Vivek wrote:
> Hi Dmitry,
> 
>> Subject: Re: [PATCH v2 2/5] drm/virtio: Add a helper to map and note the
>> dma addrs and lengths
>>
>> On 10/22/24 07:51, Kasireddy, Vivek wrote:
>>> Hi Dmitry,
>>>
>>>>
>>>> On 8/13/24 06:49, Vivek Kasireddy wrote:
>>>>> +long virtgpu_dma_buf_import_sgt(struct virtio_gpu_mem_entry
>> **ents,
>>>>> +				unsigned int *nents,
>>>>> +				struct virtio_gpu_object *bo,
>>>>> +				struct dma_buf_attachment *attach)
>>>>> +{
>>>>> +	struct scatterlist *sl;
>>>>> +	struct sg_table *sgt;
>>>>> +	long i, ret;
>>>>> +
>>>>> +	dma_resv_assert_held(attach->dmabuf->resv);
>>>>> +
>>>>> +	ret = dma_resv_wait_timeout(attach->dmabuf->resv,
>>>>> +				    DMA_RESV_USAGE_KERNEL,
>>>>> +				    false, MAX_SCHEDULE_TIMEOUT);
>>>>
>>>> Why this wait is needed?
>>> The intention was to wait for any pending operations on the backing object
>>> to finish and let it become idle before mapping and accessing the
>> underlying
>>> memory. But I suspect this wait may not be necessary given that we would
>>> have already called dma_buf_pin() at this point, which would have had the
>>> desired effect?
>>
>> Looking at the dma_buf_map_attachment() code, I see that it does both of
>> pinning and waiting for the fence by itself. Hence should be safe to
>> drop both dma_buf_pin() and dma_resv_wait_timeout(), please test.
> Sure, I'll retest again but it looks like dma_buf_map_attachment() pins and
> or waits for fence only in specific situations. That is, it pins only if the exporter
> is non-dynamic and if CONFIG_DMABUF_MOVE_NOTIFY is not enabled. And,
> it waits for the fence only if the importer is non-dynamic. Since I have only tested
> with a dynamic exporter (Xe driver), I did not encounter any issues but it makes
> sense to augment the code to account for non-dynamic exporters as well.

If exporter or importer is not dynamic, then dma-buf core pins buffer at
the buffer importing time, see dma_buf_attach(). Hence, I expect that
everything should work fine.

>> BTW, is any DG2 GPU suitable for testing of this patchset? Will I be
>> able to test it using a regular consumer A750 card?
> Yes, you can test with any DG2 dGPU as long as you can passthrough it to the
> Guest VM. And, if there is an iGPU available on the Host, you can use GTK/SDL UI
> for local display or Spice UI for remote display if there is no iGPU on the Host.
> This is exactly how I started testing this patch series but I am now predominantly
> testing this series with SRIOV enabled iGPUs and dGPUs.

Was hoping to try out SR-IOV on A750 if it's even possible at all. But
passthrough indeed is another an option, will try with passthrough, thanks.
Kasireddy, Vivek Oct. 29, 2024, 6:18 a.m. UTC | #6
Hi Dmitry,

> Subject: Re: [PATCH v2 2/5] drm/virtio: Add a helper to map and note the
> dma addrs and lengths
> >>>>> +long virtgpu_dma_buf_import_sgt(struct virtio_gpu_mem_entry
> >> **ents,
> >>>>> +				unsigned int *nents,
> >>>>> +				struct virtio_gpu_object *bo,
> >>>>> +				struct dma_buf_attachment *attach)
> >>>>> +{
> >>>>> +	struct scatterlist *sl;
> >>>>> +	struct sg_table *sgt;
> >>>>> +	long i, ret;
> >>>>> +
> >>>>> +	dma_resv_assert_held(attach->dmabuf->resv);
> >>>>> +
> >>>>> +	ret = dma_resv_wait_timeout(attach->dmabuf->resv,
> >>>>> +				    DMA_RESV_USAGE_KERNEL,
> >>>>> +				    false, MAX_SCHEDULE_TIMEOUT);
> >>>>
> >>>> Why this wait is needed?
> >>> The intention was to wait for any pending operations on the backing
> object
> >>> to finish and let it become idle before mapping and accessing the
> >> underlying
> >>> memory. But I suspect this wait may not be necessary given that we
> would
> >>> have already called dma_buf_pin() at this point, which would have had
> the
> >>> desired effect?
> >>
> >> Looking at the dma_buf_map_attachment() code, I see that it does both
> of
> >> pinning and waiting for the fence by itself. Hence should be safe to
> >> drop both dma_buf_pin() and dma_resv_wait_timeout(), please test.
> > Sure, I'll retest again but it looks like dma_buf_map_attachment() pins and
> > or waits for fence only in specific situations. That is, it pins only if the
> exporter
> > is non-dynamic and if CONFIG_DMABUF_MOVE_NOTIFY is not enabled.
> And,
> > it waits for the fence only if the importer is non-dynamic. Since I have only
> tested
> > with a dynamic exporter (Xe driver), I did not encounter any issues but it
> makes
> > sense to augment the code to account for non-dynamic exporters as well.
> 
> If exporter or importer is not dynamic, then dma-buf core pins buffer at
> the buffer importing time, see dma_buf_attach(). Hence, I expect that
> everything should work fine.
Given that virtio-gpu would always be a dynamic importer (in other words,
dma_buf_attachment_is_dynamic() is true) as proposed in this series, the
question really is about whether the exporter is dynamic or not.
I tested with both Xe driver (dynamic exporter) and i915 (non-dynamic)
and noted the following:

For Xe (dma_buf_is_dynamic() is true):
- dma-buf core calls pin only if CONFIG_DMABUF_MOVE_NOTIFY is not
  enabled, and extracts the sgt as part of dma_buf_map_attachment()
- It does not wait for the fences as part of map

For i915 (dma_buf_is_dynamic() is false):
- dma-buf core does not call pin anywhere as it is a no-op for non-dynamic
  exporters, but maps/extracts the sgt as part of dma_buf_dynamic_attach()
- It does not wait for fences anywhere

And, the docs mention the following rules for dynamic importers:
"DYNAMIC IMPORTER RULES:

Dynamic importers, see dma_buf_attachment_is_dynamic(), have additional
constraints on how they set up fences:

Dynamic importers must obey the write fences and wait for them to signal
before allowing access to the buffer’s underlying storage through the device.

Dynamic importers should set fences for any access that they can’t disable
immediately from their dma_buf_attach_ops.move_notify callback."

So, I believe we need to call pin and or dma_resv_wait_timeout(resv,
DMA_RESV_USAGE_WRITE,....) at some point, as part of import, given
these rules.

> 
> >> BTW, is any DG2 GPU suitable for testing of this patchset? Will I be
> >> able to test it using a regular consumer A750 card?
> > Yes, you can test with any DG2 dGPU as long as you can passthrough it to
> the
> > Guest VM. And, if there is an iGPU available on the Host, you can use
> GTK/SDL UI
> > for local display or Spice UI for remote display if there is no iGPU on the
> Host.
> > This is exactly how I started testing this patch series but I am now
> predominantly
> > testing this series with SRIOV enabled iGPUs and dGPUs.
> 
> Was hoping to try out SR-IOV on A750 if it's even possible at all.
AFAIK, SRIOV is not supported on any versions of DG2 including A750.

Thanks,
Vivek

> But passthrough indeed is another an option, will try with passthrough, thanks.
> 
> --
> Best regards,
> Dmitry
Dmitry Osipenko Nov. 17, 2024, 10:03 a.m. UTC | #7
On 10/29/24 09:18, Kasireddy, Vivek wrote:
>>>> BTW, is any DG2 GPU suitable for testing of this patchset? Will I be
>>>> able to test it using a regular consumer A750 card?
>>> Yes, you can test with any DG2 dGPU as long as you can passthrough it to
>> the
>>> Guest VM. And, if there is an iGPU available on the Host, you can use
>> GTK/SDL UI
>>> for local display or Spice UI for remote display if there is no iGPU on the
>> Host.
>>> This is exactly how I started testing this patch series but I am now
>> predominantly
>>> testing this series with SRIOV enabled iGPUs and dGPUs.
>> Was hoping to try out SR-IOV on A750 if it's even possible at all.
> AFAIK, SRIOV is not supported on any versions of DG2 including A750.

I'm having trouble with getting it to work.

My testing setup:

1. Passthroughed A750 that uses XE driver
2. RaptorLake iGPU on host used for virtio-gpu, uses i915 driver
3. QEMU latest master branch + your QEMU vfio_dmabuf_2 patches applied
on top
4. Latest linux-next kernel on host
5. Latest linux-next kernel on guest + this v2 applied

In guest I'm running this:

  seatd-launch -- weston --drm-device=card1 --additional-devices=card0

where card1 is A750 and card0 is virtio-gpu.

I added printk's and see that virtio-gpu imports A750 dma-buf and gets
XE's SGT, but nothing shows on the QEMU display. I tried both GTK and
SPICE displays. If I connect HDMI display to passthroughed A750 while
running weston command above, then I get weston working on the A750 HDMI
display and still nothing is shown on virtio-gpu display.

I also had to force virtio-gpu driver to always probe before XE,
otherwise virtio-gpu gets PCI read errors and fails to probe because it
fails to detect virtio features.

Am I doing anything wrong? Suggestions are welcome.
Kasireddy, Vivek Nov. 18, 2024, 7:57 a.m. UTC | #8
Hi Dmitry,

> Subject: Re: [PATCH v2 2/5] drm/virtio: Add a helper to map and note the
> dma addrs and lengths
> 
> On 10/29/24 09:18, Kasireddy, Vivek wrote:
> >>>> BTW, is any DG2 GPU suitable for testing of this patchset? Will I be
> >>>> able to test it using a regular consumer A750 card?
> >>> Yes, you can test with any DG2 dGPU as long as you can passthrough it to
> >> the
> >>> Guest VM. And, if there is an iGPU available on the Host, you can use
> >> GTK/SDL UI
> >>> for local display or Spice UI for remote display if there is no iGPU on the
> >> Host.
> >>> This is exactly how I started testing this patch series but I am now
> >> predominantly
> >>> testing this series with SRIOV enabled iGPUs and dGPUs.
> >> Was hoping to try out SR-IOV on A750 if it's even possible at all.
> > AFAIK, SRIOV is not supported on any versions of DG2 including A750.
> 
> I'm having trouble with getting it to work.
> 
> My testing setup:
> 
> 1. Passthroughed A750 that uses XE driver
I tested successfully with both i915 and Xe but DG2 is officially supported
only with i915. Xe officially supports LNL and newer platforms. For older platforms,
Mesa throws the following warning:
MESA: warning: Support for this platform is experimental with Xe KMD, bug reports may be ignored.

> 2. RaptorLake iGPU on host used for virtio-gpu, uses i915 driver
I am testing with a similar setup: RPL-P iGPU + DG2

> 3. QEMU latest master branch + your QEMU vfio_dmabuf_2 patches applied
> on top
I just tried with latest Qemu master and it appears to be very sluggish in my
environment (Fedora 39 Guest and Fedora 39 Host). But, few months old
Qemu master + vfio_dmabuf_2 patches (actually, these patches are not needed
to test this patch series with GTK/SDL) seems to work OK for me. And, here are
the relevant Qemu launch options I am using:
qemu-system-x86_64 -m 4096m -enable-kvm -cpu host,host-phys-bits=on,host-phys-bits-limit=39....
-device vfio-pci,host=0000:03:00.0 -device virtio-vga,max_outputs=1,xres=1920,yres=1080,blob=true
-display gtk,gl=on -object memory-backend-memfd,id=mem1,size=4096M -machine memory-backend=mem1.......

> 4. Latest linux-next kernel on host
> 5. Latest linux-next kernel on guest + this v2 applied
I just tested successfully with today's drm-tip (6.12) + v2 of this series.

> 
> In guest I'm running this:
> 
>   seatd-launch -- weston --drm-device=card1 --additional-devices=card0
> 
> where card1 is A750 and card0 is virtio-gpu.
> 
> I added printk's and see that virtio-gpu imports A750 dma-buf and gets
> XE's SGT, but nothing shows on the QEMU display.
Could you please try this test again with i915 instead of Xe? Also, are there
any errors seen in Guest or Host logs?

> I tried both GTK and SPICE displays.
GTK should be simpler given that there are other components (Gstreamer, etc)
needed for SPICE that have their own set of dependencies.

> If I connect HDMI display to passthroughed A750 while
> running weston command above, then I get weston working on the A750
> HDMI
> display and still nothing is shown on virtio-gpu display.
I tried this exact same test and it seems to work OK for me. Could you please also test
Gnome + Wayland:
XDG_SESSION_TYPE=wayland dbus-run-session -- /usr/bin/gnome-shell --wayland --no-x11 &

Note that, for Weston, the primary device (drm-device=) needs to support KMS and
have at-least one connector connected. For Gnome + Wayland, there is no such
requirement as the primary device can work with dummy backend and be used
only for rendering. Therefore, to test Gnome with DG2 in Guest, I typically do
modprobe i915 enable_guc=3 disable_display=true

> 
> I also had to force virtio-gpu driver to always probe before XE,
> otherwise virtio-gpu gets PCI read errors and fails to probe because it
> fails to detect virtio features.
That is concerning. I guess we'll need to investigate how to fix this properly.
As part of my tests, I typically blacklist Xe and i915 and load them later
after booting to the cmd line and before launching the compositors.

> 
> Am I doing anything wrong? Suggestions are welcome.
Could you please share your Qemu launch parameters? I'll try to recreate the
issue you are seeing.

Thanks,
Vivek

> 
> --
> Best regards,
> Dmitry
Kasireddy, Vivek Nov. 19, 2024, 6:01 a.m. UTC | #9
Hi Dmitry,

> > Subject: Re: [PATCH v2 2/5] drm/virtio: Add a helper to map and note the
> > dma addrs and lengths
> >
> > On 10/29/24 09:18, Kasireddy, Vivek wrote:
> > >>>> BTW, is any DG2 GPU suitable for testing of this patchset? Will I be
> > >>>> able to test it using a regular consumer A750 card?
> > >>> Yes, you can test with any DG2 dGPU as long as you can passthrough it
> to
> > >> the
> > >>> Guest VM. And, if there is an iGPU available on the Host, you can use
> > >> GTK/SDL UI
> > >>> for local display or Spice UI for remote display if there is no iGPU on the
> > >> Host.
> > >>> This is exactly how I started testing this patch series but I am now
> > >> predominantly
> > >>> testing this series with SRIOV enabled iGPUs and dGPUs.
> > >> Was hoping to try out SR-IOV on A750 if it's even possible at all.
> > > AFAIK, SRIOV is not supported on any versions of DG2 including A750.
> >
> > I'm having trouble with getting it to work.
> >
> > My testing setup:
> >
> > 1. Passthroughed A750 that uses XE driver
> I tested successfully with both i915 and Xe but DG2 is officially supported
> only with i915. Xe officially supports LNL and newer platforms. For older
> platforms,
> Mesa throws the following warning:
> MESA: warning: Support for this platform is experimental with Xe KMD, bug
> reports may be ignored.
> 
> > 2. RaptorLake iGPU on host used for virtio-gpu, uses i915 driver
> I am testing with a similar setup: RPL-P iGPU + DG2
> 
> > 3. QEMU latest master branch + your QEMU vfio_dmabuf_2 patches
> applied
> > on top
> I just tried with latest Qemu master and it appears to be very sluggish in my
Turns out, the sluggishness is the result of a busy loop triggered by this patch in Qemu:
commit 640f9149c3dcafbfeb495a10f6105c6f71f24d1d
Author: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Date:   Fri Oct 25 00:03:08 2024 +0300

    virtio-gpu: Support suspension of commands processing

    Check whether command processing has been finished; otherwise, stop
    processing commands and retry the command again next time. This allows
    us to support asynchronous execution of non-fenced commands needed for
    unmapping host blobs safely.

The above patch exposes an issue that occurs in very specific cases that
make use of single FB (such as fbcon and dirtyfb) where a fence is not added
to the (resource_flush) cmd. However, this issue can be fixed in the Guest
virtio drm driver by including the following patch:
commit cef51cbd7ac70d225eaa0d4d47e6e29f66ebcedf
Author: Dongwon Kim <dongwon.kim@intel.com>
Date:   Mon Oct 21 02:08:03 2024 +0300

    drm/virtio: New fence for every plane update

    Having a fence linked to a virtio_gpu_framebuffer in the plane update
    sequence would cause conflict when several planes referencing the same
    framebuffer (e.g. Xorg screen covering multi-displays configured for an
    extended mode) and those planes are updated concurrently. So it is needed
    to allocate a fence for every plane state instead of the framebuffer.

After rebasing v2 of this patch series on top of the above patch, I see that
this use-case works as expected with Qemu master. Let me send out v3,
which would be a rebase of v2 on top of the above patch.

Thanks,
Vivek

> environment (Fedora 39 Guest and Fedora 39 Host). But, few months old
> Qemu master + vfio_dmabuf_2 patches (actually, these patches are not
> needed
> to test this patch series with GTK/SDL) seems to work OK for me. And, here
> are
> the relevant Qemu launch options I am using:
> qemu-system-x86_64 -m 4096m -enable-kvm -cpu host,host-phys-
> bits=on,host-phys-bits-limit=39....
> -device vfio-pci,host=0000:03:00.0 -device virtio-
> vga,max_outputs=1,xres=1920,yres=1080,blob=true
> -display gtk,gl=on -object memory-backend-memfd,id=mem1,size=4096M -
> machine memory-backend=mem1.......
> 
> > 4. Latest linux-next kernel on host
> > 5. Latest linux-next kernel on guest + this v2 applied
> I just tested successfully with today's drm-tip (6.12) + v2 of this series.
> 
> >
> > In guest I'm running this:
> >
> >   seatd-launch -- weston --drm-device=card1 --additional-devices=card0
> >
> > where card1 is A750 and card0 is virtio-gpu.
> >
> > I added printk's and see that virtio-gpu imports A750 dma-buf and gets
> > XE's SGT, but nothing shows on the QEMU display.
> Could you please try this test again with i915 instead of Xe? Also, are there
> any errors seen in Guest or Host logs?
> 
> > I tried both GTK and SPICE displays.
> GTK should be simpler given that there are other components (Gstreamer,
> etc)
> needed for SPICE that have their own set of dependencies.
> 
> > If I connect HDMI display to passthroughed A750 while
> > running weston command above, then I get weston working on the A750
> > HDMI
> > display and still nothing is shown on virtio-gpu display.
> I tried this exact same test and it seems to work OK for me. Could you please
> also test
> Gnome + Wayland:
> XDG_SESSION_TYPE=wayland dbus-run-session -- /usr/bin/gnome-shell --
> wayland --no-x11 &
> 
> Note that, for Weston, the primary device (drm-device=) needs to support
> KMS and
> have at-least one connector connected. For Gnome + Wayland, there is no
> such
> requirement as the primary device can work with dummy backend and be
> used
> only for rendering. Therefore, to test Gnome with DG2 in Guest, I typically do
> modprobe i915 enable_guc=3 disable_display=true
> 
> >
> > I also had to force virtio-gpu driver to always probe before XE,
> > otherwise virtio-gpu gets PCI read errors and fails to probe because it
> > fails to detect virtio features.
> That is concerning. I guess we'll need to investigate how to fix this properly.
> As part of my tests, I typically blacklist Xe and i915 and load them later
> after booting to the cmd line and before launching the compositors.
> 
> >
> > Am I doing anything wrong? Suggestions are welcome.
> Could you please share your Qemu launch parameters? I'll try to recreate the
> issue you are seeing.
> 
> Thanks,
> Vivek
> 
> >
> > --
> > Best regards,
> > Dmitry
Dmitry Osipenko Nov. 19, 2024, 10:17 a.m. UTC | #10
Hi, Vivek

On 11/19/24 09:01, Kasireddy, Vivek wrote:
...
> After rebasing v2 of this patch series on top of the above patch, I see that
> this use-case works as expected with Qemu master. Let me send out v3,
> which would be a rebase of v2 on top of the above patch.
...
>>> Am I doing anything wrong? Suggestions are welcome.
>> Could you please share your Qemu launch parameters? I'll try to recreate the
>> issue you are seeing.

Thanks a lot for sharing your Qemu command. I haven't used the
`host-phys-bits` flags, will try again soon and then also check host
errors if it still won't work.

The `new fence for every plane` patchset is now applied to drm-misc-next.
Kasireddy, Vivek Nov. 20, 2024, 3:44 a.m. UTC | #11
Hi Dmitry,

> Subject: Re: [PATCH v2 2/5] drm/virtio: Add a helper to map and note the
> dma addrs and lengths
> 
> ...
> > After rebasing v2 of this patch series on top of the above patch, I see that
> > this use-case works as expected with Qemu master. Let me send out v3,
> > which would be a rebase of v2 on top of the above patch.
> ...
> >>> Am I doing anything wrong? Suggestions are welcome.
> >> Could you please share your Qemu launch parameters? I'll try to recreate
> the
> >> issue you are seeing.
> 
> Thanks a lot for sharing your Qemu command. I haven't used the
> `host-phys-bits` flags, will try again soon and then also check host
> errors if it still won't work.
Any chance you would be able to finish reviewing this patch series before
your upcoming vacation? 

> 
> The `new fence for every plane` patchset is now applied to drm-misc-next.
Thank you,
Vivek

> 
> --
> Best regards,
> Dmitry
diff mbox series

Patch

diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 9fb09486c08c..6b3b91968298 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -89,6 +89,7 @@  struct virtio_gpu_object_params {
 
 struct virtio_gpu_object {
 	struct drm_gem_shmem_object base;
+	struct sg_table *sgt;
 	uint32_t hw_res_handle;
 	bool dumb;
 	bool created;
@@ -473,6 +474,10 @@  struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev,
 struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
 	struct drm_device *dev, struct dma_buf_attachment *attach,
 	struct sg_table *sgt);
+long virtgpu_dma_buf_import_sgt(struct virtio_gpu_mem_entry **ents,
+				unsigned int *nents,
+				struct virtio_gpu_object *bo,
+				struct dma_buf_attachment *attach);
 
 /* virtgpu_debugfs.c */
 void virtio_gpu_debugfs_init(struct drm_minor *minor);
diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c
index 44425f20d91a..c2ae59146ac9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_prime.c
+++ b/drivers/gpu/drm/virtio/virtgpu_prime.c
@@ -27,6 +27,8 @@ 
 
 #include "virtgpu_drv.h"
 
+MODULE_IMPORT_NS(DMA_BUF);
+
 static int virtgpu_virtio_get_uuid(struct dma_buf *buf,
 				   uuid_t *uuid)
 {
@@ -142,6 +144,46 @@  struct dma_buf *virtgpu_gem_prime_export(struct drm_gem_object *obj,
 	return buf;
 }
 
+long virtgpu_dma_buf_import_sgt(struct virtio_gpu_mem_entry **ents,
+				unsigned int *nents,
+				struct virtio_gpu_object *bo,
+				struct dma_buf_attachment *attach)
+{
+	struct scatterlist *sl;
+	struct sg_table *sgt;
+	long i, ret;
+
+	dma_resv_assert_held(attach->dmabuf->resv);
+
+	ret = dma_resv_wait_timeout(attach->dmabuf->resv,
+				    DMA_RESV_USAGE_KERNEL,
+				    false, MAX_SCHEDULE_TIMEOUT);
+	if (ret < 0)
+		return ret;
+
+	sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sgt))
+		return PTR_ERR(sgt);
+
+	*ents = kvmalloc_array(sgt->nents,
+			       sizeof(struct virtio_gpu_mem_entry),
+			       GFP_KERNEL);
+	if (!(*ents)) {
+		dma_buf_unmap_attachment(attach, sgt, DMA_BIDIRECTIONAL);
+		return -ENOMEM;
+	}
+
+	*nents = sgt->nents;
+	for_each_sgtable_dma_sg(sgt, sl, i) {
+		(*ents)[i].addr = cpu_to_le64(sg_dma_address(sl));
+		(*ents)[i].length = cpu_to_le32(sg_dma_len(sl));
+		(*ents)[i].padding = 0;
+	}
+
+	bo->sgt = sgt;
+	return 0;
+}
+
 struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev,
 						struct dma_buf *buf)
 {