diff mbox series

drm: enable render nodes wherever buffer sharing is supported

Message ID 20200427200513.36328-1-pcc@google.com (mailing list archive)
State New, archived
Headers show
Series drm: enable render nodes wherever buffer sharing is supported | expand

Commit Message

Peter Collingbourne April 27, 2020, 8:05 p.m. UTC
Render nodes are not just useful for devices supporting GPU hardware
acceleration. Even on devices that only support dumb frame buffers,
they are useful in situations where composition (using software
rasterization) and KMS are done in different processes with buffer
sharing being used to send frame buffers between them. This is the
situation on Android, where surfaceflinger is the compositor and the
composer HAL uses KMS to display the buffers. Thus it is beneficial
to expose render nodes on all devices that support buffer sharing.

Because all drivers that currently support render nodes also support
buffer sharing, the DRIVER_RENDER flag is no longer necessary to mark
devices as supporting render nodes, so remove it and just rely on
the presence of a prime_handle_to_fd function pointer to determine
whether buffer sharing is supported.

Signed-off-by: Peter Collingbourne <pcc@google.com>
---
 Documentation/gpu/drm-uapi.rst          | 9 +++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +-
 drivers/gpu/drm/drm_drv.c               | 2 +-
 drivers/gpu/drm/etnaviv/etnaviv_drv.c   | 2 +-
 drivers/gpu/drm/exynos/exynos_drm_drv.c | 2 +-
 drivers/gpu/drm/i915/i915_drv.c         | 2 +-
 drivers/gpu/drm/lima/lima_drv.c         | 2 +-
 drivers/gpu/drm/msm/msm_drv.c           | 1 -
 drivers/gpu/drm/nouveau/nouveau_drm.c   | 2 +-
 drivers/gpu/drm/omapdrm/omap_drv.c      | 2 +-
 drivers/gpu/drm/panfrost/panfrost_drv.c | 2 +-
 drivers/gpu/drm/radeon/radeon_drv.c     | 3 +--
 drivers/gpu/drm/tegra/drm.c             | 2 +-
 drivers/gpu/drm/v3d/v3d_drv.c           | 1 -
 drivers/gpu/drm/vc4/vc4_drv.c           | 8 --------
 drivers/gpu/drm/vgem/vgem_drv.c         | 2 +-
 drivers/gpu/drm/virtio/virtgpu_drv.c    | 2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     | 2 +-
 include/drm/drm_drv.h                   | 7 -------
 19 files changed, 19 insertions(+), 36 deletions(-)

Comments

Peter Collingbourne April 28, 2020, 2:22 a.m. UTC | #1
On Mon, Apr 27, 2020 at 1:47 PM Eric Anholt <eric@anholt.net> wrote:
>
> On Mon, Apr 27, 2020 at 1:05 PM Peter Collingbourne <pcc@google.com> wrote:
> >
> > Render nodes are not just useful for devices supporting GPU hardware
> > acceleration. Even on devices that only support dumb frame buffers,
> > they are useful in situations where composition (using software
> > rasterization) and KMS are done in different processes with buffer
> > sharing being used to send frame buffers between them. This is the
> > situation on Android, where surfaceflinger is the compositor and the
> > composer HAL uses KMS to display the buffers. Thus it is beneficial
> > to expose render nodes on all devices that support buffer sharing.
> >
> > Because all drivers that currently support render nodes also support
> > buffer sharing, the DRIVER_RENDER flag is no longer necessary to mark
> > devices as supporting render nodes, so remove it and just rely on
> > the presence of a prime_handle_to_fd function pointer to determine
> > whether buffer sharing is supported.
>
> I'm definitely interested in seeing a patch like this land, as I think
> the current state is an ugly historical artifact.  We just have to be
> careful.
>
> Were there any instances of drivers with render engines exposing PRIME
> but not RENDER?  We should be careful to make sure that we're not
> exposing new privileges for those through adding render nodes.

These are the drivers that we'd be adding render nodes for with this change:

$ git grep -l prime_handle_to_fd (git grep -L DRIVER_RENDER (git grep
-l '\.driver_features'))
drivers/gpu/drm/arc/arcpgu_drv.c
drivers/gpu/drm/arm/display/komeda/komeda_kms.c
drivers/gpu/drm/arm/hdlcd_drv.c
drivers/gpu/drm/arm/malidp_drv.c
drivers/gpu/drm/armada/armada_drv.c
drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
drivers/gpu/drm/imx/imx-drm-core.c
drivers/gpu/drm/ingenic/ingenic-drm.c
drivers/gpu/drm/mcde/mcde_drv.c
drivers/gpu/drm/mediatek/mtk_drm_drv.c
drivers/gpu/drm/meson/meson_drv.c
drivers/gpu/drm/mxsfb/mxsfb_drv.c
drivers/gpu/drm/pl111/pl111_drv.c
drivers/gpu/drm/qxl/qxl_drv.c
drivers/gpu/drm/rcar-du/rcar_du_drv.c
drivers/gpu/drm/rockchip/rockchip_drm_drv.c
drivers/gpu/drm/shmobile/shmob_drm_drv.c
drivers/gpu/drm/sti/sti_drv.c
drivers/gpu/drm/stm/drv.c
drivers/gpu/drm/tilcdc/tilcdc_drv.c
drivers/gpu/drm/tve200/tve200_drv.c
drivers/gpu/drm/xen/xen_drm_front.c
drivers/gpu/drm/zte/zx_drm_drv.c

Some of the drivers provide custom ioctls but they are already
protected from render nodes by not setting DRM_RENDER_ALLOW. Another
thing to check for is drivers providing custom fops that might expose
something undesirable in the render node:

$ git grep -L 'DEFINE_DRM_GEM_CMA_FOPS\|DEFINE_DRM_GEM_FOPS' (git grep
-l prime_handle_to_fd (git grep -L DRIVER_RENDER (git grep -l
'\.driver_features')))
drivers/gpu/drm/mediatek/mtk_drm_drv.c
drivers/gpu/drm/rockchip/rockchip_drm_drv.c
drivers/gpu/drm/xen/xen_drm_front.c

But looking at those drivers in detail, I see that each of them is
using the standard DRM fops handlers (which presumably already handle
render nodes correctly) with the exception of mmap, which they provide
wrappers for that mostly just wrap drm_gem_mmap.

So unless I'm missing something significant (which seems likely -- I'm
not a DRM expert), I don't see a problem so far.

> There's a UAPI risk I see here.  Right now, on a system with a single
> renderer GPU, we can just open /dev/dri/renderD128 and get the GPU for
> rendering, and various things are relying on that (such as libwaffle,
> used in piglit among other things)   Adding render nodes for kms-only
> drivers could displace the actual GPU to 129, and the question is
> whether this will be disruptive.  For Mesa, I think this works out,
> because kmsro should load on the kms device's node and then share
> buffers over to the real GPU that it digs around to find at init time.
> Just saying, I'm not sure I know all of the userspace well enough to
> say "this should be safe despite that"
>
> (And, maybe, if we decide that it's not safe enough, we could punt
> kms-only drivers to a higher starting number?)

Android (minigbm) similarly tries to open /dev/dri/renderD$N in a loop
with 128 <= N < 192 and assumes that the first non-blacklisted (i.e.
not vgem) one that it can open corresponds to the real GPU [1]. I
think that the risk of breaking something on Android is low since
Android's architecture basically already depends on there being a
render node, and it seems unlikely for a device to have more than one
GPU, one of which would be non-functional.

It's also worth bearing in mind that render nodes were added to vgem
in commit 3a6eb795 from 2018. To the extent that exposing additional
render nodes would lead to widespread breakage, this would seem to me
to be a likely way in which it could have happened (since I would
expect that it could cause many machines to go from having one render
node to having more than one), so perhaps the argument can be made
that if we hadn't seen widespread breakage as a result of that change,
we'd be unlikely to see it as a result of this one.

This would be conditional on the userspace code not blacklisting the
vgem render node like minigbm does -- at a glance I couldn't find such
code in Mesa (there does appear to be some code that looks for the
vgem driver name, but it seems to only be used on primary nodes, not
render nodes) or libwaffle.

Peter

[1] https://cs.android.com/android/platform/superproject/+/master:external/minigbm/cros_gralloc/cros_gralloc_driver.cc;l=48
Peter Collingbourne April 28, 2020, 4:47 p.m. UTC | #2
On Tue, Apr 28, 2020 at 4:48 AM Liviu Dudau <Liviu.Dudau@arm.com> wrote:
>
> On Mon, Apr 27, 2020 at 07:22:02PM -0700, Peter Collingbourne wrote:
> > On Mon, Apr 27, 2020 at 1:47 PM Eric Anholt <eric@anholt.net> wrote:
> > >
> > > On Mon, Apr 27, 2020 at 1:05 PM Peter Collingbourne <pcc@google.com> wrote:
> > > >
> > > > Render nodes are not just useful for devices supporting GPU hardware
> > > > acceleration. Even on devices that only support dumb frame buffers,
> > > > they are useful in situations where composition (using software
> > > > rasterization) and KMS are done in different processes with buffer
> > > > sharing being used to send frame buffers between them. This is the
> > > > situation on Android, where surfaceflinger is the compositor and the
> > > > composer HAL uses KMS to display the buffers. Thus it is beneficial
> > > > to expose render nodes on all devices that support buffer sharing.
> > > >
> > > > Because all drivers that currently support render nodes also support
> > > > buffer sharing, the DRIVER_RENDER flag is no longer necessary to mark
> > > > devices as supporting render nodes, so remove it and just rely on
> > > > the presence of a prime_handle_to_fd function pointer to determine
> > > > whether buffer sharing is supported.
> > >
> > > I'm definitely interested in seeing a patch like this land, as I think
> > > the current state is an ugly historical artifact.  We just have to be
> > > careful.
> > >
> > > Were there any instances of drivers with render engines exposing PRIME
> > > but not RENDER?  We should be careful to make sure that we're not
> > > exposing new privileges for those through adding render nodes.
> >
> > These are the drivers that we'd be adding render nodes for with this change:
> >
> > $ git grep -l prime_handle_to_fd (git grep -L DRIVER_RENDER (git grep
> > -l '\.driver_features'))
> > drivers/gpu/drm/arc/arcpgu_drv.c
> > drivers/gpu/drm/arm/display/komeda/komeda_kms.c
> > drivers/gpu/drm/arm/hdlcd_drv.c
> > drivers/gpu/drm/arm/malidp_drv.c
> > drivers/gpu/drm/armada/armada_drv.c
> > drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
> > drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
> > drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
> > drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
> > drivers/gpu/drm/imx/imx-drm-core.c
> > drivers/gpu/drm/ingenic/ingenic-drm.c
> > drivers/gpu/drm/mcde/mcde_drv.c
> > drivers/gpu/drm/mediatek/mtk_drm_drv.c
> > drivers/gpu/drm/meson/meson_drv.c
> > drivers/gpu/drm/mxsfb/mxsfb_drv.c
> > drivers/gpu/drm/pl111/pl111_drv.c
> > drivers/gpu/drm/qxl/qxl_drv.c
> > drivers/gpu/drm/rcar-du/rcar_du_drv.c
> > drivers/gpu/drm/rockchip/rockchip_drm_drv.c
> > drivers/gpu/drm/shmobile/shmob_drm_drv.c
> > drivers/gpu/drm/sti/sti_drv.c
> > drivers/gpu/drm/stm/drv.c
> > drivers/gpu/drm/tilcdc/tilcdc_drv.c
> > drivers/gpu/drm/tve200/tve200_drv.c
> > drivers/gpu/drm/xen/xen_drm_front.c
> > drivers/gpu/drm/zte/zx_drm_drv.c
> >
> > Some of the drivers provide custom ioctls but they are already
> > protected from render nodes by not setting DRM_RENDER_ALLOW. Another
> > thing to check for is drivers providing custom fops that might expose
> > something undesirable in the render node:
> >
> > $ git grep -L 'DEFINE_DRM_GEM_CMA_FOPS\|DEFINE_DRM_GEM_FOPS' (git grep
> > -l prime_handle_to_fd (git grep -L DRIVER_RENDER (git grep -l
> > '\.driver_features')))
> > drivers/gpu/drm/mediatek/mtk_drm_drv.c
> > drivers/gpu/drm/rockchip/rockchip_drm_drv.c
> > drivers/gpu/drm/xen/xen_drm_front.c
> >
> > But looking at those drivers in detail, I see that each of them is
> > using the standard DRM fops handlers (which presumably already handle
> > render nodes correctly) with the exception of mmap, which they provide
> > wrappers for that mostly just wrap drm_gem_mmap.
> >
> > So unless I'm missing something significant (which seems likely -- I'm
> > not a DRM expert), I don't see a problem so far.
> >
> > > There's a UAPI risk I see here.  Right now, on a system with a single
> > > renderer GPU, we can just open /dev/dri/renderD128 and get the GPU for
> > > rendering, and various things are relying on that (such as libwaffle,
> > > used in piglit among other things)   Adding render nodes for kms-only
> > > drivers could displace the actual GPU to 129, and the question is
> > > whether this will be disruptive.  For Mesa, I think this works out,
> > > because kmsro should load on the kms device's node and then share
> > > buffers over to the real GPU that it digs around to find at init time.
> > > Just saying, I'm not sure I know all of the userspace well enough to
> > > say "this should be safe despite that"
> > >
> > > (And, maybe, if we decide that it's not safe enough, we could punt
> > > kms-only drivers to a higher starting number?)
> >
> > Android (minigbm) similarly tries to open /dev/dri/renderD$N in a loop
> > with 128 <= N < 192 and assumes that the first non-blacklisted (i.e.
> > not vgem) one that it can open corresponds to the real GPU [1]. I
> > think that the risk of breaking something on Android is low since
> > Android's architecture basically already depends on there being a
> > render node, and it seems unlikely for a device to have more than one
> > GPU, one of which would be non-functional.
>
> Would Juno suffer from this issue? It has 2 HDLCD drivers (both can be
> active at the same time if you want) and you could run panfrost for the
> actual GPU. Depending on the probing order, HDLCD render nodes would be
> registered before the GPU's.

If I'm reading the Mesa code [1] correctly, it will loop through the
render nodes and attempt to match the driver name against its list of
known devices. So if there were two hdlcd render nodes and one
panfrost render node it should find the latter regardless of probing
order, because it doesn't know about hdlcd.

Peter

[1] https://cs.android.com/android/platform/superproject/+/master:external/mesa3d/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c;l=214

>
> Best regards,
> Liviu
>
> >
> > It's also worth bearing in mind that render nodes were added to vgem
> > in commit 3a6eb795 from 2018. To the extent that exposing additional
> > render nodes would lead to widespread breakage, this would seem to me
> > to be a likely way in which it could have happened (since I would
> > expect that it could cause many machines to go from having one render
> > node to having more than one), so perhaps the argument can be made
> > that if we hadn't seen widespread breakage as a result of that change,
> > we'd be unlikely to see it as a result of this one.
> >
> > This would be conditional on the userspace code not blacklisting the
> > vgem render node like minigbm does -- at a glance I couldn't find such
> > code in Mesa (there does appear to be some code that looks for the
> > vgem driver name, but it seems to only be used on primary nodes, not
> > render nodes) or libwaffle.
> >
> > Peter
> >
> > [1] https://cs.android.com/android/platform/superproject/+/master:external/minigbm/cros_gralloc/cros_gralloc_driver.cc;l=48
>
> --
> ====================
> | I would like to |
> | fix the world,  |
> | but they're not |
> | giving me the   |
>  \ source code!  /
>   ---------------
>     ¯\_(ツ)_/¯
Brian Starkey April 29, 2020, 4:16 p.m. UTC | #3
Hi Peter,

On Mon, Apr 27, 2020 at 01:05:13PM -0700, Peter Collingbourne wrote:
> Render nodes are not just useful for devices supporting GPU hardware
> acceleration. Even on devices that only support dumb frame buffers,
> they are useful in situations where composition (using software
> rasterization) and KMS are done in different processes with buffer
> sharing being used to send frame buffers between them. This is the
> situation on Android, where surfaceflinger is the compositor and the
> composer HAL uses KMS to display the buffers. Thus it is beneficial
> to expose render nodes on all devices that support buffer sharing.

If I understand your problem right, the issue is that you're getting
your buffers in minigbm via pl111, which means you want a render node
just for buffer allocation? Then HWComposer will import those on the
primary node for displaying.

I'm not at all familiar with how minigbm sits in Android, but on our
platforms where the Display and GPU devices are different, we allocate
via ion in Gralloc, and import those into both the GPU and Display.
I think that should work on pl111 too - if you can allocate contiguous
memory from ion (or dma-buf heaps) in minigbm, then you don't need the
render node.

Cheers,
-Brian
Peter Collingbourne April 29, 2020, 4:51 p.m. UTC | #4
On Wed, Apr 29, 2020 at 9:17 AM Brian Starkey <brian.starkey@arm.com> wrote:
>
> Hi Peter,
>
> On Mon, Apr 27, 2020 at 01:05:13PM -0700, Peter Collingbourne wrote:
> > Render nodes are not just useful for devices supporting GPU hardware
> > acceleration. Even on devices that only support dumb frame buffers,
> > they are useful in situations where composition (using software
> > rasterization) and KMS are done in different processes with buffer
> > sharing being used to send frame buffers between them. This is the
> > situation on Android, where surfaceflinger is the compositor and the
> > composer HAL uses KMS to display the buffers. Thus it is beneficial
> > to expose render nodes on all devices that support buffer sharing.
>
> If I understand your problem right, the issue is that you're getting
> your buffers in minigbm via pl111, which means you want a render node
> just for buffer allocation? Then HWComposer will import those on the
> primary node for displaying.

Correct.

> I'm not at all familiar with how minigbm sits in Android, but on our
> platforms where the Display and GPU devices are different, we allocate
> via ion in Gralloc, and import those into both the GPU and Display.
> I think that should work on pl111 too - if you can allocate contiguous
> memory from ion (or dma-buf heaps) in minigbm, then you don't need the
> render node.

Those contiguous memory regions would not necessarily be compatible
with the pl111 device as far as I know -- according to [1], on some
devices, a designated memory region must be used for the framebuffer
and therefore the memory region allocated in CMA would not be
compatible. That being said, FVP doesn't appear to be one of those
devices, so in principle that might work for FVP (provided that the
user provides a sufficiently large cma=X kernel command line
argument), but not for those other devices.

Peter

[1] https://www.kernel.org/doc/Documentation/devicetree/bindings/display/arm%2Cpl11x.txt
Liviu Dudau April 29, 2020, 5:31 p.m. UTC | #5
On Wed, Apr 29, 2020 at 09:51:19AM -0700, Peter Collingbourne wrote:
> On Wed, Apr 29, 2020 at 9:17 AM Brian Starkey <brian.starkey@arm.com> wrote:
> >
> > Hi Peter,
> >
> > On Mon, Apr 27, 2020 at 01:05:13PM -0700, Peter Collingbourne wrote:
> > > Render nodes are not just useful for devices supporting GPU hardware
> > > acceleration. Even on devices that only support dumb frame buffers,
> > > they are useful in situations where composition (using software
> > > rasterization) and KMS are done in different processes with buffer
> > > sharing being used to send frame buffers between them. This is the
> > > situation on Android, where surfaceflinger is the compositor and the
> > > composer HAL uses KMS to display the buffers. Thus it is beneficial
> > > to expose render nodes on all devices that support buffer sharing.
> >
> > If I understand your problem right, the issue is that you're getting
> > your buffers in minigbm via pl111, which means you want a render node
> > just for buffer allocation? Then HWComposer will import those on the
> > primary node for displaying.
> 
> Correct.
> 
> > I'm not at all familiar with how minigbm sits in Android, but on our
> > platforms where the Display and GPU devices are different, we allocate
> > via ion in Gralloc, and import those into both the GPU and Display.
> > I think that should work on pl111 too - if you can allocate contiguous
> > memory from ion (or dma-buf heaps) in minigbm, then you don't need the
> > render node.
> 
> Those contiguous memory regions would not necessarily be compatible
> with the pl111 device as far as I know -- according to [1], on some
> devices, a designated memory region must be used for the framebuffer
> and therefore the memory region allocated in CMA would not be
> compatible. That being said, FVP doesn't appear to be one of those
> devices, so in principle that might work for FVP (provided that the
> user provides a sufficiently large cma=X kernel command line
> argument), but not for those other devices.

We have been doing that with mali-dp drivers for years. Because most of
our dev environments are on FPGAs, we needed to use the local RAM on
those boards so we've had to assign a memory region to the driver that
in turn was using CMA. We've made heavy use of 'reserved-memory' and
'memory-region' nodes in the DT to link the two together, but things
worked out quite well. Because the 'reserved-memory' sub-node was marked
as 'compatible="shared-dma-pool"', gralloc and ION could be used to
allocate memory from it.

Best regards,
Liviu

> 
> Peter
> 
> [1] https://www.kernel.org/doc/Documentation/devicetree/bindings/display/arm%2Cpl11x.txt
Brian Starkey April 30, 2020, 10:30 a.m. UTC | #6
On Wed, Apr 29, 2020 at 06:31:01PM +0100, Liviu Dudau wrote:
> On Wed, Apr 29, 2020 at 09:51:19AM -0700, Peter Collingbourne wrote:
> > On Wed, Apr 29, 2020 at 9:17 AM Brian Starkey <brian.starkey@arm.com> wrote:
> > >
> > > Hi Peter,
> > >
> > > On Mon, Apr 27, 2020 at 01:05:13PM -0700, Peter Collingbourne wrote:
> > > > Render nodes are not just useful for devices supporting GPU hardware
> > > > acceleration. Even on devices that only support dumb frame buffers,
> > > > they are useful in situations where composition (using software
> > > > rasterization) and KMS are done in different processes with buffer
> > > > sharing being used to send frame buffers between them. This is the
> > > > situation on Android, where surfaceflinger is the compositor and the
> > > > composer HAL uses KMS to display the buffers. Thus it is beneficial
> > > > to expose render nodes on all devices that support buffer sharing.
> > >
> > > If I understand your problem right, the issue is that you're getting
> > > your buffers in minigbm via pl111, which means you want a render node
> > > just for buffer allocation? Then HWComposer will import those on the
> > > primary node for displaying.
> > 
> > Correct.
> > 
> > > I'm not at all familiar with how minigbm sits in Android, but on our
> > > platforms where the Display and GPU devices are different, we allocate
> > > via ion in Gralloc, and import those into both the GPU and Display.
> > > I think that should work on pl111 too - if you can allocate contiguous
> > > memory from ion (or dma-buf heaps) in minigbm, then you don't need the
> > > render node.
> > 
> > Those contiguous memory regions would not necessarily be compatible
> > with the pl111 device as far as I know -- according to [1], on some
> > devices, a designated memory region must be used for the framebuffer
> > and therefore the memory region allocated in CMA would not be
> > compatible. That being said, FVP doesn't appear to be one of those
> > devices, so in principle that might work for FVP (provided that the
> > user provides a sufficiently large cma=X kernel command line
> > argument), but not for those other devices.

Yeah I'd be surprised if FVP cares about anything other than it being
contiguous.

My understanding of how "most" Android devices implement this is to
have ion heaps representing whatever dedicated memory regions there
are. Gralloc can then pick the appropriate one based on the gralloc
usage flags. So allocations wouldn't go via the DRM driver.

It looks like the checks in pl111 can't support that usage model if
use_device_memory is set (though that wouldn't matter on FVP).

> 
> We have been doing that with mali-dp drivers for years. Because most of
> our dev environments are on FPGAs, we needed to use the local RAM on
> those boards so we've had to assign a memory region to the driver that
> in turn was using CMA. We've made heavy use of 'reserved-memory' and
> 'memory-region' nodes in the DT to link the two together, but things
> worked out quite well. Because the 'reserved-memory' sub-node was marked
> as 'compatible="shared-dma-pool"', gralloc and ION could be used to
> allocate memory from it.

This is a little imperfect because ion doesn't have a way to access
the `dev` pointer needed to allocate from device-attached CMA regions,
so some hackery is required.

I think dma-heaps would let you expose device-specific CMA regions.

Even if you did that and allocated from the right place, the check
in pl111 would reject any attempt to import it if it's set to
use_device_memory.

I don't know if there's a better way to tell if the memory is
compatible, but that check seems like a bit of a sledgehammer. It
looks like it not only forces the memory to be from the right place,
it also forces it to have been allocated via pl111.

On FVP though, I reckon any old CMA memory should be fine.

Cheers,
-Brian

> 
> Best regards,
> Liviu
> 
> > 
> > Peter
> > 
> > [1] https://www.kernel.org/doc/Documentation/devicetree/bindings/display/arm%2Cpl11x.txt
> 
> -- 
> ====================
> | I would like to |
> | fix the world,  |
> | but they're not |
> | giving me the   |
>  \ source code!  /
>   ---------------
>     ¯\_(ツ)_/¯
Daniel Vetter April 30, 2020, 10:44 a.m. UTC | #7
On Thu, Apr 30, 2020 at 12:30 PM Brian Starkey <brian.starkey@arm.com> wrote:
>
> On Wed, Apr 29, 2020 at 06:31:01PM +0100, Liviu Dudau wrote:
> > On Wed, Apr 29, 2020 at 09:51:19AM -0700, Peter Collingbourne wrote:
> > > On Wed, Apr 29, 2020 at 9:17 AM Brian Starkey <brian.starkey@arm.com> wrote:
> > > >
> > > > Hi Peter,
> > > >
> > > > On Mon, Apr 27, 2020 at 01:05:13PM -0700, Peter Collingbourne wrote:
> > > > > Render nodes are not just useful for devices supporting GPU hardware
> > > > > acceleration. Even on devices that only support dumb frame buffers,
> > > > > they are useful in situations where composition (using software
> > > > > rasterization) and KMS are done in different processes with buffer
> > > > > sharing being used to send frame buffers between them. This is the
> > > > > situation on Android, where surfaceflinger is the compositor and the
> > > > > composer HAL uses KMS to display the buffers. Thus it is beneficial
> > > > > to expose render nodes on all devices that support buffer sharing.
> > > >
> > > > If I understand your problem right, the issue is that you're getting
> > > > your buffers in minigbm via pl111, which means you want a render node
> > > > just for buffer allocation? Then HWComposer will import those on the
> > > > primary node for displaying.
> > >
> > > Correct.
> > >
> > > > I'm not at all familiar with how minigbm sits in Android, but on our
> > > > platforms where the Display and GPU devices are different, we allocate
> > > > via ion in Gralloc, and import those into both the GPU and Display.
> > > > I think that should work on pl111 too - if you can allocate contiguous
> > > > memory from ion (or dma-buf heaps) in minigbm, then you don't need the
> > > > render node.
> > >
> > > Those contiguous memory regions would not necessarily be compatible
> > > with the pl111 device as far as I know -- according to [1], on some
> > > devices, a designated memory region must be used for the framebuffer
> > > and therefore the memory region allocated in CMA would not be
> > > compatible. That being said, FVP doesn't appear to be one of those
> > > devices, so in principle that might work for FVP (provided that the
> > > user provides a sufficiently large cma=X kernel command line
> > > argument), but not for those other devices.
>
> Yeah I'd be surprised if FVP cares about anything other than it being
> contiguous.
>
> My understanding of how "most" Android devices implement this is to
> have ion heaps representing whatever dedicated memory regions there
> are. Gralloc can then pick the appropriate one based on the gralloc
> usage flags. So allocations wouldn't go via the DRM driver.
>
> It looks like the checks in pl111 can't support that usage model if
> use_device_memory is set (though that wouldn't matter on FVP).
>
> >
> > We have been doing that with mali-dp drivers for years. Because most of
> > our dev environments are on FPGAs, we needed to use the local RAM on
> > those boards so we've had to assign a memory region to the driver that
> > in turn was using CMA. We've made heavy use of 'reserved-memory' and
> > 'memory-region' nodes in the DT to link the two together, but things
> > worked out quite well. Because the 'reserved-memory' sub-node was marked
> > as 'compatible="shared-dma-pool"', gralloc and ION could be used to
> > allocate memory from it.
>
> This is a little imperfect because ion doesn't have a way to access
> the `dev` pointer needed to allocate from device-attached CMA regions,
> so some hackery is required.
>
> I think dma-heaps would let you expose device-specific CMA regions.

The plan was that each device in sysfs which needs special dma memory
would have a pointer to the corresponding dma-heap. That way userspace
knows where to allocate. Just need some userspace to use these, and
kernel patch to expose those links. I think it's defacto already there
through the dma pointers of struct device.
-Daniel

>
> Even if you did that and allocated from the right place, the check
> in pl111 would reject any attempt to import it if it's set to
> use_device_memory.
>
> I don't know if there's a better way to tell if the memory is
> compatible, but that check seems like a bit of a sledgehammer. It
> looks like it not only forces the memory to be from the right place,
> it also forces it to have been allocated via pl111.
>
> On FVP though, I reckon any old CMA memory should be fine.
>
> Cheers,
> -Brian
>
> >
> > Best regards,
> > Liviu
> >
> > >
> > > Peter
> > >
> > > [1] https://www.kernel.org/doc/Documentation/devicetree/bindings/display/arm%2Cpl11x.txt
> >
> > --
> > ====================
> > | I would like to |
> > | fix the world,  |
> > | but they're not |
> > | giving me the   |
> >  \ source code!  /
> >   ---------------
> >     ¯\_(ツ)_/¯
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Daniel Vetter April 30, 2020, 10:44 a.m. UTC | #8
On Tue, Apr 28, 2020 at 2:46 PM Peter Collingbourne <pcc@google.com> wrote:
>
> Render nodes are not just useful for devices supporting GPU hardware
> acceleration. Even on devices that only support dumb frame buffers,
> they are useful in situations where composition (using software
> rasterization) and KMS are done in different processes with buffer
> sharing being used to send frame buffers between them. This is the
> situation on Android, where surfaceflinger is the compositor and the
> composer HAL uses KMS to display the buffers. Thus it is beneficial
> to expose render nodes on all devices that support buffer sharing.
>
> Because all drivers that currently support render nodes also support
> buffer sharing, the DRIVER_RENDER flag is no longer necessary to mark
> devices as supporting render nodes, so remove it and just rely on
> the presence of a prime_handle_to_fd function pointer to determine
> whether buffer sharing is supported.

The idea behind render nodes is that you can freely pass these to
unpriviledged users, and nothing bad happens. That's why we have gpu
reset code in drivers, proper pagetables, and also (in at least the
solid drivers) ban code so that repeat offenders from userspace who
constantly submit endless batch buffers and funny stuff like that
can't DOS the gpu.

Ofc in practice there's hw issues and fun stuff like that sometimes,
and driver bugs, and all that. But that's the aspiration.

Now many of these display-only drivers need contiguous buffers, and
there's not endless amounts of that around. So if you allow random
clients to allocate buffers, they can easily exhaust that, and not
just upset the render side of the gpu, but essentially make it
impossible for a compositor to allocate more framebuffers. I don't
think that's a good idea.

I know there's hw like vc4 which needs contiguous buffers for
everything, but that's kinda the places where aspiration falls a bit
short.

So from that pov I'm a rather worried with handing out render rights
to everyone for these display-only buffers. It's not entirely
harmless.
-Daniel


>
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> ---
>  Documentation/gpu/drm-uapi.rst          | 9 +++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +-
>  drivers/gpu/drm/drm_drv.c               | 2 +-
>  drivers/gpu/drm/etnaviv/etnaviv_drv.c   | 2 +-
>  drivers/gpu/drm/exynos/exynos_drm_drv.c | 2 +-
>  drivers/gpu/drm/i915/i915_drv.c         | 2 +-
>  drivers/gpu/drm/lima/lima_drv.c         | 2 +-
>  drivers/gpu/drm/msm/msm_drv.c           | 1 -
>  drivers/gpu/drm/nouveau/nouveau_drm.c   | 2 +-
>  drivers/gpu/drm/omapdrm/omap_drv.c      | 2 +-
>  drivers/gpu/drm/panfrost/panfrost_drv.c | 2 +-
>  drivers/gpu/drm/radeon/radeon_drv.c     | 3 +--
>  drivers/gpu/drm/tegra/drm.c             | 2 +-
>  drivers/gpu/drm/v3d/v3d_drv.c           | 1 -
>  drivers/gpu/drm/vc4/vc4_drv.c           | 8 --------
>  drivers/gpu/drm/vgem/vgem_drv.c         | 2 +-
>  drivers/gpu/drm/virtio/virtgpu_drv.c    | 2 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     | 2 +-
>  include/drm/drm_drv.h                   | 7 -------
>  19 files changed, 19 insertions(+), 36 deletions(-)
>
> diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst
> index 56fec6ed1ad8..2769714ae75a 100644
> --- a/Documentation/gpu/drm-uapi.rst
> +++ b/Documentation/gpu/drm-uapi.rst
> @@ -129,10 +129,11 @@ authenticate to a DRM-Master prior to getting GPU access. To avoid this
>  step and to grant clients GPU access without authenticating, render
>  nodes were introduced. Render nodes solely serve render clients, that
>  is, no modesetting or privileged ioctls can be issued on render nodes.
> -Only non-global rendering commands are allowed. If a driver supports
> -render nodes, it must advertise it via the DRIVER_RENDER DRM driver
> -capability. If not supported, the primary node must be used for render
> -clients together with the legacy drmAuth authentication procedure.
> +Only non-global rendering commands are allowed. Drivers that support
> +:ref:`PRIME buffer sharing <prime_buffer_sharing>` automatically
> +support render nodes. If a driver does not support render nodes,
> +the primary node must be used for render clients together with the
> +legacy drmAuth authentication procedure.
>
>  If a driver advertises render node support, DRM core will create a
>  separate render node called renderD<num>. There will be one render node
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 8ea86ffdea0d..46460620bc37 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -1426,7 +1426,7 @@ static struct drm_driver kms_driver = {
>         .driver_features =
>             DRIVER_ATOMIC |
>             DRIVER_GEM |
> -           DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
> +           DRIVER_MODESET | DRIVER_SYNCOBJ |
>             DRIVER_SYNCOBJ_TIMELINE,
>         .open = amdgpu_driver_open_kms,
>         .postclose = amdgpu_driver_postclose_kms,
> diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
> index 7b1a628d1f6e..8861a30920e5 100644
> --- a/drivers/gpu/drm/drm_drv.c
> +++ b/drivers/gpu/drm/drm_drv.c
> @@ -651,7 +651,7 @@ int drm_dev_init(struct drm_device *dev,
>                 goto err_free;
>         }
>
> -       if (drm_core_check_feature(dev, DRIVER_RENDER)) {
> +       if (driver->prime_handle_to_fd) {
>                 ret = drm_minor_alloc(dev, DRM_MINOR_RENDER);
>                 if (ret)
>                         goto err_minors;
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> index a8685b2e1803..abfb143334ac 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
> @@ -497,7 +497,7 @@ static const struct file_operations fops = {
>  };
>
>  static struct drm_driver etnaviv_drm_driver = {
> -       .driver_features    = DRIVER_GEM | DRIVER_RENDER,
> +       .driver_features    = DRIVER_GEM,
>         .open               = etnaviv_open,
>         .postclose           = etnaviv_postclose,
>         .gem_free_object_unlocked = etnaviv_gem_free_object,
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
> index 57defeb44522..834eb5713fe4 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
> @@ -121,7 +121,7 @@ static const struct file_operations exynos_drm_driver_fops = {
>
>  static struct drm_driver exynos_drm_driver = {
>         .driver_features        = DRIVER_MODESET | DRIVER_GEM
> -                                 | DRIVER_ATOMIC | DRIVER_RENDER,
> +                                 | DRIVER_ATOMIC,
>         .open                   = exynos_drm_open,
>         .lastclose              = drm_fb_helper_lastclose,
>         .postclose              = exynos_drm_postclose,
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 81a4621853db..b028a32fcac5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1834,7 +1834,7 @@ static struct drm_driver driver = {
>          */
>         .driver_features =
>             DRIVER_GEM |
> -           DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
> +           DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
>         .release = i915_driver_release,
>         .open = i915_driver_open,
>         .lastclose = i915_driver_lastclose,
> diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
> index 2daac64d8955..bd8b6ad25ac0 100644
> --- a/drivers/gpu/drm/lima/lima_drv.c
> +++ b/drivers/gpu/drm/lima/lima_drv.c
> @@ -252,7 +252,7 @@ DEFINE_DRM_GEM_FOPS(lima_drm_driver_fops);
>   */
>
>  static struct drm_driver lima_drm_driver = {
> -       .driver_features    = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
> +       .driver_features    = DRIVER_GEM | DRIVER_SYNCOBJ,
>         .open               = lima_drm_driver_open,
>         .postclose          = lima_drm_driver_postclose,
>         .ioctls             = lima_drm_driver_ioctls,
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 29295dee2a2e..061e62d4b691 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -1000,7 +1000,6 @@ static const struct file_operations fops = {
>
>  static struct drm_driver msm_driver = {
>         .driver_features    = DRIVER_GEM |
> -                               DRIVER_RENDER |
>                                 DRIVER_ATOMIC |
>                                 DRIVER_MODESET,
>         .open               = msm_open,
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
> index ca4087f5a15b..3ba8c9789292 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> @@ -1169,7 +1169,7 @@ nouveau_driver_fops = {
>  static struct drm_driver
>  driver_stub = {
>         .driver_features =
> -               DRIVER_GEM | DRIVER_MODESET | DRIVER_RENDER
> +               DRIVER_GEM | DRIVER_MODESET
>  #if defined(CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT)
>                 | DRIVER_KMS_LEGACY_CONTEXT
>  #endif
> diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
> index cdafd7ef1c32..fe25b352a755 100644
> --- a/drivers/gpu/drm/omapdrm/omap_drv.c
> +++ b/drivers/gpu/drm/omapdrm/omap_drv.c
> @@ -558,7 +558,7 @@ static const struct file_operations omapdriver_fops = {
>
>  static struct drm_driver omap_drm_driver = {
>         .driver_features = DRIVER_MODESET | DRIVER_GEM  |
> -               DRIVER_ATOMIC | DRIVER_RENDER,
> +               DRIVER_ATOMIC,
>         .open = dev_open,
>         .lastclose = drm_fb_helper_lastclose,
>  #ifdef CONFIG_DEBUG_FS
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 882fecc33fdb..058765d5d5d5 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -550,7 +550,7 @@ DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops);
>   * - 1.1 - adds HEAP and NOEXEC flags for CREATE_BO
>   */
>  static struct drm_driver panfrost_drm_driver = {
> -       .driver_features        = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
> +       .driver_features        = DRIVER_GEM | DRIVER_SYNCOBJ,
>         .open                   = panfrost_open,
>         .postclose              = panfrost_postclose,
>         .ioctls                 = panfrost_drm_driver_ioctls,
> diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
> index 59f8186a2415..9457b2f8f81e 100644
> --- a/drivers/gpu/drm/radeon/radeon_drv.c
> +++ b/drivers/gpu/drm/radeon/radeon_drv.c
> @@ -600,8 +600,7 @@ static const struct file_operations radeon_driver_kms_fops = {
>  };
>
>  static struct drm_driver kms_driver = {
> -       .driver_features =
> -           DRIVER_GEM | DRIVER_RENDER,
> +       .driver_features = DRIVER_GEM,
>         .load = radeon_driver_load_kms,
>         .open = radeon_driver_open_kms,
>         .postclose = radeon_driver_postclose_kms,
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index bd268028fb3d..55190c582946 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -849,7 +849,7 @@ static int tegra_debugfs_init(struct drm_minor *minor)
>
>  static struct drm_driver tegra_drm_driver = {
>         .driver_features = DRIVER_MODESET | DRIVER_GEM |
> -                          DRIVER_ATOMIC | DRIVER_RENDER,
> +                          DRIVER_ATOMIC,
>         .open = tegra_drm_open,
>         .postclose = tegra_drm_postclose,
>         .lastclose = drm_fb_helper_lastclose,
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
> index eaa8e9682373..96db702fd648 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.c
> +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> @@ -195,7 +195,6 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
>
>  static struct drm_driver v3d_drm_driver = {
>         .driver_features = (DRIVER_GEM |
> -                           DRIVER_RENDER |
>                             DRIVER_SYNCOBJ),
>
>         .open = v3d_open,
> diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
> index 76f93b662766..bf143d1e0d2e 100644
> --- a/drivers/gpu/drm/vc4/vc4_drv.c
> +++ b/drivers/gpu/drm/vc4/vc4_drv.c
> @@ -181,7 +181,6 @@ static struct drm_driver vc4_drm_driver = {
>         .driver_features = (DRIVER_MODESET |
>                             DRIVER_ATOMIC |
>                             DRIVER_GEM |
> -                           DRIVER_RENDER |
>                             DRIVER_SYNCOBJ),
>         .open = vc4_open,
>         .postclose = vc4_close,
> @@ -251,7 +250,6 @@ static int vc4_drm_bind(struct device *dev)
>         struct platform_device *pdev = to_platform_device(dev);
>         struct drm_device *drm;
>         struct vc4_dev *vc4;
> -       struct device_node *node;
>         int ret = 0;
>
>         dev->coherent_dma_mask = DMA_BIT_MASK(32);
> @@ -260,12 +258,6 @@ static int vc4_drm_bind(struct device *dev)
>         if (!vc4)
>                 return -ENOMEM;
>
> -       /* If VC4 V3D is missing, don't advertise render nodes. */
> -       node = of_find_matching_node_and_match(NULL, vc4_v3d_dt_match, NULL);
> -       if (!node || !of_device_is_available(node))
> -               vc4_drm_driver.driver_features &= ~DRIVER_RENDER;
> -       of_node_put(node);
> -
>         drm = drm_dev_alloc(&vc4_drm_driver, dev);
>         if (IS_ERR(drm))
>                 return PTR_ERR(drm);
> diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
> index 909eba43664a..abc7345a2e96 100644
> --- a/drivers/gpu/drm/vgem/vgem_drv.c
> +++ b/drivers/gpu/drm/vgem/vgem_drv.c
> @@ -437,7 +437,7 @@ static void vgem_release(struct drm_device *dev)
>  }
>
>  static struct drm_driver vgem_driver = {
> -       .driver_features                = DRIVER_GEM | DRIVER_RENDER,
> +       .driver_features                = DRIVER_GEM,
>         .release                        = vgem_release,
>         .open                           = vgem_open,
>         .postclose                      = vgem_postclose,
> diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
> index ab4bed78e656..22195e008f0d 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_drv.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
> @@ -189,7 +189,7 @@ MODULE_AUTHOR("Alon Levy");
>  DEFINE_DRM_GEM_FOPS(virtio_gpu_driver_fops);
>
>  static struct drm_driver driver = {
> -       .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC,
> +       .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC,
>         .open = virtio_gpu_driver_open,
>         .postclose = virtio_gpu_driver_postclose,
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index c2247a893ed4..415c3f8ea907 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -1435,7 +1435,7 @@ static const struct file_operations vmwgfx_driver_fops = {
>
>  static struct drm_driver driver = {
>         .driver_features =
> -       DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC,
> +       DRIVER_MODESET | DRIVER_ATOMIC,
>         .ioctls = vmw_ioctls,
>         .num_ioctls = ARRAY_SIZE(vmw_ioctls),
>         .master_set = vmw_master_set,
> diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
> index 97109df5beac..f0277d3f89fe 100644
> --- a/include/drm/drm_drv.h
> +++ b/include/drm/drm_drv.h
> @@ -61,13 +61,6 @@ enum drm_driver_feature {
>          * Driver supports mode setting interfaces (KMS).
>          */
>         DRIVER_MODESET                  = BIT(1),
> -       /**
> -        * @DRIVER_RENDER:
> -        *
> -        * Driver supports dedicated render nodes. See also the :ref:`section on
> -        * render nodes <drm_render_node>` for details.
> -        */
> -       DRIVER_RENDER                   = BIT(3),
>         /**
>          * @DRIVER_ATOMIC:
>          *
> --
> 2.26.2.303.gf8c07b1a785-goog
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel



--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
Eric Anholt April 30, 2020, 7:57 p.m. UTC | #9
On Thu, Apr 30, 2020 at 3:44 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Tue, Apr 28, 2020 at 2:46 PM Peter Collingbourne <pcc@google.com> wrote:
> >
> > Render nodes are not just useful for devices supporting GPU hardware
> > acceleration. Even on devices that only support dumb frame buffers,
> > they are useful in situations where composition (using software
> > rasterization) and KMS are done in different processes with buffer
> > sharing being used to send frame buffers between them. This is the
> > situation on Android, where surfaceflinger is the compositor and the
> > composer HAL uses KMS to display the buffers. Thus it is beneficial
> > to expose render nodes on all devices that support buffer sharing.
> >
> > Because all drivers that currently support render nodes also support
> > buffer sharing, the DRIVER_RENDER flag is no longer necessary to mark
> > devices as supporting render nodes, so remove it and just rely on
> > the presence of a prime_handle_to_fd function pointer to determine
> > whether buffer sharing is supported.
>
> The idea behind render nodes is that you can freely pass these to
> unpriviledged users, and nothing bad happens. That's why we have gpu
> reset code in drivers, proper pagetables, and also (in at least the
> solid drivers) ban code so that repeat offenders from userspace who
> constantly submit endless batch buffers and funny stuff like that
> can't DOS the gpu.
>
> Ofc in practice there's hw issues and fun stuff like that sometimes,
> and driver bugs, and all that. But that's the aspiration.
>
> Now many of these display-only drivers need contiguous buffers, and
> there's not endless amounts of that around. So if you allow random
> clients to allocate buffers, they can easily exhaust that, and not
> just upset the render side of the gpu, but essentially make it
> impossible for a compositor to allocate more framebuffers. I don't
> think that's a good idea.
>
> I know there's hw like vc4 which needs contiguous buffers for
> everything, but that's kinda the places where aspiration falls a bit
> short.
>
> So from that pov I'm a rather worried with handing out render rights
> to everyone for these display-only buffers. It's not entirely
> harmless.

This doesn't feel like a contiguous-mem-specific concern to me.  We
don't have resource limits on renderer GPU nodes today, you can
allocate memory there to fill up and DOS the system, and unless
something changed since last time I was looking, we don't even tell
the OOM killer about our allocations so they can kill the right app!
(my compositor always got picked, in my experience)

And, keep in mind what we're fixing at a system level here: the
current workaround is you either get your compositor to hand you a GPU
fd, at which point you can DOS the system, or you open the master node
yourself and try to drop master before the compositor comes up, then
DOS the system. "But there are access controls for the compositor or
the card node!" you say: yes, but there are for render nodes too.  I
know my distro doesn't default to open access to /dev/dri/render*
Daniel Vetter May 4, 2020, 11:51 a.m. UTC | #10
On Thu, Apr 30, 2020 at 12:57:40PM -0700, Eric Anholt wrote:
> On Thu, Apr 30, 2020 at 3:44 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > On Tue, Apr 28, 2020 at 2:46 PM Peter Collingbourne <pcc@google.com> wrote:
> > >
> > > Render nodes are not just useful for devices supporting GPU hardware
> > > acceleration. Even on devices that only support dumb frame buffers,
> > > they are useful in situations where composition (using software
> > > rasterization) and KMS are done in different processes with buffer
> > > sharing being used to send frame buffers between them. This is the
> > > situation on Android, where surfaceflinger is the compositor and the
> > > composer HAL uses KMS to display the buffers. Thus it is beneficial
> > > to expose render nodes on all devices that support buffer sharing.
> > >
> > > Because all drivers that currently support render nodes also support
> > > buffer sharing, the DRIVER_RENDER flag is no longer necessary to mark
> > > devices as supporting render nodes, so remove it and just rely on
> > > the presence of a prime_handle_to_fd function pointer to determine
> > > whether buffer sharing is supported.
> >
> > The idea behind render nodes is that you can freely pass these to
> > unpriviledged users, and nothing bad happens. That's why we have gpu
> > reset code in drivers, proper pagetables, and also (in at least the
> > solid drivers) ban code so that repeat offenders from userspace who
> > constantly submit endless batch buffers and funny stuff like that
> > can't DOS the gpu.
> >
> > Ofc in practice there's hw issues and fun stuff like that sometimes,
> > and driver bugs, and all that. But that's the aspiration.
> >
> > Now many of these display-only drivers need contiguous buffers, and
> > there's not endless amounts of that around. So if you allow random
> > clients to allocate buffers, they can easily exhaust that, and not
> > just upset the render side of the gpu, but essentially make it
> > impossible for a compositor to allocate more framebuffers. I don't
> > think that's a good idea.
> >
> > I know there's hw like vc4 which needs contiguous buffers for
> > everything, but that's kinda the places where aspiration falls a bit
> > short.
> >
> > So from that pov I'm a rather worried with handing out render rights
> > to everyone for these display-only buffers. It's not entirely
> > harmless.
> 
> This doesn't feel like a contiguous-mem-specific concern to me.  We
> don't have resource limits on renderer GPU nodes today, you can
> allocate memory there to fill up and DOS the system, and unless
> something changed since last time I was looking, we don't even tell
> the OOM killer about our allocations so they can kill the right app!
> (my compositor always got picked, in my experience)
> 
> And, keep in mind what we're fixing at a system level here: the
> current workaround is you either get your compositor to hand you a GPU
> fd, at which point you can DOS the system, or you open the master node
> yourself and try to drop master before the compositor comes up, then
> DOS the system. "But there are access controls for the compositor or
> the card node!" you say: yes, but there are for render nodes too.  I
> know my distro doesn't default to open access to /dev/dri/render*

Hm indeed, I thought we've limited dumb buffer creation to master only on
the primary device.

But that kinda leaves me wondering, how do you allocate buffers on these
then? Exposing dumb on render nodes gets the Dave "it's not for rendering"
Airlie nack, at least for drivers which do have real userspace sitting on
the same drm driver. And exposing dumb only for these display-only devices
feels somewhat silly and inconsistent too.

So not sure.
-Daniel
diff mbox series

Patch

diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst
index 56fec6ed1ad8..2769714ae75a 100644
--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@@ -129,10 +129,11 @@  authenticate to a DRM-Master prior to getting GPU access. To avoid this
 step and to grant clients GPU access without authenticating, render
 nodes were introduced. Render nodes solely serve render clients, that
 is, no modesetting or privileged ioctls can be issued on render nodes.
-Only non-global rendering commands are allowed. If a driver supports
-render nodes, it must advertise it via the DRIVER_RENDER DRM driver
-capability. If not supported, the primary node must be used for render
-clients together with the legacy drmAuth authentication procedure.
+Only non-global rendering commands are allowed. Drivers that support
+:ref:`PRIME buffer sharing <prime_buffer_sharing>` automatically
+support render nodes. If a driver does not support render nodes,
+the primary node must be used for render clients together with the
+legacy drmAuth authentication procedure.
 
 If a driver advertises render node support, DRM core will create a
 separate render node called renderD<num>. There will be one render node
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8ea86ffdea0d..46460620bc37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1426,7 +1426,7 @@  static struct drm_driver kms_driver = {
 	.driver_features =
 	    DRIVER_ATOMIC |
 	    DRIVER_GEM |
-	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ |
+	    DRIVER_MODESET | DRIVER_SYNCOBJ |
 	    DRIVER_SYNCOBJ_TIMELINE,
 	.open = amdgpu_driver_open_kms,
 	.postclose = amdgpu_driver_postclose_kms,
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 7b1a628d1f6e..8861a30920e5 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -651,7 +651,7 @@  int drm_dev_init(struct drm_device *dev,
 		goto err_free;
 	}
 
-	if (drm_core_check_feature(dev, DRIVER_RENDER)) {
+	if (driver->prime_handle_to_fd) {
 		ret = drm_minor_alloc(dev, DRM_MINOR_RENDER);
 		if (ret)
 			goto err_minors;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index a8685b2e1803..abfb143334ac 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -497,7 +497,7 @@  static const struct file_operations fops = {
 };
 
 static struct drm_driver etnaviv_drm_driver = {
-	.driver_features    = DRIVER_GEM | DRIVER_RENDER,
+	.driver_features    = DRIVER_GEM,
 	.open               = etnaviv_open,
 	.postclose           = etnaviv_postclose,
 	.gem_free_object_unlocked = etnaviv_gem_free_object,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 57defeb44522..834eb5713fe4 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -121,7 +121,7 @@  static const struct file_operations exynos_drm_driver_fops = {
 
 static struct drm_driver exynos_drm_driver = {
 	.driver_features	= DRIVER_MODESET | DRIVER_GEM
-				  | DRIVER_ATOMIC | DRIVER_RENDER,
+				  | DRIVER_ATOMIC,
 	.open			= exynos_drm_open,
 	.lastclose		= drm_fb_helper_lastclose,
 	.postclose		= exynos_drm_postclose,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 81a4621853db..b028a32fcac5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1834,7 +1834,7 @@  static struct drm_driver driver = {
 	 */
 	.driver_features =
 	    DRIVER_GEM |
-	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
+	    DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
 	.release = i915_driver_release,
 	.open = i915_driver_open,
 	.lastclose = i915_driver_lastclose,
diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
index 2daac64d8955..bd8b6ad25ac0 100644
--- a/drivers/gpu/drm/lima/lima_drv.c
+++ b/drivers/gpu/drm/lima/lima_drv.c
@@ -252,7 +252,7 @@  DEFINE_DRM_GEM_FOPS(lima_drm_driver_fops);
  */
 
 static struct drm_driver lima_drm_driver = {
-	.driver_features    = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
+	.driver_features    = DRIVER_GEM | DRIVER_SYNCOBJ,
 	.open               = lima_drm_driver_open,
 	.postclose          = lima_drm_driver_postclose,
 	.ioctls             = lima_drm_driver_ioctls,
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 29295dee2a2e..061e62d4b691 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -1000,7 +1000,6 @@  static const struct file_operations fops = {
 
 static struct drm_driver msm_driver = {
 	.driver_features    = DRIVER_GEM |
-				DRIVER_RENDER |
 				DRIVER_ATOMIC |
 				DRIVER_MODESET,
 	.open               = msm_open,
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index ca4087f5a15b..3ba8c9789292 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -1169,7 +1169,7 @@  nouveau_driver_fops = {
 static struct drm_driver
 driver_stub = {
 	.driver_features =
-		DRIVER_GEM | DRIVER_MODESET | DRIVER_RENDER
+		DRIVER_GEM | DRIVER_MODESET
 #if defined(CONFIG_NOUVEAU_LEGACY_CTX_SUPPORT)
 		| DRIVER_KMS_LEGACY_CONTEXT
 #endif
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index cdafd7ef1c32..fe25b352a755 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -558,7 +558,7 @@  static const struct file_operations omapdriver_fops = {
 
 static struct drm_driver omap_drm_driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM  |
-		DRIVER_ATOMIC | DRIVER_RENDER,
+		DRIVER_ATOMIC,
 	.open = dev_open,
 	.lastclose = drm_fb_helper_lastclose,
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 882fecc33fdb..058765d5d5d5 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -550,7 +550,7 @@  DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops);
  * - 1.1 - adds HEAP and NOEXEC flags for CREATE_BO
  */
 static struct drm_driver panfrost_drm_driver = {
-	.driver_features	= DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
+	.driver_features	= DRIVER_GEM | DRIVER_SYNCOBJ,
 	.open			= panfrost_open,
 	.postclose		= panfrost_postclose,
 	.ioctls			= panfrost_drm_driver_ioctls,
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 59f8186a2415..9457b2f8f81e 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -600,8 +600,7 @@  static const struct file_operations radeon_driver_kms_fops = {
 };
 
 static struct drm_driver kms_driver = {
-	.driver_features =
-	    DRIVER_GEM | DRIVER_RENDER,
+	.driver_features = DRIVER_GEM,
 	.load = radeon_driver_load_kms,
 	.open = radeon_driver_open_kms,
 	.postclose = radeon_driver_postclose_kms,
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index bd268028fb3d..55190c582946 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -849,7 +849,7 @@  static int tegra_debugfs_init(struct drm_minor *minor)
 
 static struct drm_driver tegra_drm_driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM |
-			   DRIVER_ATOMIC | DRIVER_RENDER,
+			   DRIVER_ATOMIC,
 	.open = tegra_drm_open,
 	.postclose = tegra_drm_postclose,
 	.lastclose = drm_fb_helper_lastclose,
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index eaa8e9682373..96db702fd648 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -195,7 +195,6 @@  static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
 
 static struct drm_driver v3d_drm_driver = {
 	.driver_features = (DRIVER_GEM |
-			    DRIVER_RENDER |
 			    DRIVER_SYNCOBJ),
 
 	.open = v3d_open,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 76f93b662766..bf143d1e0d2e 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -181,7 +181,6 @@  static struct drm_driver vc4_drm_driver = {
 	.driver_features = (DRIVER_MODESET |
 			    DRIVER_ATOMIC |
 			    DRIVER_GEM |
-			    DRIVER_RENDER |
 			    DRIVER_SYNCOBJ),
 	.open = vc4_open,
 	.postclose = vc4_close,
@@ -251,7 +250,6 @@  static int vc4_drm_bind(struct device *dev)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct drm_device *drm;
 	struct vc4_dev *vc4;
-	struct device_node *node;
 	int ret = 0;
 
 	dev->coherent_dma_mask = DMA_BIT_MASK(32);
@@ -260,12 +258,6 @@  static int vc4_drm_bind(struct device *dev)
 	if (!vc4)
 		return -ENOMEM;
 
-	/* If VC4 V3D is missing, don't advertise render nodes. */
-	node = of_find_matching_node_and_match(NULL, vc4_v3d_dt_match, NULL);
-	if (!node || !of_device_is_available(node))
-		vc4_drm_driver.driver_features &= ~DRIVER_RENDER;
-	of_node_put(node);
-
 	drm = drm_dev_alloc(&vc4_drm_driver, dev);
 	if (IS_ERR(drm))
 		return PTR_ERR(drm);
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 909eba43664a..abc7345a2e96 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -437,7 +437,7 @@  static void vgem_release(struct drm_device *dev)
 }
 
 static struct drm_driver vgem_driver = {
-	.driver_features		= DRIVER_GEM | DRIVER_RENDER,
+	.driver_features		= DRIVER_GEM,
 	.release			= vgem_release,
 	.open				= vgem_open,
 	.postclose			= vgem_postclose,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index ab4bed78e656..22195e008f0d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -189,7 +189,7 @@  MODULE_AUTHOR("Alon Levy");
 DEFINE_DRM_GEM_FOPS(virtio_gpu_driver_fops);
 
 static struct drm_driver driver = {
-	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC,
+	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC,
 	.open = virtio_gpu_driver_open,
 	.postclose = virtio_gpu_driver_postclose,
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index c2247a893ed4..415c3f8ea907 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1435,7 +1435,7 @@  static const struct file_operations vmwgfx_driver_fops = {
 
 static struct drm_driver driver = {
 	.driver_features =
-	DRIVER_MODESET | DRIVER_RENDER | DRIVER_ATOMIC,
+	DRIVER_MODESET | DRIVER_ATOMIC,
 	.ioctls = vmw_ioctls,
 	.num_ioctls = ARRAY_SIZE(vmw_ioctls),
 	.master_set = vmw_master_set,
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 97109df5beac..f0277d3f89fe 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -61,13 +61,6 @@  enum drm_driver_feature {
 	 * Driver supports mode setting interfaces (KMS).
 	 */
 	DRIVER_MODESET			= BIT(1),
-	/**
-	 * @DRIVER_RENDER:
-	 *
-	 * Driver supports dedicated render nodes. See also the :ref:`section on
-	 * render nodes <drm_render_node>` for details.
-	 */
-	DRIVER_RENDER			= BIT(3),
 	/**
 	 * @DRIVER_ATOMIC:
 	 *