diff mbox series

[v2,3/3] drm/nouveau: Add drm_panic support for nv50+

Message ID 20240906100434.1171093-4-jfalempe@redhat.com (mailing list archive)
State New, archived
Headers show
Series drm/nouveau: Add drm_panic support for nv50+ | expand

Commit Message

Jocelyn Falempe Sept. 6, 2024, 10:03 a.m. UTC
Add drm_panic support, for nv50+ cards.
It's enough to get the panic screen while running Gnome/Wayland on a
GTX 1650.
It doesn't support multi-plane or compressed format.
Support for other formats and older cards will come later.
Tiling is only tested on GTX1650, and might be wrong for other cards.

Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
---
v2:
 * Rebase and drop already merged patches.
 * Rework the tiling algorithm, using "swizzle" to compute the offset
   inside the block.

 drivers/gpu/drm/nouveau/dispnv50/wndw.c | 107 +++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 2 deletions(-)

Comments

Ilia Mirkin Sept. 6, 2024, 12:53 p.m. UTC | #1
On Fri, Sep 6, 2024 at 6:05 AM Jocelyn Falempe <jfalempe@redhat.com> wrote:

> Add drm_panic support, for nv50+ cards.
> It's enough to get the panic screen while running Gnome/Wayland on a
> GTX 1650.
> It doesn't support multi-plane or compressed format.
> Support for other formats and older cards will come later.
> Tiling is only tested on GTX1650, and might be wrong for other cards.
>

I'm moderately sure that nv50 and nvc0 tile differently (the general algo
is the same, but height is different):

https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces

That said, I don't know that nv50 supports scanout of tiled surfaces (nor
was I aware that nvc0+ did, perhaps it's a recent feature, or perhaps I'm
just forgetful).

Cheers,

  -ilia


>
> Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
> ---
> v2:
>  * Rebase and drop already merged patches.
>  * Rework the tiling algorithm, using "swizzle" to compute the offset
>    inside the block.
>
>  drivers/gpu/drm/nouveau/dispnv50/wndw.c | 107 +++++++++++++++++++++++-
>  1 file changed, 105 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> index 7a2cceaee6e9..50ecf6f12b81 100644
> --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> @@ -30,11 +30,16 @@
>  #include <nvhw/class/cl507e.h>
>  #include <nvhw/class/clc37e.h>
>
> +#include <linux/iosys-map.h>
> +
>  #include <drm/drm_atomic.h>
>  #include <drm/drm_atomic_helper.h>
>  #include <drm/drm_blend.h>
> -#include <drm/drm_gem_atomic_helper.h>
>  #include <drm/drm_fourcc.h>
> +#include <drm/drm_framebuffer.h>
> +#include <drm/drm_gem_atomic_helper.h>
> +#include <drm/drm_panic.h>
> +#include <drm/ttm/ttm_bo.h>
>
>  #include "nouveau_bo.h"
>  #include "nouveau_gem.h"
> @@ -577,6 +582,93 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct
> drm_plane_state *state)
>         return 0;
>  }
>
> +#define NV_TILE_BLK_BASE_HEIGHT 8      /* In pixel */
> +#define NV_TILE_GOB_SIZE 64    /* In bytes */
> +#define NV_TILE_BLK_WIDTH (NV_TILE_GOB_SIZE / 4) /* For 32 bits pixel */
> +
> +/* get the offset in bytes inside the framebuffer, after taking tiling
> into account */
> +static unsigned int nv50_get_tiled_offset(struct drm_scanout_buffer *sb,
> unsigned int blk_h,
> +                                         unsigned int x, unsigned int y)
> +{
> +       u32 blk_x, blk_y, blk_sz, blk_off, pitch;
> +       u32 swizzle;
> +
> +       blk_sz = NV_TILE_GOB_SIZE * blk_h;
> +       pitch = DIV_ROUND_UP(sb->width, NV_TILE_BLK_WIDTH);
> +
> +       /* block coordinate */
> +       blk_x = x / NV_TILE_BLK_WIDTH;
> +       blk_y = y / blk_h;
> +
> +       blk_off = ((blk_y * pitch) + blk_x) * blk_sz;
> +
> +       y = y % blk_h;
> +
> +       /* Inside the block, use the fast address swizzle to compute the
> offset
> +        * For nvidia blocklinear, bit order is yn..y3 x3 y2 x2 y1 y0 x1 x0
> +        */
> +       swizzle = (x & 3) | (y & 3) << 2 | (x & 4) << 2 | (y & 4) << 3;
> +       swizzle |= (x & 8) << 3 | (y >> 3) << 7;
> +
> +       return blk_off + swizzle * 4;
> +}
> +
> +static void nv50_set_pixel(struct drm_scanout_buffer *sb, unsigned int x,
> unsigned int y, u32 color)
> +{
> +       struct drm_framebuffer *fb = sb->private;
> +       unsigned int off;
> +       /* According to DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D
> documentation,
> +        * the last 4 bits of the modifier is log2(blk_height /
> NV_TILE_BLK_BASE_HEIGHT)
> +        */
> +       unsigned int blk_h = NV_TILE_BLK_BASE_HEIGHT * (1 << (fb->modifier
> & 0xf));
> +
> +       off = nv50_get_tiled_offset(sb, blk_h, x, y);
> +       iosys_map_wr(&sb->map[0], off, u32, color);
> +}
> +
> +static int
> +nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct
> drm_scanout_buffer *sb)
> +{
> +       struct drm_framebuffer *fb;
> +       struct nouveau_bo *nvbo;
> +
> +       if (!plane->state || !plane->state->fb)
> +               return -EINVAL;
> +
> +       fb = plane->state->fb;
> +       nvbo = nouveau_gem_object(fb->obj[0]);
> +
> +       /* Don't support compressed format, or multiplane yet. */
> +       if (nvbo->comp || fb->format->num_planes != 1)
> +               return -EOPNOTSUPP;
> +
> +       if (nouveau_bo_map(nvbo)) {
> +               pr_warn("nouveau bo map failed, panic won't be
> displayed\n");
> +               return -ENOMEM;
> +       }
> +
> +       if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
> +               iosys_map_set_vaddr_iomem(&sb->map[0], nvbo->kmap.virtual);
> +       else
> +               iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual);
> +
> +       sb->height = fb->height;
> +       sb->width = fb->width;
> +       sb->pitch[0] = fb->pitches[0];
> +       sb->format = fb->format;
> +
> +       /* If tiling is enabled, use the set_pixel() to display correctly.
> +        * Only handle 32bits format for now.
> +        */
> +       if (fb->modifier & 0xf) {
> +               if (fb->format->cpp[0] != 4)
> +                       return -EOPNOTSUPP;
> +               sb->private = (void *) fb;
> +               sb->set_pixel = nv50_set_pixel;
> +       }
> +       return 0;
> +}
> +
>  static const struct drm_plane_helper_funcs
>  nv50_wndw_helper = {
>         .prepare_fb = nv50_wndw_prepare_fb,
> @@ -584,6 +676,14 @@ nv50_wndw_helper = {
>         .atomic_check = nv50_wndw_atomic_check,
>  };
>
> +static const struct drm_plane_helper_funcs
> +nv50_wndw_primary_helper = {
> +       .prepare_fb = nv50_wndw_prepare_fb,
> +       .cleanup_fb = nv50_wndw_cleanup_fb,
> +       .atomic_check = nv50_wndw_atomic_check,
> +       .get_scanout_buffer = nv50_wndw_get_scanout_buffer,
> +};
> +
>  static void
>  nv50_wndw_atomic_destroy_state(struct drm_plane *plane,
>                                struct drm_plane_state *state)
> @@ -732,7 +832,10 @@ nv50_wndw_new_(const struct nv50_wndw_func *func,
> struct drm_device *dev,
>                 return ret;
>         }
>
> -       drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper);
> +       if (type == DRM_PLANE_TYPE_PRIMARY)
> +               drm_plane_helper_add(&wndw->plane,
> &nv50_wndw_primary_helper);
> +       else
> +               drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper);
>
>         if (wndw->func->ilut) {
>                 ret = nv50_lut_init(disp, mmu, &wndw->ilut);
> --
> 2.46.0
>
>
Jocelyn Falempe Sept. 6, 2024, 1:10 p.m. UTC | #2
On 06/09/2024 14:53, Ilia Mirkin wrote:
> On Fri, Sep 6, 2024 at 6:05 AM Jocelyn Falempe <jfalempe@redhat.com 
> <mailto:jfalempe@redhat.com>> wrote:
> 
>     Add drm_panic support, for nv50+ cards.
>     It's enough to get the panic screen while running Gnome/Wayland on a
>     GTX 1650.
>     It doesn't support multi-plane or compressed format.
>     Support for other formats and older cards will come later.
>     Tiling is only tested on GTX1650, and might be wrong for other cards.
> 
> 
> I'm moderately sure that nv50 and nvc0 tile differently (the general 
> algo is the same, but height is different):
> 
> https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces <https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces>

Thanks, it looks like it needs a small adjustment, as in the Doc, GF100 
uses a default height of 4, and GF100+ default to 8 (and I've hardcoded 
it to NV_TILE_BLK_BASE_HEIGHT 8).
GF100 is still nv50, so it should use this code.

> 
> That said, I don't know that nv50 supports scanout of tiled surfaces 
> (nor was I aware that nvc0+ did, perhaps it's a recent feature, or 
> perhaps I'm just forgetful).

What I know is that when using Gnome/Wayland, the framebuffer is tiled, 
and without this tiling code, the panic screen is unreadable.
When using the VT console, the framebuffer is linear, and it's easier.

> 
> Cheers,
> 
>    -ilia
> 
> 
>     Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com
>     <mailto:jfalempe@redhat.com>>
>     ---
>     v2:
>       * Rebase and drop already merged patches.
>       * Rework the tiling algorithm, using "swizzle" to compute the offset
>         inside the block.
> 
>       drivers/gpu/drm/nouveau/dispnv50/wndw.c | 107 +++++++++++++++++++++++-
>       1 file changed, 105 insertions(+), 2 deletions(-)
> 
>     diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
>     b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
>     index 7a2cceaee6e9..50ecf6f12b81 100644
>     --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
>     +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
>     @@ -30,11 +30,16 @@
>       #include <nvhw/class/cl507e.h>
>       #include <nvhw/class/clc37e.h>
> 
>     +#include <linux/iosys-map.h>
>     +
>       #include <drm/drm_atomic.h>
>       #include <drm/drm_atomic_helper.h>
>       #include <drm/drm_blend.h>
>     -#include <drm/drm_gem_atomic_helper.h>
>       #include <drm/drm_fourcc.h>
>     +#include <drm/drm_framebuffer.h>
>     +#include <drm/drm_gem_atomic_helper.h>
>     +#include <drm/drm_panic.h>
>     +#include <drm/ttm/ttm_bo.h>
> 
>       #include "nouveau_bo.h"
>       #include "nouveau_gem.h"
>     @@ -577,6 +582,93 @@ nv50_wndw_prepare_fb(struct drm_plane *plane,
>     struct drm_plane_state *state)
>              return 0;
>       }
> 
>     +#define NV_TILE_BLK_BASE_HEIGHT 8      /* In pixel */
>     +#define NV_TILE_GOB_SIZE 64    /* In bytes */
>     +#define NV_TILE_BLK_WIDTH (NV_TILE_GOB_SIZE / 4) /* For 32 bits
>     pixel */
>     +
>     +/* get the offset in bytes inside the framebuffer, after taking
>     tiling into account */
>     +static unsigned int nv50_get_tiled_offset(struct drm_scanout_buffer
>     *sb, unsigned int blk_h,
>     +                                         unsigned int x, unsigned
>     int y)
>     +{
>     +       u32 blk_x, blk_y, blk_sz, blk_off, pitch;
>     +       u32 swizzle;
>     +
>     +       blk_sz = NV_TILE_GOB_SIZE * blk_h;
>     +       pitch = DIV_ROUND_UP(sb->width, NV_TILE_BLK_WIDTH);
>     +
>     +       /* block coordinate */
>     +       blk_x = x / NV_TILE_BLK_WIDTH;
>     +       blk_y = y / blk_h;
>     +
>     +       blk_off = ((blk_y * pitch) + blk_x) * blk_sz;
>     +
>     +       y = y % blk_h;
>     +
>     +       /* Inside the block, use the fast address swizzle to compute
>     the offset
>     +        * For nvidia blocklinear, bit order is yn..y3 x3 y2 x2 y1
>     y0 x1 x0
>     +        */
>     +       swizzle = (x & 3) | (y & 3) << 2 | (x & 4) << 2 | (y & 4) << 3;
>     +       swizzle |= (x & 8) << 3 | (y >> 3) << 7;
>     +
>     +       return blk_off + swizzle * 4;
>     +}
>     +
>     +static void nv50_set_pixel(struct drm_scanout_buffer *sb, unsigned
>     int x, unsigned int y, u32 color)
>     +{
>     +       struct drm_framebuffer *fb = sb->private;
>     +       unsigned int off;
>     +       /* According to DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D
>     documentation,
>     +        * the last 4 bits of the modifier is log2(blk_height /
>     NV_TILE_BLK_BASE_HEIGHT)
>     +        */
>     +       unsigned int blk_h = NV_TILE_BLK_BASE_HEIGHT * (1 <<
>     (fb->modifier & 0xf));
>     +
>     +       off = nv50_get_tiled_offset(sb, blk_h, x, y);
>     +       iosys_map_wr(&sb->map[0], off, u32, color);
>     +}
>     +
>     +static int
>     +nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct
>     drm_scanout_buffer *sb)
>     +{
>     +       struct drm_framebuffer *fb;
>     +       struct nouveau_bo *nvbo;
>     +
>     +       if (!plane->state || !plane->state->fb)
>     +               return -EINVAL;
>     +
>     +       fb = plane->state->fb;
>     +       nvbo = nouveau_gem_object(fb->obj[0]);
>     +
>     +       /* Don't support compressed format, or multiplane yet. */
>     +       if (nvbo->comp || fb->format->num_planes != 1)
>     +               return -EOPNOTSUPP;
>     +
>     +       if (nouveau_bo_map(nvbo)) {
>     +               pr_warn("nouveau bo map failed, panic won't be
>     displayed\n");
>     +               return -ENOMEM;
>     +       }
>     +
>     +       if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
>     +               iosys_map_set_vaddr_iomem(&sb->map[0],
>     nvbo->kmap.virtual);
>     +       else
>     +               iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual);
>     +
>     +       sb->height = fb->height;
>     +       sb->width = fb->width;
>     +       sb->pitch[0] = fb->pitches[0];
>     +       sb->format = fb->format;
>     +
>     +       /* If tiling is enabled, use the set_pixel() to display
>     correctly.
>     +        * Only handle 32bits format for now.
>     +        */
>     +       if (fb->modifier & 0xf) {
>     +               if (fb->format->cpp[0] != 4)
>     +                       return -EOPNOTSUPP;
>     +               sb->private = (void *) fb;
>     +               sb->set_pixel = nv50_set_pixel;
>     +       }
>     +       return 0;
>     +}
>     +
>       static const struct drm_plane_helper_funcs
>       nv50_wndw_helper = {
>              .prepare_fb = nv50_wndw_prepare_fb,
>     @@ -584,6 +676,14 @@ nv50_wndw_helper = {
>              .atomic_check = nv50_wndw_atomic_check,
>       };
> 
>     +static const struct drm_plane_helper_funcs
>     +nv50_wndw_primary_helper = {
>     +       .prepare_fb = nv50_wndw_prepare_fb,
>     +       .cleanup_fb = nv50_wndw_cleanup_fb,
>     +       .atomic_check = nv50_wndw_atomic_check,
>     +       .get_scanout_buffer = nv50_wndw_get_scanout_buffer,
>     +};
>     +
>       static void
>       nv50_wndw_atomic_destroy_state(struct drm_plane *plane,
>                                     struct drm_plane_state *state)
>     @@ -732,7 +832,10 @@ nv50_wndw_new_(const struct nv50_wndw_func
>     *func, struct drm_device *dev,
>                      return ret;
>              }
> 
>     -       drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper);
>     +       if (type == DRM_PLANE_TYPE_PRIMARY)
>     +               drm_plane_helper_add(&wndw->plane,
>     &nv50_wndw_primary_helper);
>     +       else
>     +               drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper);
> 
>              if (wndw->func->ilut) {
>                      ret = nv50_lut_init(disp, mmu, &wndw->ilut);
>     -- 
>     2.46.0
>
Ilia Mirkin Sept. 6, 2024, 1:22 p.m. UTC | #3
On Fri, Sep 6, 2024 at 9:10 AM Jocelyn Falempe <jfalempe@redhat.com> wrote:

> On 06/09/2024 14:53, Ilia Mirkin wrote:
> > On Fri, Sep 6, 2024 at 6:05 AM Jocelyn Falempe <jfalempe@redhat.com
> > <mailto:jfalempe@redhat.com>> wrote:
> >
> >     Add drm_panic support, for nv50+ cards.
> >     It's enough to get the panic screen while running Gnome/Wayland on a
> >     GTX 1650.
> >     It doesn't support multi-plane or compressed format.
> >     Support for other formats and older cards will come later.
> >     Tiling is only tested on GTX1650, and might be wrong for other cards.
> >
> >
> > I'm moderately sure that nv50 and nvc0 tile differently (the general
> > algo is the same, but height is different):
> >
> >
> https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces
> <
> https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces
> >
>
> Thanks, it looks like it needs a small adjustment, as in the Doc, GF100
> uses a default height of 4, and GF100+ default to 8 (and I've hardcoded
> it to NV_TILE_BLK_BASE_HEIGHT 8).
> GF100 is still nv50, so it should use this code.
>

Just as a point of clarification -- nv50 = G80. nvc0 = GF100. (There are
many other chips, but those are the big generational changes as far as this
code is concerned. https://nouveau.freedesktop.org/CodeNames.html for a
fuller list.)


>
> >
> > That said, I don't know that nv50 supports scanout of tiled surfaces
> > (nor was I aware that nvc0+ did, perhaps it's a recent feature, or
> > perhaps I'm just forgetful).
>
> What I know is that when using Gnome/Wayland, the framebuffer is tiled,
> and without this tiling code, the panic screen is unreadable.
> When using the VT console, the framebuffer is linear, and it's easier.
>

OK. Then clearly it's supported :) I'm sure Ben or Lyude will know offhand
when this support came in.

Cheers,

  -ilia
James Jones Sept. 6, 2024, 7:36 p.m. UTC | #4
Right, there are 3 iterations of block linear tiling actually. NV50 does 
support scanout of block linear surfaces. All block-linear-capable GPUs 
do. The 3 generations are:

NV5x/G8x/GTXXX line: Original block size.
GFXXX(nvc0 I believe in nouveau terms)-GV100: double the block height I 
believe.
GTXXX+: Same block size, but the layout within a block is subtly 
different, at least as visible in CPU mappings.

If there isn't enough info on the layouts in the reverse engineered 
documentation, believe this is somewhat documented in drm_fourcc.h, and 
it has a link to a technical reference manual describing the details of 
the 2nd-gen layout (Pre-turing), though I believe it's behind a 
developer account login wall that's free to sign up for but may require 
agreeing to a EULA of some sort regarding the content. Once you get the 
pre-turing layout working, just halve the block height to get it working 
on NV5x. If that isn't sufficient, reach out and we'll see what we can 
do to improve documentation or answer specific questions.

Thanks,
-James

On 9/6/24 05:53, Ilia Mirkin wrote:
> On Fri, Sep 6, 2024 at 6:05 AM Jocelyn Falempe <jfalempe@redhat.com 
> <mailto:jfalempe@redhat.com>> wrote:
> 
>     Add drm_panic support, for nv50+ cards.
>     It's enough to get the panic screen while running Gnome/Wayland on a
>     GTX 1650.
>     It doesn't support multi-plane or compressed format.
>     Support for other formats and older cards will come later.
>     Tiling is only tested on GTX1650, and might be wrong for other cards.
> 
> 
> I'm moderately sure that nv50 and nvc0 tile differently (the general 
> algo is the same, but height is different):
> 
> https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces <https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces>
> 
> That said, I don't know that nv50 supports scanout of tiled surfaces 
> (nor was I aware that nvc0+ did, perhaps it's a recent feature, or 
> perhaps I'm just forgetful).
> 
> Cheers,
> 
>    -ilia
> 
> 
>     Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com
>     <mailto:jfalempe@redhat.com>>
>     ---
>     v2:
>       * Rebase and drop already merged patches.
>       * Rework the tiling algorithm, using "swizzle" to compute the offset
>         inside the block.
> 
>       drivers/gpu/drm/nouveau/dispnv50/wndw.c | 107 +++++++++++++++++++++++-
>       1 file changed, 105 insertions(+), 2 deletions(-)
> 
>     diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
>     b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
>     index 7a2cceaee6e9..50ecf6f12b81 100644
>     --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
>     +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
>     @@ -30,11 +30,16 @@
>       #include <nvhw/class/cl507e.h>
>       #include <nvhw/class/clc37e.h>
> 
>     +#include <linux/iosys-map.h>
>     +
>       #include <drm/drm_atomic.h>
>       #include <drm/drm_atomic_helper.h>
>       #include <drm/drm_blend.h>
>     -#include <drm/drm_gem_atomic_helper.h>
>       #include <drm/drm_fourcc.h>
>     +#include <drm/drm_framebuffer.h>
>     +#include <drm/drm_gem_atomic_helper.h>
>     +#include <drm/drm_panic.h>
>     +#include <drm/ttm/ttm_bo.h>
> 
>       #include "nouveau_bo.h"
>       #include "nouveau_gem.h"
>     @@ -577,6 +582,93 @@ nv50_wndw_prepare_fb(struct drm_plane *plane,
>     struct drm_plane_state *state)
>              return 0;
>       }
> 
>     +#define NV_TILE_BLK_BASE_HEIGHT 8      /* In pixel */
>     +#define NV_TILE_GOB_SIZE 64    /* In bytes */
>     +#define NV_TILE_BLK_WIDTH (NV_TILE_GOB_SIZE / 4) /* For 32 bits
>     pixel */
>     +
>     +/* get the offset in bytes inside the framebuffer, after taking
>     tiling into account */
>     +static unsigned int nv50_get_tiled_offset(struct drm_scanout_buffer
>     *sb, unsigned int blk_h,
>     +                                         unsigned int x, unsigned
>     int y)
>     +{
>     +       u32 blk_x, blk_y, blk_sz, blk_off, pitch;
>     +       u32 swizzle;
>     +
>     +       blk_sz = NV_TILE_GOB_SIZE * blk_h;
>     +       pitch = DIV_ROUND_UP(sb->width, NV_TILE_BLK_WIDTH);
>     +
>     +       /* block coordinate */
>     +       blk_x = x / NV_TILE_BLK_WIDTH;
>     +       blk_y = y / blk_h;
>     +
>     +       blk_off = ((blk_y * pitch) + blk_x) * blk_sz;
>     +
>     +       y = y % blk_h;
>     +
>     +       /* Inside the block, use the fast address swizzle to compute
>     the offset
>     +        * For nvidia blocklinear, bit order is yn..y3 x3 y2 x2 y1
>     y0 x1 x0
>     +        */
>     +       swizzle = (x & 3) | (y & 3) << 2 | (x & 4) << 2 | (y & 4) << 3;
>     +       swizzle |= (x & 8) << 3 | (y >> 3) << 7;
>     +
>     +       return blk_off + swizzle * 4;
>     +}
>     +
>     +static void nv50_set_pixel(struct drm_scanout_buffer *sb, unsigned
>     int x, unsigned int y, u32 color)
>     +{
>     +       struct drm_framebuffer *fb = sb->private;
>     +       unsigned int off;
>     +       /* According to DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D
>     documentation,
>     +        * the last 4 bits of the modifier is log2(blk_height /
>     NV_TILE_BLK_BASE_HEIGHT)
>     +        */
>     +       unsigned int blk_h = NV_TILE_BLK_BASE_HEIGHT * (1 <<
>     (fb->modifier & 0xf));
>     +
>     +       off = nv50_get_tiled_offset(sb, blk_h, x, y);
>     +       iosys_map_wr(&sb->map[0], off, u32, color);
>     +}
>     +
>     +static int
>     +nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct
>     drm_scanout_buffer *sb)
>     +{
>     +       struct drm_framebuffer *fb;
>     +       struct nouveau_bo *nvbo;
>     +
>     +       if (!plane->state || !plane->state->fb)
>     +               return -EINVAL;
>     +
>     +       fb = plane->state->fb;
>     +       nvbo = nouveau_gem_object(fb->obj[0]);
>     +
>     +       /* Don't support compressed format, or multiplane yet. */
>     +       if (nvbo->comp || fb->format->num_planes != 1)
>     +               return -EOPNOTSUPP;
>     +
>     +       if (nouveau_bo_map(nvbo)) {
>     +               pr_warn("nouveau bo map failed, panic won't be
>     displayed\n");
>     +               return -ENOMEM;
>     +       }
>     +
>     +       if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
>     +               iosys_map_set_vaddr_iomem(&sb->map[0],
>     nvbo->kmap.virtual);
>     +       else
>     +               iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual);
>     +
>     +       sb->height = fb->height;
>     +       sb->width = fb->width;
>     +       sb->pitch[0] = fb->pitches[0];
>     +       sb->format = fb->format;
>     +
>     +       /* If tiling is enabled, use the set_pixel() to display
>     correctly.
>     +        * Only handle 32bits format for now.
>     +        */
>     +       if (fb->modifier & 0xf) {
>     +               if (fb->format->cpp[0] != 4)
>     +                       return -EOPNOTSUPP;
>     +               sb->private = (void *) fb;
>     +               sb->set_pixel = nv50_set_pixel;
>     +       }
>     +       return 0;
>     +}
>     +
>       static const struct drm_plane_helper_funcs
>       nv50_wndw_helper = {
>              .prepare_fb = nv50_wndw_prepare_fb,
>     @@ -584,6 +676,14 @@ nv50_wndw_helper = {
>              .atomic_check = nv50_wndw_atomic_check,
>       };
> 
>     +static const struct drm_plane_helper_funcs
>     +nv50_wndw_primary_helper = {
>     +       .prepare_fb = nv50_wndw_prepare_fb,
>     +       .cleanup_fb = nv50_wndw_cleanup_fb,
>     +       .atomic_check = nv50_wndw_atomic_check,
>     +       .get_scanout_buffer = nv50_wndw_get_scanout_buffer,
>     +};
>     +
>       static void
>       nv50_wndw_atomic_destroy_state(struct drm_plane *plane,
>                                     struct drm_plane_state *state)
>     @@ -732,7 +832,10 @@ nv50_wndw_new_(const struct nv50_wndw_func
>     *func, struct drm_device *dev,
>                      return ret;
>              }
> 
>     -       drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper);
>     +       if (type == DRM_PLANE_TYPE_PRIMARY)
>     +               drm_plane_helper_add(&wndw->plane,
>     &nv50_wndw_primary_helper);
>     +       else
>     +               drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper);
> 
>              if (wndw->func->ilut) {
>                      ret = nv50_lut_init(disp, mmu, &wndw->ilut);
>     -- 
>     2.46.0
>
kernel test robot Sept. 9, 2024, 11:42 a.m. UTC | #5
Hi Jocelyn,

kernel test robot noticed the following build warnings:

[auto build test WARNING on e8653e63e834e4c7de60b81b8b24deb7bdd3bf56]

url:    https://github.com/intel-lab-lkp/linux/commits/Jocelyn-Falempe/drm-panic-Add-ABGR2101010-support/20240906-180717
base:   e8653e63e834e4c7de60b81b8b24deb7bdd3bf56
patch link:    https://lore.kernel.org/r/20240906100434.1171093-4-jfalempe%40redhat.com
patch subject: [PATCH v2 3/3] drm/nouveau: Add drm_panic support for nv50+
config: x86_64-randconfig-121-20240909 (https://download.01.org/0day-ci/archive/20240909/202409091805.3PzxoAY6-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240909/202409091805.3PzxoAY6-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409091805.3PzxoAY6-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> drivers/gpu/drm/nouveau/dispnv50/wndw.c:651:66: sparse: sparse: incorrect type in argument 2 (different address spaces) @@     expected void [noderef] __iomem *vaddr_iomem @@     got void *virtual @@
   drivers/gpu/drm/nouveau/dispnv50/wndw.c:651:66: sparse:     expected void [noderef] __iomem *vaddr_iomem
   drivers/gpu/drm/nouveau/dispnv50/wndw.c:651:66: sparse:     got void *virtual

vim +651 drivers/gpu/drm/nouveau/dispnv50/wndw.c

   628	
   629	static int
   630	nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct drm_scanout_buffer *sb)
   631	{
   632		struct drm_framebuffer *fb;
   633		struct nouveau_bo *nvbo;
   634	
   635		if (!plane->state || !plane->state->fb)
   636			return -EINVAL;
   637	
   638		fb = plane->state->fb;
   639		nvbo = nouveau_gem_object(fb->obj[0]);
   640	
   641		/* Don't support compressed format, or multiplane yet. */
   642		if (nvbo->comp || fb->format->num_planes != 1)
   643			return -EOPNOTSUPP;
   644	
   645		if (nouveau_bo_map(nvbo)) {
   646			pr_warn("nouveau bo map failed, panic won't be displayed\n");
   647			return -ENOMEM;
   648		}
   649	
   650		if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
 > 651			iosys_map_set_vaddr_iomem(&sb->map[0], nvbo->kmap.virtual);
   652		else
   653			iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual);
   654	
   655		sb->height = fb->height;
   656		sb->width = fb->width;
   657		sb->pitch[0] = fb->pitches[0];
   658		sb->format = fb->format;
   659	
   660		/* If tiling is enabled, use the set_pixel() to display correctly.
   661		 * Only handle 32bits format for now.
   662		 */
   663		if (fb->modifier & 0xf) {
   664			if (fb->format->cpp[0] != 4)
   665				return -EOPNOTSUPP;
   666			sb->private = (void *) fb;
   667			sb->set_pixel = nv50_set_pixel;
   668		}
   669		return 0;
   670	}
   671
Jocelyn Falempe Sept. 11, 2024, 2:19 p.m. UTC | #6
On 06/09/2024 21:36, James Jones wrote:
> Right, there are 3 iterations of block linear tiling actually. NV50 does 
> support scanout of block linear surfaces. All block-linear-capable GPUs 
> do. The 3 generations are:
> 
> NV5x/G8x/GTXXX line: Original block size.
> GFXXX(nvc0 I believe in nouveau terms)-GV100: double the block height I 
> believe.
> GTXXX+: Same block size, but the layout within a block is subtly 
> different, at least as visible in CPU mappings.
> 

I'm looking at how to check for specific chip in nouveau, and fix the 
tiling for other cards than Turing.
It looks like in most case nouveau uses device->info.chipset, with 
hardcoded hex value. so for nvc0+ I should check device->info.chipset >= 
0xc0 ?

chipset < c0 : block_height 4, "old layout"
chipset >= c0 : block_height 8, "old layout"
chispet >= ?? : block_height 8, "new layout"

For testing, I have at hand a GTX1650 (Turing) and an old Geforce 
8800GTS (Tesla?), so it's a NV92, and still uses this nv50+ code ?

So I should be able to figure out if there is a tiling layout difference 
on my 8800GTS.

> If there isn't enough info on the layouts in the reverse engineered 
> documentation, believe this is somewhat documented in drm_fourcc.h, and 
> it has a link to a technical reference manual describing the details of 
> the 2nd-gen layout (Pre-turing), though I believe it's behind a 
> developer account login wall that's free to sign up for but may require 
> agreeing to a EULA of some sort regarding the content. Once you get the 
> pre-turing layout working, just halve the block height to get it working 
> on NV5x. If that isn't sufficient, reach out and we'll see what we can 
> do to improve documentation or answer specific questions.

I didn't find the link, but I think I have enough information, thanks 
for your support.

Best regards,
Ilia Mirkin Sept. 11, 2024, 2:39 p.m. UTC | #7
On Wed, Sep 11, 2024 at 10:19 AM Jocelyn Falempe <jfalempe@redhat.com>
wrote:

> On 06/09/2024 21:36, James Jones wrote:
> > Right, there are 3 iterations of block linear tiling actually. NV50 does
> > support scanout of block linear surfaces. All block-linear-capable GPUs
> > do. The 3 generations are:
> >
> > NV5x/G8x/GTXXX line: Original block size.
> > GFXXX(nvc0 I believe in nouveau terms)-GV100: double the block height I
> > believe.
> > GTXXX+: Same block size, but the layout within a block is subtly
> > different, at least as visible in CPU mappings.
> >
>
> I'm looking at how to check for specific chip in nouveau, and fix the
> tiling for other cards than Turing.
> It looks like in most case nouveau uses device->info.chipset, with
> hardcoded hex value. so for nvc0+ I should check device->info.chipset >=
> 0xc0 ?
>
> chipset < c0 : block_height 4, "old layout"
> chipset >= c0 : block_height 8, "old layout"
> chispet >= ?? : block_height 8, "new layout"
>
> For testing, I have at hand a GTX1650 (Turing) and an old Geforce
> 8800GTS (Tesla?), so it's a NV92, and still uses this nv50+ code ?
>

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c#n2399

So probably >= 0x160

There should also be a device->card_type which is an enum that you can use
too, depending on what you have available? Set here:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c#n3177

Once you have a patch tested, ideally someone with the hardware should test
on a >= fermi && < turing card.

Hope this helps,

  -ilia
Jocelyn Falempe Sept. 12, 2024, 10:58 a.m. UTC | #8
On 11/09/2024 16:39, Ilia Mirkin wrote:
> On Wed, Sep 11, 2024 at 10:19 AM Jocelyn Falempe <jfalempe@redhat.com 
> <mailto:jfalempe@redhat.com>> wrote:
> 
>     On 06/09/2024 21:36, James Jones wrote:
>      > Right, there are 3 iterations of block linear tiling actually.
>     NV50 does
>      > support scanout of block linear surfaces. All block-linear-
>     capable GPUs
>      > do. The 3 generations are:
>      >
>      > NV5x/G8x/GTXXX line: Original block size.
>      > GFXXX(nvc0 I believe in nouveau terms)-GV100: double the block
>     height I
>      > believe.
>      > GTXXX+: Same block size, but the layout within a block is subtly
>      > different, at least as visible in CPU mappings.
>      >
> 
>     I'm looking at how to check for specific chip in nouveau, and fix the
>     tiling for other cards than Turing.
>     It looks like in most case nouveau uses device->info.chipset, with
>     hardcoded hex value. so for nvc0+ I should check device-
>      >info.chipset >=
>     0xc0 ?
> 
>     chipset < c0 : block_height 4, "old layout"
>     chipset >= c0 : block_height 8, "old layout"
>     chispet >= ?? : block_height 8, "new layout"
> 
>     For testing, I have at hand a GTX1650 (Turing) and an old Geforce
>     8800GTS (Tesla?), so it's a NV92, and still uses this nv50+ code ?
> 
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/ 
> drivers/gpu/drm/nouveau/nvkm/engine/device/base.c#n2399 <https:// 
> git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/ 
> gpu/drm/nouveau/nvkm/engine/device/base.c#n2399>
> 
> So probably >= 0x160
> 
> There should also be a device->card_type which is an enum that you can 
> use too, depending on what you have available? Set here:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/ 
> drivers/gpu/drm/nouveau/nvkm/engine/device/base.c#n3177 <https:// 
> git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/ 
> gpu/drm/nouveau/nvkm/engine/device/base.c#n3177>
> 
> Once you have a patch tested, ideally someone with the hardware should 
> test on a >= fermi && < turing card.

I'm testing on my 8800 GTS.

The problem I have is that fb->modifier is 0, but the buffer is still 
tiled. I found there is a nouveau_framebuffer_get_layout() which gives 
tile_mode and kind, and works on my Tesla.

So if I understand correctly, if kind != 0, there is tiling, and the 
block size is 8 * (1 << tile_mode). (and half this on Tesla).

At least tiling is much easier on Tesla, there is no tiling inside the 
block. In my test, block size is 4K, 16x64 pixels, tile_mode is 4, and 
kind is 122.

I will send a v3 which works on Tesla and Turing soon.

Thanks for your help.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index 7a2cceaee6e9..50ecf6f12b81 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -30,11 +30,16 @@ 
 #include <nvhw/class/cl507e.h>
 #include <nvhw/class/clc37e.h>
 
+#include <linux/iosys-map.h>
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_blend.h>
-#include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_gem_atomic_helper.h>
+#include <drm/drm_panic.h>
+#include <drm/ttm/ttm_bo.h>
 
 #include "nouveau_bo.h"
 #include "nouveau_gem.h"
@@ -577,6 +582,93 @@  nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
 	return 0;
 }
 
+#define NV_TILE_BLK_BASE_HEIGHT 8	/* In pixel */
+#define NV_TILE_GOB_SIZE 64	/* In bytes */
+#define NV_TILE_BLK_WIDTH (NV_TILE_GOB_SIZE / 4) /* For 32 bits pixel */
+
+/* get the offset in bytes inside the framebuffer, after taking tiling into account */
+static unsigned int nv50_get_tiled_offset(struct drm_scanout_buffer *sb, unsigned int blk_h,
+					  unsigned int x, unsigned int y)
+{
+	u32 blk_x, blk_y, blk_sz, blk_off, pitch;
+	u32 swizzle;
+
+	blk_sz = NV_TILE_GOB_SIZE * blk_h;
+	pitch = DIV_ROUND_UP(sb->width, NV_TILE_BLK_WIDTH);
+
+	/* block coordinate */
+	blk_x = x / NV_TILE_BLK_WIDTH;
+	blk_y = y / blk_h;
+
+	blk_off = ((blk_y * pitch) + blk_x) * blk_sz;
+
+	y = y % blk_h;
+
+	/* Inside the block, use the fast address swizzle to compute the offset
+	 * For nvidia blocklinear, bit order is yn..y3 x3 y2 x2 y1 y0 x1 x0
+	 */
+	swizzle = (x & 3) | (y & 3) << 2 | (x & 4) << 2 | (y & 4) << 3;
+	swizzle |= (x & 8) << 3 | (y >> 3) << 7;
+
+	return blk_off + swizzle * 4;
+}
+
+static void nv50_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, unsigned int y, u32 color)
+{
+	struct drm_framebuffer *fb = sb->private;
+	unsigned int off;
+	/* According to DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D documentation,
+	 * the last 4 bits of the modifier is log2(blk_height / NV_TILE_BLK_BASE_HEIGHT)
+	 */
+	unsigned int blk_h = NV_TILE_BLK_BASE_HEIGHT * (1 << (fb->modifier & 0xf));
+
+	off = nv50_get_tiled_offset(sb, blk_h, x, y);
+	iosys_map_wr(&sb->map[0], off, u32, color);
+}
+
+static int
+nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct drm_scanout_buffer *sb)
+{
+	struct drm_framebuffer *fb;
+	struct nouveau_bo *nvbo;
+
+	if (!plane->state || !plane->state->fb)
+		return -EINVAL;
+
+	fb = plane->state->fb;
+	nvbo = nouveau_gem_object(fb->obj[0]);
+
+	/* Don't support compressed format, or multiplane yet. */
+	if (nvbo->comp || fb->format->num_planes != 1)
+		return -EOPNOTSUPP;
+
+	if (nouveau_bo_map(nvbo)) {
+		pr_warn("nouveau bo map failed, panic won't be displayed\n");
+		return -ENOMEM;
+	}
+
+	if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK)
+		iosys_map_set_vaddr_iomem(&sb->map[0], nvbo->kmap.virtual);
+	else
+		iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual);
+
+	sb->height = fb->height;
+	sb->width = fb->width;
+	sb->pitch[0] = fb->pitches[0];
+	sb->format = fb->format;
+
+	/* If tiling is enabled, use the set_pixel() to display correctly.
+	 * Only handle 32bits format for now.
+	 */
+	if (fb->modifier & 0xf) {
+		if (fb->format->cpp[0] != 4)
+			return -EOPNOTSUPP;
+		sb->private = (void *) fb;
+		sb->set_pixel = nv50_set_pixel;
+	}
+	return 0;
+}
+
 static const struct drm_plane_helper_funcs
 nv50_wndw_helper = {
 	.prepare_fb = nv50_wndw_prepare_fb,
@@ -584,6 +676,14 @@  nv50_wndw_helper = {
 	.atomic_check = nv50_wndw_atomic_check,
 };
 
+static const struct drm_plane_helper_funcs
+nv50_wndw_primary_helper = {
+	.prepare_fb = nv50_wndw_prepare_fb,
+	.cleanup_fb = nv50_wndw_cleanup_fb,
+	.atomic_check = nv50_wndw_atomic_check,
+	.get_scanout_buffer = nv50_wndw_get_scanout_buffer,
+};
+
 static void
 nv50_wndw_atomic_destroy_state(struct drm_plane *plane,
 			       struct drm_plane_state *state)
@@ -732,7 +832,10 @@  nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev,
 		return ret;
 	}
 
-	drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper);
+	if (type == DRM_PLANE_TYPE_PRIMARY)
+		drm_plane_helper_add(&wndw->plane, &nv50_wndw_primary_helper);
+	else
+		drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper);
 
 	if (wndw->func->ilut) {
 		ret = nv50_lut_init(disp, mmu, &wndw->ilut);