Message ID | 20240906100434.1171093-4-jfalempe@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/nouveau: Add drm_panic support for nv50+ | expand |
On Fri, Sep 6, 2024 at 6:05 AM Jocelyn Falempe <jfalempe@redhat.com> wrote: > Add drm_panic support, for nv50+ cards. > It's enough to get the panic screen while running Gnome/Wayland on a > GTX 1650. > It doesn't support multi-plane or compressed format. > Support for other formats and older cards will come later. > Tiling is only tested on GTX1650, and might be wrong for other cards. > I'm moderately sure that nv50 and nvc0 tile differently (the general algo is the same, but height is different): https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces That said, I don't know that nv50 supports scanout of tiled surfaces (nor was I aware that nvc0+ did, perhaps it's a recent feature, or perhaps I'm just forgetful). Cheers, -ilia > > Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com> > --- > v2: > * Rebase and drop already merged patches. > * Rework the tiling algorithm, using "swizzle" to compute the offset > inside the block. > > drivers/gpu/drm/nouveau/dispnv50/wndw.c | 107 +++++++++++++++++++++++- > 1 file changed, 105 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c > b/drivers/gpu/drm/nouveau/dispnv50/wndw.c > index 7a2cceaee6e9..50ecf6f12b81 100644 > --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c > +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c > @@ -30,11 +30,16 @@ > #include <nvhw/class/cl507e.h> > #include <nvhw/class/clc37e.h> > > +#include <linux/iosys-map.h> > + > #include <drm/drm_atomic.h> > #include <drm/drm_atomic_helper.h> > #include <drm/drm_blend.h> > -#include <drm/drm_gem_atomic_helper.h> > #include <drm/drm_fourcc.h> > +#include <drm/drm_framebuffer.h> > +#include <drm/drm_gem_atomic_helper.h> > +#include <drm/drm_panic.h> > +#include <drm/ttm/ttm_bo.h> > > #include "nouveau_bo.h" > #include "nouveau_gem.h" > @@ -577,6 +582,93 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct > drm_plane_state *state) > return 0; > } > > +#define NV_TILE_BLK_BASE_HEIGHT 8 /* In pixel */ > +#define NV_TILE_GOB_SIZE 64 /* In bytes */ > +#define NV_TILE_BLK_WIDTH (NV_TILE_GOB_SIZE / 4) /* For 32 bits pixel */ > + > +/* get the offset in bytes inside the framebuffer, after taking tiling > into account */ > +static unsigned int nv50_get_tiled_offset(struct drm_scanout_buffer *sb, > unsigned int blk_h, > + unsigned int x, unsigned int y) > +{ > + u32 blk_x, blk_y, blk_sz, blk_off, pitch; > + u32 swizzle; > + > + blk_sz = NV_TILE_GOB_SIZE * blk_h; > + pitch = DIV_ROUND_UP(sb->width, NV_TILE_BLK_WIDTH); > + > + /* block coordinate */ > + blk_x = x / NV_TILE_BLK_WIDTH; > + blk_y = y / blk_h; > + > + blk_off = ((blk_y * pitch) + blk_x) * blk_sz; > + > + y = y % blk_h; > + > + /* Inside the block, use the fast address swizzle to compute the > offset > + * For nvidia blocklinear, bit order is yn..y3 x3 y2 x2 y1 y0 x1 x0 > + */ > + swizzle = (x & 3) | (y & 3) << 2 | (x & 4) << 2 | (y & 4) << 3; > + swizzle |= (x & 8) << 3 | (y >> 3) << 7; > + > + return blk_off + swizzle * 4; > +} > + > +static void nv50_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, > unsigned int y, u32 color) > +{ > + struct drm_framebuffer *fb = sb->private; > + unsigned int off; > + /* According to DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D > documentation, > + * the last 4 bits of the modifier is log2(blk_height / > NV_TILE_BLK_BASE_HEIGHT) > + */ > + unsigned int blk_h = NV_TILE_BLK_BASE_HEIGHT * (1 << (fb->modifier > & 0xf)); > + > + off = nv50_get_tiled_offset(sb, blk_h, x, y); > + iosys_map_wr(&sb->map[0], off, u32, color); > +} > + > +static int > +nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct > drm_scanout_buffer *sb) > +{ > + struct drm_framebuffer *fb; > + struct nouveau_bo *nvbo; > + > + if (!plane->state || !plane->state->fb) > + return -EINVAL; > + > + fb = plane->state->fb; > + nvbo = nouveau_gem_object(fb->obj[0]); > + > + /* Don't support compressed format, or multiplane yet. */ > + if (nvbo->comp || fb->format->num_planes != 1) > + return -EOPNOTSUPP; > + > + if (nouveau_bo_map(nvbo)) { > + pr_warn("nouveau bo map failed, panic won't be > displayed\n"); > + return -ENOMEM; > + } > + > + if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK) > + iosys_map_set_vaddr_iomem(&sb->map[0], nvbo->kmap.virtual); > + else > + iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual); > + > + sb->height = fb->height; > + sb->width = fb->width; > + sb->pitch[0] = fb->pitches[0]; > + sb->format = fb->format; > + > + /* If tiling is enabled, use the set_pixel() to display correctly. > + * Only handle 32bits format for now. > + */ > + if (fb->modifier & 0xf) { > + if (fb->format->cpp[0] != 4) > + return -EOPNOTSUPP; > + sb->private = (void *) fb; > + sb->set_pixel = nv50_set_pixel; > + } > + return 0; > +} > + > static const struct drm_plane_helper_funcs > nv50_wndw_helper = { > .prepare_fb = nv50_wndw_prepare_fb, > @@ -584,6 +676,14 @@ nv50_wndw_helper = { > .atomic_check = nv50_wndw_atomic_check, > }; > > +static const struct drm_plane_helper_funcs > +nv50_wndw_primary_helper = { > + .prepare_fb = nv50_wndw_prepare_fb, > + .cleanup_fb = nv50_wndw_cleanup_fb, > + .atomic_check = nv50_wndw_atomic_check, > + .get_scanout_buffer = nv50_wndw_get_scanout_buffer, > +}; > + > static void > nv50_wndw_atomic_destroy_state(struct drm_plane *plane, > struct drm_plane_state *state) > @@ -732,7 +832,10 @@ nv50_wndw_new_(const struct nv50_wndw_func *func, > struct drm_device *dev, > return ret; > } > > - drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); > + if (type == DRM_PLANE_TYPE_PRIMARY) > + drm_plane_helper_add(&wndw->plane, > &nv50_wndw_primary_helper); > + else > + drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); > > if (wndw->func->ilut) { > ret = nv50_lut_init(disp, mmu, &wndw->ilut); > -- > 2.46.0 > >
On 06/09/2024 14:53, Ilia Mirkin wrote: > On Fri, Sep 6, 2024 at 6:05 AM Jocelyn Falempe <jfalempe@redhat.com > <mailto:jfalempe@redhat.com>> wrote: > > Add drm_panic support, for nv50+ cards. > It's enough to get the panic screen while running Gnome/Wayland on a > GTX 1650. > It doesn't support multi-plane or compressed format. > Support for other formats and older cards will come later. > Tiling is only tested on GTX1650, and might be wrong for other cards. > > > I'm moderately sure that nv50 and nvc0 tile differently (the general > algo is the same, but height is different): > > https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces <https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces> Thanks, it looks like it needs a small adjustment, as in the Doc, GF100 uses a default height of 4, and GF100+ default to 8 (and I've hardcoded it to NV_TILE_BLK_BASE_HEIGHT 8). GF100 is still nv50, so it should use this code. > > That said, I don't know that nv50 supports scanout of tiled surfaces > (nor was I aware that nvc0+ did, perhaps it's a recent feature, or > perhaps I'm just forgetful). What I know is that when using Gnome/Wayland, the framebuffer is tiled, and without this tiling code, the panic screen is unreadable. When using the VT console, the framebuffer is linear, and it's easier. > > Cheers, > > -ilia > > > Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com > <mailto:jfalempe@redhat.com>> > --- > v2: > * Rebase and drop already merged patches. > * Rework the tiling algorithm, using "swizzle" to compute the offset > inside the block. > > drivers/gpu/drm/nouveau/dispnv50/wndw.c | 107 +++++++++++++++++++++++- > 1 file changed, 105 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c > b/drivers/gpu/drm/nouveau/dispnv50/wndw.c > index 7a2cceaee6e9..50ecf6f12b81 100644 > --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c > +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c > @@ -30,11 +30,16 @@ > #include <nvhw/class/cl507e.h> > #include <nvhw/class/clc37e.h> > > +#include <linux/iosys-map.h> > + > #include <drm/drm_atomic.h> > #include <drm/drm_atomic_helper.h> > #include <drm/drm_blend.h> > -#include <drm/drm_gem_atomic_helper.h> > #include <drm/drm_fourcc.h> > +#include <drm/drm_framebuffer.h> > +#include <drm/drm_gem_atomic_helper.h> > +#include <drm/drm_panic.h> > +#include <drm/ttm/ttm_bo.h> > > #include "nouveau_bo.h" > #include "nouveau_gem.h" > @@ -577,6 +582,93 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, > struct drm_plane_state *state) > return 0; > } > > +#define NV_TILE_BLK_BASE_HEIGHT 8 /* In pixel */ > +#define NV_TILE_GOB_SIZE 64 /* In bytes */ > +#define NV_TILE_BLK_WIDTH (NV_TILE_GOB_SIZE / 4) /* For 32 bits > pixel */ > + > +/* get the offset in bytes inside the framebuffer, after taking > tiling into account */ > +static unsigned int nv50_get_tiled_offset(struct drm_scanout_buffer > *sb, unsigned int blk_h, > + unsigned int x, unsigned > int y) > +{ > + u32 blk_x, blk_y, blk_sz, blk_off, pitch; > + u32 swizzle; > + > + blk_sz = NV_TILE_GOB_SIZE * blk_h; > + pitch = DIV_ROUND_UP(sb->width, NV_TILE_BLK_WIDTH); > + > + /* block coordinate */ > + blk_x = x / NV_TILE_BLK_WIDTH; > + blk_y = y / blk_h; > + > + blk_off = ((blk_y * pitch) + blk_x) * blk_sz; > + > + y = y % blk_h; > + > + /* Inside the block, use the fast address swizzle to compute > the offset > + * For nvidia blocklinear, bit order is yn..y3 x3 y2 x2 y1 > y0 x1 x0 > + */ > + swizzle = (x & 3) | (y & 3) << 2 | (x & 4) << 2 | (y & 4) << 3; > + swizzle |= (x & 8) << 3 | (y >> 3) << 7; > + > + return blk_off + swizzle * 4; > +} > + > +static void nv50_set_pixel(struct drm_scanout_buffer *sb, unsigned > int x, unsigned int y, u32 color) > +{ > + struct drm_framebuffer *fb = sb->private; > + unsigned int off; > + /* According to DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D > documentation, > + * the last 4 bits of the modifier is log2(blk_height / > NV_TILE_BLK_BASE_HEIGHT) > + */ > + unsigned int blk_h = NV_TILE_BLK_BASE_HEIGHT * (1 << > (fb->modifier & 0xf)); > + > + off = nv50_get_tiled_offset(sb, blk_h, x, y); > + iosys_map_wr(&sb->map[0], off, u32, color); > +} > + > +static int > +nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct > drm_scanout_buffer *sb) > +{ > + struct drm_framebuffer *fb; > + struct nouveau_bo *nvbo; > + > + if (!plane->state || !plane->state->fb) > + return -EINVAL; > + > + fb = plane->state->fb; > + nvbo = nouveau_gem_object(fb->obj[0]); > + > + /* Don't support compressed format, or multiplane yet. */ > + if (nvbo->comp || fb->format->num_planes != 1) > + return -EOPNOTSUPP; > + > + if (nouveau_bo_map(nvbo)) { > + pr_warn("nouveau bo map failed, panic won't be > displayed\n"); > + return -ENOMEM; > + } > + > + if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK) > + iosys_map_set_vaddr_iomem(&sb->map[0], > nvbo->kmap.virtual); > + else > + iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual); > + > + sb->height = fb->height; > + sb->width = fb->width; > + sb->pitch[0] = fb->pitches[0]; > + sb->format = fb->format; > + > + /* If tiling is enabled, use the set_pixel() to display > correctly. > + * Only handle 32bits format for now. > + */ > + if (fb->modifier & 0xf) { > + if (fb->format->cpp[0] != 4) > + return -EOPNOTSUPP; > + sb->private = (void *) fb; > + sb->set_pixel = nv50_set_pixel; > + } > + return 0; > +} > + > static const struct drm_plane_helper_funcs > nv50_wndw_helper = { > .prepare_fb = nv50_wndw_prepare_fb, > @@ -584,6 +676,14 @@ nv50_wndw_helper = { > .atomic_check = nv50_wndw_atomic_check, > }; > > +static const struct drm_plane_helper_funcs > +nv50_wndw_primary_helper = { > + .prepare_fb = nv50_wndw_prepare_fb, > + .cleanup_fb = nv50_wndw_cleanup_fb, > + .atomic_check = nv50_wndw_atomic_check, > + .get_scanout_buffer = nv50_wndw_get_scanout_buffer, > +}; > + > static void > nv50_wndw_atomic_destroy_state(struct drm_plane *plane, > struct drm_plane_state *state) > @@ -732,7 +832,10 @@ nv50_wndw_new_(const struct nv50_wndw_func > *func, struct drm_device *dev, > return ret; > } > > - drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); > + if (type == DRM_PLANE_TYPE_PRIMARY) > + drm_plane_helper_add(&wndw->plane, > &nv50_wndw_primary_helper); > + else > + drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); > > if (wndw->func->ilut) { > ret = nv50_lut_init(disp, mmu, &wndw->ilut); > -- > 2.46.0 >
On Fri, Sep 6, 2024 at 9:10 AM Jocelyn Falempe <jfalempe@redhat.com> wrote: > On 06/09/2024 14:53, Ilia Mirkin wrote: > > On Fri, Sep 6, 2024 at 6:05 AM Jocelyn Falempe <jfalempe@redhat.com > > <mailto:jfalempe@redhat.com>> wrote: > > > > Add drm_panic support, for nv50+ cards. > > It's enough to get the panic screen while running Gnome/Wayland on a > > GTX 1650. > > It doesn't support multi-plane or compressed format. > > Support for other formats and older cards will come later. > > Tiling is only tested on GTX1650, and might be wrong for other cards. > > > > > > I'm moderately sure that nv50 and nvc0 tile differently (the general > > algo is the same, but height is different): > > > > > https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces > < > https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces > > > > Thanks, it looks like it needs a small adjustment, as in the Doc, GF100 > uses a default height of 4, and GF100+ default to 8 (and I've hardcoded > it to NV_TILE_BLK_BASE_HEIGHT 8). > GF100 is still nv50, so it should use this code. > Just as a point of clarification -- nv50 = G80. nvc0 = GF100. (There are many other chips, but those are the big generational changes as far as this code is concerned. https://nouveau.freedesktop.org/CodeNames.html for a fuller list.) > > > > > That said, I don't know that nv50 supports scanout of tiled surfaces > > (nor was I aware that nvc0+ did, perhaps it's a recent feature, or > > perhaps I'm just forgetful). > > What I know is that when using Gnome/Wayland, the framebuffer is tiled, > and without this tiling code, the panic screen is unreadable. > When using the VT console, the framebuffer is linear, and it's easier. > OK. Then clearly it's supported :) I'm sure Ben or Lyude will know offhand when this support came in. Cheers, -ilia
Right, there are 3 iterations of block linear tiling actually. NV50 does support scanout of block linear surfaces. All block-linear-capable GPUs do. The 3 generations are: NV5x/G8x/GTXXX line: Original block size. GFXXX(nvc0 I believe in nouveau terms)-GV100: double the block height I believe. GTXXX+: Same block size, but the layout within a block is subtly different, at least as visible in CPU mappings. If there isn't enough info on the layouts in the reverse engineered documentation, believe this is somewhat documented in drm_fourcc.h, and it has a link to a technical reference manual describing the details of the 2nd-gen layout (Pre-turing), though I believe it's behind a developer account login wall that's free to sign up for but may require agreeing to a EULA of some sort regarding the content. Once you get the pre-turing layout working, just halve the block height to get it working on NV5x. If that isn't sufficient, reach out and we'll see what we can do to improve documentation or answer specific questions. Thanks, -James On 9/6/24 05:53, Ilia Mirkin wrote: > On Fri, Sep 6, 2024 at 6:05 AM Jocelyn Falempe <jfalempe@redhat.com > <mailto:jfalempe@redhat.com>> wrote: > > Add drm_panic support, for nv50+ cards. > It's enough to get the panic screen while running Gnome/Wayland on a > GTX 1650. > It doesn't support multi-plane or compressed format. > Support for other formats and older cards will come later. > Tiling is only tested on GTX1650, and might be wrong for other cards. > > > I'm moderately sure that nv50 and nvc0 tile differently (the general > algo is the same, but height is different): > > https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces <https://envytools.readthedocs.io/en/latest/hw/memory/g80-surface.html?highlight=tiling#blocklinear-surfaces> > > That said, I don't know that nv50 supports scanout of tiled surfaces > (nor was I aware that nvc0+ did, perhaps it's a recent feature, or > perhaps I'm just forgetful). > > Cheers, > > -ilia > > > Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com > <mailto:jfalempe@redhat.com>> > --- > v2: > * Rebase and drop already merged patches. > * Rework the tiling algorithm, using "swizzle" to compute the offset > inside the block. > > drivers/gpu/drm/nouveau/dispnv50/wndw.c | 107 +++++++++++++++++++++++- > 1 file changed, 105 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c > b/drivers/gpu/drm/nouveau/dispnv50/wndw.c > index 7a2cceaee6e9..50ecf6f12b81 100644 > --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c > +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c > @@ -30,11 +30,16 @@ > #include <nvhw/class/cl507e.h> > #include <nvhw/class/clc37e.h> > > +#include <linux/iosys-map.h> > + > #include <drm/drm_atomic.h> > #include <drm/drm_atomic_helper.h> > #include <drm/drm_blend.h> > -#include <drm/drm_gem_atomic_helper.h> > #include <drm/drm_fourcc.h> > +#include <drm/drm_framebuffer.h> > +#include <drm/drm_gem_atomic_helper.h> > +#include <drm/drm_panic.h> > +#include <drm/ttm/ttm_bo.h> > > #include "nouveau_bo.h" > #include "nouveau_gem.h" > @@ -577,6 +582,93 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, > struct drm_plane_state *state) > return 0; > } > > +#define NV_TILE_BLK_BASE_HEIGHT 8 /* In pixel */ > +#define NV_TILE_GOB_SIZE 64 /* In bytes */ > +#define NV_TILE_BLK_WIDTH (NV_TILE_GOB_SIZE / 4) /* For 32 bits > pixel */ > + > +/* get the offset in bytes inside the framebuffer, after taking > tiling into account */ > +static unsigned int nv50_get_tiled_offset(struct drm_scanout_buffer > *sb, unsigned int blk_h, > + unsigned int x, unsigned > int y) > +{ > + u32 blk_x, blk_y, blk_sz, blk_off, pitch; > + u32 swizzle; > + > + blk_sz = NV_TILE_GOB_SIZE * blk_h; > + pitch = DIV_ROUND_UP(sb->width, NV_TILE_BLK_WIDTH); > + > + /* block coordinate */ > + blk_x = x / NV_TILE_BLK_WIDTH; > + blk_y = y / blk_h; > + > + blk_off = ((blk_y * pitch) + blk_x) * blk_sz; > + > + y = y % blk_h; > + > + /* Inside the block, use the fast address swizzle to compute > the offset > + * For nvidia blocklinear, bit order is yn..y3 x3 y2 x2 y1 > y0 x1 x0 > + */ > + swizzle = (x & 3) | (y & 3) << 2 | (x & 4) << 2 | (y & 4) << 3; > + swizzle |= (x & 8) << 3 | (y >> 3) << 7; > + > + return blk_off + swizzle * 4; > +} > + > +static void nv50_set_pixel(struct drm_scanout_buffer *sb, unsigned > int x, unsigned int y, u32 color) > +{ > + struct drm_framebuffer *fb = sb->private; > + unsigned int off; > + /* According to DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D > documentation, > + * the last 4 bits of the modifier is log2(blk_height / > NV_TILE_BLK_BASE_HEIGHT) > + */ > + unsigned int blk_h = NV_TILE_BLK_BASE_HEIGHT * (1 << > (fb->modifier & 0xf)); > + > + off = nv50_get_tiled_offset(sb, blk_h, x, y); > + iosys_map_wr(&sb->map[0], off, u32, color); > +} > + > +static int > +nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct > drm_scanout_buffer *sb) > +{ > + struct drm_framebuffer *fb; > + struct nouveau_bo *nvbo; > + > + if (!plane->state || !plane->state->fb) > + return -EINVAL; > + > + fb = plane->state->fb; > + nvbo = nouveau_gem_object(fb->obj[0]); > + > + /* Don't support compressed format, or multiplane yet. */ > + if (nvbo->comp || fb->format->num_planes != 1) > + return -EOPNOTSUPP; > + > + if (nouveau_bo_map(nvbo)) { > + pr_warn("nouveau bo map failed, panic won't be > displayed\n"); > + return -ENOMEM; > + } > + > + if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK) > + iosys_map_set_vaddr_iomem(&sb->map[0], > nvbo->kmap.virtual); > + else > + iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual); > + > + sb->height = fb->height; > + sb->width = fb->width; > + sb->pitch[0] = fb->pitches[0]; > + sb->format = fb->format; > + > + /* If tiling is enabled, use the set_pixel() to display > correctly. > + * Only handle 32bits format for now. > + */ > + if (fb->modifier & 0xf) { > + if (fb->format->cpp[0] != 4) > + return -EOPNOTSUPP; > + sb->private = (void *) fb; > + sb->set_pixel = nv50_set_pixel; > + } > + return 0; > +} > + > static const struct drm_plane_helper_funcs > nv50_wndw_helper = { > .prepare_fb = nv50_wndw_prepare_fb, > @@ -584,6 +676,14 @@ nv50_wndw_helper = { > .atomic_check = nv50_wndw_atomic_check, > }; > > +static const struct drm_plane_helper_funcs > +nv50_wndw_primary_helper = { > + .prepare_fb = nv50_wndw_prepare_fb, > + .cleanup_fb = nv50_wndw_cleanup_fb, > + .atomic_check = nv50_wndw_atomic_check, > + .get_scanout_buffer = nv50_wndw_get_scanout_buffer, > +}; > + > static void > nv50_wndw_atomic_destroy_state(struct drm_plane *plane, > struct drm_plane_state *state) > @@ -732,7 +832,10 @@ nv50_wndw_new_(const struct nv50_wndw_func > *func, struct drm_device *dev, > return ret; > } > > - drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); > + if (type == DRM_PLANE_TYPE_PRIMARY) > + drm_plane_helper_add(&wndw->plane, > &nv50_wndw_primary_helper); > + else > + drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); > > if (wndw->func->ilut) { > ret = nv50_lut_init(disp, mmu, &wndw->ilut); > -- > 2.46.0 >
Hi Jocelyn, kernel test robot noticed the following build warnings: [auto build test WARNING on e8653e63e834e4c7de60b81b8b24deb7bdd3bf56] url: https://github.com/intel-lab-lkp/linux/commits/Jocelyn-Falempe/drm-panic-Add-ABGR2101010-support/20240906-180717 base: e8653e63e834e4c7de60b81b8b24deb7bdd3bf56 patch link: https://lore.kernel.org/r/20240906100434.1171093-4-jfalempe%40redhat.com patch subject: [PATCH v2 3/3] drm/nouveau: Add drm_panic support for nv50+ config: x86_64-randconfig-121-20240909 (https://download.01.org/0day-ci/archive/20240909/202409091805.3PzxoAY6-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240909/202409091805.3PzxoAY6-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202409091805.3PzxoAY6-lkp@intel.com/ sparse warnings: (new ones prefixed by >>) >> drivers/gpu/drm/nouveau/dispnv50/wndw.c:651:66: sparse: sparse: incorrect type in argument 2 (different address spaces) @@ expected void [noderef] __iomem *vaddr_iomem @@ got void *virtual @@ drivers/gpu/drm/nouveau/dispnv50/wndw.c:651:66: sparse: expected void [noderef] __iomem *vaddr_iomem drivers/gpu/drm/nouveau/dispnv50/wndw.c:651:66: sparse: got void *virtual vim +651 drivers/gpu/drm/nouveau/dispnv50/wndw.c 628 629 static int 630 nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct drm_scanout_buffer *sb) 631 { 632 struct drm_framebuffer *fb; 633 struct nouveau_bo *nvbo; 634 635 if (!plane->state || !plane->state->fb) 636 return -EINVAL; 637 638 fb = plane->state->fb; 639 nvbo = nouveau_gem_object(fb->obj[0]); 640 641 /* Don't support compressed format, or multiplane yet. */ 642 if (nvbo->comp || fb->format->num_planes != 1) 643 return -EOPNOTSUPP; 644 645 if (nouveau_bo_map(nvbo)) { 646 pr_warn("nouveau bo map failed, panic won't be displayed\n"); 647 return -ENOMEM; 648 } 649 650 if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK) > 651 iosys_map_set_vaddr_iomem(&sb->map[0], nvbo->kmap.virtual); 652 else 653 iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual); 654 655 sb->height = fb->height; 656 sb->width = fb->width; 657 sb->pitch[0] = fb->pitches[0]; 658 sb->format = fb->format; 659 660 /* If tiling is enabled, use the set_pixel() to display correctly. 661 * Only handle 32bits format for now. 662 */ 663 if (fb->modifier & 0xf) { 664 if (fb->format->cpp[0] != 4) 665 return -EOPNOTSUPP; 666 sb->private = (void *) fb; 667 sb->set_pixel = nv50_set_pixel; 668 } 669 return 0; 670 } 671
On 06/09/2024 21:36, James Jones wrote: > Right, there are 3 iterations of block linear tiling actually. NV50 does > support scanout of block linear surfaces. All block-linear-capable GPUs > do. The 3 generations are: > > NV5x/G8x/GTXXX line: Original block size. > GFXXX(nvc0 I believe in nouveau terms)-GV100: double the block height I > believe. > GTXXX+: Same block size, but the layout within a block is subtly > different, at least as visible in CPU mappings. > I'm looking at how to check for specific chip in nouveau, and fix the tiling for other cards than Turing. It looks like in most case nouveau uses device->info.chipset, with hardcoded hex value. so for nvc0+ I should check device->info.chipset >= 0xc0 ? chipset < c0 : block_height 4, "old layout" chipset >= c0 : block_height 8, "old layout" chispet >= ?? : block_height 8, "new layout" For testing, I have at hand a GTX1650 (Turing) and an old Geforce 8800GTS (Tesla?), so it's a NV92, and still uses this nv50+ code ? So I should be able to figure out if there is a tiling layout difference on my 8800GTS. > If there isn't enough info on the layouts in the reverse engineered > documentation, believe this is somewhat documented in drm_fourcc.h, and > it has a link to a technical reference manual describing the details of > the 2nd-gen layout (Pre-turing), though I believe it's behind a > developer account login wall that's free to sign up for but may require > agreeing to a EULA of some sort regarding the content. Once you get the > pre-turing layout working, just halve the block height to get it working > on NV5x. If that isn't sufficient, reach out and we'll see what we can > do to improve documentation or answer specific questions. I didn't find the link, but I think I have enough information, thanks for your support. Best regards,
On Wed, Sep 11, 2024 at 10:19 AM Jocelyn Falempe <jfalempe@redhat.com> wrote: > On 06/09/2024 21:36, James Jones wrote: > > Right, there are 3 iterations of block linear tiling actually. NV50 does > > support scanout of block linear surfaces. All block-linear-capable GPUs > > do. The 3 generations are: > > > > NV5x/G8x/GTXXX line: Original block size. > > GFXXX(nvc0 I believe in nouveau terms)-GV100: double the block height I > > believe. > > GTXXX+: Same block size, but the layout within a block is subtly > > different, at least as visible in CPU mappings. > > > > I'm looking at how to check for specific chip in nouveau, and fix the > tiling for other cards than Turing. > It looks like in most case nouveau uses device->info.chipset, with > hardcoded hex value. so for nvc0+ I should check device->info.chipset >= > 0xc0 ? > > chipset < c0 : block_height 4, "old layout" > chipset >= c0 : block_height 8, "old layout" > chispet >= ?? : block_height 8, "new layout" > > For testing, I have at hand a GTX1650 (Turing) and an old Geforce > 8800GTS (Tesla?), so it's a NV92, and still uses this nv50+ code ? > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c#n2399 So probably >= 0x160 There should also be a device->card_type which is an enum that you can use too, depending on what you have available? Set here: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c#n3177 Once you have a patch tested, ideally someone with the hardware should test on a >= fermi && < turing card. Hope this helps, -ilia
On 11/09/2024 16:39, Ilia Mirkin wrote: > On Wed, Sep 11, 2024 at 10:19 AM Jocelyn Falempe <jfalempe@redhat.com > <mailto:jfalempe@redhat.com>> wrote: > > On 06/09/2024 21:36, James Jones wrote: > > Right, there are 3 iterations of block linear tiling actually. > NV50 does > > support scanout of block linear surfaces. All block-linear- > capable GPUs > > do. The 3 generations are: > > > > NV5x/G8x/GTXXX line: Original block size. > > GFXXX(nvc0 I believe in nouveau terms)-GV100: double the block > height I > > believe. > > GTXXX+: Same block size, but the layout within a block is subtly > > different, at least as visible in CPU mappings. > > > > I'm looking at how to check for specific chip in nouveau, and fix the > tiling for other cards than Turing. > It looks like in most case nouveau uses device->info.chipset, with > hardcoded hex value. so for nvc0+ I should check device- > >info.chipset >= > 0xc0 ? > > chipset < c0 : block_height 4, "old layout" > chipset >= c0 : block_height 8, "old layout" > chispet >= ?? : block_height 8, "new layout" > > For testing, I have at hand a GTX1650 (Turing) and an old Geforce > 8800GTS (Tesla?), so it's a NV92, and still uses this nv50+ code ? > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/ > drivers/gpu/drm/nouveau/nvkm/engine/device/base.c#n2399 <https:// > git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/ > gpu/drm/nouveau/nvkm/engine/device/base.c#n2399> > > So probably >= 0x160 > > There should also be a device->card_type which is an enum that you can > use too, depending on what you have available? Set here: > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/ > drivers/gpu/drm/nouveau/nvkm/engine/device/base.c#n3177 <https:// > git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/ > gpu/drm/nouveau/nvkm/engine/device/base.c#n3177> > > Once you have a patch tested, ideally someone with the hardware should > test on a >= fermi && < turing card. I'm testing on my 8800 GTS. The problem I have is that fb->modifier is 0, but the buffer is still tiled. I found there is a nouveau_framebuffer_get_layout() which gives tile_mode and kind, and works on my Tesla. So if I understand correctly, if kind != 0, there is tiling, and the block size is 8 * (1 << tile_mode). (and half this on Tesla). At least tiling is much easier on Tesla, there is no tiling inside the block. In my test, block size is 4K, 16x64 pixels, tile_mode is 4, and kind is 122. I will send a v3 which works on Tesla and Turing soon. Thanks for your help.
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 7a2cceaee6e9..50ecf6f12b81 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -30,11 +30,16 @@ #include <nvhw/class/cl507e.h> #include <nvhw/class/clc37e.h> +#include <linux/iosys-map.h> + #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> -#include <drm/drm_gem_atomic_helper.h> #include <drm/drm_fourcc.h> +#include <drm/drm_framebuffer.h> +#include <drm/drm_gem_atomic_helper.h> +#include <drm/drm_panic.h> +#include <drm/ttm/ttm_bo.h> #include "nouveau_bo.h" #include "nouveau_gem.h" @@ -577,6 +582,93 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) return 0; } +#define NV_TILE_BLK_BASE_HEIGHT 8 /* In pixel */ +#define NV_TILE_GOB_SIZE 64 /* In bytes */ +#define NV_TILE_BLK_WIDTH (NV_TILE_GOB_SIZE / 4) /* For 32 bits pixel */ + +/* get the offset in bytes inside the framebuffer, after taking tiling into account */ +static unsigned int nv50_get_tiled_offset(struct drm_scanout_buffer *sb, unsigned int blk_h, + unsigned int x, unsigned int y) +{ + u32 blk_x, blk_y, blk_sz, blk_off, pitch; + u32 swizzle; + + blk_sz = NV_TILE_GOB_SIZE * blk_h; + pitch = DIV_ROUND_UP(sb->width, NV_TILE_BLK_WIDTH); + + /* block coordinate */ + blk_x = x / NV_TILE_BLK_WIDTH; + blk_y = y / blk_h; + + blk_off = ((blk_y * pitch) + blk_x) * blk_sz; + + y = y % blk_h; + + /* Inside the block, use the fast address swizzle to compute the offset + * For nvidia blocklinear, bit order is yn..y3 x3 y2 x2 y1 y0 x1 x0 + */ + swizzle = (x & 3) | (y & 3) << 2 | (x & 4) << 2 | (y & 4) << 3; + swizzle |= (x & 8) << 3 | (y >> 3) << 7; + + return blk_off + swizzle * 4; +} + +static void nv50_set_pixel(struct drm_scanout_buffer *sb, unsigned int x, unsigned int y, u32 color) +{ + struct drm_framebuffer *fb = sb->private; + unsigned int off; + /* According to DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D documentation, + * the last 4 bits of the modifier is log2(blk_height / NV_TILE_BLK_BASE_HEIGHT) + */ + unsigned int blk_h = NV_TILE_BLK_BASE_HEIGHT * (1 << (fb->modifier & 0xf)); + + off = nv50_get_tiled_offset(sb, blk_h, x, y); + iosys_map_wr(&sb->map[0], off, u32, color); +} + +static int +nv50_wndw_get_scanout_buffer(struct drm_plane *plane, struct drm_scanout_buffer *sb) +{ + struct drm_framebuffer *fb; + struct nouveau_bo *nvbo; + + if (!plane->state || !plane->state->fb) + return -EINVAL; + + fb = plane->state->fb; + nvbo = nouveau_gem_object(fb->obj[0]); + + /* Don't support compressed format, or multiplane yet. */ + if (nvbo->comp || fb->format->num_planes != 1) + return -EOPNOTSUPP; + + if (nouveau_bo_map(nvbo)) { + pr_warn("nouveau bo map failed, panic won't be displayed\n"); + return -ENOMEM; + } + + if (nvbo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK) + iosys_map_set_vaddr_iomem(&sb->map[0], nvbo->kmap.virtual); + else + iosys_map_set_vaddr(&sb->map[0], nvbo->kmap.virtual); + + sb->height = fb->height; + sb->width = fb->width; + sb->pitch[0] = fb->pitches[0]; + sb->format = fb->format; + + /* If tiling is enabled, use the set_pixel() to display correctly. + * Only handle 32bits format for now. + */ + if (fb->modifier & 0xf) { + if (fb->format->cpp[0] != 4) + return -EOPNOTSUPP; + sb->private = (void *) fb; + sb->set_pixel = nv50_set_pixel; + } + return 0; +} + static const struct drm_plane_helper_funcs nv50_wndw_helper = { .prepare_fb = nv50_wndw_prepare_fb, @@ -584,6 +676,14 @@ nv50_wndw_helper = { .atomic_check = nv50_wndw_atomic_check, }; +static const struct drm_plane_helper_funcs +nv50_wndw_primary_helper = { + .prepare_fb = nv50_wndw_prepare_fb, + .cleanup_fb = nv50_wndw_cleanup_fb, + .atomic_check = nv50_wndw_atomic_check, + .get_scanout_buffer = nv50_wndw_get_scanout_buffer, +}; + static void nv50_wndw_atomic_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) @@ -732,7 +832,10 @@ nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev, return ret; } - drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); + if (type == DRM_PLANE_TYPE_PRIMARY) + drm_plane_helper_add(&wndw->plane, &nv50_wndw_primary_helper); + else + drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper); if (wndw->func->ilut) { ret = nv50_lut_init(disp, mmu, &wndw->ilut);
Add drm_panic support, for nv50+ cards. It's enough to get the panic screen while running Gnome/Wayland on a GTX 1650. It doesn't support multi-plane or compressed format. Support for other formats and older cards will come later. Tiling is only tested on GTX1650, and might be wrong for other cards. Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com> --- v2: * Rebase and drop already merged patches. * Rework the tiling algorithm, using "swizzle" to compute the offset inside the block. drivers/gpu/drm/nouveau/dispnv50/wndw.c | 107 +++++++++++++++++++++++- 1 file changed, 105 insertions(+), 2 deletions(-)