@@ -946,8 +946,7 @@ gm204_grctx_generate_rop_active_fbps(struct gf100_gr_priv *priv)
static void
gm204_grctx_generate_405b60(struct gf100_gr_priv *priv)
{
- const u32 dist_nr = DIV_ROUND_UP(priv->tpc_total, 4);
- u32 dist[TPC_MAX] = {};
+ u32 dist = 0;
u32 gpcs[GPC_MAX] = {};
u8 tpcnr[GPC_MAX];
int tpc, gpc, i;
@@ -964,12 +963,16 @@ gm204_grctx_generate_405b60(struct gf100_gr_priv *priv)
} while(!tpcnr[gpc]);
tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--;
- dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
+ dist |= ((gpc << 4) | tpc) << ((i % 4) * 8);
+ if ((i % 4) == 3) {
+ nv_wr32(priv, 0x405b60 + i - 3, dist);
+ dist = 0;
+ }
gpcs[gpc] |= i << (tpc * 8);
}
+ if (i % 4)
+ nv_wr32(priv, 0x405b60 + round_down(i, 4), dist);
- for (i = 0; i < dist_nr; i++)
- nv_wr32(priv, 0x405b60 + (i * 4), dist[i]);
for (i = 0; i < priv->gpc_nr; i++)
nv_wr32(priv, 0x405ba0 + (i * 4), gpcs[i]);
}
This function keeps causing the kernel build to whine at me about its 1200 byte stack frame. (TPC_MAC = 8 * GPC_MAX = 8 * 32 = 256) The following generates the same nv_wr32() operations in the same order as the original, but without the 256-word dist[] array. (I'm not sure if 4-bit fields inside the dist bytes can accomodate priv->gpc_nr greater than 16, so the other arrays could have halved in size, too, but that's a separate issue.) I don't have the necessary hardware to test this, but would someone mind doing so? (If you want to declare *priv const while you're at it, I'm not likely to object.) Signed-off-by: George Spelvin <linux@horizon.com> --- drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-)