Message ID | 20241113-topic-sm8x50-gpu-bw-vote-v1-4-3b8d39737a9b@linaro.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | drm/msm: adreno: add support for DDR bandwidth scaling via GMU | expand |
On Wed, Nov 13, 2024 at 04:48:30PM +0100, Neil Armstrong wrote: > The Adreno GPU Management Unit (GMU) can also scale the ddr > bandwidth along the frequency and power domain level, but for > now we statically fill the bw_table with values from the > downstream driver. > > Only the first entry is used, which is a disable vote, so we > currently rely on scaling via the linux interconnect paths. > > Let's dynamically generate the bw_table with the vote values > previously calculated from the OPPs. Nice to see this being worked upon. I hope the code can is generic enough so that we can use it from other adreno_foo_build_bw_table() functions. > > Those entried will then be used by the GMU when passing the > appropriate bandwidth level when voting for a gpu frequency. > > Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org> > --- > drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++-------- > 1 file changed, 37 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > @@ -596,22 +596,48 @@ static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) > msg->cnoc_cmds_data[1][0] = 0x60000001; > } > > -static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) > +static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, > + struct a6xx_hfi_msg_bw_table *msg) > { > - msg->bw_level_num = 1; > + const struct a6xx_info *info = adreno_gpu->info->a6xx; > + unsigned int i, j; > > - msg->ddr_cmds_num = 3; > msg->ddr_wait_bitmask = 0x7; > > - msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); > - msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); > - msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); > + for (i = 0; i < 3; i++) { > + if (!info->bcm[i].name) > + break; > + msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name); > + } > + msg->ddr_cmds_num = i; > > - msg->ddr_cmds_data[0][0] = 0x40000000; > - msg->ddr_cmds_data[0][1] = 0x40000000; > - msg->ddr_cmds_data[0][2] = 0x40000000; > + for (i = 0; i < gmu->nr_gpu_bws; ++i) > + for (j = 0; j < msg->ddr_cmds_num; j++) > + msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j]; > + msg->bw_level_num = gmu->nr_gpu_bws; > +} > + > +static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, > + struct a6xx_hfi_msg_bw_table *msg) > +{ > + if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) { > + a740_generate_bw_table(adreno_gpu, gmu, msg); > + } else { Why do we need a fallback code here? > + msg->bw_level_num = 1; > > - /* TODO: add a proper dvfs table */ > + msg->ddr_cmds_num = 3; > + msg->ddr_wait_bitmask = 0x7; > + > + msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); > + msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); > + msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); > + > + msg->ddr_cmds_data[0][0] = 0x40000000; > + msg->ddr_cmds_data[0][1] = 0x40000000; > + msg->ddr_cmds_data[0][2] = 0x40000000; > + > + /* TODO: add a proper dvfs table */ I think TODO is unapplicable anymore. > + } > > msg->cnoc_cmds_num = 1; > msg->cnoc_wait_bitmask = 0x1; > @@ -691,7 +717,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) > else if (adreno_is_a730(adreno_gpu)) > a730_build_bw_table(msg); > else if (adreno_is_a740_family(adreno_gpu)) > - a740_build_bw_table(msg); > + a740_build_bw_table(adreno_gpu, gmu, msg); > else > a6xx_build_bw_table(msg); > > > -- > 2.34.1 >
On 15/11/2024 08:24, Dmitry Baryshkov wrote: > On Wed, Nov 13, 2024 at 04:48:30PM +0100, Neil Armstrong wrote: >> The Adreno GPU Management Unit (GMU) can also scale the ddr >> bandwidth along the frequency and power domain level, but for >> now we statically fill the bw_table with values from the >> downstream driver. >> >> Only the first entry is used, which is a disable vote, so we >> currently rely on scaling via the linux interconnect paths. >> >> Let's dynamically generate the bw_table with the vote values >> previously calculated from the OPPs. > > Nice to see this being worked upon. I hope the code can is generic > enough so that we can use it from other adreno_foo_build_bw_table() > functions. I would hope so, but I don't have the HW to properly test it on those platforms. > >> >> Those entried will then be used by the GMU when passing the >> appropriate bandwidth level when voting for a gpu frequency. >> >> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org> >> --- >> drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++-------- >> 1 file changed, 37 insertions(+), 11 deletions(-) >> >> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c >> index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644 >> --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c >> +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c >> @@ -596,22 +596,48 @@ static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) >> msg->cnoc_cmds_data[1][0] = 0x60000001; >> } >> >> -static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) >> +static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, >> + struct a6xx_hfi_msg_bw_table *msg) >> { >> - msg->bw_level_num = 1; >> + const struct a6xx_info *info = adreno_gpu->info->a6xx; >> + unsigned int i, j; >> >> - msg->ddr_cmds_num = 3; >> msg->ddr_wait_bitmask = 0x7; >> >> - msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); >> - msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); >> - msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); >> + for (i = 0; i < 3; i++) { >> + if (!info->bcm[i].name) >> + break; >> + msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name); >> + } >> + msg->ddr_cmds_num = i; >> >> - msg->ddr_cmds_data[0][0] = 0x40000000; >> - msg->ddr_cmds_data[0][1] = 0x40000000; >> - msg->ddr_cmds_data[0][2] = 0x40000000; >> + for (i = 0; i < gmu->nr_gpu_bws; ++i) >> + for (j = 0; j < msg->ddr_cmds_num; j++) >> + msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j]; >> + msg->bw_level_num = gmu->nr_gpu_bws; >> +} >> + >> +static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, >> + struct a6xx_hfi_msg_bw_table *msg) >> +{ >> + if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) { >> + a740_generate_bw_table(adreno_gpu, gmu, msg); >> + } else { > > Why do we need a fallback code here? Because at this particular commit, it would generate an invalid table, I should probably remove the fallback at the end > >> + msg->bw_level_num = 1; >> >> - /* TODO: add a proper dvfs table */ >> + msg->ddr_cmds_num = 3; >> + msg->ddr_wait_bitmask = 0x7; >> + >> + msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); >> + msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); >> + msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); >> + >> + msg->ddr_cmds_data[0][0] = 0x40000000; >> + msg->ddr_cmds_data[0][1] = 0x40000000; >> + msg->ddr_cmds_data[0][2] = 0x40000000; >> + >> + /* TODO: add a proper dvfs table */ > > I think TODO is unapplicable anymore. > >> + } >> >> msg->cnoc_cmds_num = 1; >> msg->cnoc_wait_bitmask = 0x1; >> @@ -691,7 +717,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) >> else if (adreno_is_a730(adreno_gpu)) >> a730_build_bw_table(msg); >> else if (adreno_is_a740_family(adreno_gpu)) >> - a740_build_bw_table(msg); >> + a740_build_bw_table(adreno_gpu, gmu, msg); >> else >> a6xx_build_bw_table(msg); >> >> >> -- >> 2.34.1 >> >
On Fri, Nov 15, 2024 at 10:11:09AM +0100, Neil Armstrong wrote: > On 15/11/2024 08:24, Dmitry Baryshkov wrote: > > On Wed, Nov 13, 2024 at 04:48:30PM +0100, Neil Armstrong wrote: > > > The Adreno GPU Management Unit (GMU) can also scale the ddr > > > bandwidth along the frequency and power domain level, but for > > > now we statically fill the bw_table with values from the > > > downstream driver. > > > > > > Only the first entry is used, which is a disable vote, so we > > > currently rely on scaling via the linux interconnect paths. > > > > > > Let's dynamically generate the bw_table with the vote values > > > previously calculated from the OPPs. > > > > Nice to see this being worked upon. I hope the code can is generic > > enough so that we can use it from other adreno_foo_build_bw_table() > > functions. > > I would hope so, but I don't have the HW to properly test it on those > platforms. Welcome to the club^W Lab. > > > Those entried will then be used by the GMU when passing the > > > appropriate bandwidth level when voting for a gpu frequency. > > > > > > Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org> > > > --- > > > drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++-------- > > > 1 file changed, 37 insertions(+), 11 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > > > index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644 > > > --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c > > > @@ -596,22 +596,48 @@ static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) > > > msg->cnoc_cmds_data[1][0] = 0x60000001; > > > } > > > -static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) > > > +static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, > > > + struct a6xx_hfi_msg_bw_table *msg) > > > { > > > - msg->bw_level_num = 1; > > > + const struct a6xx_info *info = adreno_gpu->info->a6xx; > > > + unsigned int i, j; > > > - msg->ddr_cmds_num = 3; > > > msg->ddr_wait_bitmask = 0x7; > > > - msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); > > > - msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); > > > - msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); > > > + for (i = 0; i < 3; i++) { > > > + if (!info->bcm[i].name) > > > + break; > > > + msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name); > > > + } > > > + msg->ddr_cmds_num = i; > > > - msg->ddr_cmds_data[0][0] = 0x40000000; > > > - msg->ddr_cmds_data[0][1] = 0x40000000; > > > - msg->ddr_cmds_data[0][2] = 0x40000000; > > > + for (i = 0; i < gmu->nr_gpu_bws; ++i) > > > + for (j = 0; j < msg->ddr_cmds_num; j++) > > > + msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j]; > > > + msg->bw_level_num = gmu->nr_gpu_bws; > > > +} > > > + > > > +static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, > > > + struct a6xx_hfi_msg_bw_table *msg) > > > +{ > > > + if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) { > > > + a740_generate_bw_table(adreno_gpu, gmu, msg); > > > + } else { > > > > Why do we need a fallback code here? > > Because at this particular commit, it would generate an invalid table, I should probably remove the fallback at the end Or move this to a generic code that generates a table if there is no bw data (like there is none for older platforms with the current DTs). > > > > > > + msg->bw_level_num = 1; > > > - /* TODO: add a proper dvfs table */ > > > + msg->ddr_cmds_num = 3; > > > + msg->ddr_wait_bitmask = 0x7; > > > + > > > + msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); > > > + msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); > > > + msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); > > > + > > > + msg->ddr_cmds_data[0][0] = 0x40000000; > > > + msg->ddr_cmds_data[0][1] = 0x40000000; > > > + msg->ddr_cmds_data[0][2] = 0x40000000; > > > + > > > + /* TODO: add a proper dvfs table */ > > > > I think TODO is unapplicable anymore. > > > > > + } > > > msg->cnoc_cmds_num = 1; > > > msg->cnoc_wait_bitmask = 0x1; > > > @@ -691,7 +717,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) > > > else if (adreno_is_a730(adreno_gpu)) > > > a730_build_bw_table(msg); > > > else if (adreno_is_a740_family(adreno_gpu)) > > > - a740_build_bw_table(msg); > > > + a740_build_bw_table(adreno_gpu, gmu, msg); > > > else > > > a6xx_build_bw_table(msg); > > > > > > -- > > > 2.34.1 > > > > > >
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c @@ -596,22 +596,48 @@ static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) msg->cnoc_cmds_data[1][0] = 0x60000001; } -static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg) +static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, + struct a6xx_hfi_msg_bw_table *msg) { - msg->bw_level_num = 1; + const struct a6xx_info *info = adreno_gpu->info->a6xx; + unsigned int i, j; - msg->ddr_cmds_num = 3; msg->ddr_wait_bitmask = 0x7; - msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); - msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); - msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); + for (i = 0; i < 3; i++) { + if (!info->bcm[i].name) + break; + msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name); + } + msg->ddr_cmds_num = i; - msg->ddr_cmds_data[0][0] = 0x40000000; - msg->ddr_cmds_data[0][1] = 0x40000000; - msg->ddr_cmds_data[0][2] = 0x40000000; + for (i = 0; i < gmu->nr_gpu_bws; ++i) + for (j = 0; j < msg->ddr_cmds_num; j++) + msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j]; + msg->bw_level_num = gmu->nr_gpu_bws; +} + +static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu, + struct a6xx_hfi_msg_bw_table *msg) +{ + if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) { + a740_generate_bw_table(adreno_gpu, gmu, msg); + } else { + msg->bw_level_num = 1; - /* TODO: add a proper dvfs table */ + msg->ddr_cmds_num = 3; + msg->ddr_wait_bitmask = 0x7; + + msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0"); + msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0"); + msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV"); + + msg->ddr_cmds_data[0][0] = 0x40000000; + msg->ddr_cmds_data[0][1] = 0x40000000; + msg->ddr_cmds_data[0][2] = 0x40000000; + + /* TODO: add a proper dvfs table */ + } msg->cnoc_cmds_num = 1; msg->cnoc_wait_bitmask = 0x1; @@ -691,7 +717,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu) else if (adreno_is_a730(adreno_gpu)) a730_build_bw_table(msg); else if (adreno_is_a740_family(adreno_gpu)) - a740_build_bw_table(msg); + a740_build_bw_table(adreno_gpu, gmu, msg); else a6xx_build_bw_table(msg);
The Adreno GPU Management Unit (GMU) can also scale the ddr bandwidth along the frequency and power domain level, but for now we statically fill the bw_table with values from the downstream driver. Only the first entry is used, which is a disable vote, so we currently rely on scaling via the linux interconnect paths. Let's dynamically generate the bw_table with the vote values previously calculated from the OPPs. Those entried will then be used by the GMU when passing the appropriate bandwidth level when voting for a gpu frequency. Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org> --- drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 11 deletions(-)