diff mbox series

[RFC,4/8] drm/msm: adreno: dynamically generate GMU bw table

Message ID 20241113-topic-sm8x50-gpu-bw-vote-v1-4-3b8d39737a9b@linaro.org (mailing list archive)
State Superseded
Headers show
Series drm/msm: adreno: add support for DDR bandwidth scaling via GMU | expand

Commit Message

Neil Armstrong Nov. 13, 2024, 3:48 p.m. UTC
The Adreno GPU Management Unit (GMU) can also scale the ddr
bandwidth along the frequency and power domain level, but for
now we statically fill the bw_table with values from the
downstream driver.

Only the first entry is used, which is a disable vote, so we
currently rely on scaling via the linux interconnect paths.

Let's dynamically generate the bw_table with the vote values
previously calculated from the OPPs.

Those entried will then be used by the GMU when passing the
appropriate bandwidth level when voting for a gpu frequency.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
---
 drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 11 deletions(-)

Comments

Dmitry Baryshkov Nov. 15, 2024, 7:24 a.m. UTC | #1
On Wed, Nov 13, 2024 at 04:48:30PM +0100, Neil Armstrong wrote:
> The Adreno GPU Management Unit (GMU) can also scale the ddr
> bandwidth along the frequency and power domain level, but for
> now we statically fill the bw_table with values from the
> downstream driver.
> 
> Only the first entry is used, which is a disable vote, so we
> currently rely on scaling via the linux interconnect paths.
> 
> Let's dynamically generate the bw_table with the vote values
> previously calculated from the OPPs.

Nice to see this being worked upon. I hope the code can is generic
enough so that we can use it from other adreno_foo_build_bw_table()
functions.

> 
> Those entried will then be used by the GMU when passing the
> appropriate bandwidth level when voting for a gpu frequency.
> 
> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
> ---
>  drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++--------
>  1 file changed, 37 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
> index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
> @@ -596,22 +596,48 @@ static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
>  	msg->cnoc_cmds_data[1][0] = 0x60000001;
>  }
>  
> -static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
> +static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu,
> +				   struct a6xx_hfi_msg_bw_table *msg)
>  {
> -	msg->bw_level_num = 1;
> +	const struct a6xx_info *info = adreno_gpu->info->a6xx;
> +	unsigned int i, j;
>  
> -	msg->ddr_cmds_num = 3;
>  	msg->ddr_wait_bitmask = 0x7;
>  
> -	msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0");
> -	msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0");
> -	msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV");
> +	for (i = 0; i < 3; i++) {
> +		if (!info->bcm[i].name)
> +			break;
> +		msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name);
> +	}
> +	msg->ddr_cmds_num = i;
>  
> -	msg->ddr_cmds_data[0][0] = 0x40000000;
> -	msg->ddr_cmds_data[0][1] = 0x40000000;
> -	msg->ddr_cmds_data[0][2] = 0x40000000;
> +	for (i = 0; i < gmu->nr_gpu_bws; ++i)
> +		for (j = 0; j < msg->ddr_cmds_num; j++)
> +			msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j];
> +	msg->bw_level_num = gmu->nr_gpu_bws;
> +}
> +
> +static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu,
> +				struct a6xx_hfi_msg_bw_table *msg)
> +{
> +	if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) {
> +		a740_generate_bw_table(adreno_gpu, gmu, msg);
> +	} else {

Why do we need a fallback code here?

> +		msg->bw_level_num = 1;
>  
> -	/* TODO: add a proper dvfs table */
> +		msg->ddr_cmds_num = 3;
> +		msg->ddr_wait_bitmask = 0x7;
> +
> +		msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0");
> +		msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0");
> +		msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV");
> +
> +		msg->ddr_cmds_data[0][0] = 0x40000000;
> +		msg->ddr_cmds_data[0][1] = 0x40000000;
> +		msg->ddr_cmds_data[0][2] = 0x40000000;
> +
> +		/* TODO: add a proper dvfs table */

I think TODO is unapplicable anymore.

> +	}
>  
>  	msg->cnoc_cmds_num = 1;
>  	msg->cnoc_wait_bitmask = 0x1;
> @@ -691,7 +717,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu)
>  	else if (adreno_is_a730(adreno_gpu))
>  		a730_build_bw_table(msg);
>  	else if (adreno_is_a740_family(adreno_gpu))
> -		a740_build_bw_table(msg);
> +		a740_build_bw_table(adreno_gpu, gmu, msg);
>  	else
>  		a6xx_build_bw_table(msg);
>  
> 
> -- 
> 2.34.1
>
Neil Armstrong Nov. 15, 2024, 9:11 a.m. UTC | #2
On 15/11/2024 08:24, Dmitry Baryshkov wrote:
> On Wed, Nov 13, 2024 at 04:48:30PM +0100, Neil Armstrong wrote:
>> The Adreno GPU Management Unit (GMU) can also scale the ddr
>> bandwidth along the frequency and power domain level, but for
>> now we statically fill the bw_table with values from the
>> downstream driver.
>>
>> Only the first entry is used, which is a disable vote, so we
>> currently rely on scaling via the linux interconnect paths.
>>
>> Let's dynamically generate the bw_table with the vote values
>> previously calculated from the OPPs.
> 
> Nice to see this being worked upon. I hope the code can is generic
> enough so that we can use it from other adreno_foo_build_bw_table()
> functions.

I would hope so, but I don't have the HW to properly test it on those
platforms.

> 
>>
>> Those entried will then be used by the GMU when passing the
>> appropriate bandwidth level when voting for a gpu frequency.
>>
>> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
>> ---
>>   drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++--------
>>   1 file changed, 37 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
>> index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
>> @@ -596,22 +596,48 @@ static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
>>   	msg->cnoc_cmds_data[1][0] = 0x60000001;
>>   }
>>   
>> -static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
>> +static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu,
>> +				   struct a6xx_hfi_msg_bw_table *msg)
>>   {
>> -	msg->bw_level_num = 1;
>> +	const struct a6xx_info *info = adreno_gpu->info->a6xx;
>> +	unsigned int i, j;
>>   
>> -	msg->ddr_cmds_num = 3;
>>   	msg->ddr_wait_bitmask = 0x7;
>>   
>> -	msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0");
>> -	msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0");
>> -	msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV");
>> +	for (i = 0; i < 3; i++) {
>> +		if (!info->bcm[i].name)
>> +			break;
>> +		msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name);
>> +	}
>> +	msg->ddr_cmds_num = i;
>>   
>> -	msg->ddr_cmds_data[0][0] = 0x40000000;
>> -	msg->ddr_cmds_data[0][1] = 0x40000000;
>> -	msg->ddr_cmds_data[0][2] = 0x40000000;
>> +	for (i = 0; i < gmu->nr_gpu_bws; ++i)
>> +		for (j = 0; j < msg->ddr_cmds_num; j++)
>> +			msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j];
>> +	msg->bw_level_num = gmu->nr_gpu_bws;
>> +}
>> +
>> +static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu,
>> +				struct a6xx_hfi_msg_bw_table *msg)
>> +{
>> +	if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) {
>> +		a740_generate_bw_table(adreno_gpu, gmu, msg);
>> +	} else {
> 
> Why do we need a fallback code here?

Because at this particular commit, it would generate an invalid table, I should probably remove the fallback at the end

> 
>> +		msg->bw_level_num = 1;
>>   
>> -	/* TODO: add a proper dvfs table */
>> +		msg->ddr_cmds_num = 3;
>> +		msg->ddr_wait_bitmask = 0x7;
>> +
>> +		msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0");
>> +		msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0");
>> +		msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV");
>> +
>> +		msg->ddr_cmds_data[0][0] = 0x40000000;
>> +		msg->ddr_cmds_data[0][1] = 0x40000000;
>> +		msg->ddr_cmds_data[0][2] = 0x40000000;
>> +
>> +		/* TODO: add a proper dvfs table */
> 
> I think TODO is unapplicable anymore.
> 
>> +	}
>>   
>>   	msg->cnoc_cmds_num = 1;
>>   	msg->cnoc_wait_bitmask = 0x1;
>> @@ -691,7 +717,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu)
>>   	else if (adreno_is_a730(adreno_gpu))
>>   		a730_build_bw_table(msg);
>>   	else if (adreno_is_a740_family(adreno_gpu))
>> -		a740_build_bw_table(msg);
>> +		a740_build_bw_table(adreno_gpu, gmu, msg);
>>   	else
>>   		a6xx_build_bw_table(msg);
>>   
>>
>> -- 
>> 2.34.1
>>
>
Dmitry Baryshkov Nov. 15, 2024, 2:35 p.m. UTC | #3
On Fri, Nov 15, 2024 at 10:11:09AM +0100, Neil Armstrong wrote:
> On 15/11/2024 08:24, Dmitry Baryshkov wrote:
> > On Wed, Nov 13, 2024 at 04:48:30PM +0100, Neil Armstrong wrote:
> > > The Adreno GPU Management Unit (GMU) can also scale the ddr
> > > bandwidth along the frequency and power domain level, but for
> > > now we statically fill the bw_table with values from the
> > > downstream driver.
> > > 
> > > Only the first entry is used, which is a disable vote, so we
> > > currently rely on scaling via the linux interconnect paths.
> > > 
> > > Let's dynamically generate the bw_table with the vote values
> > > previously calculated from the OPPs.
> > 
> > Nice to see this being worked upon. I hope the code can is generic
> > enough so that we can use it from other adreno_foo_build_bw_table()
> > functions.
> 
> I would hope so, but I don't have the HW to properly test it on those
> platforms.

Welcome to the club^W Lab.

> > > Those entried will then be used by the GMU when passing the
> > > appropriate bandwidth level when voting for a gpu frequency.
> > > 
> > > Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
> > > ---
> > >   drivers/gpu/drm/msm/adreno/a6xx_hfi.c | 48 +++++++++++++++++++++++++++--------
> > >   1 file changed, 37 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
> > > index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644
> > > --- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
> > > +++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
> > > @@ -596,22 +596,48 @@ static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
> > >   	msg->cnoc_cmds_data[1][0] = 0x60000001;
> > >   }
> > > -static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
> > > +static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu,
> > > +				   struct a6xx_hfi_msg_bw_table *msg)
> > >   {
> > > -	msg->bw_level_num = 1;
> > > +	const struct a6xx_info *info = adreno_gpu->info->a6xx;
> > > +	unsigned int i, j;
> > > -	msg->ddr_cmds_num = 3;
> > >   	msg->ddr_wait_bitmask = 0x7;
> > > -	msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0");
> > > -	msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0");
> > > -	msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV");
> > > +	for (i = 0; i < 3; i++) {
> > > +		if (!info->bcm[i].name)
> > > +			break;
> > > +		msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name);
> > > +	}
> > > +	msg->ddr_cmds_num = i;
> > > -	msg->ddr_cmds_data[0][0] = 0x40000000;
> > > -	msg->ddr_cmds_data[0][1] = 0x40000000;
> > > -	msg->ddr_cmds_data[0][2] = 0x40000000;
> > > +	for (i = 0; i < gmu->nr_gpu_bws; ++i)
> > > +		for (j = 0; j < msg->ddr_cmds_num; j++)
> > > +			msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j];
> > > +	msg->bw_level_num = gmu->nr_gpu_bws;
> > > +}
> > > +
> > > +static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu,
> > > +				struct a6xx_hfi_msg_bw_table *msg)
> > > +{
> > > +	if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) {
> > > +		a740_generate_bw_table(adreno_gpu, gmu, msg);
> > > +	} else {
> > 
> > Why do we need a fallback code here?
> 
> Because at this particular commit, it would generate an invalid table, I should probably remove the fallback at the end

Or move this to a generic code that generates a table if there is no bw
data (like there is none for older platforms with the current DTs).

> 
> > 
> > > +		msg->bw_level_num = 1;
> > > -	/* TODO: add a proper dvfs table */
> > > +		msg->ddr_cmds_num = 3;
> > > +		msg->ddr_wait_bitmask = 0x7;
> > > +
> > > +		msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0");
> > > +		msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0");
> > > +		msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV");
> > > +
> > > +		msg->ddr_cmds_data[0][0] = 0x40000000;
> > > +		msg->ddr_cmds_data[0][1] = 0x40000000;
> > > +		msg->ddr_cmds_data[0][2] = 0x40000000;
> > > +
> > > +		/* TODO: add a proper dvfs table */
> > 
> > I think TODO is unapplicable anymore.
> > 
> > > +	}
> > >   	msg->cnoc_cmds_num = 1;
> > >   	msg->cnoc_wait_bitmask = 0x1;
> > > @@ -691,7 +717,7 @@ static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu)
> > >   	else if (adreno_is_a730(adreno_gpu))
> > >   		a730_build_bw_table(msg);
> > >   	else if (adreno_is_a740_family(adreno_gpu))
> > > -		a740_build_bw_table(msg);
> > > +		a740_build_bw_table(adreno_gpu, gmu, msg);
> > >   	else
> > >   		a6xx_build_bw_table(msg);
> > > 
> > > -- 
> > > 2.34.1
> > > 
> > 
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
index cb8844ed46b29c4569d05eb7a24f7b27e173190f..9a89ba95843e7805d78f0e5ddbe328677b6431dd 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_hfi.c
@@ -596,22 +596,48 @@  static void a730_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
 	msg->cnoc_cmds_data[1][0] = 0x60000001;
 }
 
-static void a740_build_bw_table(struct a6xx_hfi_msg_bw_table *msg)
+static void a740_generate_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu,
+				   struct a6xx_hfi_msg_bw_table *msg)
 {
-	msg->bw_level_num = 1;
+	const struct a6xx_info *info = adreno_gpu->info->a6xx;
+	unsigned int i, j;
 
-	msg->ddr_cmds_num = 3;
 	msg->ddr_wait_bitmask = 0x7;
 
-	msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0");
-	msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0");
-	msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV");
+	for (i = 0; i < 3; i++) {
+		if (!info->bcm[i].name)
+			break;
+		msg->ddr_cmds_addrs[i] = cmd_db_read_addr(info->bcm[i].name);
+	}
+	msg->ddr_cmds_num = i;
 
-	msg->ddr_cmds_data[0][0] = 0x40000000;
-	msg->ddr_cmds_data[0][1] = 0x40000000;
-	msg->ddr_cmds_data[0][2] = 0x40000000;
+	for (i = 0; i < gmu->nr_gpu_bws; ++i)
+		for (j = 0; j < msg->ddr_cmds_num; j++)
+			msg->ddr_cmds_data[i][j] = gmu->gpu_bw_votes[i][j];
+	msg->bw_level_num = gmu->nr_gpu_bws;
+}
+
+static void a740_build_bw_table(struct adreno_gpu *adreno_gpu, struct a6xx_gmu *gmu,
+				struct a6xx_hfi_msg_bw_table *msg)
+{
+	if ((adreno_gpu->info->quirks & ADRENO_QUIRK_GMU_BW_VOTE) && gmu->nr_gpu_bws) {
+		a740_generate_bw_table(adreno_gpu, gmu, msg);
+	} else {
+		msg->bw_level_num = 1;
 
-	/* TODO: add a proper dvfs table */
+		msg->ddr_cmds_num = 3;
+		msg->ddr_wait_bitmask = 0x7;
+
+		msg->ddr_cmds_addrs[0] = cmd_db_read_addr("SH0");
+		msg->ddr_cmds_addrs[1] = cmd_db_read_addr("MC0");
+		msg->ddr_cmds_addrs[2] = cmd_db_read_addr("ACV");
+
+		msg->ddr_cmds_data[0][0] = 0x40000000;
+		msg->ddr_cmds_data[0][1] = 0x40000000;
+		msg->ddr_cmds_data[0][2] = 0x40000000;
+
+		/* TODO: add a proper dvfs table */
+	}
 
 	msg->cnoc_cmds_num = 1;
 	msg->cnoc_wait_bitmask = 0x1;
@@ -691,7 +717,7 @@  static int a6xx_hfi_send_bw_table(struct a6xx_gmu *gmu)
 	else if (adreno_is_a730(adreno_gpu))
 		a730_build_bw_table(msg);
 	else if (adreno_is_a740_family(adreno_gpu))
-		a740_build_bw_table(msg);
+		a740_build_bw_table(adreno_gpu, gmu, msg);
 	else
 		a6xx_build_bw_table(msg);