diff mbox series

PM / devfreq: Rework freq_table to be local to devfreq struct

Message ID 20220619220351.29891-1-ansuelsmth@gmail.com (mailing list archive)
State New, archived
Delegated to: Chanwoo Choi
Headers show
Series PM / devfreq: Rework freq_table to be local to devfreq struct | expand

Commit Message

Christian Marangi June 19, 2022, 10:03 p.m. UTC
On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
is never reset and may be leaved in an undefined state.

This comes from the fact that we store the freq_table in the driver
profile struct that is commonly defined as static and not reset on
PROBE_DEFER.
We currently skip the reinit of the freq_table if we found
it's already defined since a driver may declare his own freq_table.

This logic is flawed in the case devfreq core generate a freq_table, set
it in the profile struct and then PROBE_DEFER, freeing the freq_table.
In this case devfreq will found a NOT NULL freq_table that has been
freed, skip the freq_table generation and probe the driver based on the
wrong table.

To fix this and correctly handle PROBE_DEFER, use a local freq_table and
max_state in the devfreq struct and never modify the freq_table present
in the profile struct if it does provide it.

Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
Cc: stable@vger.kernel.org
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
---
 drivers/devfreq/devfreq.c          | 71 ++++++++++++++----------------
 drivers/devfreq/governor_passive.c | 14 +++---
 include/linux/devfreq.h            |  5 +++
 3 files changed, 46 insertions(+), 44 deletions(-)

Comments

Chanwoo Choi June 29, 2022, 7:41 p.m. UTC | #1
On 22. 6. 20. 07:03, Christian Marangi wrote:
> On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
> is never reset and may be leaved in an undefined state.
> 
> This comes from the fact that we store the freq_table in the driver
> profile struct that is commonly defined as static and not reset on
> PROBE_DEFER.
> We currently skip the reinit of the freq_table if we found
> it's already defined since a driver may declare his own freq_table.
> 
> This logic is flawed in the case devfreq core generate a freq_table, set
> it in the profile struct and then PROBE_DEFER, freeing the freq_table.
> In this case devfreq will found a NOT NULL freq_table that has been
> freed, skip the freq_table generation and probe the driver based on the
> wrong table.
> 
> To fix this and correctly handle PROBE_DEFER, use a local freq_table and
> max_state in the devfreq struct and never modify the freq_table present
> in the profile struct if it does provide it.
> 
> Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
> Cc: stable@vger.kernel.org
> Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
> ---
>  drivers/devfreq/devfreq.c          | 71 ++++++++++++++----------------
>  drivers/devfreq/governor_passive.c | 14 +++---
>  include/linux/devfreq.h            |  5 +++
>  3 files changed, 46 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 01474daf4548..2e2b3b414d67 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
>  			    unsigned long *min_freq,
>  			    unsigned long *max_freq)
>  {
> -	unsigned long *freq_table = devfreq->profile->freq_table;
> +	unsigned long *freq_table = devfreq->freq_table;
>  	s32 qos_min_freq, qos_max_freq;
>  
>  	lockdep_assert_held(&devfreq->lock);
> @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
>  	 * The devfreq drivers can initialize this in either ascending or
>  	 * descending order and devfreq core supports both.
>  	 */
> -	if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
> +	if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
>  		*min_freq = freq_table[0];
> -		*max_freq = freq_table[devfreq->profile->max_state - 1];
> +		*max_freq = freq_table[devfreq->max_state - 1];
>  	} else {
> -		*min_freq = freq_table[devfreq->profile->max_state - 1];
> +		*min_freq = freq_table[devfreq->max_state - 1];
>  		*max_freq = freq_table[0];
>  	}
>  
> @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>  {
>  	int lev;
>  
> -	for (lev = 0; lev < devfreq->profile->max_state; lev++)
> -		if (freq == devfreq->profile->freq_table[lev])
> +	for (lev = 0; lev < devfreq->max_state; lev++)
> +		if (freq == devfreq->freq_table[lev])
>  			return lev;
>  
>  	return -EINVAL;
> @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>  
>  static int set_freq_table(struct devfreq *devfreq)
>  {
> -	struct devfreq_dev_profile *profile = devfreq->profile;
>  	struct dev_pm_opp *opp;
>  	unsigned long freq;
>  	int i, count;
> @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
>  	if (count <= 0)
>  		return -EINVAL;
>  
> -	profile->max_state = count;
> -	profile->freq_table = devm_kcalloc(devfreq->dev.parent,
> -					profile->max_state,
> -					sizeof(*profile->freq_table),
> -					GFP_KERNEL);
> -	if (!profile->freq_table) {
> -		profile->max_state = 0;
> +	devfreq->max_state = count;
> +	devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
> +					   devfreq->max_state,
> +					   sizeof(*devfreq->freq_table),
> +					   GFP_KERNEL);
> +	if (!devfreq->freq_table)
>  		return -ENOMEM;
> -	}
>  
> -	for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
> +	for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
>  		opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
>  		if (IS_ERR(opp)) {
> -			devm_kfree(devfreq->dev.parent, profile->freq_table);
> -			profile->max_state = 0;
> +			devm_kfree(devfreq->dev.parent, devfreq->freq_table);
>  			return PTR_ERR(opp);
>  		}
>  		dev_pm_opp_put(opp);
> -		profile->freq_table[i] = freq;
> +		devfreq->freq_table[i] = freq;
>  	}
>  
>  	return 0;
> @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
>  
>  	if (lev != prev_lev) {
>  		devfreq->stats.trans_table[
> -			(prev_lev * devfreq->profile->max_state) + lev]++;
> +			(prev_lev * devfreq->max_state) + lev]++;
>  		devfreq->stats.total_trans++;
>  	}
>  
> @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  		if (err < 0)
>  			goto err_dev;
>  		mutex_lock(&devfreq->lock);
> +	} else {
> +		devfreq->freq_table = devfreq->profile->freq_table;
> +		devfreq->max_state = devfreq->profile->max_state;
>  	}
>  
>  	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  
>  	devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
>  			array3_size(sizeof(unsigned int),
> -				    devfreq->profile->max_state,
> -				    devfreq->profile->max_state),
> +				    devfreq->max_state,
> +				    devfreq->max_state),
>  			GFP_KERNEL);
>  	if (!devfreq->stats.trans_table) {
>  		mutex_unlock(&devfreq->lock);
> @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  	}
>  
>  	devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
> -			devfreq->profile->max_state,
> +			devfreq->max_state,
>  			sizeof(*devfreq->stats.time_in_state),
>  			GFP_KERNEL);
>  	if (!devfreq->stats.time_in_state) {
> @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
>  
>  	mutex_lock(&df->lock);
>  
> -	for (i = 0; i < df->profile->max_state; i++)
> +	for (i = 0; i < df->max_state; i++)
>  		count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
> -				"%lu ", df->profile->freq_table[i]);
> +				"%lu ", df->freq_table[i]);
>  
>  	mutex_unlock(&df->lock);
>  	/* Truncate the trailing space */
> @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
>  
>  	if (!df->profile)
>  		return -EINVAL;
> -	max_state = df->profile->max_state;
> +	max_state = df->max_state;
>  
>  	if (max_state == 0)
>  		return sprintf(buf, "Not Supported.\n");
> @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
>  	len += sprintf(buf + len, "           :");
>  	for (i = 0; i < max_state; i++)
>  		len += sprintf(buf + len, "%10lu",
> -				df->profile->freq_table[i]);
> +				df->freq_table[i]);
>  
>  	len += sprintf(buf + len, "   time(ms)\n");
>  
>  	for (i = 0; i < max_state; i++) {
> -		if (df->profile->freq_table[i]
> -					== df->previous_freq) {
> +		if (df->freq_table[i] == df->previous_freq)
>  			len += sprintf(buf + len, "*");
> -		} else {
> +		else
>  			len += sprintf(buf + len, " ");
> -		}
> -		len += sprintf(buf + len, "%10lu:",
> -				df->profile->freq_table[i]);
> +
> +		len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
>  		for (j = 0; j < max_state; j++)
>  			len += sprintf(buf + len, "%10u",
>  				df->stats.trans_table[(i * max_state) + j]);
> @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
>  	if (!df->profile)
>  		return -EINVAL;
>  
> -	if (df->profile->max_state == 0)
> +	if (df->max_state == 0)
>  		return count;
>  
>  	err = kstrtoint(buf, 10, &value);
> @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
>  		return -EINVAL;
>  
>  	mutex_lock(&df->lock);
> -	memset(df->stats.time_in_state, 0, (df->profile->max_state *
> +	memset(df->stats.time_in_state, 0, (df->max_state *
>  					sizeof(*df->stats.time_in_state)));
>  	memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
> -					df->profile->max_state,
> -					df->profile->max_state));
> +					df->max_state,
> +					df->max_state));
>  	df->stats.total_trans = 0;
>  	df->stats.last_update = get_jiffies_64();
>  	mutex_unlock(&df->lock);
> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> index 72c67979ebe1..ce24a262aa16 100644
> --- a/drivers/devfreq/governor_passive.c
> +++ b/drivers/devfreq/governor_passive.c
> @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
>  		goto out;
>  
>  	/* Use interpolation if required opps is not available */
> -	for (i = 0; i < parent_devfreq->profile->max_state; i++)
> -		if (parent_devfreq->profile->freq_table[i] == *freq)
> +	for (i = 0; i < parent_devfreq->max_state; i++)
> +		if (parent_devfreq->freq_table[i] == *freq)
>  			break;
>  
> -	if (i == parent_devfreq->profile->max_state)
> +	if (i == parent_devfreq->max_state)
>  		return -EINVAL;
>  
> -	if (i < devfreq->profile->max_state) {
> -		child_freq = devfreq->profile->freq_table[i];
> +	if (i < devfreq->max_state) {
> +		child_freq = devfreq->freq_table[i];
>  	} else {
> -		count = devfreq->profile->max_state;
> -		child_freq = devfreq->profile->freq_table[count - 1];
> +		count = devfreq->max_state;
> +		child_freq = devfreq->freq_table[count - 1];
>  	}
>  
>  out:
> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> index dc10bee75a72..34aab4dd336c 100644
> --- a/include/linux/devfreq.h
> +++ b/include/linux/devfreq.h
> @@ -148,6 +148,8 @@ struct devfreq_stats {
>   *		reevaluate operable frequencies. Devfreq users may use
>   *		devfreq.nb to the corresponding register notifier call chain.
>   * @work:	delayed work for load monitoring.
> + * @freq_table:		current frequency table used by the devfreq driver.
> + * @max_state:		count of entry present in the frequency table.
>   * @previous_freq:	previously configured frequency value.
>   * @last_status:	devfreq user device info, performance statistics
>   * @data:	Private data of the governor. The devfreq framework does not
> @@ -185,6 +187,9 @@ struct devfreq {
>  	struct notifier_block nb;
>  	struct delayed_work work;
>  
> +	unsigned long *freq_table;
> +	unsigned int max_state;
> +
>  	unsigned long previous_freq;
>  	struct devfreq_dev_status last_status;
>  

Applied it. Thanks.
Marek Szyprowski July 1, 2022, 8:01 a.m. UTC | #2
Hi All,

On 20.06.2022 00:03, Christian Marangi wrote:
> On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
> is never reset and may be leaved in an undefined state.
>
> This comes from the fact that we store the freq_table in the driver
> profile struct that is commonly defined as static and not reset on
> PROBE_DEFER.
> We currently skip the reinit of the freq_table if we found
> it's already defined since a driver may declare his own freq_table.
>
> This logic is flawed in the case devfreq core generate a freq_table, set
> it in the profile struct and then PROBE_DEFER, freeing the freq_table.
> In this case devfreq will found a NOT NULL freq_table that has been
> freed, skip the freq_table generation and probe the driver based on the
> wrong table.
>
> To fix this and correctly handle PROBE_DEFER, use a local freq_table and
> max_state in the devfreq struct and never modify the freq_table present
> in the profile struct if it does provide it.
>
> Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
> Cc: stable@vger.kernel.org
> Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
> ---

This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM / 
devfreq: Rework freq_table to be local to devfreq struct"). 
Unfortunately it causes the following regression on my Exynos based test 
systems:

8<--- cut here ---
Unable to handle kernel NULL pointer dereference at virtual address 00000000
[00000000] *pgd=00000000
Internal error: Oops: 5 [#1] PREEMPT SMP ARM
Modules linked in:
CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312
Hardware name: Samsung Exynos (Flattened Device Tree)
Workqueue: events_unbound deferred_probe_work_func
PC is at exynos_bus_probe+0x604/0x684
LR is at device_add+0x14c/0x908
pc : [<c090aef4>]    lr : [<c06cf77c>]    psr: 80000053
...
Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval))
Stack: (0xf0a15d30 to 0xf0a16000)
...
  exynos_bus_probe from platform_probe+0x5c/0xb8
  platform_probe from really_probe+0xe0/0x414
  really_probe from __driver_probe_device+0xa0/0x208
  __driver_probe_device from driver_probe_device+0x30/0xc0
  driver_probe_device from __device_attach_driver+0xa4/0x11c
  __device_attach_driver from bus_for_each_drv+0x7c/0xc0
  bus_for_each_drv from __device_attach+0xac/0x20c
  __device_attach from bus_probe_device+0x88/0x90
  bus_probe_device from deferred_probe_work_func+0x98/0xe0
  deferred_probe_work_func from process_one_work+0x288/0x774
  process_one_work from worker_thread+0x44/0x504
  worker_thread from kthread+0xf4/0x128
  kthread from ret_from_fork+0x14/0x2c
Exception stack(0xf0a15fb0 to 0xf0a15ff8)
...
---[ end trace 0000000000000000 ]---

This issue is caused by bus->devfreq->profile->freq_table being NULL here:

https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/devfreq/exynos-bus.c?h=next-20220630#n451


>   drivers/devfreq/devfreq.c          | 71 ++++++++++++++----------------
>   drivers/devfreq/governor_passive.c | 14 +++---
>   include/linux/devfreq.h            |  5 +++
>   3 files changed, 46 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 01474daf4548..2e2b3b414d67 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
>   			    unsigned long *min_freq,
>   			    unsigned long *max_freq)
>   {
> -	unsigned long *freq_table = devfreq->profile->freq_table;
> +	unsigned long *freq_table = devfreq->freq_table;
>   	s32 qos_min_freq, qos_max_freq;
>   
>   	lockdep_assert_held(&devfreq->lock);
> @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
>   	 * The devfreq drivers can initialize this in either ascending or
>   	 * descending order and devfreq core supports both.
>   	 */
> -	if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
> +	if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
>   		*min_freq = freq_table[0];
> -		*max_freq = freq_table[devfreq->profile->max_state - 1];
> +		*max_freq = freq_table[devfreq->max_state - 1];
>   	} else {
> -		*min_freq = freq_table[devfreq->profile->max_state - 1];
> +		*min_freq = freq_table[devfreq->max_state - 1];
>   		*max_freq = freq_table[0];
>   	}
>   
> @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>   {
>   	int lev;
>   
> -	for (lev = 0; lev < devfreq->profile->max_state; lev++)
> -		if (freq == devfreq->profile->freq_table[lev])
> +	for (lev = 0; lev < devfreq->max_state; lev++)
> +		if (freq == devfreq->freq_table[lev])
>   			return lev;
>   
>   	return -EINVAL;
> @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>   
>   static int set_freq_table(struct devfreq *devfreq)
>   {
> -	struct devfreq_dev_profile *profile = devfreq->profile;
>   	struct dev_pm_opp *opp;
>   	unsigned long freq;
>   	int i, count;
> @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
>   	if (count <= 0)
>   		return -EINVAL;
>   
> -	profile->max_state = count;
> -	profile->freq_table = devm_kcalloc(devfreq->dev.parent,
> -					profile->max_state,
> -					sizeof(*profile->freq_table),
> -					GFP_KERNEL);
> -	if (!profile->freq_table) {
> -		profile->max_state = 0;
> +	devfreq->max_state = count;
> +	devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
> +					   devfreq->max_state,
> +					   sizeof(*devfreq->freq_table),
> +					   GFP_KERNEL);
> +	if (!devfreq->freq_table)
>   		return -ENOMEM;
> -	}
>   
> -	for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
> +	for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
>   		opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
>   		if (IS_ERR(opp)) {
> -			devm_kfree(devfreq->dev.parent, profile->freq_table);
> -			profile->max_state = 0;
> +			devm_kfree(devfreq->dev.parent, devfreq->freq_table);
>   			return PTR_ERR(opp);
>   		}
>   		dev_pm_opp_put(opp);
> -		profile->freq_table[i] = freq;
> +		devfreq->freq_table[i] = freq;
>   	}
>   
>   	return 0;
> @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
>   
>   	if (lev != prev_lev) {
>   		devfreq->stats.trans_table[
> -			(prev_lev * devfreq->profile->max_state) + lev]++;
> +			(prev_lev * devfreq->max_state) + lev]++;
>   		devfreq->stats.total_trans++;
>   	}
>   
> @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
>   		if (err < 0)
>   			goto err_dev;
>   		mutex_lock(&devfreq->lock);
> +	} else {
> +		devfreq->freq_table = devfreq->profile->freq_table;
> +		devfreq->max_state = devfreq->profile->max_state;
>   	}
>   
>   	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
>   
>   	devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
>   			array3_size(sizeof(unsigned int),
> -				    devfreq->profile->max_state,
> -				    devfreq->profile->max_state),
> +				    devfreq->max_state,
> +				    devfreq->max_state),
>   			GFP_KERNEL);
>   	if (!devfreq->stats.trans_table) {
>   		mutex_unlock(&devfreq->lock);
> @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
>   	}
>   
>   	devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
> -			devfreq->profile->max_state,
> +			devfreq->max_state,
>   			sizeof(*devfreq->stats.time_in_state),
>   			GFP_KERNEL);
>   	if (!devfreq->stats.time_in_state) {
> @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
>   
>   	mutex_lock(&df->lock);
>   
> -	for (i = 0; i < df->profile->max_state; i++)
> +	for (i = 0; i < df->max_state; i++)
>   		count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
> -				"%lu ", df->profile->freq_table[i]);
> +				"%lu ", df->freq_table[i]);
>   
>   	mutex_unlock(&df->lock);
>   	/* Truncate the trailing space */
> @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
>   
>   	if (!df->profile)
>   		return -EINVAL;
> -	max_state = df->profile->max_state;
> +	max_state = df->max_state;
>   
>   	if (max_state == 0)
>   		return sprintf(buf, "Not Supported.\n");
> @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
>   	len += sprintf(buf + len, "           :");
>   	for (i = 0; i < max_state; i++)
>   		len += sprintf(buf + len, "%10lu",
> -				df->profile->freq_table[i]);
> +				df->freq_table[i]);
>   
>   	len += sprintf(buf + len, "   time(ms)\n");
>   
>   	for (i = 0; i < max_state; i++) {
> -		if (df->profile->freq_table[i]
> -					== df->previous_freq) {
> +		if (df->freq_table[i] == df->previous_freq)
>   			len += sprintf(buf + len, "*");
> -		} else {
> +		else
>   			len += sprintf(buf + len, " ");
> -		}
> -		len += sprintf(buf + len, "%10lu:",
> -				df->profile->freq_table[i]);
> +
> +		len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
>   		for (j = 0; j < max_state; j++)
>   			len += sprintf(buf + len, "%10u",
>   				df->stats.trans_table[(i * max_state) + j]);
> @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
>   	if (!df->profile)
>   		return -EINVAL;
>   
> -	if (df->profile->max_state == 0)
> +	if (df->max_state == 0)
>   		return count;
>   
>   	err = kstrtoint(buf, 10, &value);
> @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
>   		return -EINVAL;
>   
>   	mutex_lock(&df->lock);
> -	memset(df->stats.time_in_state, 0, (df->profile->max_state *
> +	memset(df->stats.time_in_state, 0, (df->max_state *
>   					sizeof(*df->stats.time_in_state)));
>   	memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
> -					df->profile->max_state,
> -					df->profile->max_state));
> +					df->max_state,
> +					df->max_state));
>   	df->stats.total_trans = 0;
>   	df->stats.last_update = get_jiffies_64();
>   	mutex_unlock(&df->lock);
> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> index 72c67979ebe1..ce24a262aa16 100644
> --- a/drivers/devfreq/governor_passive.c
> +++ b/drivers/devfreq/governor_passive.c
> @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
>   		goto out;
>   
>   	/* Use interpolation if required opps is not available */
> -	for (i = 0; i < parent_devfreq->profile->max_state; i++)
> -		if (parent_devfreq->profile->freq_table[i] == *freq)
> +	for (i = 0; i < parent_devfreq->max_state; i++)
> +		if (parent_devfreq->freq_table[i] == *freq)
>   			break;
>   
> -	if (i == parent_devfreq->profile->max_state)
> +	if (i == parent_devfreq->max_state)
>   		return -EINVAL;
>   
> -	if (i < devfreq->profile->max_state) {
> -		child_freq = devfreq->profile->freq_table[i];
> +	if (i < devfreq->max_state) {
> +		child_freq = devfreq->freq_table[i];
>   	} else {
> -		count = devfreq->profile->max_state;
> -		child_freq = devfreq->profile->freq_table[count - 1];
> +		count = devfreq->max_state;
> +		child_freq = devfreq->freq_table[count - 1];
>   	}
>   
>   out:
> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> index dc10bee75a72..34aab4dd336c 100644
> --- a/include/linux/devfreq.h
> +++ b/include/linux/devfreq.h
> @@ -148,6 +148,8 @@ struct devfreq_stats {
>    *		reevaluate operable frequencies. Devfreq users may use
>    *		devfreq.nb to the corresponding register notifier call chain.
>    * @work:	delayed work for load monitoring.
> + * @freq_table:		current frequency table used by the devfreq driver.
> + * @max_state:		count of entry present in the frequency table.
>    * @previous_freq:	previously configured frequency value.
>    * @last_status:	devfreq user device info, performance statistics
>    * @data:	Private data of the governor. The devfreq framework does not
> @@ -185,6 +187,9 @@ struct devfreq {
>   	struct notifier_block nb;
>   	struct delayed_work work;
>   
> +	unsigned long *freq_table;
> +	unsigned int max_state;
> +
>   	unsigned long previous_freq;
>   	struct devfreq_dev_status last_status;
>   

Best regards
Christian Marangi July 1, 2022, 11:28 a.m. UTC | #3
On Fri, Jul 01, 2022 at 10:01:52AM +0200, Marek Szyprowski wrote:
> Hi All,
> 
> On 20.06.2022 00:03, Christian Marangi wrote:
> > On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
> > is never reset and may be leaved in an undefined state.
> >
> > This comes from the fact that we store the freq_table in the driver
> > profile struct that is commonly defined as static and not reset on
> > PROBE_DEFER.
> > We currently skip the reinit of the freq_table if we found
> > it's already defined since a driver may declare his own freq_table.
> >
> > This logic is flawed in the case devfreq core generate a freq_table, set
> > it in the profile struct and then PROBE_DEFER, freeing the freq_table.
> > In this case devfreq will found a NOT NULL freq_table that has been
> > freed, skip the freq_table generation and probe the driver based on the
> > wrong table.
> >
> > To fix this and correctly handle PROBE_DEFER, use a local freq_table and
> > max_state in the devfreq struct and never modify the freq_table present
> > in the profile struct if it does provide it.
> >
> > Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
> > ---
> 
> This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM / 
> devfreq: Rework freq_table to be local to devfreq struct"). 
> Unfortunately it causes the following regression on my Exynos based test 
> systems:
> 
> 8<--- cut here ---
> Unable to handle kernel NULL pointer dereference at virtual address 00000000
> [00000000] *pgd=00000000
> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> Modules linked in:
> CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312
> Hardware name: Samsung Exynos (Flattened Device Tree)
> Workqueue: events_unbound deferred_probe_work_func
> PC is at exynos_bus_probe+0x604/0x684
> LR is at device_add+0x14c/0x908
> pc : [<c090aef4>]    lr : [<c06cf77c>]    psr: 80000053
> ...
> Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval))
> Stack: (0xf0a15d30 to 0xf0a16000)
> ...
>   exynos_bus_probe from platform_probe+0x5c/0xb8
>   platform_probe from really_probe+0xe0/0x414
>   really_probe from __driver_probe_device+0xa0/0x208
>   __driver_probe_device from driver_probe_device+0x30/0xc0
>   driver_probe_device from __device_attach_driver+0xa4/0x11c
>   __device_attach_driver from bus_for_each_drv+0x7c/0xc0
>   bus_for_each_drv from __device_attach+0xac/0x20c
>   __device_attach from bus_probe_device+0x88/0x90
>   bus_probe_device from deferred_probe_work_func+0x98/0xe0
>   deferred_probe_work_func from process_one_work+0x288/0x774
>   process_one_work from worker_thread+0x44/0x504
>   worker_thread from kthread+0xf4/0x128
>   kthread from ret_from_fork+0x14/0x2c
> Exception stack(0xf0a15fb0 to 0xf0a15ff8)
> ...
> ---[ end trace 0000000000000000 ]---
> 
> This issue is caused by bus->devfreq->profile->freq_table being NULL here:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/devfreq/exynos-bus.c?h=next-20220630#n451
> 
>

I just checked this and the bug is caused by a simple pr_info...

Can you test the following patch just to make sure?

diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index b5615e667e31..79725bbb4bb0 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -447,9 +447,9 @@ static int exynos_bus_probe(struct platform_device *pdev)
                }
        }

-       max_state = bus->devfreq->profile->max_state;
-       min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
-       max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
+       max_state = bus->devfreq->max_state;
+       min_freq = (bus->devfreq->freq_table[0] / 1000);
+       max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000);
        pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n",
                        dev_name(dev), min_freq, max_freq);
 

> >   drivers/devfreq/devfreq.c          | 71 ++++++++++++++----------------
> >   drivers/devfreq/governor_passive.c | 14 +++---
> >   include/linux/devfreq.h            |  5 +++
> >   3 files changed, 46 insertions(+), 44 deletions(-)
> >
> > diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> > index 01474daf4548..2e2b3b414d67 100644
> > --- a/drivers/devfreq/devfreq.c
> > +++ b/drivers/devfreq/devfreq.c
> > @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> >   			    unsigned long *min_freq,
> >   			    unsigned long *max_freq)
> >   {
> > -	unsigned long *freq_table = devfreq->profile->freq_table;
> > +	unsigned long *freq_table = devfreq->freq_table;
> >   	s32 qos_min_freq, qos_max_freq;
> >   
> >   	lockdep_assert_held(&devfreq->lock);
> > @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> >   	 * The devfreq drivers can initialize this in either ascending or
> >   	 * descending order and devfreq core supports both.
> >   	 */
> > -	if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
> > +	if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
> >   		*min_freq = freq_table[0];
> > -		*max_freq = freq_table[devfreq->profile->max_state - 1];
> > +		*max_freq = freq_table[devfreq->max_state - 1];
> >   	} else {
> > -		*min_freq = freq_table[devfreq->profile->max_state - 1];
> > +		*min_freq = freq_table[devfreq->max_state - 1];
> >   		*max_freq = freq_table[0];
> >   	}
> >   
> > @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> >   {
> >   	int lev;
> >   
> > -	for (lev = 0; lev < devfreq->profile->max_state; lev++)
> > -		if (freq == devfreq->profile->freq_table[lev])
> > +	for (lev = 0; lev < devfreq->max_state; lev++)
> > +		if (freq == devfreq->freq_table[lev])
> >   			return lev;
> >   
> >   	return -EINVAL;
> > @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> >   
> >   static int set_freq_table(struct devfreq *devfreq)
> >   {
> > -	struct devfreq_dev_profile *profile = devfreq->profile;
> >   	struct dev_pm_opp *opp;
> >   	unsigned long freq;
> >   	int i, count;
> > @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
> >   	if (count <= 0)
> >   		return -EINVAL;
> >   
> > -	profile->max_state = count;
> > -	profile->freq_table = devm_kcalloc(devfreq->dev.parent,
> > -					profile->max_state,
> > -					sizeof(*profile->freq_table),
> > -					GFP_KERNEL);
> > -	if (!profile->freq_table) {
> > -		profile->max_state = 0;
> > +	devfreq->max_state = count;
> > +	devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
> > +					   devfreq->max_state,
> > +					   sizeof(*devfreq->freq_table),
> > +					   GFP_KERNEL);
> > +	if (!devfreq->freq_table)
> >   		return -ENOMEM;
> > -	}
> >   
> > -	for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
> > +	for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
> >   		opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
> >   		if (IS_ERR(opp)) {
> > -			devm_kfree(devfreq->dev.parent, profile->freq_table);
> > -			profile->max_state = 0;
> > +			devm_kfree(devfreq->dev.parent, devfreq->freq_table);
> >   			return PTR_ERR(opp);
> >   		}
> >   		dev_pm_opp_put(opp);
> > -		profile->freq_table[i] = freq;
> > +		devfreq->freq_table[i] = freq;
> >   	}
> >   
> >   	return 0;
> > @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
> >   
> >   	if (lev != prev_lev) {
> >   		devfreq->stats.trans_table[
> > -			(prev_lev * devfreq->profile->max_state) + lev]++;
> > +			(prev_lev * devfreq->max_state) + lev]++;
> >   		devfreq->stats.total_trans++;
> >   	}
> >   
> > @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
> >   		if (err < 0)
> >   			goto err_dev;
> >   		mutex_lock(&devfreq->lock);
> > +	} else {
> > +		devfreq->freq_table = devfreq->profile->freq_table;
> > +		devfreq->max_state = devfreq->profile->max_state;
> >   	}
> >   
> >   	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> > @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
> >   
> >   	devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
> >   			array3_size(sizeof(unsigned int),
> > -				    devfreq->profile->max_state,
> > -				    devfreq->profile->max_state),
> > +				    devfreq->max_state,
> > +				    devfreq->max_state),
> >   			GFP_KERNEL);
> >   	if (!devfreq->stats.trans_table) {
> >   		mutex_unlock(&devfreq->lock);
> > @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
> >   	}
> >   
> >   	devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
> > -			devfreq->profile->max_state,
> > +			devfreq->max_state,
> >   			sizeof(*devfreq->stats.time_in_state),
> >   			GFP_KERNEL);
> >   	if (!devfreq->stats.time_in_state) {
> > @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
> >   
> >   	mutex_lock(&df->lock);
> >   
> > -	for (i = 0; i < df->profile->max_state; i++)
> > +	for (i = 0; i < df->max_state; i++)
> >   		count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
> > -				"%lu ", df->profile->freq_table[i]);
> > +				"%lu ", df->freq_table[i]);
> >   
> >   	mutex_unlock(&df->lock);
> >   	/* Truncate the trailing space */
> > @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
> >   
> >   	if (!df->profile)
> >   		return -EINVAL;
> > -	max_state = df->profile->max_state;
> > +	max_state = df->max_state;
> >   
> >   	if (max_state == 0)
> >   		return sprintf(buf, "Not Supported.\n");
> > @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
> >   	len += sprintf(buf + len, "           :");
> >   	for (i = 0; i < max_state; i++)
> >   		len += sprintf(buf + len, "%10lu",
> > -				df->profile->freq_table[i]);
> > +				df->freq_table[i]);
> >   
> >   	len += sprintf(buf + len, "   time(ms)\n");
> >   
> >   	for (i = 0; i < max_state; i++) {
> > -		if (df->profile->freq_table[i]
> > -					== df->previous_freq) {
> > +		if (df->freq_table[i] == df->previous_freq)
> >   			len += sprintf(buf + len, "*");
> > -		} else {
> > +		else
> >   			len += sprintf(buf + len, " ");
> > -		}
> > -		len += sprintf(buf + len, "%10lu:",
> > -				df->profile->freq_table[i]);
> > +
> > +		len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
> >   		for (j = 0; j < max_state; j++)
> >   			len += sprintf(buf + len, "%10u",
> >   				df->stats.trans_table[(i * max_state) + j]);
> > @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
> >   	if (!df->profile)
> >   		return -EINVAL;
> >   
> > -	if (df->profile->max_state == 0)
> > +	if (df->max_state == 0)
> >   		return count;
> >   
> >   	err = kstrtoint(buf, 10, &value);
> > @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
> >   		return -EINVAL;
> >   
> >   	mutex_lock(&df->lock);
> > -	memset(df->stats.time_in_state, 0, (df->profile->max_state *
> > +	memset(df->stats.time_in_state, 0, (df->max_state *
> >   					sizeof(*df->stats.time_in_state)));
> >   	memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
> > -					df->profile->max_state,
> > -					df->profile->max_state));
> > +					df->max_state,
> > +					df->max_state));
> >   	df->stats.total_trans = 0;
> >   	df->stats.last_update = get_jiffies_64();
> >   	mutex_unlock(&df->lock);
> > diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> > index 72c67979ebe1..ce24a262aa16 100644
> > --- a/drivers/devfreq/governor_passive.c
> > +++ b/drivers/devfreq/governor_passive.c
> > @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
> >   		goto out;
> >   
> >   	/* Use interpolation if required opps is not available */
> > -	for (i = 0; i < parent_devfreq->profile->max_state; i++)
> > -		if (parent_devfreq->profile->freq_table[i] == *freq)
> > +	for (i = 0; i < parent_devfreq->max_state; i++)
> > +		if (parent_devfreq->freq_table[i] == *freq)
> >   			break;
> >   
> > -	if (i == parent_devfreq->profile->max_state)
> > +	if (i == parent_devfreq->max_state)
> >   		return -EINVAL;
> >   
> > -	if (i < devfreq->profile->max_state) {
> > -		child_freq = devfreq->profile->freq_table[i];
> > +	if (i < devfreq->max_state) {
> > +		child_freq = devfreq->freq_table[i];
> >   	} else {
> > -		count = devfreq->profile->max_state;
> > -		child_freq = devfreq->profile->freq_table[count - 1];
> > +		count = devfreq->max_state;
> > +		child_freq = devfreq->freq_table[count - 1];
> >   	}
> >   
> >   out:
> > diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> > index dc10bee75a72..34aab4dd336c 100644
> > --- a/include/linux/devfreq.h
> > +++ b/include/linux/devfreq.h
> > @@ -148,6 +148,8 @@ struct devfreq_stats {
> >    *		reevaluate operable frequencies. Devfreq users may use
> >    *		devfreq.nb to the corresponding register notifier call chain.
> >    * @work:	delayed work for load monitoring.
> > + * @freq_table:		current frequency table used by the devfreq driver.
> > + * @max_state:		count of entry present in the frequency table.
> >    * @previous_freq:	previously configured frequency value.
> >    * @last_status:	devfreq user device info, performance statistics
> >    * @data:	Private data of the governor. The devfreq framework does not
> > @@ -185,6 +187,9 @@ struct devfreq {
> >   	struct notifier_block nb;
> >   	struct delayed_work work;
> >   
> > +	unsigned long *freq_table;
> > +	unsigned int max_state;
> > +
> >   	unsigned long previous_freq;
> >   	struct devfreq_dev_status last_status;
> >   
> 
> Best regards
> -- 
> Marek Szyprowski, PhD
> Samsung R&D Institute Poland
>
Christian Marangi July 1, 2022, 11:45 a.m. UTC | #4
On Fri, Jul 01, 2022 at 01:28:50PM +0200, Christian Marangi wrote:
> On Fri, Jul 01, 2022 at 10:01:52AM +0200, Marek Szyprowski wrote:
> > Hi All,
> > 
> > On 20.06.2022 00:03, Christian Marangi wrote:
> > > On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
> > > is never reset and may be leaved in an undefined state.
> > >
> > > This comes from the fact that we store the freq_table in the driver
> > > profile struct that is commonly defined as static and not reset on
> > > PROBE_DEFER.
> > > We currently skip the reinit of the freq_table if we found
> > > it's already defined since a driver may declare his own freq_table.
> > >
> > > This logic is flawed in the case devfreq core generate a freq_table, set
> > > it in the profile struct and then PROBE_DEFER, freeing the freq_table.
> > > In this case devfreq will found a NOT NULL freq_table that has been
> > > freed, skip the freq_table generation and probe the driver based on the
> > > wrong table.
> > >
> > > To fix this and correctly handle PROBE_DEFER, use a local freq_table and
> > > max_state in the devfreq struct and never modify the freq_table present
> > > in the profile struct if it does provide it.
> > >
> > > Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
> > > Cc: stable@vger.kernel.org
> > > Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
> > > ---
> > 
> > This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM / 
> > devfreq: Rework freq_table to be local to devfreq struct"). 
> > Unfortunately it causes the following regression on my Exynos based test 
> > systems:
> > 
> > 8<--- cut here ---
> > Unable to handle kernel NULL pointer dereference at virtual address 00000000
> > [00000000] *pgd=00000000
> > Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > Modules linked in:
> > CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312
> > Hardware name: Samsung Exynos (Flattened Device Tree)
> > Workqueue: events_unbound deferred_probe_work_func
> > PC is at exynos_bus_probe+0x604/0x684
> > LR is at device_add+0x14c/0x908
> > pc : [<c090aef4>]    lr : [<c06cf77c>]    psr: 80000053
> > ...
> > Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval))
> > Stack: (0xf0a15d30 to 0xf0a16000)
> > ...
> >   exynos_bus_probe from platform_probe+0x5c/0xb8
> >   platform_probe from really_probe+0xe0/0x414
> >   really_probe from __driver_probe_device+0xa0/0x208
> >   __driver_probe_device from driver_probe_device+0x30/0xc0
> >   driver_probe_device from __device_attach_driver+0xa4/0x11c
> >   __device_attach_driver from bus_for_each_drv+0x7c/0xc0
> >   bus_for_each_drv from __device_attach+0xac/0x20c
> >   __device_attach from bus_probe_device+0x88/0x90
> >   bus_probe_device from deferred_probe_work_func+0x98/0xe0
> >   deferred_probe_work_func from process_one_work+0x288/0x774
> >   process_one_work from worker_thread+0x44/0x504
> >   worker_thread from kthread+0xf4/0x128
> >   kthread from ret_from_fork+0x14/0x2c
> > Exception stack(0xf0a15fb0 to 0xf0a15ff8)
> > ...
> > ---[ end trace 0000000000000000 ]---
> > 
> > This issue is caused by bus->devfreq->profile->freq_table being NULL here:
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/devfreq/exynos-bus.c?h=next-20220630#n451
> > 
> >
> 
> I just checked this and the bug is caused by a simple pr_info...
> 
> Can you test the following patch just to make sure?
> 
> diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
> index b5615e667e31..79725bbb4bb0 100644
> --- a/drivers/devfreq/exynos-bus.c
> +++ b/drivers/devfreq/exynos-bus.c
> @@ -447,9 +447,9 @@ static int exynos_bus_probe(struct platform_device *pdev)
>                 }
>         }
> 
> -       max_state = bus->devfreq->profile->max_state;
> -       min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
> -       max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
> +       max_state = bus->devfreq->max_state;
> +       min_freq = (bus->devfreq->freq_table[0] / 1000);
> +       max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000);
>         pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n",
>                         dev_name(dev), min_freq, max_freq);
>  
>

(BTW patch is ready, just waiting for your test and I will send it)

> > >   drivers/devfreq/devfreq.c          | 71 ++++++++++++++----------------
> > >   drivers/devfreq/governor_passive.c | 14 +++---
> > >   include/linux/devfreq.h            |  5 +++
> > >   3 files changed, 46 insertions(+), 44 deletions(-)
> > >
> > > diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> > > index 01474daf4548..2e2b3b414d67 100644
> > > --- a/drivers/devfreq/devfreq.c
> > > +++ b/drivers/devfreq/devfreq.c
> > > @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> > >   			    unsigned long *min_freq,
> > >   			    unsigned long *max_freq)
> > >   {
> > > -	unsigned long *freq_table = devfreq->profile->freq_table;
> > > +	unsigned long *freq_table = devfreq->freq_table;
> > >   	s32 qos_min_freq, qos_max_freq;
> > >   
> > >   	lockdep_assert_held(&devfreq->lock);
> > > @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> > >   	 * The devfreq drivers can initialize this in either ascending or
> > >   	 * descending order and devfreq core supports both.
> > >   	 */
> > > -	if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
> > > +	if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
> > >   		*min_freq = freq_table[0];
> > > -		*max_freq = freq_table[devfreq->profile->max_state - 1];
> > > +		*max_freq = freq_table[devfreq->max_state - 1];
> > >   	} else {
> > > -		*min_freq = freq_table[devfreq->profile->max_state - 1];
> > > +		*min_freq = freq_table[devfreq->max_state - 1];
> > >   		*max_freq = freq_table[0];
> > >   	}
> > >   
> > > @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> > >   {
> > >   	int lev;
> > >   
> > > -	for (lev = 0; lev < devfreq->profile->max_state; lev++)
> > > -		if (freq == devfreq->profile->freq_table[lev])
> > > +	for (lev = 0; lev < devfreq->max_state; lev++)
> > > +		if (freq == devfreq->freq_table[lev])
> > >   			return lev;
> > >   
> > >   	return -EINVAL;
> > > @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> > >   
> > >   static int set_freq_table(struct devfreq *devfreq)
> > >   {
> > > -	struct devfreq_dev_profile *profile = devfreq->profile;
> > >   	struct dev_pm_opp *opp;
> > >   	unsigned long freq;
> > >   	int i, count;
> > > @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
> > >   	if (count <= 0)
> > >   		return -EINVAL;
> > >   
> > > -	profile->max_state = count;
> > > -	profile->freq_table = devm_kcalloc(devfreq->dev.parent,
> > > -					profile->max_state,
> > > -					sizeof(*profile->freq_table),
> > > -					GFP_KERNEL);
> > > -	if (!profile->freq_table) {
> > > -		profile->max_state = 0;
> > > +	devfreq->max_state = count;
> > > +	devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
> > > +					   devfreq->max_state,
> > > +					   sizeof(*devfreq->freq_table),
> > > +					   GFP_KERNEL);
> > > +	if (!devfreq->freq_table)
> > >   		return -ENOMEM;
> > > -	}
> > >   
> > > -	for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
> > > +	for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
> > >   		opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
> > >   		if (IS_ERR(opp)) {
> > > -			devm_kfree(devfreq->dev.parent, profile->freq_table);
> > > -			profile->max_state = 0;
> > > +			devm_kfree(devfreq->dev.parent, devfreq->freq_table);
> > >   			return PTR_ERR(opp);
> > >   		}
> > >   		dev_pm_opp_put(opp);
> > > -		profile->freq_table[i] = freq;
> > > +		devfreq->freq_table[i] = freq;
> > >   	}
> > >   
> > >   	return 0;
> > > @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
> > >   
> > >   	if (lev != prev_lev) {
> > >   		devfreq->stats.trans_table[
> > > -			(prev_lev * devfreq->profile->max_state) + lev]++;
> > > +			(prev_lev * devfreq->max_state) + lev]++;
> > >   		devfreq->stats.total_trans++;
> > >   	}
> > >   
> > > @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
> > >   		if (err < 0)
> > >   			goto err_dev;
> > >   		mutex_lock(&devfreq->lock);
> > > +	} else {
> > > +		devfreq->freq_table = devfreq->profile->freq_table;
> > > +		devfreq->max_state = devfreq->profile->max_state;
> > >   	}
> > >   
> > >   	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> > > @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
> > >   
> > >   	devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
> > >   			array3_size(sizeof(unsigned int),
> > > -				    devfreq->profile->max_state,
> > > -				    devfreq->profile->max_state),
> > > +				    devfreq->max_state,
> > > +				    devfreq->max_state),
> > >   			GFP_KERNEL);
> > >   	if (!devfreq->stats.trans_table) {
> > >   		mutex_unlock(&devfreq->lock);
> > > @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
> > >   	}
> > >   
> > >   	devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
> > > -			devfreq->profile->max_state,
> > > +			devfreq->max_state,
> > >   			sizeof(*devfreq->stats.time_in_state),
> > >   			GFP_KERNEL);
> > >   	if (!devfreq->stats.time_in_state) {
> > > @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
> > >   
> > >   	mutex_lock(&df->lock);
> > >   
> > > -	for (i = 0; i < df->profile->max_state; i++)
> > > +	for (i = 0; i < df->max_state; i++)
> > >   		count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
> > > -				"%lu ", df->profile->freq_table[i]);
> > > +				"%lu ", df->freq_table[i]);
> > >   
> > >   	mutex_unlock(&df->lock);
> > >   	/* Truncate the trailing space */
> > > @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
> > >   
> > >   	if (!df->profile)
> > >   		return -EINVAL;
> > > -	max_state = df->profile->max_state;
> > > +	max_state = df->max_state;
> > >   
> > >   	if (max_state == 0)
> > >   		return sprintf(buf, "Not Supported.\n");
> > > @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
> > >   	len += sprintf(buf + len, "           :");
> > >   	for (i = 0; i < max_state; i++)
> > >   		len += sprintf(buf + len, "%10lu",
> > > -				df->profile->freq_table[i]);
> > > +				df->freq_table[i]);
> > >   
> > >   	len += sprintf(buf + len, "   time(ms)\n");
> > >   
> > >   	for (i = 0; i < max_state; i++) {
> > > -		if (df->profile->freq_table[i]
> > > -					== df->previous_freq) {
> > > +		if (df->freq_table[i] == df->previous_freq)
> > >   			len += sprintf(buf + len, "*");
> > > -		} else {
> > > +		else
> > >   			len += sprintf(buf + len, " ");
> > > -		}
> > > -		len += sprintf(buf + len, "%10lu:",
> > > -				df->profile->freq_table[i]);
> > > +
> > > +		len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
> > >   		for (j = 0; j < max_state; j++)
> > >   			len += sprintf(buf + len, "%10u",
> > >   				df->stats.trans_table[(i * max_state) + j]);
> > > @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
> > >   	if (!df->profile)
> > >   		return -EINVAL;
> > >   
> > > -	if (df->profile->max_state == 0)
> > > +	if (df->max_state == 0)
> > >   		return count;
> > >   
> > >   	err = kstrtoint(buf, 10, &value);
> > > @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
> > >   		return -EINVAL;
> > >   
> > >   	mutex_lock(&df->lock);
> > > -	memset(df->stats.time_in_state, 0, (df->profile->max_state *
> > > +	memset(df->stats.time_in_state, 0, (df->max_state *
> > >   					sizeof(*df->stats.time_in_state)));
> > >   	memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
> > > -					df->profile->max_state,
> > > -					df->profile->max_state));
> > > +					df->max_state,
> > > +					df->max_state));
> > >   	df->stats.total_trans = 0;
> > >   	df->stats.last_update = get_jiffies_64();
> > >   	mutex_unlock(&df->lock);
> > > diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> > > index 72c67979ebe1..ce24a262aa16 100644
> > > --- a/drivers/devfreq/governor_passive.c
> > > +++ b/drivers/devfreq/governor_passive.c
> > > @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
> > >   		goto out;
> > >   
> > >   	/* Use interpolation if required opps is not available */
> > > -	for (i = 0; i < parent_devfreq->profile->max_state; i++)
> > > -		if (parent_devfreq->profile->freq_table[i] == *freq)
> > > +	for (i = 0; i < parent_devfreq->max_state; i++)
> > > +		if (parent_devfreq->freq_table[i] == *freq)
> > >   			break;
> > >   
> > > -	if (i == parent_devfreq->profile->max_state)
> > > +	if (i == parent_devfreq->max_state)
> > >   		return -EINVAL;
> > >   
> > > -	if (i < devfreq->profile->max_state) {
> > > -		child_freq = devfreq->profile->freq_table[i];
> > > +	if (i < devfreq->max_state) {
> > > +		child_freq = devfreq->freq_table[i];
> > >   	} else {
> > > -		count = devfreq->profile->max_state;
> > > -		child_freq = devfreq->profile->freq_table[count - 1];
> > > +		count = devfreq->max_state;
> > > +		child_freq = devfreq->freq_table[count - 1];
> > >   	}
> > >   
> > >   out:
> > > diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> > > index dc10bee75a72..34aab4dd336c 100644
> > > --- a/include/linux/devfreq.h
> > > +++ b/include/linux/devfreq.h
> > > @@ -148,6 +148,8 @@ struct devfreq_stats {
> > >    *		reevaluate operable frequencies. Devfreq users may use
> > >    *		devfreq.nb to the corresponding register notifier call chain.
> > >    * @work:	delayed work for load monitoring.
> > > + * @freq_table:		current frequency table used by the devfreq driver.
> > > + * @max_state:		count of entry present in the frequency table.
> > >    * @previous_freq:	previously configured frequency value.
> > >    * @last_status:	devfreq user device info, performance statistics
> > >    * @data:	Private data of the governor. The devfreq framework does not
> > > @@ -185,6 +187,9 @@ struct devfreq {
> > >   	struct notifier_block nb;
> > >   	struct delayed_work work;
> > >   
> > > +	unsigned long *freq_table;
> > > +	unsigned int max_state;
> > > +
> > >   	unsigned long previous_freq;
> > >   	struct devfreq_dev_status last_status;
> > >   
> > 
> > Best regards
> > -- 
> > Marek Szyprowski, PhD
> > Samsung R&D Institute Poland
> > 
> 
> -- 
> 	Ansuel
Marek Szyprowski July 1, 2022, 11:54 a.m. UTC | #5
On 01.07.2022 13:28, Christian Marangi wrote:
> On Fri, Jul 01, 2022 at 10:01:52AM +0200, Marek Szyprowski wrote:
>> On 20.06.2022 00:03, Christian Marangi wrote:
>>> On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
>>> is never reset and may be leaved in an undefined state.
>>>
>>> This comes from the fact that we store the freq_table in the driver
>>> profile struct that is commonly defined as static and not reset on
>>> PROBE_DEFER.
>>> We currently skip the reinit of the freq_table if we found
>>> it's already defined since a driver may declare his own freq_table.
>>>
>>> This logic is flawed in the case devfreq core generate a freq_table, set
>>> it in the profile struct and then PROBE_DEFER, freeing the freq_table.
>>> In this case devfreq will found a NOT NULL freq_table that has been
>>> freed, skip the freq_table generation and probe the driver based on the
>>> wrong table.
>>>
>>> To fix this and correctly handle PROBE_DEFER, use a local freq_table and
>>> max_state in the devfreq struct and never modify the freq_table present
>>> in the profile struct if it does provide it.
>>>
>>> Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
>>> ---
>> This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM /
>> devfreq: Rework freq_table to be local to devfreq struct").
>> Unfortunately it causes the following regression on my Exynos based test
>> systems:
>>
>> 8<--- cut here ---
>> Unable to handle kernel NULL pointer dereference at virtual address 00000000
>> [00000000] *pgd=00000000
>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
>> Modules linked in:
>> CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312
>> Hardware name: Samsung Exynos (Flattened Device Tree)
>> Workqueue: events_unbound deferred_probe_work_func
>> PC is at exynos_bus_probe+0x604/0x684
>> LR is at device_add+0x14c/0x908
>> pc : [<c090aef4>]    lr : [<c06cf77c>]    psr: 80000053
>> ...
>> Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval))
>> Stack: (0xf0a15d30 to 0xf0a16000)
>> ...
>>    exynos_bus_probe from platform_probe+0x5c/0xb8
>>    platform_probe from really_probe+0xe0/0x414
>>    really_probe from __driver_probe_device+0xa0/0x208
>>    __driver_probe_device from driver_probe_device+0x30/0xc0
>>    driver_probe_device from __device_attach_driver+0xa4/0x11c
>>    __device_attach_driver from bus_for_each_drv+0x7c/0xc0
>>    bus_for_each_drv from __device_attach+0xac/0x20c
>>    __device_attach from bus_probe_device+0x88/0x90
>>    bus_probe_device from deferred_probe_work_func+0x98/0xe0
>>    deferred_probe_work_func from process_one_work+0x288/0x774
>>    process_one_work from worker_thread+0x44/0x504
>>    worker_thread from kthread+0xf4/0x128
>>    kthread from ret_from_fork+0x14/0x2c
>> Exception stack(0xf0a15fb0 to 0xf0a15ff8)
>> ...
>> ---[ end trace 0000000000000000 ]---
>>
>> This issue is caused by bus->devfreq->profile->freq_table being NULL here:
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/devfreq/exynos-bus.c?h=next-20220630#n451
>>
>>
> I just checked this and the bug is caused by a simple pr_info...
>
> Can you test the following patch just to make sure?

Yes, this fixes the issue. Thanks! Feel free to add:

Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>

> diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
> index b5615e667e31..79725bbb4bb0 100644
> --- a/drivers/devfreq/exynos-bus.c
> +++ b/drivers/devfreq/exynos-bus.c
> @@ -447,9 +447,9 @@ static int exynos_bus_probe(struct platform_device *pdev)
>                  }
>          }
>
> -       max_state = bus->devfreq->profile->max_state;
> -       min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
> -       max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
> +       max_state = bus->devfreq->max_state;
> +       min_freq = (bus->devfreq->freq_table[0] / 1000);
> +       max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000);
>          pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n",
>                          dev_name(dev), min_freq, max_freq);
>   
>
>>>    drivers/devfreq/devfreq.c          | 71 ++++++++++++++----------------
>>>    drivers/devfreq/governor_passive.c | 14 +++---
>>>    include/linux/devfreq.h            |  5 +++
>>>    3 files changed, 46 insertions(+), 44 deletions(-)
>>>
>>> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
>>> index 01474daf4548..2e2b3b414d67 100644
>>> --- a/drivers/devfreq/devfreq.c
>>> +++ b/drivers/devfreq/devfreq.c
>>> @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
>>>    			    unsigned long *min_freq,
>>>    			    unsigned long *max_freq)
>>>    {
>>> -	unsigned long *freq_table = devfreq->profile->freq_table;
>>> +	unsigned long *freq_table = devfreq->freq_table;
>>>    	s32 qos_min_freq, qos_max_freq;
>>>    
>>>    	lockdep_assert_held(&devfreq->lock);
>>> @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
>>>    	 * The devfreq drivers can initialize this in either ascending or
>>>    	 * descending order and devfreq core supports both.
>>>    	 */
>>> -	if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
>>> +	if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
>>>    		*min_freq = freq_table[0];
>>> -		*max_freq = freq_table[devfreq->profile->max_state - 1];
>>> +		*max_freq = freq_table[devfreq->max_state - 1];
>>>    	} else {
>>> -		*min_freq = freq_table[devfreq->profile->max_state - 1];
>>> +		*min_freq = freq_table[devfreq->max_state - 1];
>>>    		*max_freq = freq_table[0];
>>>    	}
>>>    
>>> @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>>>    {
>>>    	int lev;
>>>    
>>> -	for (lev = 0; lev < devfreq->profile->max_state; lev++)
>>> -		if (freq == devfreq->profile->freq_table[lev])
>>> +	for (lev = 0; lev < devfreq->max_state; lev++)
>>> +		if (freq == devfreq->freq_table[lev])
>>>    			return lev;
>>>    
>>>    	return -EINVAL;
>>> @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>>>    
>>>    static int set_freq_table(struct devfreq *devfreq)
>>>    {
>>> -	struct devfreq_dev_profile *profile = devfreq->profile;
>>>    	struct dev_pm_opp *opp;
>>>    	unsigned long freq;
>>>    	int i, count;
>>> @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
>>>    	if (count <= 0)
>>>    		return -EINVAL;
>>>    
>>> -	profile->max_state = count;
>>> -	profile->freq_table = devm_kcalloc(devfreq->dev.parent,
>>> -					profile->max_state,
>>> -					sizeof(*profile->freq_table),
>>> -					GFP_KERNEL);
>>> -	if (!profile->freq_table) {
>>> -		profile->max_state = 0;
>>> +	devfreq->max_state = count;
>>> +	devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
>>> +					   devfreq->max_state,
>>> +					   sizeof(*devfreq->freq_table),
>>> +					   GFP_KERNEL);
>>> +	if (!devfreq->freq_table)
>>>    		return -ENOMEM;
>>> -	}
>>>    
>>> -	for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
>>> +	for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
>>>    		opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
>>>    		if (IS_ERR(opp)) {
>>> -			devm_kfree(devfreq->dev.parent, profile->freq_table);
>>> -			profile->max_state = 0;
>>> +			devm_kfree(devfreq->dev.parent, devfreq->freq_table);
>>>    			return PTR_ERR(opp);
>>>    		}
>>>    		dev_pm_opp_put(opp);
>>> -		profile->freq_table[i] = freq;
>>> +		devfreq->freq_table[i] = freq;
>>>    	}
>>>    
>>>    	return 0;
>>> @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
>>>    
>>>    	if (lev != prev_lev) {
>>>    		devfreq->stats.trans_table[
>>> -			(prev_lev * devfreq->profile->max_state) + lev]++;
>>> +			(prev_lev * devfreq->max_state) + lev]++;
>>>    		devfreq->stats.total_trans++;
>>>    	}
>>>    
>>> @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>>    		if (err < 0)
>>>    			goto err_dev;
>>>    		mutex_lock(&devfreq->lock);
>>> +	} else {
>>> +		devfreq->freq_table = devfreq->profile->freq_table;
>>> +		devfreq->max_state = devfreq->profile->max_state;
>>>    	}
>>>    
>>>    	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
>>> @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>>    
>>>    	devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
>>>    			array3_size(sizeof(unsigned int),
>>> -				    devfreq->profile->max_state,
>>> -				    devfreq->profile->max_state),
>>> +				    devfreq->max_state,
>>> +				    devfreq->max_state),
>>>    			GFP_KERNEL);
>>>    	if (!devfreq->stats.trans_table) {
>>>    		mutex_unlock(&devfreq->lock);
>>> @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>>    	}
>>>    
>>>    	devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
>>> -			devfreq->profile->max_state,
>>> +			devfreq->max_state,
>>>    			sizeof(*devfreq->stats.time_in_state),
>>>    			GFP_KERNEL);
>>>    	if (!devfreq->stats.time_in_state) {
>>> @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
>>>    
>>>    	mutex_lock(&df->lock);
>>>    
>>> -	for (i = 0; i < df->profile->max_state; i++)
>>> +	for (i = 0; i < df->max_state; i++)
>>>    		count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
>>> -				"%lu ", df->profile->freq_table[i]);
>>> +				"%lu ", df->freq_table[i]);
>>>    
>>>    	mutex_unlock(&df->lock);
>>>    	/* Truncate the trailing space */
>>> @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
>>>    
>>>    	if (!df->profile)
>>>    		return -EINVAL;
>>> -	max_state = df->profile->max_state;
>>> +	max_state = df->max_state;
>>>    
>>>    	if (max_state == 0)
>>>    		return sprintf(buf, "Not Supported.\n");
>>> @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
>>>    	len += sprintf(buf + len, "           :");
>>>    	for (i = 0; i < max_state; i++)
>>>    		len += sprintf(buf + len, "%10lu",
>>> -				df->profile->freq_table[i]);
>>> +				df->freq_table[i]);
>>>    
>>>    	len += sprintf(buf + len, "   time(ms)\n");
>>>    
>>>    	for (i = 0; i < max_state; i++) {
>>> -		if (df->profile->freq_table[i]
>>> -					== df->previous_freq) {
>>> +		if (df->freq_table[i] == df->previous_freq)
>>>    			len += sprintf(buf + len, "*");
>>> -		} else {
>>> +		else
>>>    			len += sprintf(buf + len, " ");
>>> -		}
>>> -		len += sprintf(buf + len, "%10lu:",
>>> -				df->profile->freq_table[i]);
>>> +
>>> +		len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
>>>    		for (j = 0; j < max_state; j++)
>>>    			len += sprintf(buf + len, "%10u",
>>>    				df->stats.trans_table[(i * max_state) + j]);
>>> @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
>>>    	if (!df->profile)
>>>    		return -EINVAL;
>>>    
>>> -	if (df->profile->max_state == 0)
>>> +	if (df->max_state == 0)
>>>    		return count;
>>>    
>>>    	err = kstrtoint(buf, 10, &value);
>>> @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
>>>    		return -EINVAL;
>>>    
>>>    	mutex_lock(&df->lock);
>>> -	memset(df->stats.time_in_state, 0, (df->profile->max_state *
>>> +	memset(df->stats.time_in_state, 0, (df->max_state *
>>>    					sizeof(*df->stats.time_in_state)));
>>>    	memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
>>> -					df->profile->max_state,
>>> -					df->profile->max_state));
>>> +					df->max_state,
>>> +					df->max_state));
>>>    	df->stats.total_trans = 0;
>>>    	df->stats.last_update = get_jiffies_64();
>>>    	mutex_unlock(&df->lock);
>>> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
>>> index 72c67979ebe1..ce24a262aa16 100644
>>> --- a/drivers/devfreq/governor_passive.c
>>> +++ b/drivers/devfreq/governor_passive.c
>>> @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
>>>    		goto out;
>>>    
>>>    	/* Use interpolation if required opps is not available */
>>> -	for (i = 0; i < parent_devfreq->profile->max_state; i++)
>>> -		if (parent_devfreq->profile->freq_table[i] == *freq)
>>> +	for (i = 0; i < parent_devfreq->max_state; i++)
>>> +		if (parent_devfreq->freq_table[i] == *freq)
>>>    			break;
>>>    
>>> -	if (i == parent_devfreq->profile->max_state)
>>> +	if (i == parent_devfreq->max_state)
>>>    		return -EINVAL;
>>>    
>>> -	if (i < devfreq->profile->max_state) {
>>> -		child_freq = devfreq->profile->freq_table[i];
>>> +	if (i < devfreq->max_state) {
>>> +		child_freq = devfreq->freq_table[i];
>>>    	} else {
>>> -		count = devfreq->profile->max_state;
>>> -		child_freq = devfreq->profile->freq_table[count - 1];
>>> +		count = devfreq->max_state;
>>> +		child_freq = devfreq->freq_table[count - 1];
>>>    	}
>>>    
>>>    out:
>>> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
>>> index dc10bee75a72..34aab4dd336c 100644
>>> --- a/include/linux/devfreq.h
>>> +++ b/include/linux/devfreq.h
>>> @@ -148,6 +148,8 @@ struct devfreq_stats {
>>>     *		reevaluate operable frequencies. Devfreq users may use
>>>     *		devfreq.nb to the corresponding register notifier call chain.
>>>     * @work:	delayed work for load monitoring.
>>> + * @freq_table:		current frequency table used by the devfreq driver.
>>> + * @max_state:		count of entry present in the frequency table.
>>>     * @previous_freq:	previously configured frequency value.
>>>     * @last_status:	devfreq user device info, performance statistics
>>>     * @data:	Private data of the governor. The devfreq framework does not
>>> @@ -185,6 +187,9 @@ struct devfreq {
>>>    	struct notifier_block nb;
>>>    	struct delayed_work work;
>>>    
>>> +	unsigned long *freq_table;
>>> +	unsigned int max_state;
>>> +
>>>    	unsigned long previous_freq;
>>>    	struct devfreq_dev_status last_status;
>>>    
>> Best regards
>> -- 
>> Marek Szyprowski, PhD
>> Samsung R&D Institute Poland
>>
Best regards
diff mbox series

Patch

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 01474daf4548..2e2b3b414d67 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -123,7 +123,7 @@  void devfreq_get_freq_range(struct devfreq *devfreq,
 			    unsigned long *min_freq,
 			    unsigned long *max_freq)
 {
-	unsigned long *freq_table = devfreq->profile->freq_table;
+	unsigned long *freq_table = devfreq->freq_table;
 	s32 qos_min_freq, qos_max_freq;
 
 	lockdep_assert_held(&devfreq->lock);
@@ -133,11 +133,11 @@  void devfreq_get_freq_range(struct devfreq *devfreq,
 	 * The devfreq drivers can initialize this in either ascending or
 	 * descending order and devfreq core supports both.
 	 */
-	if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
+	if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
 		*min_freq = freq_table[0];
-		*max_freq = freq_table[devfreq->profile->max_state - 1];
+		*max_freq = freq_table[devfreq->max_state - 1];
 	} else {
-		*min_freq = freq_table[devfreq->profile->max_state - 1];
+		*min_freq = freq_table[devfreq->max_state - 1];
 		*max_freq = freq_table[0];
 	}
 
@@ -169,8 +169,8 @@  static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
 {
 	int lev;
 
-	for (lev = 0; lev < devfreq->profile->max_state; lev++)
-		if (freq == devfreq->profile->freq_table[lev])
+	for (lev = 0; lev < devfreq->max_state; lev++)
+		if (freq == devfreq->freq_table[lev])
 			return lev;
 
 	return -EINVAL;
@@ -178,7 +178,6 @@  static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
 
 static int set_freq_table(struct devfreq *devfreq)
 {
-	struct devfreq_dev_profile *profile = devfreq->profile;
 	struct dev_pm_opp *opp;
 	unsigned long freq;
 	int i, count;
@@ -188,25 +187,22 @@  static int set_freq_table(struct devfreq *devfreq)
 	if (count <= 0)
 		return -EINVAL;
 
-	profile->max_state = count;
-	profile->freq_table = devm_kcalloc(devfreq->dev.parent,
-					profile->max_state,
-					sizeof(*profile->freq_table),
-					GFP_KERNEL);
-	if (!profile->freq_table) {
-		profile->max_state = 0;
+	devfreq->max_state = count;
+	devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
+					   devfreq->max_state,
+					   sizeof(*devfreq->freq_table),
+					   GFP_KERNEL);
+	if (!devfreq->freq_table)
 		return -ENOMEM;
-	}
 
-	for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
+	for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
 		opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
 		if (IS_ERR(opp)) {
-			devm_kfree(devfreq->dev.parent, profile->freq_table);
-			profile->max_state = 0;
+			devm_kfree(devfreq->dev.parent, devfreq->freq_table);
 			return PTR_ERR(opp);
 		}
 		dev_pm_opp_put(opp);
-		profile->freq_table[i] = freq;
+		devfreq->freq_table[i] = freq;
 	}
 
 	return 0;
@@ -246,7 +242,7 @@  int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
 
 	if (lev != prev_lev) {
 		devfreq->stats.trans_table[
-			(prev_lev * devfreq->profile->max_state) + lev]++;
+			(prev_lev * devfreq->max_state) + lev]++;
 		devfreq->stats.total_trans++;
 	}
 
@@ -835,6 +831,9 @@  struct devfreq *devfreq_add_device(struct device *dev,
 		if (err < 0)
 			goto err_dev;
 		mutex_lock(&devfreq->lock);
+	} else {
+		devfreq->freq_table = devfreq->profile->freq_table;
+		devfreq->max_state = devfreq->profile->max_state;
 	}
 
 	devfreq->scaling_min_freq = find_available_min_freq(devfreq);
@@ -870,8 +869,8 @@  struct devfreq *devfreq_add_device(struct device *dev,
 
 	devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
 			array3_size(sizeof(unsigned int),
-				    devfreq->profile->max_state,
-				    devfreq->profile->max_state),
+				    devfreq->max_state,
+				    devfreq->max_state),
 			GFP_KERNEL);
 	if (!devfreq->stats.trans_table) {
 		mutex_unlock(&devfreq->lock);
@@ -880,7 +879,7 @@  struct devfreq *devfreq_add_device(struct device *dev,
 	}
 
 	devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
-			devfreq->profile->max_state,
+			devfreq->max_state,
 			sizeof(*devfreq->stats.time_in_state),
 			GFP_KERNEL);
 	if (!devfreq->stats.time_in_state) {
@@ -1665,9 +1664,9 @@  static ssize_t available_frequencies_show(struct device *d,
 
 	mutex_lock(&df->lock);
 
-	for (i = 0; i < df->profile->max_state; i++)
+	for (i = 0; i < df->max_state; i++)
 		count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
-				"%lu ", df->profile->freq_table[i]);
+				"%lu ", df->freq_table[i]);
 
 	mutex_unlock(&df->lock);
 	/* Truncate the trailing space */
@@ -1690,7 +1689,7 @@  static ssize_t trans_stat_show(struct device *dev,
 
 	if (!df->profile)
 		return -EINVAL;
-	max_state = df->profile->max_state;
+	max_state = df->max_state;
 
 	if (max_state == 0)
 		return sprintf(buf, "Not Supported.\n");
@@ -1707,19 +1706,17 @@  static ssize_t trans_stat_show(struct device *dev,
 	len += sprintf(buf + len, "           :");
 	for (i = 0; i < max_state; i++)
 		len += sprintf(buf + len, "%10lu",
-				df->profile->freq_table[i]);
+				df->freq_table[i]);
 
 	len += sprintf(buf + len, "   time(ms)\n");
 
 	for (i = 0; i < max_state; i++) {
-		if (df->profile->freq_table[i]
-					== df->previous_freq) {
+		if (df->freq_table[i] == df->previous_freq)
 			len += sprintf(buf + len, "*");
-		} else {
+		else
 			len += sprintf(buf + len, " ");
-		}
-		len += sprintf(buf + len, "%10lu:",
-				df->profile->freq_table[i]);
+
+		len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
 		for (j = 0; j < max_state; j++)
 			len += sprintf(buf + len, "%10u",
 				df->stats.trans_table[(i * max_state) + j]);
@@ -1743,7 +1740,7 @@  static ssize_t trans_stat_store(struct device *dev,
 	if (!df->profile)
 		return -EINVAL;
 
-	if (df->profile->max_state == 0)
+	if (df->max_state == 0)
 		return count;
 
 	err = kstrtoint(buf, 10, &value);
@@ -1751,11 +1748,11 @@  static ssize_t trans_stat_store(struct device *dev,
 		return -EINVAL;
 
 	mutex_lock(&df->lock);
-	memset(df->stats.time_in_state, 0, (df->profile->max_state *
+	memset(df->stats.time_in_state, 0, (df->max_state *
 					sizeof(*df->stats.time_in_state)));
 	memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
-					df->profile->max_state,
-					df->profile->max_state));
+					df->max_state,
+					df->max_state));
 	df->stats.total_trans = 0;
 	df->stats.last_update = get_jiffies_64();
 	mutex_unlock(&df->lock);
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index 72c67979ebe1..ce24a262aa16 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -131,18 +131,18 @@  static int get_target_freq_with_devfreq(struct devfreq *devfreq,
 		goto out;
 
 	/* Use interpolation if required opps is not available */
-	for (i = 0; i < parent_devfreq->profile->max_state; i++)
-		if (parent_devfreq->profile->freq_table[i] == *freq)
+	for (i = 0; i < parent_devfreq->max_state; i++)
+		if (parent_devfreq->freq_table[i] == *freq)
 			break;
 
-	if (i == parent_devfreq->profile->max_state)
+	if (i == parent_devfreq->max_state)
 		return -EINVAL;
 
-	if (i < devfreq->profile->max_state) {
-		child_freq = devfreq->profile->freq_table[i];
+	if (i < devfreq->max_state) {
+		child_freq = devfreq->freq_table[i];
 	} else {
-		count = devfreq->profile->max_state;
-		child_freq = devfreq->profile->freq_table[count - 1];
+		count = devfreq->max_state;
+		child_freq = devfreq->freq_table[count - 1];
 	}
 
 out:
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index dc10bee75a72..34aab4dd336c 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -148,6 +148,8 @@  struct devfreq_stats {
  *		reevaluate operable frequencies. Devfreq users may use
  *		devfreq.nb to the corresponding register notifier call chain.
  * @work:	delayed work for load monitoring.
+ * @freq_table:		current frequency table used by the devfreq driver.
+ * @max_state:		count of entry present in the frequency table.
  * @previous_freq:	previously configured frequency value.
  * @last_status:	devfreq user device info, performance statistics
  * @data:	Private data of the governor. The devfreq framework does not
@@ -185,6 +187,9 @@  struct devfreq {
 	struct notifier_block nb;
 	struct delayed_work work;
 
+	unsigned long *freq_table;
+	unsigned int max_state;
+
 	unsigned long previous_freq;
 	struct devfreq_dev_status last_status;