diff mbox series

[1/4] drm/amd: Add detailed GFXOFF stats to debugfs

Message ID 20220722203347.70176-2-andrealmeid@igalia.com (mailing list archive)
State New, archived
Headers show
Series drm/amd: Add more GFXOFF stats for vangogh | expand

Commit Message

André Almeida July 22, 2022, 8:33 p.m. UTC
Add debugfs interface to log GFXOFF statistics:

- Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
  time of query since system power-up

- Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
  Read it to get average GFXOFF residency % multiplied by 100
  during the last logging interval.

Both features are designed to be keep the values persistent between
suspends.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   | 168 ++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       |  39 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h       |   6 +
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c           |  45 +++++
 drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |   3 +
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |  34 +++-
 drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  22 +++
 drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
 9 files changed, 321 insertions(+), 1 deletion(-)

Comments

Evan Quan July 25, 2022, 10:27 a.m. UTC | #1
[AMD Official Use Only - General]

Using "uint64_t" instead of "uint32_t" for entry counter may be better.

BR
Evan
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> André Almeida
> Sent: Saturday, July 23, 2022 4:34 AM
> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>; Pan, Xinhui <Xinhui.Pan@amd.com>; David
> Airlie <airlied@linux.ie>; Daniel Vetter <daniel@ffwll.ch>; Zhang, Hawking
> <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Kuehling,
> Felix <Felix.Kuehling@amd.com>; Xiao, Jack <Jack.Xiao@amd.com>; amd-
> gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org; linux-
> kernel@vger.kernel.org; StDenis, Tom <Tom.StDenis@amd.com>; Siqueira,
> Rodrigo <Rodrigo.Siqueira@amd.com>
> Cc: André Almeida <andrealmeid@igalia.com>; kernel-dev@igalia.com
> Subject: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
> 
> Add debugfs interface to log GFXOFF statistics:
> 
> - Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
>   time of query since system power-up
> 
> - Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
>   Read it to get average GFXOFF residency % multiplied by 100
>   during the last logging interval.
> 
> Both features are designed to be keep the values persistent between
> suspends.
> 
> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   | 168
> ++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |   2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       |  39 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h       |   6 +
>  drivers/gpu/drm/amd/pm/amdgpu_dpm.c           |  45 +++++
>  drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |   3 +
>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |  34 +++-
>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  22 +++
>  drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
>  9 files changed, 321 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index e2eec985adb3..edf90a9ba980 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1042,6 +1042,157 @@ static ssize_t amdgpu_debugfs_gpr_read(struct
> file *f, char __user *buf,
>  	return r;
>  }
> 
> +/**
> + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
> + *
> + * @f: open file handle
> + * @buf: User buffer to store read data in
> + * @size: Number of bytes to read
> + * @pos:  Offset to seek to
> + *
> + * Read the last residency value logged. It doesn't auto update, one needs
> to
> + * stop logging before getting the current value.
> + */
> +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char
> __user *buf,
> +						    size_t size, loff_t *pos)
> +{
> +	struct amdgpu_device *adev = file_inode(f)->i_private;
> +	ssize_t result = 0;
> +	int r;
> +
> +	if (size & 0x3 || *pos & 0x3)
> +		return -EINVAL;
> +
> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> +	if (r < 0) {
> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +		return r;
> +	}
> +
> +	while (size) {
> +		uint32_t value;
> +
> +		r = amdgpu_get_gfx_off_residency(adev, &value);
> +		if (r)
> +			goto out;
> +
> +		r = put_user(value, (uint32_t *)buf);
> +		if (r)
> +			goto out;
> +
> +		result += 4;
> +		buf += 4;
> +		*pos += 4;
> +		size -= 4;
> +	}
> +
> +	r = result;
> +out:
> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +
> +	return r;
> +}
> +
> +/**
> + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
> + *
> + * @f: open file handle
> + * @buf: User buffer to write data from
> + * @size: Number of bytes to write
> + * @pos:  Offset to seek to
> + *
> + * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
> + */
> +static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const
> char __user *buf,
> +						     size_t size, loff_t *pos)
> +{
> +	struct amdgpu_device *adev = file_inode(f)->i_private;
> +	ssize_t result = 0;
> +	int r;
> +
> +	if (size & 0x3 || *pos & 0x3)
> +		return -EINVAL;
> +
> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> +	if (r < 0) {
> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +		return r;
> +	}
> +
> +	while (size) {
> +		u32 value;
> +
> +		r = get_user(value, (uint32_t *)buf);
> +		if (r)
> +			goto out;
> +
> +		amdgpu_set_gfx_off_residency(adev, value ? true : false);
> +
> +		result += 4;
> +		buf += 4;
> +		*pos += 4;
> +		size -= 4;
> +	}
> +
> +	r = result;
> +out:
> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +
> +	return r;
> +}
> +
> +
> +/**
> + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
> + *
> + * @f: open file handle
> + * @buf: User buffer to store read data in
> + * @size: Number of bytes to read
> + * @pos:  Offset to seek to
> + */
> +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char
> __user *buf,
> +						size_t size, loff_t *pos)
> +{
> +	struct amdgpu_device *adev = file_inode(f)->i_private;
> +	ssize_t result = 0;
> +	int r;
> +
> +	if (size & 0x3 || *pos & 0x3)
> +		return -EINVAL;
> +
> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> +	if (r < 0) {
> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +		return r;
> +	}
> +
> +	while (size) {
> +		u32 value;
> +
> +		r = amdgpu_get_gfx_off_entrycount(adev, &value);
> +		if (r)
> +			goto out;
> +
> +		r = put_user(value, (uint32_t *)buf);
> +		if (r)
> +			goto out;
> +
> +		result += 4;
> +		buf += 4;
> +		*pos += 4;
> +		size -= 4;
> +	}
> +
> +	r = result;
> +out:
> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +
> +	return r;
> +}
> +
>  /**
>   * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
>   *
> @@ -1249,6 +1400,19 @@ static const struct file_operations
> amdgpu_debugfs_gfxoff_status_fops = {
>  	.llseek = default_llseek
>  };
> 
> +static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
> +	.owner = THIS_MODULE,
> +	.read = amdgpu_debugfs_gfxoff_count_read,
> +	.llseek = default_llseek
> +};
> +
> +static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops
> = {
> +	.owner = THIS_MODULE,
> +	.read = amdgpu_debugfs_gfxoff_residency_read,
> +	.write = amdgpu_debugfs_gfxoff_residency_write,
> +	.llseek = default_llseek
> +};
> +
>  static const struct file_operations *debugfs_regs[] = {
>  	&amdgpu_debugfs_regs_fops,
>  	&amdgpu_debugfs_regs2_fops,
> @@ -1261,6 +1425,8 @@ static const struct file_operations *debugfs_regs[]
> = {
>  	&amdgpu_debugfs_gpr_fops,
>  	&amdgpu_debugfs_gfxoff_fops,
>  	&amdgpu_debugfs_gfxoff_status_fops,
> +	&amdgpu_debugfs_gfxoff_count_fops,
> +	&amdgpu_debugfs_gfxoff_residency_fops,
>  };
> 
>  static const char *debugfs_regs_names[] = {
> @@ -1275,6 +1441,8 @@ static const char *debugfs_regs_names[] = {
>  	"amdgpu_gpr",
>  	"amdgpu_gfxoff",
>  	"amdgpu_gfxoff_status",
> +	"amdgpu_gfxoff_count",
> +	"amdgpu_gfxoff_residency",
>  };
> 
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index b79ee4ffb879..15a95bc2c211 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3576,6 +3576,8 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>  	INIT_WORK(&adev->xgmi_reset_work,
> amdgpu_device_xgmi_reset_func);
> 
>  	adev->gfx.gfx_off_req_count = 1;
> +	adev->gfx.gfx_off_residency = 0;
> +	adev->gfx.gfx_off_entrycount = 0;
>  	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
> 
>  	atomic_set(&adev->throttling_logging_enabled, 1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 222d3d7ea076..3675c1b899db 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device
> *adev, bool enable)
>  	mutex_unlock(&adev->gfx.gfx_off_mutex);
>  }
> 
> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> value)
> +{
> +	int r = 0;
> +
> +	mutex_lock(&adev->gfx.gfx_off_mutex);
> +
> +	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
> +
> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
> +
> +	return r;
> +}
> +
> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> *value)
> +{
> +	int r = 0;
> +
> +	mutex_lock(&adev->gfx.gfx_off_mutex);
> +
> +	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
> +
> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
> +
> +	return r;
> +}
> +
> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
> *value)
> +{
> +	int r = 0;
> +
> +	mutex_lock(&adev->gfx.gfx_off_mutex);
> +
> +	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
> +
> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
> +
> +	return r;
> +}
> +
>  int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> *value)
>  {
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 23a696d38390..f06e979e2565 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -336,6 +336,8 @@ struct amdgpu_gfx {
>  	struct mutex                    gfx_off_mutex;
>  	uint32_t                        gfx_off_req_count; /* default 1, enable gfx off:
> dec 1, disable gfx off: add 1 */
>  	struct delayed_work             gfx_off_delay_work;
> +	uint32_t                        gfx_off_residency;
> +	uint32_t                        gfx_off_entrycount;
> 
>  	/* pipe reservation */
>  	struct mutex			pipe_reserve_mutex;
> @@ -407,6 +409,10 @@ bool amdgpu_gfx_is_me_queue_enabled(struct
> amdgpu_device *adev, int me,
>  void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
>  int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> *value);
>  int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct
> ras_common_if *ras_block);
> +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
> *value);
> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> *residency);
> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> value);
>  int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
>  		void *err_data,
>  		struct amdgpu_iv_entry *entry);
> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> index 956b6ce81c84..df87d0768fd7 100644
> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct
> amdgpu_device *adev,
>  	return ret;
>  }
> 
> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
> value)
> +{
> +	struct smu_context *smu = adev->powerplay.pp_handle;
> +	int ret = 0;
> +
> +	if (!is_support_sw_smu(adev))
> +		return -EOPNOTSUPP;
> +
> +	mutex_lock(&adev->pm.mutex);
> +	ret = smu_set_residency_gfxoff(smu, value);
> +	mutex_unlock(&adev->pm.mutex);
> +
> +	return ret;
> +}
> +
> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
> *value)
> +{
> +	struct smu_context *smu = adev->powerplay.pp_handle;
> +	int ret = 0;
> +
> +	if (!is_support_sw_smu(adev))
> +		return -EOPNOTSUPP;
> +
> +	mutex_lock(&adev->pm.mutex);
> +	ret = smu_get_residency_gfxoff(smu, value);
> +	mutex_unlock(&adev->pm.mutex);
> +
> +	return ret;
> +}
> +
> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
> u32 *value)
> +{
> +	struct smu_context *smu = adev->powerplay.pp_handle;
> +	int ret = 0;
> +
> +	if (!is_support_sw_smu(adev))
> +		return -EOPNOTSUPP;
> +
> +	mutex_lock(&adev->pm.mutex);
> +	ret = smu_get_entrycount_gfxoff(smu, value);
> +	mutex_unlock(&adev->pm.mutex);
> +
> +	return ret;
> +}
> +
>  int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
> *value)
>  {
>  	struct smu_context *smu = adev->powerplay.pp_handle;
> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> index 65624d091ed2..83a83e93037c 100644
> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct
> amdgpu_device *adev,
>  int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev);
>  int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
> smu_event_type event,
>  		       uint64_t event_arg);
> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
> *value);
> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
> value);
> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
> u32 *value);
>  int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
> *value);
>  uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct
> amdgpu_device *adev);
>  void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> index fd79b213fab4..cfc3b9d749bf 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void *handle,
>  	return smu_set_pp_feature_mask(smu, new_mask);
>  }
> 
> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value)
> +{
> +	if (!smu->ppt_funcs->set_gfx_off_residency)
> +		return -EINVAL;
> +
> +	return smu_set_gfx_off_residency(smu, value);
> +}
> +
> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value)
> +{
> +	if (!smu->ppt_funcs->get_gfx_off_residency)
> +		return -EINVAL;
> +
> +	return smu_get_gfx_off_residency(smu, value);
> +}
> +
> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
> +{
> +	if (!smu->ppt_funcs->get_gfx_off_entrycount)
> +		return -EINVAL;
> +
> +	return smu_get_gfx_off_entrycount(smu, value);
> +}
> +
>  int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
>  {
>  	if (!smu->ppt_funcs->get_gfx_off_status)
> @@ -1573,7 +1597,7 @@ static int smu_suspend(void *handle)
>  {
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>  	struct smu_context *smu = adev->powerplay.pp_handle;
> -	int ret;
> +	int ret, count;
> 
>  	if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
>  		return 0;
> @@ -1591,6 +1615,14 @@ static int smu_suspend(void *handle)
> 
>  	smu_set_gfx_cgpg(smu, false);
> 
> +	/*
> +	 * pwfw resets entrycount when device is suspended, so we save
> the
> +	 * last value to be used when we resume to keep it consistent
> +	 */
> +	ret = smu_get_entrycount_gfxoff(smu, &count);
> +	if (!ret)
> +		adev->gfx.gfx_off_entrycount = count;
> +
>  	return 0;
>  }
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> index b81c657c7386..9827075b768e 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> @@ -1111,6 +1111,22 @@ struct pptable_funcs {
>  	 */
>  	uint32_t (*get_gfx_off_status)(struct smu_context *smu);
> 
> +	/**
> +	 * @gfx_off_entrycount: total GFXOFF entry count at the time of
> +	 * query since system power-up
> +	 */
> +	u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t
> *entrycount);
> +
> +	/**
> +	 * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
> +	 */
> +	u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
> +
> +	/**
> +	 * @get_gfx_off_residency: Average GFXOFF residency % during the
> logging interval
> +	 */
> +	u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t
> *residency);
> +
>  	/**
>  	 * @register_irq_handler: Register interupt request handlers.
>  	 */
> @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);
> 
>  int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
> 
> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
> +
> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
> +
> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
> +
>  int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value);
> 
>  int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable);
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> index 7469bbfce1fb..ceb13c838067 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> @@ -47,6 +47,9 @@
>  #define smu_notify_memory_pool_location(smu)
> 	smu_ppt_funcs(notify_memory_pool_location, 0, smu)
>  #define smu_gfx_off_control(smu, enable)
> 	smu_ppt_funcs(gfx_off_control, 0, smu, enable)
>  #define smu_get_gfx_off_status(smu)
> 		smu_ppt_funcs(get_gfx_off_status, 0, smu)
> +#define smu_get_gfx_off_entrycount(smu, value)
> 			smu_ppt_funcs(get_gfx_off_entrycount, 0, smu,
> value)
> +#define smu_get_gfx_off_residency(smu, value)
> 			smu_ppt_funcs(get_gfx_off_residency, 0, smu,
> value)
> +#define smu_set_gfx_off_residency(smu, value)
> 			smu_ppt_funcs(set_gfx_off_residency, 0, smu,
> value)
>  #define smu_set_last_dcef_min_deep_sleep_clk(smu)
> 	smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
>  #define smu_system_features_control(smu, en)
> 	smu_ppt_funcs(system_features_control, 0, smu, en)
>  #define smu_init_max_sustainable_clocks(smu)
> 	smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
> --
> 2.37.1
André Almeida July 25, 2022, 1:04 p.m. UTC | #2
Às 07:27 de 25/07/22, Quan, Evan escreveu:
> [AMD Official Use Only - General]
> 
> Using "uint64_t" instead of "uint32_t" for entry counter may be better.
> 

Indeed, it's a good idea. I'll send a v2 with that change, thanks.

> BR
> Evan
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>> André Almeida
>> Sent: Saturday, July 23, 2022 4:34 AM
>> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
>> <Christian.Koenig@amd.com>; Pan, Xinhui <Xinhui.Pan@amd.com>; David
>> Airlie <airlied@linux.ie>; Daniel Vetter <daniel@ffwll.ch>; Zhang, Hawking
>> <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Kuehling,
>> Felix <Felix.Kuehling@amd.com>; Xiao, Jack <Jack.Xiao@amd.com>; amd-
>> gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org; linux-
>> kernel@vger.kernel.org; StDenis, Tom <Tom.StDenis@amd.com>; Siqueira,
>> Rodrigo <Rodrigo.Siqueira@amd.com>
>> Cc: André Almeida <andrealmeid@igalia.com>; kernel-dev@igalia.com
>> Subject: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
>>
>> Add debugfs interface to log GFXOFF statistics:
>>
>> - Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
>>   time of query since system power-up
>>
>> - Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
>>   Read it to get average GFXOFF residency % multiplied by 100
>>   during the last logging interval.
>>
>> Both features are designed to be keep the values persistent between
>> suspends.
>>
>> Signed-off-by: André Almeida <andrealmeid@igalia.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   | 168
>> ++++++++++++++++++
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |   2 +
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       |  39 ++++
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h       |   6 +
>>  drivers/gpu/drm/amd/pm/amdgpu_dpm.c           |  45 +++++
>>  drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |   3 +
>>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |  34 +++-
>>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  22 +++
>>  drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
>>  9 files changed, 321 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index e2eec985adb3..edf90a9ba980 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -1042,6 +1042,157 @@ static ssize_t amdgpu_debugfs_gpr_read(struct
>> file *f, char __user *buf,
>>  	return r;
>>  }
>>
>> +/**
>> + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
>> + *
>> + * @f: open file handle
>> + * @buf: User buffer to store read data in
>> + * @size: Number of bytes to read
>> + * @pos:  Offset to seek to
>> + *
>> + * Read the last residency value logged. It doesn't auto update, one needs
>> to
>> + * stop logging before getting the current value.
>> + */
>> +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char
>> __user *buf,
>> +						    size_t size, loff_t *pos)
>> +{
>> +	struct amdgpu_device *adev = file_inode(f)->i_private;
>> +	ssize_t result = 0;
>> +	int r;
>> +
>> +	if (size & 0x3 || *pos & 0x3)
>> +		return -EINVAL;
>> +
>> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> +	if (r < 0) {
>> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +		return r;
>> +	}
>> +
>> +	while (size) {
>> +		uint32_t value;
>> +
>> +		r = amdgpu_get_gfx_off_residency(adev, &value);
>> +		if (r)
>> +			goto out;
>> +
>> +		r = put_user(value, (uint32_t *)buf);
>> +		if (r)
>> +			goto out;
>> +
>> +		result += 4;
>> +		buf += 4;
>> +		*pos += 4;
>> +		size -= 4;
>> +	}
>> +
>> +	r = result;
>> +out:
>> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +
>> +	return r;
>> +}
>> +
>> +/**
>> + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
>> + *
>> + * @f: open file handle
>> + * @buf: User buffer to write data from
>> + * @size: Number of bytes to write
>> + * @pos:  Offset to seek to
>> + *
>> + * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
>> + */
>> +static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const
>> char __user *buf,
>> +						     size_t size, loff_t *pos)
>> +{
>> +	struct amdgpu_device *adev = file_inode(f)->i_private;
>> +	ssize_t result = 0;
>> +	int r;
>> +
>> +	if (size & 0x3 || *pos & 0x3)
>> +		return -EINVAL;
>> +
>> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> +	if (r < 0) {
>> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +		return r;
>> +	}
>> +
>> +	while (size) {
>> +		u32 value;
>> +
>> +		r = get_user(value, (uint32_t *)buf);
>> +		if (r)
>> +			goto out;
>> +
>> +		amdgpu_set_gfx_off_residency(adev, value ? true : false);
>> +
>> +		result += 4;
>> +		buf += 4;
>> +		*pos += 4;
>> +		size -= 4;
>> +	}
>> +
>> +	r = result;
>> +out:
>> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +
>> +	return r;
>> +}
>> +
>> +
>> +/**
>> + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
>> + *
>> + * @f: open file handle
>> + * @buf: User buffer to store read data in
>> + * @size: Number of bytes to read
>> + * @pos:  Offset to seek to
>> + */
>> +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char
>> __user *buf,
>> +						size_t size, loff_t *pos)
>> +{
>> +	struct amdgpu_device *adev = file_inode(f)->i_private;
>> +	ssize_t result = 0;
>> +	int r;
>> +
>> +	if (size & 0x3 || *pos & 0x3)
>> +		return -EINVAL;
>> +
>> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> +	if (r < 0) {
>> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +		return r;
>> +	}
>> +
>> +	while (size) {
>> +		u32 value;
>> +
>> +		r = amdgpu_get_gfx_off_entrycount(adev, &value);
>> +		if (r)
>> +			goto out;
>> +
>> +		r = put_user(value, (uint32_t *)buf);
>> +		if (r)
>> +			goto out;
>> +
>> +		result += 4;
>> +		buf += 4;
>> +		*pos += 4;
>> +		size -= 4;
>> +	}
>> +
>> +	r = result;
>> +out:
>> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +
>> +	return r;
>> +}
>> +
>>  /**
>>   * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
>>   *
>> @@ -1249,6 +1400,19 @@ static const struct file_operations
>> amdgpu_debugfs_gfxoff_status_fops = {
>>  	.llseek = default_llseek
>>  };
>>
>> +static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
>> +	.owner = THIS_MODULE,
>> +	.read = amdgpu_debugfs_gfxoff_count_read,
>> +	.llseek = default_llseek
>> +};
>> +
>> +static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops
>> = {
>> +	.owner = THIS_MODULE,
>> +	.read = amdgpu_debugfs_gfxoff_residency_read,
>> +	.write = amdgpu_debugfs_gfxoff_residency_write,
>> +	.llseek = default_llseek
>> +};
>> +
>>  static const struct file_operations *debugfs_regs[] = {
>>  	&amdgpu_debugfs_regs_fops,
>>  	&amdgpu_debugfs_regs2_fops,
>> @@ -1261,6 +1425,8 @@ static const struct file_operations *debugfs_regs[]
>> = {
>>  	&amdgpu_debugfs_gpr_fops,
>>  	&amdgpu_debugfs_gfxoff_fops,
>>  	&amdgpu_debugfs_gfxoff_status_fops,
>> +	&amdgpu_debugfs_gfxoff_count_fops,
>> +	&amdgpu_debugfs_gfxoff_residency_fops,
>>  };
>>
>>  static const char *debugfs_regs_names[] = {
>> @@ -1275,6 +1441,8 @@ static const char *debugfs_regs_names[] = {
>>  	"amdgpu_gpr",
>>  	"amdgpu_gfxoff",
>>  	"amdgpu_gfxoff_status",
>> +	"amdgpu_gfxoff_count",
>> +	"amdgpu_gfxoff_residency",
>>  };
>>
>>  /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index b79ee4ffb879..15a95bc2c211 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -3576,6 +3576,8 @@ int amdgpu_device_init(struct amdgpu_device
>> *adev,
>>  	INIT_WORK(&adev->xgmi_reset_work,
>> amdgpu_device_xgmi_reset_func);
>>
>>  	adev->gfx.gfx_off_req_count = 1;
>> +	adev->gfx.gfx_off_residency = 0;
>> +	adev->gfx.gfx_off_entrycount = 0;
>>  	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>
>>  	atomic_set(&adev->throttling_logging_enabled, 1);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 222d3d7ea076..3675c1b899db 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device
>> *adev, bool enable)
>>  	mutex_unlock(&adev->gfx.gfx_off_mutex);
>>  }
>>
>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
>> value)
>> +{
>> +	int r = 0;
>> +
>> +	mutex_lock(&adev->gfx.gfx_off_mutex);
>> +
>> +	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
>> +
>> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
>> +
>> +	return r;
>> +}
>> +
>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
>> *value)
>> +{
>> +	int r = 0;
>> +
>> +	mutex_lock(&adev->gfx.gfx_off_mutex);
>> +
>> +	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
>> +
>> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
>> +
>> +	return r;
>> +}
>> +
>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
>> *value)
>> +{
>> +	int r = 0;
>> +
>> +	mutex_lock(&adev->gfx.gfx_off_mutex);
>> +
>> +	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
>> +
>> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
>> +
>> +	return r;
>> +}
>> +
>>  int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
>> *value)
>>  {
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> index 23a696d38390..f06e979e2565 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> @@ -336,6 +336,8 @@ struct amdgpu_gfx {
>>  	struct mutex                    gfx_off_mutex;
>>  	uint32_t                        gfx_off_req_count; /* default 1, enable gfx off:
>> dec 1, disable gfx off: add 1 */
>>  	struct delayed_work             gfx_off_delay_work;
>> +	uint32_t                        gfx_off_residency;
>> +	uint32_t                        gfx_off_entrycount;
>>
>>  	/* pipe reservation */
>>  	struct mutex			pipe_reserve_mutex;
>> @@ -407,6 +409,10 @@ bool amdgpu_gfx_is_me_queue_enabled(struct
>> amdgpu_device *adev, int me,
>>  void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
>>  int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
>> *value);
>>  int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct
>> ras_common_if *ras_block);
>> +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
>> *value);
>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
>> *residency);
>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
>> value);
>>  int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
>>  		void *err_data,
>>  		struct amdgpu_iv_entry *entry);
>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> index 956b6ce81c84..df87d0768fd7 100644
>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>> @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct
>> amdgpu_device *adev,
>>  	return ret;
>>  }
>>
>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
>> value)
>> +{
>> +	struct smu_context *smu = adev->powerplay.pp_handle;
>> +	int ret = 0;
>> +
>> +	if (!is_support_sw_smu(adev))
>> +		return -EOPNOTSUPP;
>> +
>> +	mutex_lock(&adev->pm.mutex);
>> +	ret = smu_set_residency_gfxoff(smu, value);
>> +	mutex_unlock(&adev->pm.mutex);
>> +
>> +	return ret;
>> +}
>> +
>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
>> *value)
>> +{
>> +	struct smu_context *smu = adev->powerplay.pp_handle;
>> +	int ret = 0;
>> +
>> +	if (!is_support_sw_smu(adev))
>> +		return -EOPNOTSUPP;
>> +
>> +	mutex_lock(&adev->pm.mutex);
>> +	ret = smu_get_residency_gfxoff(smu, value);
>> +	mutex_unlock(&adev->pm.mutex);
>> +
>> +	return ret;
>> +}
>> +
>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
>> u32 *value)
>> +{
>> +	struct smu_context *smu = adev->powerplay.pp_handle;
>> +	int ret = 0;
>> +
>> +	if (!is_support_sw_smu(adev))
>> +		return -EOPNOTSUPP;
>> +
>> +	mutex_lock(&adev->pm.mutex);
>> +	ret = smu_get_entrycount_gfxoff(smu, value);
>> +	mutex_unlock(&adev->pm.mutex);
>> +
>> +	return ret;
>> +}
>> +
>>  int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
>> *value)
>>  {
>>  	struct smu_context *smu = adev->powerplay.pp_handle;
>> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> index 65624d091ed2..83a83e93037c 100644
>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>> @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct
>> amdgpu_device *adev,
>>  int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev);
>>  int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
>> smu_event_type event,
>>  		       uint64_t event_arg);
>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
>> *value);
>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
>> value);
>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
>> u32 *value);
>>  int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
>> *value);
>>  uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct
>> amdgpu_device *adev);
>>  void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> index fd79b213fab4..cfc3b9d749bf 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>> @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void *handle,
>>  	return smu_set_pp_feature_mask(smu, new_mask);
>>  }
>>
>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value)
>> +{
>> +	if (!smu->ppt_funcs->set_gfx_off_residency)
>> +		return -EINVAL;
>> +
>> +	return smu_set_gfx_off_residency(smu, value);
>> +}
>> +
>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value)
>> +{
>> +	if (!smu->ppt_funcs->get_gfx_off_residency)
>> +		return -EINVAL;
>> +
>> +	return smu_get_gfx_off_residency(smu, value);
>> +}
>> +
>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
>> +{
>> +	if (!smu->ppt_funcs->get_gfx_off_entrycount)
>> +		return -EINVAL;
>> +
>> +	return smu_get_gfx_off_entrycount(smu, value);
>> +}
>> +
>>  int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
>>  {
>>  	if (!smu->ppt_funcs->get_gfx_off_status)
>> @@ -1573,7 +1597,7 @@ static int smu_suspend(void *handle)
>>  {
>>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>  	struct smu_context *smu = adev->powerplay.pp_handle;
>> -	int ret;
>> +	int ret, count;
>>
>>  	if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
>>  		return 0;
>> @@ -1591,6 +1615,14 @@ static int smu_suspend(void *handle)
>>
>>  	smu_set_gfx_cgpg(smu, false);
>>
>> +	/*
>> +	 * pwfw resets entrycount when device is suspended, so we save
>> the
>> +	 * last value to be used when we resume to keep it consistent
>> +	 */
>> +	ret = smu_get_entrycount_gfxoff(smu, &count);
>> +	if (!ret)
>> +		adev->gfx.gfx_off_entrycount = count;
>> +
>>  	return 0;
>>  }
>>
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> index b81c657c7386..9827075b768e 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>> @@ -1111,6 +1111,22 @@ struct pptable_funcs {
>>  	 */
>>  	uint32_t (*get_gfx_off_status)(struct smu_context *smu);
>>
>> +	/**
>> +	 * @gfx_off_entrycount: total GFXOFF entry count at the time of
>> +	 * query since system power-up
>> +	 */
>> +	u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t
>> *entrycount);
>> +
>> +	/**
>> +	 * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
>> +	 */
>> +	u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
>> +
>> +	/**
>> +	 * @get_gfx_off_residency: Average GFXOFF residency % during the
>> logging interval
>> +	 */
>> +	u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t
>> *residency);
>> +
>>  	/**
>>  	 * @register_irq_handler: Register interupt request handlers.
>>  	 */
>> @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);
>>
>>  int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
>>
>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
>> +
>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
>> +
>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
>> +
>>  int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value);
>>
>>  int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable);
>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>> b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>> index 7469bbfce1fb..ceb13c838067 100644
>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>> @@ -47,6 +47,9 @@
>>  #define smu_notify_memory_pool_location(smu)
>> 	smu_ppt_funcs(notify_memory_pool_location, 0, smu)
>>  #define smu_gfx_off_control(smu, enable)
>> 	smu_ppt_funcs(gfx_off_control, 0, smu, enable)
>>  #define smu_get_gfx_off_status(smu)
>> 		smu_ppt_funcs(get_gfx_off_status, 0, smu)
>> +#define smu_get_gfx_off_entrycount(smu, value)
>> 			smu_ppt_funcs(get_gfx_off_entrycount, 0, smu,
>> value)
>> +#define smu_get_gfx_off_residency(smu, value)
>> 			smu_ppt_funcs(get_gfx_off_residency, 0, smu,
>> value)
>> +#define smu_set_gfx_off_residency(smu, value)
>> 			smu_ppt_funcs(set_gfx_off_residency, 0, smu,
>> value)
>>  #define smu_set_last_dcef_min_deep_sleep_clk(smu)
>> 	smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
>>  #define smu_system_features_control(smu, en)
>> 	smu_ppt_funcs(system_features_control, 0, smu, en)
>>  #define smu_init_max_sustainable_clocks(smu)
>> 	smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
>> --
>> 2.37.1
André Almeida July 25, 2022, 4:15 p.m. UTC | #3
Às 10:04 de 25/07/22, André Almeida escreveu:
> Às 07:27 de 25/07/22, Quan, Evan escreveu:
>> [AMD Official Use Only - General]
>>
>> Using "uint64_t" instead of "uint32_t" for entry counter may be better.
>>
> 
> Indeed, it's a good idea. I'll send a v2 with that change, thanks.
> 

However, SMU messaging reads a 32bit register to get the entrycount from
the pwfw, so would keep with with the risk of overflow anyway right?

>> BR
>> Evan
>>> -----Original Message-----
>>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
>>> André Almeida
>>> Sent: Saturday, July 23, 2022 4:34 AM
>>> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
>>> <Christian.Koenig@amd.com>; Pan, Xinhui <Xinhui.Pan@amd.com>; David
>>> Airlie <airlied@linux.ie>; Daniel Vetter <daniel@ffwll.ch>; Zhang, Hawking
>>> <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Kuehling,
>>> Felix <Felix.Kuehling@amd.com>; Xiao, Jack <Jack.Xiao@amd.com>; amd-
>>> gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org; linux-
>>> kernel@vger.kernel.org; StDenis, Tom <Tom.StDenis@amd.com>; Siqueira,
>>> Rodrigo <Rodrigo.Siqueira@amd.com>
>>> Cc: André Almeida <andrealmeid@igalia.com>; kernel-dev@igalia.com
>>> Subject: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
>>>
>>> Add debugfs interface to log GFXOFF statistics:
>>>
>>> - Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
>>>   time of query since system power-up
>>>
>>> - Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
>>>   Read it to get average GFXOFF residency % multiplied by 100
>>>   during the last logging interval.
>>>
>>> Both features are designed to be keep the values persistent between
>>> suspends.
>>>
>>> Signed-off-by: André Almeida <andrealmeid@igalia.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   | 168
>>> ++++++++++++++++++
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |   2 +
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       |  39 ++++
>>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h       |   6 +
>>>  drivers/gpu/drm/amd/pm/amdgpu_dpm.c           |  45 +++++
>>>  drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |   3 +
>>>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |  34 +++-
>>>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  22 +++
>>>  drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
>>>  9 files changed, 321 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> index e2eec985adb3..edf90a9ba980 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>>> @@ -1042,6 +1042,157 @@ static ssize_t amdgpu_debugfs_gpr_read(struct
>>> file *f, char __user *buf,
>>>  	return r;
>>>  }
>>>
>>> +/**
>>> + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
>>> + *
>>> + * @f: open file handle
>>> + * @buf: User buffer to store read data in
>>> + * @size: Number of bytes to read
>>> + * @pos:  Offset to seek to
>>> + *
>>> + * Read the last residency value logged. It doesn't auto update, one needs
>>> to
>>> + * stop logging before getting the current value.
>>> + */
>>> +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char
>>> __user *buf,
>>> +						    size_t size, loff_t *pos)
>>> +{
>>> +	struct amdgpu_device *adev = file_inode(f)->i_private;
>>> +	ssize_t result = 0;
>>> +	int r;
>>> +
>>> +	if (size & 0x3 || *pos & 0x3)
>>> +		return -EINVAL;
>>> +
>>> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>> +	if (r < 0) {
>>> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> +		return r;
>>> +	}
>>> +
>>> +	while (size) {
>>> +		uint32_t value;
>>> +
>>> +		r = amdgpu_get_gfx_off_residency(adev, &value);
>>> +		if (r)
>>> +			goto out;
>>> +
>>> +		r = put_user(value, (uint32_t *)buf);
>>> +		if (r)
>>> +			goto out;
>>> +
>>> +		result += 4;
>>> +		buf += 4;
>>> +		*pos += 4;
>>> +		size -= 4;
>>> +	}
>>> +
>>> +	r = result;
>>> +out:
>>> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> +
>>> +	return r;
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
>>> + *
>>> + * @f: open file handle
>>> + * @buf: User buffer to write data from
>>> + * @size: Number of bytes to write
>>> + * @pos:  Offset to seek to
>>> + *
>>> + * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
>>> + */
>>> +static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const
>>> char __user *buf,
>>> +						     size_t size, loff_t *pos)
>>> +{
>>> +	struct amdgpu_device *adev = file_inode(f)->i_private;
>>> +	ssize_t result = 0;
>>> +	int r;
>>> +
>>> +	if (size & 0x3 || *pos & 0x3)
>>> +		return -EINVAL;
>>> +
>>> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>> +	if (r < 0) {
>>> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> +		return r;
>>> +	}
>>> +
>>> +	while (size) {
>>> +		u32 value;
>>> +
>>> +		r = get_user(value, (uint32_t *)buf);
>>> +		if (r)
>>> +			goto out;
>>> +
>>> +		amdgpu_set_gfx_off_residency(adev, value ? true : false);
>>> +
>>> +		result += 4;
>>> +		buf += 4;
>>> +		*pos += 4;
>>> +		size -= 4;
>>> +	}
>>> +
>>> +	r = result;
>>> +out:
>>> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> +
>>> +	return r;
>>> +}
>>> +
>>> +
>>> +/**
>>> + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
>>> + *
>>> + * @f: open file handle
>>> + * @buf: User buffer to store read data in
>>> + * @size: Number of bytes to read
>>> + * @pos:  Offset to seek to
>>> + */
>>> +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char
>>> __user *buf,
>>> +						size_t size, loff_t *pos)
>>> +{
>>> +	struct amdgpu_device *adev = file_inode(f)->i_private;
>>> +	ssize_t result = 0;
>>> +	int r;
>>> +
>>> +	if (size & 0x3 || *pos & 0x3)
>>> +		return -EINVAL;
>>> +
>>> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>>> +	if (r < 0) {
>>> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> +		return r;
>>> +	}
>>> +
>>> +	while (size) {
>>> +		u32 value;
>>> +
>>> +		r = amdgpu_get_gfx_off_entrycount(adev, &value);
>>> +		if (r)
>>> +			goto out;
>>> +
>>> +		r = put_user(value, (uint32_t *)buf);
>>> +		if (r)
>>> +			goto out;
>>> +
>>> +		result += 4;
>>> +		buf += 4;
>>> +		*pos += 4;
>>> +		size -= 4;
>>> +	}
>>> +
>>> +	r = result;
>>> +out:
>>> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>> +
>>> +	return r;
>>> +}
>>> +
>>>  /**
>>>   * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
>>>   *
>>> @@ -1249,6 +1400,19 @@ static const struct file_operations
>>> amdgpu_debugfs_gfxoff_status_fops = {
>>>  	.llseek = default_llseek
>>>  };
>>>
>>> +static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
>>> +	.owner = THIS_MODULE,
>>> +	.read = amdgpu_debugfs_gfxoff_count_read,
>>> +	.llseek = default_llseek
>>> +};
>>> +
>>> +static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops
>>> = {
>>> +	.owner = THIS_MODULE,
>>> +	.read = amdgpu_debugfs_gfxoff_residency_read,
>>> +	.write = amdgpu_debugfs_gfxoff_residency_write,
>>> +	.llseek = default_llseek
>>> +};
>>> +
>>>  static const struct file_operations *debugfs_regs[] = {
>>>  	&amdgpu_debugfs_regs_fops,
>>>  	&amdgpu_debugfs_regs2_fops,
>>> @@ -1261,6 +1425,8 @@ static const struct file_operations *debugfs_regs[]
>>> = {
>>>  	&amdgpu_debugfs_gpr_fops,
>>>  	&amdgpu_debugfs_gfxoff_fops,
>>>  	&amdgpu_debugfs_gfxoff_status_fops,
>>> +	&amdgpu_debugfs_gfxoff_count_fops,
>>> +	&amdgpu_debugfs_gfxoff_residency_fops,
>>>  };
>>>
>>>  static const char *debugfs_regs_names[] = {
>>> @@ -1275,6 +1441,8 @@ static const char *debugfs_regs_names[] = {
>>>  	"amdgpu_gpr",
>>>  	"amdgpu_gfxoff",
>>>  	"amdgpu_gfxoff_status",
>>> +	"amdgpu_gfxoff_count",
>>> +	"amdgpu_gfxoff_residency",
>>>  };
>>>
>>>  /**
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index b79ee4ffb879..15a95bc2c211 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -3576,6 +3576,8 @@ int amdgpu_device_init(struct amdgpu_device
>>> *adev,
>>>  	INIT_WORK(&adev->xgmi_reset_work,
>>> amdgpu_device_xgmi_reset_func);
>>>
>>>  	adev->gfx.gfx_off_req_count = 1;
>>> +	adev->gfx.gfx_off_residency = 0;
>>> +	adev->gfx.gfx_off_entrycount = 0;
>>>  	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>>
>>>  	atomic_set(&adev->throttling_logging_enabled, 1);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> index 222d3d7ea076..3675c1b899db 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>>> @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device
>>> *adev, bool enable)
>>>  	mutex_unlock(&adev->gfx.gfx_off_mutex);
>>>  }
>>>
>>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
>>> value)
>>> +{
>>> +	int r = 0;
>>> +
>>> +	mutex_lock(&adev->gfx.gfx_off_mutex);
>>> +
>>> +	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
>>> +
>>> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
>>> +
>>> +	return r;
>>> +}
>>> +
>>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
>>> *value)
>>> +{
>>> +	int r = 0;
>>> +
>>> +	mutex_lock(&adev->gfx.gfx_off_mutex);
>>> +
>>> +	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
>>> +
>>> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
>>> +
>>> +	return r;
>>> +}
>>> +
>>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
>>> *value)
>>> +{
>>> +	int r = 0;
>>> +
>>> +	mutex_lock(&adev->gfx.gfx_off_mutex);
>>> +
>>> +	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
>>> +
>>> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
>>> +
>>> +	return r;
>>> +}
>>> +
>>>  int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
>>> *value)
>>>  {
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> index 23a696d38390..f06e979e2565 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>>> @@ -336,6 +336,8 @@ struct amdgpu_gfx {
>>>  	struct mutex                    gfx_off_mutex;
>>>  	uint32_t                        gfx_off_req_count; /* default 1, enable gfx off:
>>> dec 1, disable gfx off: add 1 */
>>>  	struct delayed_work             gfx_off_delay_work;
>>> +	uint32_t                        gfx_off_residency;
>>> +	uint32_t                        gfx_off_entrycount;
>>>
>>>  	/* pipe reservation */
>>>  	struct mutex			pipe_reserve_mutex;
>>> @@ -407,6 +409,10 @@ bool amdgpu_gfx_is_me_queue_enabled(struct
>>> amdgpu_device *adev, int me,
>>>  void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
>>>  int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
>>> *value);
>>>  int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct
>>> ras_common_if *ras_block);
>>> +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
>>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
>>> *value);
>>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
>>> *residency);
>>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
>>> value);
>>>  int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
>>>  		void *err_data,
>>>  		struct amdgpu_iv_entry *entry);
>>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> index 956b6ce81c84..df87d0768fd7 100644
>>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
>>> @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct
>>> amdgpu_device *adev,
>>>  	return ret;
>>>  }
>>>
>>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
>>> value)
>>> +{
>>> +	struct smu_context *smu = adev->powerplay.pp_handle;
>>> +	int ret = 0;
>>> +
>>> +	if (!is_support_sw_smu(adev))
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	mutex_lock(&adev->pm.mutex);
>>> +	ret = smu_set_residency_gfxoff(smu, value);
>>> +	mutex_unlock(&adev->pm.mutex);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
>>> *value)
>>> +{
>>> +	struct smu_context *smu = adev->powerplay.pp_handle;
>>> +	int ret = 0;
>>> +
>>> +	if (!is_support_sw_smu(adev))
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	mutex_lock(&adev->pm.mutex);
>>> +	ret = smu_get_residency_gfxoff(smu, value);
>>> +	mutex_unlock(&adev->pm.mutex);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
>>> u32 *value)
>>> +{
>>> +	struct smu_context *smu = adev->powerplay.pp_handle;
>>> +	int ret = 0;
>>> +
>>> +	if (!is_support_sw_smu(adev))
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	mutex_lock(&adev->pm.mutex);
>>> +	ret = smu_get_entrycount_gfxoff(smu, value);
>>> +	mutex_unlock(&adev->pm.mutex);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>>  int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
>>> *value)
>>>  {
>>>  	struct smu_context *smu = adev->powerplay.pp_handle;
>>> diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> index 65624d091ed2..83a83e93037c 100644
>>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
>>> @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct
>>> amdgpu_device *adev,
>>>  int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev);
>>>  int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
>>> smu_event_type event,
>>>  		       uint64_t event_arg);
>>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32
>>> *value);
>>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool
>>> value);
>>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev,
>>> u32 *value);
>>>  int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t
>>> *value);
>>>  uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct
>>> amdgpu_device *adev);
>>>  void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> index fd79b213fab4..cfc3b9d749bf 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
>>> @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void *handle,
>>>  	return smu_set_pp_feature_mask(smu, new_mask);
>>>  }
>>>
>>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value)
>>> +{
>>> +	if (!smu->ppt_funcs->set_gfx_off_residency)
>>> +		return -EINVAL;
>>> +
>>> +	return smu_set_gfx_off_residency(smu, value);
>>> +}
>>> +
>>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value)
>>> +{
>>> +	if (!smu->ppt_funcs->get_gfx_off_residency)
>>> +		return -EINVAL;
>>> +
>>> +	return smu_get_gfx_off_residency(smu, value);
>>> +}
>>> +
>>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
>>> +{
>>> +	if (!smu->ppt_funcs->get_gfx_off_entrycount)
>>> +		return -EINVAL;
>>> +
>>> +	return smu_get_gfx_off_entrycount(smu, value);
>>> +}
>>> +
>>>  int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
>>>  {
>>>  	if (!smu->ppt_funcs->get_gfx_off_status)
>>> @@ -1573,7 +1597,7 @@ static int smu_suspend(void *handle)
>>>  {
>>>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>>  	struct smu_context *smu = adev->powerplay.pp_handle;
>>> -	int ret;
>>> +	int ret, count;
>>>
>>>  	if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
>>>  		return 0;
>>> @@ -1591,6 +1615,14 @@ static int smu_suspend(void *handle)
>>>
>>>  	smu_set_gfx_cgpg(smu, false);
>>>
>>> +	/*
>>> +	 * pwfw resets entrycount when device is suspended, so we save
>>> the
>>> +	 * last value to be used when we resume to keep it consistent
>>> +	 */
>>> +	ret = smu_get_entrycount_gfxoff(smu, &count);
>>> +	if (!ret)
>>> +		adev->gfx.gfx_off_entrycount = count;
>>> +
>>>  	return 0;
>>>  }
>>>
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> index b81c657c7386..9827075b768e 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
>>> @@ -1111,6 +1111,22 @@ struct pptable_funcs {
>>>  	 */
>>>  	uint32_t (*get_gfx_off_status)(struct smu_context *smu);
>>>
>>> +	/**
>>> +	 * @gfx_off_entrycount: total GFXOFF entry count at the time of
>>> +	 * query since system power-up
>>> +	 */
>>> +	u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t
>>> *entrycount);
>>> +
>>> +	/**
>>> +	 * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
>>> +	 */
>>> +	u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
>>> +
>>> +	/**
>>> +	 * @get_gfx_off_residency: Average GFXOFF residency % during the
>>> logging interval
>>> +	 */
>>> +	u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t
>>> *residency);
>>> +
>>>  	/**
>>>  	 * @register_irq_handler: Register interupt request handlers.
>>>  	 */
>>> @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);
>>>
>>>  int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
>>>
>>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
>>> +
>>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
>>> +
>>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
>>> +
>>>  int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value);
>>>
>>>  int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable);
>>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>>> b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>>> index 7469bbfce1fb..ceb13c838067 100644
>>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
>>> @@ -47,6 +47,9 @@
>>>  #define smu_notify_memory_pool_location(smu)
>>> 	smu_ppt_funcs(notify_memory_pool_location, 0, smu)
>>>  #define smu_gfx_off_control(smu, enable)
>>> 	smu_ppt_funcs(gfx_off_control, 0, smu, enable)
>>>  #define smu_get_gfx_off_status(smu)
>>> 		smu_ppt_funcs(get_gfx_off_status, 0, smu)
>>> +#define smu_get_gfx_off_entrycount(smu, value)
>>> 			smu_ppt_funcs(get_gfx_off_entrycount, 0, smu,
>>> value)
>>> +#define smu_get_gfx_off_residency(smu, value)
>>> 			smu_ppt_funcs(get_gfx_off_residency, 0, smu,
>>> value)
>>> +#define smu_set_gfx_off_residency(smu, value)
>>> 			smu_ppt_funcs(set_gfx_off_residency, 0, smu,
>>> value)
>>>  #define smu_set_last_dcef_min_deep_sleep_clk(smu)
>>> 	smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
>>>  #define smu_system_features_control(smu, en)
>>> 	smu_ppt_funcs(system_features_control, 0, smu, en)
>>>  #define smu_init_max_sustainable_clocks(smu)
>>> 	smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
>>> --
>>> 2.37.1
Evan Quan July 26, 2022, 3:03 a.m. UTC | #4
[AMD Official Use Only - General]



> -----Original Message-----
> From: André Almeida <andrealmeid@igalia.com>
> Sent: Tuesday, July 26, 2022 12:15 AM
> To: Quan, Evan <Evan.Quan@amd.com>; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>; Pan, Xinhui <Xinhui.Pan@amd.com>; David
> Airlie <airlied@linux.ie>; Daniel Vetter <daniel@ffwll.ch>; Zhang, Hawking
> <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Kuehling,
> Felix <Felix.Kuehling@amd.com>; Xiao, Jack <Jack.Xiao@amd.com>; amd-
> gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org; linux-
> kernel@vger.kernel.org; StDenis, Tom <Tom.StDenis@amd.com>; Siqueira,
> Rodrigo <Rodrigo.Siqueira@amd.com>
> Cc: kernel-dev@igalia.com
> Subject: Re: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
> 
> Às 10:04 de 25/07/22, André Almeida escreveu:
> > Às 07:27 de 25/07/22, Quan, Evan escreveu:
> >> [AMD Official Use Only - General]
> >>
> >> Using "uint64_t" instead of "uint32_t" for entry counter may be better.
> >>
> >
> > Indeed, it's a good idea. I'll send a v2 with that change, thanks.
> >
> 
> However, SMU messaging reads a 32bit register to get the entrycount from
> the pwfw, so would keep with with the risk of overflow anyway right?
[Quan, Evan] Yes, that makes sense. Better to document that(the risk of overflow).
Anyway, the series seems fine to me.
Series is acked-by: Evan Quan <evan.quan@amd.com>
> 
> >> BR
> >> Evan
> >>> -----Original Message-----
> >>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> >>> André Almeida
> >>> Sent: Saturday, July 23, 2022 4:34 AM
> >>> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig,
> >>> Christian <Christian.Koenig@amd.com>; Pan, Xinhui
> >>> <Xinhui.Pan@amd.com>; David Airlie <airlied@linux.ie>; Daniel Vetter
> >>> <daniel@ffwll.ch>; Zhang, Hawking <Hawking.Zhang@amd.com>; Zhou1,
> >>> Tao <Tao.Zhou1@amd.com>; Kuehling, Felix
> <Felix.Kuehling@amd.com>;
> >>> Xiao, Jack <Jack.Xiao@amd.com>; amd- gfx@lists.freedesktop.org;
> >>> dri-devel@lists.freedesktop.org; linux- kernel@vger.kernel.org;
> >>> StDenis, Tom <Tom.StDenis@amd.com>; Siqueira, Rodrigo
> >>> <Rodrigo.Siqueira@amd.com>
> >>> Cc: André Almeida <andrealmeid@igalia.com>; kernel-dev@igalia.com
> >>> Subject: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
> >>>
> >>> Add debugfs interface to log GFXOFF statistics:
> >>>
> >>> - Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
> >>>   time of query since system power-up
> >>>
> >>> - Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
> >>>   Read it to get average GFXOFF residency % multiplied by 100
> >>>   during the last logging interval.
> >>>
> >>> Both features are designed to be keep the values persistent between
> >>> suspends.
> >>>
> >>> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> >>> ---
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   | 168
> >>> ++++++++++++++++++
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |   2 +
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       |  39 ++++
> >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h       |   6 +
> >>>  drivers/gpu/drm/amd/pm/amdgpu_dpm.c           |  45 +++++
> >>>  drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |   3 +
> >>>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |  34 +++-
> >>>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  22 +++
> >>>  drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
> >>>  9 files changed, 321 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> index e2eec985adb3..edf90a9ba980 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >>> @@ -1042,6 +1042,157 @@ static ssize_t
> >>> amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
> >>>  	return r;
> >>>  }
> >>>
> >>> +/**
> >>> + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
> >>> + *
> >>> + * @f: open file handle
> >>> + * @buf: User buffer to store read data in
> >>> + * @size: Number of bytes to read
> >>> + * @pos:  Offset to seek to
> >>> + *
> >>> + * Read the last residency value logged. It doesn't auto update,
> >>> +one needs
> >>> to
> >>> + * stop logging before getting the current value.
> >>> + */
> >>> +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f,
> >>> +char
> >>> __user *buf,
> >>> +						    size_t size, loff_t *pos) {
> >>> +	struct amdgpu_device *adev = file_inode(f)->i_private;
> >>> +	ssize_t result = 0;
> >>> +	int r;
> >>> +
> >>> +	if (size & 0x3 || *pos & 0x3)
> >>> +		return -EINVAL;
> >>> +
> >>> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>> +	if (r < 0) {
> >>> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> +		return r;
> >>> +	}
> >>> +
> >>> +	while (size) {
> >>> +		uint32_t value;
> >>> +
> >>> +		r = amdgpu_get_gfx_off_residency(adev, &value);
> >>> +		if (r)
> >>> +			goto out;
> >>> +
> >>> +		r = put_user(value, (uint32_t *)buf);
> >>> +		if (r)
> >>> +			goto out;
> >>> +
> >>> +		result += 4;
> >>> +		buf += 4;
> >>> +		*pos += 4;
> >>> +		size -= 4;
> >>> +	}
> >>> +
> >>> +	r = result;
> >>> +out:
> >>> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> >>> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> +
> >>> +	return r;
> >>> +}
> >>> +
> >>> +/**
> >>> + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
> >>> + *
> >>> + * @f: open file handle
> >>> + * @buf: User buffer to write data from
> >>> + * @size: Number of bytes to write
> >>> + * @pos:  Offset to seek to
> >>> + *
> >>> + * Write a 32-bit non-zero to start logging; write a 32-bit zero to
> >>> +stop  */ static ssize_t
> >>> +amdgpu_debugfs_gfxoff_residency_write(struct file *f, const
> >>> char __user *buf,
> >>> +						     size_t size, loff_t *pos) {
> >>> +	struct amdgpu_device *adev = file_inode(f)->i_private;
> >>> +	ssize_t result = 0;
> >>> +	int r;
> >>> +
> >>> +	if (size & 0x3 || *pos & 0x3)
> >>> +		return -EINVAL;
> >>> +
> >>> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>> +	if (r < 0) {
> >>> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> +		return r;
> >>> +	}
> >>> +
> >>> +	while (size) {
> >>> +		u32 value;
> >>> +
> >>> +		r = get_user(value, (uint32_t *)buf);
> >>> +		if (r)
> >>> +			goto out;
> >>> +
> >>> +		amdgpu_set_gfx_off_residency(adev, value ? true : false);
> >>> +
> >>> +		result += 4;
> >>> +		buf += 4;
> >>> +		*pos += 4;
> >>> +		size -= 4;
> >>> +	}
> >>> +
> >>> +	r = result;
> >>> +out:
> >>> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> >>> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> +
> >>> +	return r;
> >>> +}
> >>> +
> >>> +
> >>> +/**
> >>> + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
> >>> + *
> >>> + * @f: open file handle
> >>> + * @buf: User buffer to store read data in
> >>> + * @size: Number of bytes to read
> >>> + * @pos:  Offset to seek to
> >>> + */
> >>> +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f,
> >>> +char
> >>> __user *buf,
> >>> +						size_t size, loff_t *pos)
> >>> +{
> >>> +	struct amdgpu_device *adev = file_inode(f)->i_private;
> >>> +	ssize_t result = 0;
> >>> +	int r;
> >>> +
> >>> +	if (size & 0x3 || *pos & 0x3)
> >>> +		return -EINVAL;
> >>> +
> >>> +	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >>> +	if (r < 0) {
> >>> +		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> +		return r;
> >>> +	}
> >>> +
> >>> +	while (size) {
> >>> +		u32 value;
> >>> +
> >>> +		r = amdgpu_get_gfx_off_entrycount(adev, &value);
> >>> +		if (r)
> >>> +			goto out;
> >>> +
> >>> +		r = put_user(value, (uint32_t *)buf);
> >>> +		if (r)
> >>> +			goto out;
> >>> +
> >>> +		result += 4;
> >>> +		buf += 4;
> >>> +		*pos += 4;
> >>> +		size -= 4;
> >>> +	}
> >>> +
> >>> +	r = result;
> >>> +out:
> >>> +	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> >>> +	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >>> +
> >>> +	return r;
> >>> +}
> >>> +
> >>>  /**
> >>>   * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
> >>>   *
> >>> @@ -1249,6 +1400,19 @@ static const struct file_operations
> >>> amdgpu_debugfs_gfxoff_status_fops = {
> >>>  	.llseek = default_llseek
> >>>  };
> >>>
> >>> +static const struct file_operations
> amdgpu_debugfs_gfxoff_count_fops = {
> >>> +	.owner = THIS_MODULE,
> >>> +	.read = amdgpu_debugfs_gfxoff_count_read,
> >>> +	.llseek = default_llseek
> >>> +};
> >>> +
> >>> +static const struct file_operations
> >>> +amdgpu_debugfs_gfxoff_residency_fops
> >>> = {
> >>> +	.owner = THIS_MODULE,
> >>> +	.read = amdgpu_debugfs_gfxoff_residency_read,
> >>> +	.write = amdgpu_debugfs_gfxoff_residency_write,
> >>> +	.llseek = default_llseek
> >>> +};
> >>> +
> >>>  static const struct file_operations *debugfs_regs[] = {
> >>>  	&amdgpu_debugfs_regs_fops,
> >>>  	&amdgpu_debugfs_regs2_fops,
> >>> @@ -1261,6 +1425,8 @@ static const struct file_operations
> >>> *debugfs_regs[] = {
> >>>  	&amdgpu_debugfs_gpr_fops,
> >>>  	&amdgpu_debugfs_gfxoff_fops,
> >>>  	&amdgpu_debugfs_gfxoff_status_fops,
> >>> +	&amdgpu_debugfs_gfxoff_count_fops,
> >>> +	&amdgpu_debugfs_gfxoff_residency_fops,
> >>>  };
> >>>
> >>>  static const char *debugfs_regs_names[] = { @@ -1275,6 +1441,8 @@
> >>> static const char *debugfs_regs_names[] = {
> >>>  	"amdgpu_gpr",
> >>>  	"amdgpu_gfxoff",
> >>>  	"amdgpu_gfxoff_status",
> >>> +	"amdgpu_gfxoff_count",
> >>> +	"amdgpu_gfxoff_residency",
> >>>  };
> >>>
> >>>  /**
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> index b79ee4ffb879..15a95bc2c211 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> @@ -3576,6 +3576,8 @@ int amdgpu_device_init(struct amdgpu_device
> >>> *adev,
> >>>  	INIT_WORK(&adev->xgmi_reset_work,
> >>> amdgpu_device_xgmi_reset_func);
> >>>
> >>>  	adev->gfx.gfx_off_req_count = 1;
> >>> +	adev->gfx.gfx_off_residency = 0;
> >>> +	adev->gfx.gfx_off_entrycount = 0;
> >>>  	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
> >>>
> >>>  	atomic_set(&adev->throttling_logging_enabled, 1); diff --git
> >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> index 222d3d7ea076..3675c1b899db 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> >>> @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct
> amdgpu_device
> >>> *adev, bool enable)
> >>>  	mutex_unlock(&adev->gfx.gfx_off_mutex);
> >>>  }
> >>>
> >>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> >>> value)
> >>> +{
> >>> +	int r = 0;
> >>> +
> >>> +	mutex_lock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> +	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
> >>> +
> >>> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> +	return r;
> >>> +}
> >>> +
> >>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> >>> *value)
> >>> +{
> >>> +	int r = 0;
> >>> +
> >>> +	mutex_lock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> +	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
> >>> +
> >>> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> +	return r;
> >>> +}
> >>> +
> >>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev,
> u32
> >>> *value)
> >>> +{
> >>> +	int r = 0;
> >>> +
> >>> +	mutex_lock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> +	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
> >>> +
> >>> +	mutex_unlock(&adev->gfx.gfx_off_mutex);
> >>> +
> >>> +	return r;
> >>> +}
> >>> +
> >>>  int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> >>> *value)
> >>>  {
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> index 23a696d38390..f06e979e2565 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> >>> @@ -336,6 +336,8 @@ struct amdgpu_gfx {
> >>>  	struct mutex                    gfx_off_mutex;
> >>>  	uint32_t                        gfx_off_req_count; /* default 1, enable gfx off:
> >>> dec 1, disable gfx off: add 1 */
> >>>  	struct delayed_work             gfx_off_delay_work;
> >>> +	uint32_t                        gfx_off_residency;
> >>> +	uint32_t                        gfx_off_entrycount;
> >>>
> >>>  	/* pipe reservation */
> >>>  	struct mutex			pipe_reserve_mutex;
> >>> @@ -407,6 +409,10 @@ bool
> amdgpu_gfx_is_me_queue_enabled(struct
> >>> amdgpu_device *adev, int me,
> >>>  void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
> >>> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> >>> *value);  int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev,
> >>> struct ras_common_if *ras_block);
> >>> +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); int
> >>> +amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
> >>> *value);
> >>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> >>> *residency);
> >>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> >>> value);
> >>>  int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
> >>>  		void *err_data,
> >>>  		struct amdgpu_iv_entry *entry);
> >>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> index 956b6ce81c84..df87d0768fd7 100644
> >>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> >>> @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct
> >>> amdgpu_device *adev,
> >>>  	return ret;
> >>>  }
> >>>
> >>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev,
> >>> +bool
> >>> value)
> >>> +{
> >>> +	struct smu_context *smu = adev->powerplay.pp_handle;
> >>> +	int ret = 0;
> >>> +
> >>> +	if (!is_support_sw_smu(adev))
> >>> +		return -EOPNOTSUPP;
> >>> +
> >>> +	mutex_lock(&adev->pm.mutex);
> >>> +	ret = smu_set_residency_gfxoff(smu, value);
> >>> +	mutex_unlock(&adev->pm.mutex);
> >>> +
> >>> +	return ret;
> >>> +}
> >>> +
> >>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev,
> u32
> >>> *value)
> >>> +{
> >>> +	struct smu_context *smu = adev->powerplay.pp_handle;
> >>> +	int ret = 0;
> >>> +
> >>> +	if (!is_support_sw_smu(adev))
> >>> +		return -EOPNOTSUPP;
> >>> +
> >>> +	mutex_lock(&adev->pm.mutex);
> >>> +	ret = smu_get_residency_gfxoff(smu, value);
> >>> +	mutex_unlock(&adev->pm.mutex);
> >>> +
> >>> +	return ret;
> >>> +}
> >>> +
> >>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device
> *adev,
> >>> u32 *value)
> >>> +{
> >>> +	struct smu_context *smu = adev->powerplay.pp_handle;
> >>> +	int ret = 0;
> >>> +
> >>> +	if (!is_support_sw_smu(adev))
> >>> +		return -EOPNOTSUPP;
> >>> +
> >>> +	mutex_lock(&adev->pm.mutex);
> >>> +	ret = smu_get_entrycount_gfxoff(smu, value);
> >>> +	mutex_unlock(&adev->pm.mutex);
> >>> +
> >>> +	return ret;
> >>> +}
> >>> +
> >>>  int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
> >>> uint32_t
> >>> *value)
> >>>  {
> >>>  	struct smu_context *smu = adev->powerplay.pp_handle; diff --git
> >>> a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> index 65624d091ed2..83a83e93037c 100644
> >>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> >>> @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct
> >>> amdgpu_device *adev,
> >>>  int amdgpu_dpm_write_watermarks_table(struct amdgpu_device
> *adev);
> >>> int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
> >>> smu_event_type event,
> >>>  		       uint64_t event_arg);
> >>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev,
> u32
> >>> *value);
> >>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev,
> >>> +bool
> >>> value);
> >>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device
> *adev,
> >>> u32 *value);
> >>>  int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
> >>> uint32_t *value);  uint64_t
> >>> amdgpu_dpm_get_thermal_throttling_counter(struct
> >>> amdgpu_device *adev);
> >>>  void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
> diff
> >>> --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> index fd79b213fab4..cfc3b9d749bf 100644
> >>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> >>> @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void
> *handle,
> >>>  	return smu_set_pp_feature_mask(smu, new_mask);  }
> >>>
> >>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value) {
> >>> +	if (!smu->ppt_funcs->set_gfx_off_residency)
> >>> +		return -EINVAL;
> >>> +
> >>> +	return smu_set_gfx_off_residency(smu, value); }
> >>> +
> >>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value) {
> >>> +	if (!smu->ppt_funcs->get_gfx_off_residency)
> >>> +		return -EINVAL;
> >>> +
> >>> +	return smu_get_gfx_off_residency(smu, value); }
> >>> +
> >>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
> >>> +{
> >>> +	if (!smu->ppt_funcs->get_gfx_off_entrycount)
> >>> +		return -EINVAL;
> >>> +
> >>> +	return smu_get_gfx_off_entrycount(smu, value); }
> >>> +
> >>>  int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
> >>> {
> >>>  	if (!smu->ppt_funcs->get_gfx_off_status)
> >>> @@ -1573,7 +1597,7 @@ static int smu_suspend(void *handle)  {
> >>>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> >>>  	struct smu_context *smu = adev->powerplay.pp_handle;
> >>> -	int ret;
> >>> +	int ret, count;
> >>>
> >>>  	if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
> >>>  		return 0;
> >>> @@ -1591,6 +1615,14 @@ static int smu_suspend(void *handle)
> >>>
> >>>  	smu_set_gfx_cgpg(smu, false);
> >>>
> >>> +	/*
> >>> +	 * pwfw resets entrycount when device is suspended, so we save
> >>> the
> >>> +	 * last value to be used when we resume to keep it consistent
> >>> +	 */
> >>> +	ret = smu_get_entrycount_gfxoff(smu, &count);
> >>> +	if (!ret)
> >>> +		adev->gfx.gfx_off_entrycount = count;
> >>> +
> >>>  	return 0;
> >>>  }
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> >>> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> >>> index b81c657c7386..9827075b768e 100644
> >>> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> >>> @@ -1111,6 +1111,22 @@ struct pptable_funcs {
> >>>  	 */
> >>>  	uint32_t (*get_gfx_off_status)(struct smu_context *smu);
> >>>
> >>> +	/**
> >>> +	 * @gfx_off_entrycount: total GFXOFF entry count at the time of
> >>> +	 * query since system power-up
> >>> +	 */
> >>> +	u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t
> >>> *entrycount);
> >>> +
> >>> +	/**
> >>> +	 * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
> >>> +	 */
> >>> +	u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
> >>> +
> >>> +	/**
> >>> +	 * @get_gfx_off_residency: Average GFXOFF residency % during the
> >>> logging interval
> >>> +	 */
> >>> +	u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t
> >>> *residency);
> >>> +
> >>>  	/**
> >>>  	 * @register_irq_handler: Register interupt request handlers.
> >>>  	 */
> >>> @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);
> >>>
> >>>  int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
> >>>
> >>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
> >>> +
> >>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
> >>> +
> >>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
> >>> +
> >>>  int smu_get_status_gfxoff(struct smu_context *smu, uint32_t
> >>> *value);
> >>>
> >>>  int smu_handle_passthrough_sbr(struct smu_context *smu, bool
> >>> enable); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> >>> b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> >>> index 7469bbfce1fb..ceb13c838067 100644
> >>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> >>> @@ -47,6 +47,9 @@
> >>>  #define smu_notify_memory_pool_location(smu)
> >>> 	smu_ppt_funcs(notify_memory_pool_location, 0, smu)  #define
> >>> smu_gfx_off_control(smu, enable)
> >>> 	smu_ppt_funcs(gfx_off_control, 0, smu, enable)  #define
> >>> smu_get_gfx_off_status(smu)
> >>> 		smu_ppt_funcs(get_gfx_off_status, 0, smu)
> >>> +#define smu_get_gfx_off_entrycount(smu, value)
> >>> 			smu_ppt_funcs(get_gfx_off_entrycount, 0, smu,
> >>> value)
> >>> +#define smu_get_gfx_off_residency(smu, value)
> >>> 			smu_ppt_funcs(get_gfx_off_residency, 0, smu,
> >>> value)
> >>> +#define smu_set_gfx_off_residency(smu, value)
> >>> 			smu_ppt_funcs(set_gfx_off_residency, 0, smu,
> >>> value)
> >>>  #define smu_set_last_dcef_min_deep_sleep_clk(smu)
> >>> 	smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
> #define
> >>> smu_system_features_control(smu, en)
> >>> 	smu_ppt_funcs(system_features_control, 0, smu, en)  #define
> >>> smu_init_max_sustainable_clocks(smu)
> >>> 	smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
> >>> --
> >>> 2.37.1
Alex Deucher July 26, 2022, 1:29 p.m. UTC | #5
On Mon, Jul 25, 2022 at 11:03 PM Quan, Evan <Evan.Quan@amd.com> wrote:
>
> [AMD Official Use Only - General]
>
>
>
> > -----Original Message-----
> > From: André Almeida <andrealmeid@igalia.com>
> > Sent: Tuesday, July 26, 2022 12:15 AM
> > To: Quan, Evan <Evan.Quan@amd.com>; Deucher, Alexander
> > <Alexander.Deucher@amd.com>; Koenig, Christian
> > <Christian.Koenig@amd.com>; Pan, Xinhui <Xinhui.Pan@amd.com>; David
> > Airlie <airlied@linux.ie>; Daniel Vetter <daniel@ffwll.ch>; Zhang, Hawking
> > <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Kuehling,
> > Felix <Felix.Kuehling@amd.com>; Xiao, Jack <Jack.Xiao@amd.com>; amd-
> > gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org; linux-
> > kernel@vger.kernel.org; StDenis, Tom <Tom.StDenis@amd.com>; Siqueira,
> > Rodrigo <Rodrigo.Siqueira@amd.com>
> > Cc: kernel-dev@igalia.com
> > Subject: Re: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
> >
> > Às 10:04 de 25/07/22, André Almeida escreveu:
> > > Às 07:27 de 25/07/22, Quan, Evan escreveu:
> > >> [AMD Official Use Only - General]
> > >>
> > >> Using "uint64_t" instead of "uint32_t" for entry counter may be better.
> > >>
> > >
> > > Indeed, it's a good idea. I'll send a v2 with that change, thanks.
> > >
> >
> > However, SMU messaging reads a 32bit register to get the entrycount from
> > the pwfw, so would keep with with the risk of overflow anyway right?
> [Quan, Evan] Yes, that makes sense. Better to document that(the risk of overflow).

Still might be better to use a 64 bit number for the external
interface to be more future proof.  Then we can either document the
behavior for specific chips or handle the wrap around in the driver.

Alex

> Anyway, the series seems fine to me.
> Series is acked-by: Evan Quan <evan.quan@amd.com>
> >
> > >> BR
> > >> Evan
> > >>> -----Original Message-----
> > >>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of
> > >>> André Almeida
> > >>> Sent: Saturday, July 23, 2022 4:34 AM
> > >>> To: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig,
> > >>> Christian <Christian.Koenig@amd.com>; Pan, Xinhui
> > >>> <Xinhui.Pan@amd.com>; David Airlie <airlied@linux.ie>; Daniel Vetter
> > >>> <daniel@ffwll.ch>; Zhang, Hawking <Hawking.Zhang@amd.com>; Zhou1,
> > >>> Tao <Tao.Zhou1@amd.com>; Kuehling, Felix
> > <Felix.Kuehling@amd.com>;
> > >>> Xiao, Jack <Jack.Xiao@amd.com>; amd- gfx@lists.freedesktop.org;
> > >>> dri-devel@lists.freedesktop.org; linux- kernel@vger.kernel.org;
> > >>> StDenis, Tom <Tom.StDenis@amd.com>; Siqueira, Rodrigo
> > >>> <Rodrigo.Siqueira@amd.com>
> > >>> Cc: André Almeida <andrealmeid@igalia.com>; kernel-dev@igalia.com
> > >>> Subject: [PATCH 1/4] drm/amd: Add detailed GFXOFF stats to debugfs
> > >>>
> > >>> Add debugfs interface to log GFXOFF statistics:
> > >>>
> > >>> - Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
> > >>>   time of query since system power-up
> > >>>
> > >>> - Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
> > >>>   Read it to get average GFXOFF residency % multiplied by 100
> > >>>   during the last logging interval.
> > >>>
> > >>> Both features are designed to be keep the values persistent between
> > >>> suspends.
> > >>>
> > >>> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> > >>> ---
> > >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c   | 168
> > >>> ++++++++++++++++++
> > >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |   2 +
> > >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c       |  39 ++++
> > >>>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h       |   6 +
> > >>>  drivers/gpu/drm/amd/pm/amdgpu_dpm.c           |  45 +++++
> > >>>  drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h       |   3 +
> > >>>  drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c     |  34 +++-
> > >>>  drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h |  22 +++
> > >>>  drivers/gpu/drm/amd/pm/swsmu/smu_internal.h   |   3 +
> > >>>  9 files changed, 321 insertions(+), 1 deletion(-)
> > >>>
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > >>> index e2eec985adb3..edf90a9ba980 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> > >>> @@ -1042,6 +1042,157 @@ static ssize_t
> > >>> amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
> > >>>   return r;
> > >>>  }
> > >>>
> > >>> +/**
> > >>> + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
> > >>> + *
> > >>> + * @f: open file handle
> > >>> + * @buf: User buffer to store read data in
> > >>> + * @size: Number of bytes to read
> > >>> + * @pos:  Offset to seek to
> > >>> + *
> > >>> + * Read the last residency value logged. It doesn't auto update,
> > >>> +one needs
> > >>> to
> > >>> + * stop logging before getting the current value.
> > >>> + */
> > >>> +static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f,
> > >>> +char
> > >>> __user *buf,
> > >>> +                                             size_t size, loff_t *pos) {
> > >>> + struct amdgpu_device *adev = file_inode(f)->i_private;
> > >>> + ssize_t result = 0;
> > >>> + int r;
> > >>> +
> > >>> + if (size & 0x3 || *pos & 0x3)
> > >>> +         return -EINVAL;
> > >>> +
> > >>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> > >>> + if (r < 0) {
> > >>> +         pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> +         return r;
> > >>> + }
> > >>> +
> > >>> + while (size) {
> > >>> +         uint32_t value;
> > >>> +
> > >>> +         r = amdgpu_get_gfx_off_residency(adev, &value);
> > >>> +         if (r)
> > >>> +                 goto out;
> > >>> +
> > >>> +         r = put_user(value, (uint32_t *)buf);
> > >>> +         if (r)
> > >>> +                 goto out;
> > >>> +
> > >>> +         result += 4;
> > >>> +         buf += 4;
> > >>> +         *pos += 4;
> > >>> +         size -= 4;
> > >>> + }
> > >>> +
> > >>> + r = result;
> > >>> +out:
> > >>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> > >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>> +/**
> > >>> + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
> > >>> + *
> > >>> + * @f: open file handle
> > >>> + * @buf: User buffer to write data from
> > >>> + * @size: Number of bytes to write
> > >>> + * @pos:  Offset to seek to
> > >>> + *
> > >>> + * Write a 32-bit non-zero to start logging; write a 32-bit zero to
> > >>> +stop  */ static ssize_t
> > >>> +amdgpu_debugfs_gfxoff_residency_write(struct file *f, const
> > >>> char __user *buf,
> > >>> +                                              size_t size, loff_t *pos) {
> > >>> + struct amdgpu_device *adev = file_inode(f)->i_private;
> > >>> + ssize_t result = 0;
> > >>> + int r;
> > >>> +
> > >>> + if (size & 0x3 || *pos & 0x3)
> > >>> +         return -EINVAL;
> > >>> +
> > >>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> > >>> + if (r < 0) {
> > >>> +         pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> +         return r;
> > >>> + }
> > >>> +
> > >>> + while (size) {
> > >>> +         u32 value;
> > >>> +
> > >>> +         r = get_user(value, (uint32_t *)buf);
> > >>> +         if (r)
> > >>> +                 goto out;
> > >>> +
> > >>> +         amdgpu_set_gfx_off_residency(adev, value ? true : false);
> > >>> +
> > >>> +         result += 4;
> > >>> +         buf += 4;
> > >>> +         *pos += 4;
> > >>> +         size -= 4;
> > >>> + }
> > >>> +
> > >>> + r = result;
> > >>> +out:
> > >>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> > >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>> +
> > >>> +/**
> > >>> + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
> > >>> + *
> > >>> + * @f: open file handle
> > >>> + * @buf: User buffer to store read data in
> > >>> + * @size: Number of bytes to read
> > >>> + * @pos:  Offset to seek to
> > >>> + */
> > >>> +static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f,
> > >>> +char
> > >>> __user *buf,
> > >>> +                                         size_t size, loff_t *pos)
> > >>> +{
> > >>> + struct amdgpu_device *adev = file_inode(f)->i_private;
> > >>> + ssize_t result = 0;
> > >>> + int r;
> > >>> +
> > >>> + if (size & 0x3 || *pos & 0x3)
> > >>> +         return -EINVAL;
> > >>> +
> > >>> + r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> > >>> + if (r < 0) {
> > >>> +         pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> +         return r;
> > >>> + }
> > >>> +
> > >>> + while (size) {
> > >>> +         u32 value;
> > >>> +
> > >>> +         r = amdgpu_get_gfx_off_entrycount(adev, &value);
> > >>> +         if (r)
> > >>> +                 goto out;
> > >>> +
> > >>> +         r = put_user(value, (uint32_t *)buf);
> > >>> +         if (r)
> > >>> +                 goto out;
> > >>> +
> > >>> +         result += 4;
> > >>> +         buf += 4;
> > >>> +         *pos += 4;
> > >>> +         size -= 4;
> > >>> + }
> > >>> +
> > >>> + r = result;
> > >>> +out:
> > >>> + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> > >>> + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>>  /**
> > >>>   * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
> > >>>   *
> > >>> @@ -1249,6 +1400,19 @@ static const struct file_operations
> > >>> amdgpu_debugfs_gfxoff_status_fops = {
> > >>>   .llseek = default_llseek
> > >>>  };
> > >>>
> > >>> +static const struct file_operations
> > amdgpu_debugfs_gfxoff_count_fops = {
> > >>> + .owner = THIS_MODULE,
> > >>> + .read = amdgpu_debugfs_gfxoff_count_read,
> > >>> + .llseek = default_llseek
> > >>> +};
> > >>> +
> > >>> +static const struct file_operations
> > >>> +amdgpu_debugfs_gfxoff_residency_fops
> > >>> = {
> > >>> + .owner = THIS_MODULE,
> > >>> + .read = amdgpu_debugfs_gfxoff_residency_read,
> > >>> + .write = amdgpu_debugfs_gfxoff_residency_write,
> > >>> + .llseek = default_llseek
> > >>> +};
> > >>> +
> > >>>  static const struct file_operations *debugfs_regs[] = {
> > >>>   &amdgpu_debugfs_regs_fops,
> > >>>   &amdgpu_debugfs_regs2_fops,
> > >>> @@ -1261,6 +1425,8 @@ static const struct file_operations
> > >>> *debugfs_regs[] = {
> > >>>   &amdgpu_debugfs_gpr_fops,
> > >>>   &amdgpu_debugfs_gfxoff_fops,
> > >>>   &amdgpu_debugfs_gfxoff_status_fops,
> > >>> + &amdgpu_debugfs_gfxoff_count_fops,
> > >>> + &amdgpu_debugfs_gfxoff_residency_fops,
> > >>>  };
> > >>>
> > >>>  static const char *debugfs_regs_names[] = { @@ -1275,6 +1441,8 @@
> > >>> static const char *debugfs_regs_names[] = {
> > >>>   "amdgpu_gpr",
> > >>>   "amdgpu_gfxoff",
> > >>>   "amdgpu_gfxoff_status",
> > >>> + "amdgpu_gfxoff_count",
> > >>> + "amdgpu_gfxoff_residency",
> > >>>  };
> > >>>
> > >>>  /**
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > >>> index b79ee4ffb879..15a95bc2c211 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > >>> @@ -3576,6 +3576,8 @@ int amdgpu_device_init(struct amdgpu_device
> > >>> *adev,
> > >>>   INIT_WORK(&adev->xgmi_reset_work,
> > >>> amdgpu_device_xgmi_reset_func);
> > >>>
> > >>>   adev->gfx.gfx_off_req_count = 1;
> > >>> + adev->gfx.gfx_off_residency = 0;
> > >>> + adev->gfx.gfx_off_entrycount = 0;
> > >>>   adev->pm.ac_power = power_supply_is_system_supplied() > 0;
> > >>>
> > >>>   atomic_set(&adev->throttling_logging_enabled, 1); diff --git
> > >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > >>> index 222d3d7ea076..3675c1b899db 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > >>> @@ -610,6 +610,45 @@ void amdgpu_gfx_off_ctrl(struct
> > amdgpu_device
> > >>> *adev, bool enable)
> > >>>   mutex_unlock(&adev->gfx.gfx_off_mutex);
> > >>>  }
> > >>>
> > >>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> > >>> value)
> > >>> +{
> > >>> + int r = 0;
> > >>> +
> > >>> + mutex_lock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + r = amdgpu_dpm_set_residency_gfxoff(adev, value);
> > >>> +
> > >>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> > >>> *value)
> > >>> +{
> > >>> + int r = 0;
> > >>> +
> > >>> + mutex_lock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + r = amdgpu_dpm_get_residency_gfxoff(adev, value);
> > >>> +
> > >>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>> +int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev,
> > u32
> > >>> *value)
> > >>> +{
> > >>> + int r = 0;
> > >>> +
> > >>> + mutex_lock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
> > >>> +
> > >>> + mutex_unlock(&adev->gfx.gfx_off_mutex);
> > >>> +
> > >>> + return r;
> > >>> +}
> > >>> +
> > >>>  int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> > >>> *value)
> > >>>  {
> > >>>
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > >>> index 23a696d38390..f06e979e2565 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > >>> @@ -336,6 +336,8 @@ struct amdgpu_gfx {
> > >>>   struct mutex                    gfx_off_mutex;
> > >>>   uint32_t                        gfx_off_req_count; /* default 1, enable gfx off:
> > >>> dec 1, disable gfx off: add 1 */
> > >>>   struct delayed_work             gfx_off_delay_work;
> > >>> + uint32_t                        gfx_off_residency;
> > >>> + uint32_t                        gfx_off_entrycount;
> > >>>
> > >>>   /* pipe reservation */
> > >>>   struct mutex                    pipe_reserve_mutex;
> > >>> @@ -407,6 +409,10 @@ bool
> > amdgpu_gfx_is_me_queue_enabled(struct
> > >>> amdgpu_device *adev, int me,
> > >>>  void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
> > >>> int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t
> > >>> *value);  int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev,
> > >>> struct ras_common_if *ras_block);
> > >>> +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); int
> > >>> +amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32
> > >>> *value);
> > >>> +int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32
> > >>> *residency);
> > >>> +int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool
> > >>> value);
> > >>>  int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
> > >>>           void *err_data,
> > >>>           struct amdgpu_iv_entry *entry);
> > >>> diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > >>> b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > >>> index 956b6ce81c84..df87d0768fd7 100644
> > >>> --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > >>> +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
> > >>> @@ -668,6 +668,51 @@ int amdgpu_dpm_wait_for_event(struct
> > >>> amdgpu_device *adev,
> > >>>   return ret;
> > >>>  }
> > >>>
> > >>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev,
> > >>> +bool
> > >>> value)
> > >>> +{
> > >>> + struct smu_context *smu = adev->powerplay.pp_handle;
> > >>> + int ret = 0;
> > >>> +
> > >>> + if (!is_support_sw_smu(adev))
> > >>> +         return -EOPNOTSUPP;
> > >>> +
> > >>> + mutex_lock(&adev->pm.mutex);
> > >>> + ret = smu_set_residency_gfxoff(smu, value);
> > >>> + mutex_unlock(&adev->pm.mutex);
> > >>> +
> > >>> + return ret;
> > >>> +}
> > >>> +
> > >>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev,
> > u32
> > >>> *value)
> > >>> +{
> > >>> + struct smu_context *smu = adev->powerplay.pp_handle;
> > >>> + int ret = 0;
> > >>> +
> > >>> + if (!is_support_sw_smu(adev))
> > >>> +         return -EOPNOTSUPP;
> > >>> +
> > >>> + mutex_lock(&adev->pm.mutex);
> > >>> + ret = smu_get_residency_gfxoff(smu, value);
> > >>> + mutex_unlock(&adev->pm.mutex);
> > >>> +
> > >>> + return ret;
> > >>> +}
> > >>> +
> > >>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device
> > *adev,
> > >>> u32 *value)
> > >>> +{
> > >>> + struct smu_context *smu = adev->powerplay.pp_handle;
> > >>> + int ret = 0;
> > >>> +
> > >>> + if (!is_support_sw_smu(adev))
> > >>> +         return -EOPNOTSUPP;
> > >>> +
> > >>> + mutex_lock(&adev->pm.mutex);
> > >>> + ret = smu_get_entrycount_gfxoff(smu, value);
> > >>> + mutex_unlock(&adev->pm.mutex);
> > >>> +
> > >>> + return ret;
> > >>> +}
> > >>> +
> > >>>  int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
> > >>> uint32_t
> > >>> *value)
> > >>>  {
> > >>>   struct smu_context *smu = adev->powerplay.pp_handle; diff --git
> > >>> a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > >>> b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > >>> index 65624d091ed2..83a83e93037c 100644
> > >>> --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > >>> +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
> > >>> @@ -435,6 +435,9 @@ int amdgpu_dpm_set_soft_freq_range(struct
> > >>> amdgpu_device *adev,
> > >>>  int amdgpu_dpm_write_watermarks_table(struct amdgpu_device
> > *adev);
> > >>> int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum
> > >>> smu_event_type event,
> > >>>                  uint64_t event_arg);
> > >>> +int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev,
> > u32
> > >>> *value);
> > >>> +int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev,
> > >>> +bool
> > >>> value);
> > >>> +int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device
> > *adev,
> > >>> u32 *value);
> > >>>  int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev,
> > >>> uint32_t *value);  uint64_t
> > >>> amdgpu_dpm_get_thermal_throttling_counter(struct
> > >>> amdgpu_device *adev);
> > >>>  void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
> > diff
> > >>> --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > >>> b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > >>> index fd79b213fab4..cfc3b9d749bf 100644
> > >>> --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
> > >>> @@ -90,6 +90,30 @@ static int smu_sys_set_pp_feature_mask(void
> > *handle,
> > >>>   return smu_set_pp_feature_mask(smu, new_mask);  }
> > >>>
> > >>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value) {
> > >>> + if (!smu->ppt_funcs->set_gfx_off_residency)
> > >>> +         return -EINVAL;
> > >>> +
> > >>> + return smu_set_gfx_off_residency(smu, value); }
> > >>> +
> > >>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value) {
> > >>> + if (!smu->ppt_funcs->get_gfx_off_residency)
> > >>> +         return -EINVAL;
> > >>> +
> > >>> + return smu_get_gfx_off_residency(smu, value); }
> > >>> +
> > >>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
> > >>> +{
> > >>> + if (!smu->ppt_funcs->get_gfx_off_entrycount)
> > >>> +         return -EINVAL;
> > >>> +
> > >>> + return smu_get_gfx_off_entrycount(smu, value); }
> > >>> +
> > >>>  int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
> > >>> {
> > >>>   if (!smu->ppt_funcs->get_gfx_off_status)
> > >>> @@ -1573,7 +1597,7 @@ static int smu_suspend(void *handle)  {
> > >>>   struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> > >>>   struct smu_context *smu = adev->powerplay.pp_handle;
> > >>> - int ret;
> > >>> + int ret, count;
> > >>>
> > >>>   if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
> > >>>           return 0;
> > >>> @@ -1591,6 +1615,14 @@ static int smu_suspend(void *handle)
> > >>>
> > >>>   smu_set_gfx_cgpg(smu, false);
> > >>>
> > >>> + /*
> > >>> +  * pwfw resets entrycount when device is suspended, so we save
> > >>> the
> > >>> +  * last value to be used when we resume to keep it consistent
> > >>> +  */
> > >>> + ret = smu_get_entrycount_gfxoff(smu, &count);
> > >>> + if (!ret)
> > >>> +         adev->gfx.gfx_off_entrycount = count;
> > >>> +
> > >>>   return 0;
> > >>>  }
> > >>>
> > >>> diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > >>> b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > >>> index b81c657c7386..9827075b768e 100644
> > >>> --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
> > >>> @@ -1111,6 +1111,22 @@ struct pptable_funcs {
> > >>>    */
> > >>>   uint32_t (*get_gfx_off_status)(struct smu_context *smu);
> > >>>
> > >>> + /**
> > >>> +  * @gfx_off_entrycount: total GFXOFF entry count at the time of
> > >>> +  * query since system power-up
> > >>> +  */
> > >>> + u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t
> > >>> *entrycount);
> > >>> +
> > >>> + /**
> > >>> +  * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
> > >>> +  */
> > >>> + u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
> > >>> +
> > >>> + /**
> > >>> +  * @get_gfx_off_residency: Average GFXOFF residency % during the
> > >>> logging interval
> > >>> +  */
> > >>> + u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t
> > >>> *residency);
> > >>> +
> > >>>   /**
> > >>>    * @register_irq_handler: Register interupt request handlers.
> > >>>    */
> > >>> @@ -1454,6 +1470,12 @@ int smu_set_ac_dc(struct smu_context *smu);
> > >>>
> > >>>  int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
> > >>>
> > >>> +int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
> > >>> +
> > >>> +int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
> > >>> +
> > >>> +int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
> > >>> +
> > >>>  int smu_get_status_gfxoff(struct smu_context *smu, uint32_t
> > >>> *value);
> > >>>
> > >>>  int smu_handle_passthrough_sbr(struct smu_context *smu, bool
> > >>> enable); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> > >>> b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> > >>> index 7469bbfce1fb..ceb13c838067 100644
> > >>> --- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> > >>> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
> > >>> @@ -47,6 +47,9 @@
> > >>>  #define smu_notify_memory_pool_location(smu)
> > >>>   smu_ppt_funcs(notify_memory_pool_location, 0, smu)  #define
> > >>> smu_gfx_off_control(smu, enable)
> > >>>   smu_ppt_funcs(gfx_off_control, 0, smu, enable)  #define
> > >>> smu_get_gfx_off_status(smu)
> > >>>           smu_ppt_funcs(get_gfx_off_status, 0, smu)
> > >>> +#define smu_get_gfx_off_entrycount(smu, value)
> > >>>                   smu_ppt_funcs(get_gfx_off_entrycount, 0, smu,
> > >>> value)
> > >>> +#define smu_get_gfx_off_residency(smu, value)
> > >>>                   smu_ppt_funcs(get_gfx_off_residency, 0, smu,
> > >>> value)
> > >>> +#define smu_set_gfx_off_residency(smu, value)
> > >>>                   smu_ppt_funcs(set_gfx_off_residency, 0, smu,
> > >>> value)
> > >>>  #define smu_set_last_dcef_min_deep_sleep_clk(smu)
> > >>>   smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
> > #define
> > >>> smu_system_features_control(smu, en)
> > >>>   smu_ppt_funcs(system_features_control, 0, smu, en)  #define
> > >>> smu_init_max_sustainable_clocks(smu)
> > >>>   smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)
> > >>> --
> > >>> 2.37.1
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index e2eec985adb3..edf90a9ba980 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1042,6 +1042,157 @@  static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 	return r;
 }
 
+/**
+ * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * Read the last residency value logged. It doesn't auto update, one needs to
+ * stop logging before getting the current value.
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user *buf,
+						    size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		uint32_t value;
+
+		r = amdgpu_get_gfx_off_residency(adev, &value);
+		if (r)
+			goto out;
+
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+/**
+ * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop
+ */
+static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char __user *buf,
+						     size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		u32 value;
+
+		r = get_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		amdgpu_set_gfx_off_residency(adev, value ? true : false);
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+
+/**
+ * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ */
+static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf,
+						size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		u32 value;
+
+		r = amdgpu_get_gfx_off_entrycount(adev, &value);
+		if (r)
+			goto out;
+
+		r = put_user(value, (uint32_t *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
 /**
  * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF
  *
@@ -1249,6 +1400,19 @@  static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
 	.llseek = default_llseek
 };
 
+static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gfxoff_count_read,
+	.llseek = default_llseek
+};
+
+static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gfxoff_residency_read,
+	.write = amdgpu_debugfs_gfxoff_residency_write,
+	.llseek = default_llseek
+};
+
 static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_regs_fops,
 	&amdgpu_debugfs_regs2_fops,
@@ -1261,6 +1425,8 @@  static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_gpr_fops,
 	&amdgpu_debugfs_gfxoff_fops,
 	&amdgpu_debugfs_gfxoff_status_fops,
+	&amdgpu_debugfs_gfxoff_count_fops,
+	&amdgpu_debugfs_gfxoff_residency_fops,
 };
 
 static const char *debugfs_regs_names[] = {
@@ -1275,6 +1441,8 @@  static const char *debugfs_regs_names[] = {
 	"amdgpu_gpr",
 	"amdgpu_gfxoff",
 	"amdgpu_gfxoff_status",
+	"amdgpu_gfxoff_count",
+	"amdgpu_gfxoff_residency",
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index b79ee4ffb879..15a95bc2c211 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3576,6 +3576,8 @@  int amdgpu_device_init(struct amdgpu_device *adev,
 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
 
 	adev->gfx.gfx_off_req_count = 1;
+	adev->gfx.gfx_off_residency = 0;
+	adev->gfx.gfx_off_entrycount = 0;
 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
 
 	atomic_set(&adev->throttling_logging_enabled, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 222d3d7ea076..3675c1b899db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -610,6 +610,45 @@  void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 	mutex_unlock(&adev->gfx.gfx_off_mutex);
 }
 
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
+{
+	int r = 0;
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
+
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+	return r;
+}
+
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
+{
+	int r = 0;
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
+
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+	return r;
+}
+
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32 *value)
+{
+	int r = 0;
+
+	mutex_lock(&adev->gfx.gfx_off_mutex);
+
+	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
+
+	mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+	return r;
+}
+
 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
 {
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 23a696d38390..f06e979e2565 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -336,6 +336,8 @@  struct amdgpu_gfx {
 	struct mutex                    gfx_off_mutex;
 	uint32_t                        gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */
 	struct delayed_work             gfx_off_delay_work;
+	uint32_t                        gfx_off_residency;
+	uint32_t                        gfx_off_entrycount;
 
 	/* pipe reservation */
 	struct mutex			pipe_reserve_mutex;
@@ -407,6 +409,10 @@  bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);
 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value);
 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+void amdgpu_gfx_ras_fini(struct amdgpu_device *adev);
+int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u32 *value);
+int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency);
+int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value);
 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
 		void *err_data,
 		struct amdgpu_iv_entry *entry);
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 956b6ce81c84..df87d0768fd7 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -668,6 +668,51 @@  int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev,
 	return ret;
 }
 
+int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value)
+{
+	struct smu_context *smu = adev->powerplay.pp_handle;
+	int ret = 0;
+
+	if (!is_support_sw_smu(adev))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&adev->pm.mutex);
+	ret = smu_set_residency_gfxoff(smu, value);
+	mutex_unlock(&adev->pm.mutex);
+
+	return ret;
+}
+
+int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value)
+{
+	struct smu_context *smu = adev->powerplay.pp_handle;
+	int ret = 0;
+
+	if (!is_support_sw_smu(adev))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&adev->pm.mutex);
+	ret = smu_get_residency_gfxoff(smu, value);
+	mutex_unlock(&adev->pm.mutex);
+
+	return ret;
+}
+
+int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u32 *value)
+{
+	struct smu_context *smu = adev->powerplay.pp_handle;
+	int ret = 0;
+
+	if (!is_support_sw_smu(adev))
+		return -EOPNOTSUPP;
+
+	mutex_lock(&adev->pm.mutex);
+	ret = smu_get_entrycount_gfxoff(smu, value);
+	mutex_unlock(&adev->pm.mutex);
+
+	return ret;
+}
+
 int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value)
 {
 	struct smu_context *smu = adev->powerplay.pp_handle;
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 65624d091ed2..83a83e93037c 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -435,6 +435,9 @@  int amdgpu_dpm_set_soft_freq_range(struct amdgpu_device *adev,
 int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev);
 int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum smu_event_type event,
 		       uint64_t event_arg);
+int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value);
+int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value);
+int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u32 *value);
 int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value);
 uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct amdgpu_device *adev);
 void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index fd79b213fab4..cfc3b9d749bf 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -90,6 +90,30 @@  static int smu_sys_set_pp_feature_mask(void *handle,
 	return smu_set_pp_feature_mask(smu, new_mask);
 }
 
+int smu_set_residency_gfxoff(struct smu_context *smu, bool value)
+{
+	if (!smu->ppt_funcs->set_gfx_off_residency)
+		return -EINVAL;
+
+	return smu_set_gfx_off_residency(smu, value);
+}
+
+int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value)
+{
+	if (!smu->ppt_funcs->get_gfx_off_residency)
+		return -EINVAL;
+
+	return smu_get_gfx_off_residency(smu, value);
+}
+
+int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value)
+{
+	if (!smu->ppt_funcs->get_gfx_off_entrycount)
+		return -EINVAL;
+
+	return smu_get_gfx_off_entrycount(smu, value);
+}
+
 int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value)
 {
 	if (!smu->ppt_funcs->get_gfx_off_status)
@@ -1573,7 +1597,7 @@  static int smu_suspend(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct smu_context *smu = adev->powerplay.pp_handle;
-	int ret;
+	int ret, count;
 
 	if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev))
 		return 0;
@@ -1591,6 +1615,14 @@  static int smu_suspend(void *handle)
 
 	smu_set_gfx_cgpg(smu, false);
 
+	/*
+	 * pwfw resets entrycount when device is suspended, so we save the
+	 * last value to be used when we resume to keep it consistent
+	 */
+	ret = smu_get_entrycount_gfxoff(smu, &count);
+	if (!ret)
+		adev->gfx.gfx_off_entrycount = count;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index b81c657c7386..9827075b768e 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -1111,6 +1111,22 @@  struct pptable_funcs {
 	 */
 	uint32_t (*get_gfx_off_status)(struct smu_context *smu);
 
+	/**
+	 * @gfx_off_entrycount: total GFXOFF entry count at the time of
+	 * query since system power-up
+	 */
+	u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint32_t *entrycount);
+
+	/**
+	 * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging
+	 */
+	u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start);
+
+	/**
+	 * @get_gfx_off_residency: Average GFXOFF residency % during the logging interval
+	 */
+	u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t *residency);
+
 	/**
 	 * @register_irq_handler: Register interupt request handlers.
 	 */
@@ -1454,6 +1470,12 @@  int smu_set_ac_dc(struct smu_context *smu);
 
 int smu_allow_xgmi_power_down(struct smu_context *smu, bool en);
 
+int smu_get_entrycount_gfxoff(struct smu_context *smu, u32 *value);
+
+int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value);
+
+int smu_set_residency_gfxoff(struct smu_context *smu, bool value);
+
 int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value);
 
 int smu_handle_passthrough_sbr(struct smu_context *smu, bool enable);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
index 7469bbfce1fb..ceb13c838067 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
@@ -47,6 +47,9 @@ 
 #define smu_notify_memory_pool_location(smu)				smu_ppt_funcs(notify_memory_pool_location, 0, smu)
 #define smu_gfx_off_control(smu, enable)				smu_ppt_funcs(gfx_off_control, 0, smu, enable)
 #define smu_get_gfx_off_status(smu)						smu_ppt_funcs(get_gfx_off_status, 0, smu)
+#define smu_get_gfx_off_entrycount(smu, value)						smu_ppt_funcs(get_gfx_off_entrycount, 0, smu, value)
+#define smu_get_gfx_off_residency(smu, value)						smu_ppt_funcs(get_gfx_off_residency, 0, smu, value)
+#define smu_set_gfx_off_residency(smu, value)						smu_ppt_funcs(set_gfx_off_residency, 0, smu, value)
 #define smu_set_last_dcef_min_deep_sleep_clk(smu)			smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu)
 #define smu_system_features_control(smu, en)				smu_ppt_funcs(system_features_control, 0, smu, en)
 #define smu_init_max_sustainable_clocks(smu)				smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)