diff mbox series

drm/amd/debugfs: Expose GFXOFF state to userspace

Message ID 20220713151504.7521-1-andrealmeid@igalia.com (mailing list archive)
State New, archived
Headers show
Series drm/amd/debugfs: Expose GFXOFF state to userspace | expand

Commit Message

André Almeida July 13, 2022, 3:15 p.m. UTC
GFXOFF has two different "state" values: one to define if the GPU is
allowed/disallowed to enter GFXOFF, usually called state; and another
one to define if currently GFXOFF is being used, usually called status.
Even when GFXOFF is allowed, GPU firmware can decide to not used it
accordingly to the GPU load.

Userspace can allow/disallow GPUs to enter into GFXOFF via debugfs. The
kernel maintains a counter of requests for GFXOFF (gfx_off_req_count)
that should be decreased to allow GFXOFF and increased to disallow.

The issue with this interface is that userspace can't be sure if GFXOFF
is currently allowed. Even by checking amdgpu_gfxoff file, one might get
an ambiguous 2, that means that GPU is currently out of GFXOFF, but that
can be either because it's currently disallowed or because it's allowed
but given the current GPU load it's enabled. Then, userspace needs to
rely on the fact that GFXOFF is enabled by default on boot and to track
this information.

To make userspace life easier and GFXOFF more reliable, return the
current state of GFXOFF to userspace when reading amdgpu_gfxoff with the
same semantics of writing: 0 means not allowed, not 0 means allowed.

Expose the current status of GFXOFF through a new file,
amdgpu_gfxoff_status.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 49 ++++++++++++++++++++-
 1 file changed, 47 insertions(+), 2 deletions(-)

Comments

Alex Deucher July 14, 2022, 4:42 p.m. UTC | #1
On Wed, Jul 13, 2022 at 11:15 AM André Almeida <andrealmeid@igalia.com> wrote:
>
> GFXOFF has two different "state" values: one to define if the GPU is
> allowed/disallowed to enter GFXOFF, usually called state; and another
> one to define if currently GFXOFF is being used, usually called status.
> Even when GFXOFF is allowed, GPU firmware can decide to not used it
> accordingly to the GPU load.
>
> Userspace can allow/disallow GPUs to enter into GFXOFF via debugfs. The
> kernel maintains a counter of requests for GFXOFF (gfx_off_req_count)
> that should be decreased to allow GFXOFF and increased to disallow.
>
> The issue with this interface is that userspace can't be sure if GFXOFF
> is currently allowed. Even by checking amdgpu_gfxoff file, one might get
> an ambiguous 2, that means that GPU is currently out of GFXOFF, but that
> can be either because it's currently disallowed or because it's allowed
> but given the current GPU load it's enabled. Then, userspace needs to
> rely on the fact that GFXOFF is enabled by default on boot and to track
> this information.
>
> To make userspace life easier and GFXOFF more reliable, return the
> current state of GFXOFF to userspace when reading amdgpu_gfxoff with the
> same semantics of writing: 0 means not allowed, not 0 means allowed.
>

This looks good. Can you document this in the amdgpu kerneldoc?

Alex

> Expose the current status of GFXOFF through a new file,
> amdgpu_gfxoff_status.
>
> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 49 ++++++++++++++++++++-
>  1 file changed, 47 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index f3b3c688e4e7..e2eec985adb3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -1117,13 +1117,50 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
>         }
>
>         while (size) {
> -               uint32_t value;
> +               u32 value = adev->gfx.gfx_off_state;
> +
> +               r = put_user(value, (u32 *)buf);
> +               if (r)
> +                       goto out;
> +
> +               result += 4;
> +               buf += 4;
> +               *pos += 4;
> +               size -= 4;
> +       }
> +
> +       r = result;
> +out:
> +       pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> +       pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +
> +       return r;
> +}
> +
> +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
> +                                                size_t size, loff_t *pos)
> +{
> +       struct amdgpu_device *adev = file_inode(f)->i_private;
> +       ssize_t result = 0;
> +       int r;
> +
> +       if (size & 0x3 || *pos & 0x3)
> +               return -EINVAL;
> +
> +       r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> +       if (r < 0) {
> +               pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> +               return r;
> +       }
> +
> +       while (size) {
> +               u32 value;
>
>                 r = amdgpu_get_gfx_off_status(adev, &value);
>                 if (r)
>                         goto out;
>
> -               r = put_user(value, (uint32_t *)buf);
> +               r = put_user(value, (u32 *)buf);
>                 if (r)
>                         goto out;
>
> @@ -1206,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
>         .llseek = default_llseek
>  };
>
> +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
> +       .owner = THIS_MODULE,
> +       .read = amdgpu_debugfs_gfxoff_status_read,
> +       .llseek = default_llseek
> +};
> +
>  static const struct file_operations *debugfs_regs[] = {
>         &amdgpu_debugfs_regs_fops,
>         &amdgpu_debugfs_regs2_fops,
> @@ -1217,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = {
>         &amdgpu_debugfs_wave_fops,
>         &amdgpu_debugfs_gpr_fops,
>         &amdgpu_debugfs_gfxoff_fops,
> +       &amdgpu_debugfs_gfxoff_status_fops,
>  };
>
>  static const char *debugfs_regs_names[] = {
> @@ -1230,6 +1274,7 @@ static const char *debugfs_regs_names[] = {
>         "amdgpu_wave",
>         "amdgpu_gpr",
>         "amdgpu_gfxoff",
> +       "amdgpu_gfxoff_status",
>  };
>
>  /**
> --
> 2.37.0
>
André Almeida July 14, 2022, 4:45 p.m. UTC | #2
Às 13:42 de 14/07/22, Alex Deucher escreveu:
> On Wed, Jul 13, 2022 at 11:15 AM André Almeida <andrealmeid@igalia.com> wrote:
>>
>> GFXOFF has two different "state" values: one to define if the GPU is
>> allowed/disallowed to enter GFXOFF, usually called state; and another
>> one to define if currently GFXOFF is being used, usually called status.
>> Even when GFXOFF is allowed, GPU firmware can decide to not used it
>> accordingly to the GPU load.
>>
>> Userspace can allow/disallow GPUs to enter into GFXOFF via debugfs. The
>> kernel maintains a counter of requests for GFXOFF (gfx_off_req_count)
>> that should be decreased to allow GFXOFF and increased to disallow.
>>
>> The issue with this interface is that userspace can't be sure if GFXOFF
>> is currently allowed. Even by checking amdgpu_gfxoff file, one might get
>> an ambiguous 2, that means that GPU is currently out of GFXOFF, but that
>> can be either because it's currently disallowed or because it's allowed
>> but given the current GPU load it's enabled. Then, userspace needs to
>> rely on the fact that GFXOFF is enabled by default on boot and to track
>> this information.
>>
>> To make userspace life easier and GFXOFF more reliable, return the
>> current state of GFXOFF to userspace when reading amdgpu_gfxoff with the
>> same semantics of writing: 0 means not allowed, not 0 means allowed.
>>
> 
> This looks good. Can you document this in the amdgpu kerneldoc?
> 

Sure, let me send a v2 with that

> Alex
> 
>> Expose the current status of GFXOFF through a new file,
>> amdgpu_gfxoff_status.
>>
>> Signed-off-by: André Almeida <andrealmeid@igalia.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 49 ++++++++++++++++++++-
>>  1 file changed, 47 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> index f3b3c688e4e7..e2eec985adb3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
>> @@ -1117,13 +1117,50 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
>>         }
>>
>>         while (size) {
>> -               uint32_t value;
>> +               u32 value = adev->gfx.gfx_off_state;
>> +
>> +               r = put_user(value, (u32 *)buf);
>> +               if (r)
>> +                       goto out;
>> +
>> +               result += 4;
>> +               buf += 4;
>> +               *pos += 4;
>> +               size -= 4;
>> +       }
>> +
>> +       r = result;
>> +out:
>> +       pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>> +       pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +
>> +       return r;
>> +}
>> +
>> +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
>> +                                                size_t size, loff_t *pos)
>> +{
>> +       struct amdgpu_device *adev = file_inode(f)->i_private;
>> +       ssize_t result = 0;
>> +       int r;
>> +
>> +       if (size & 0x3 || *pos & 0x3)
>> +               return -EINVAL;
>> +
>> +       r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
>> +       if (r < 0) {
>> +               pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> +               return r;
>> +       }
>> +
>> +       while (size) {
>> +               u32 value;
>>
>>                 r = amdgpu_get_gfx_off_status(adev, &value);
>>                 if (r)
>>                         goto out;
>>
>> -               r = put_user(value, (uint32_t *)buf);
>> +               r = put_user(value, (u32 *)buf);
>>                 if (r)
>>                         goto out;
>>
>> @@ -1206,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
>>         .llseek = default_llseek
>>  };
>>
>> +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
>> +       .owner = THIS_MODULE,
>> +       .read = amdgpu_debugfs_gfxoff_status_read,
>> +       .llseek = default_llseek
>> +};
>> +
>>  static const struct file_operations *debugfs_regs[] = {
>>         &amdgpu_debugfs_regs_fops,
>>         &amdgpu_debugfs_regs2_fops,
>> @@ -1217,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = {
>>         &amdgpu_debugfs_wave_fops,
>>         &amdgpu_debugfs_gpr_fops,
>>         &amdgpu_debugfs_gfxoff_fops,
>> +       &amdgpu_debugfs_gfxoff_status_fops,
>>  };
>>
>>  static const char *debugfs_regs_names[] = {
>> @@ -1230,6 +1274,7 @@ static const char *debugfs_regs_names[] = {
>>         "amdgpu_wave",
>>         "amdgpu_gpr",
>>         "amdgpu_gfxoff",
>> +       "amdgpu_gfxoff_status",
>>  };
>>
>>  /**
>> --
>> 2.37.0
>>
Alex Deucher July 14, 2022, 4:47 p.m. UTC | #3
On Thu, Jul 14, 2022 at 12:45 PM André Almeida <andrealmeid@igalia.com> wrote:
>
>
>
> Às 13:42 de 14/07/22, Alex Deucher escreveu:
> > On Wed, Jul 13, 2022 at 11:15 AM André Almeida <andrealmeid@igalia.com> wrote:
> >>
> >> GFXOFF has two different "state" values: one to define if the GPU is
> >> allowed/disallowed to enter GFXOFF, usually called state; and another
> >> one to define if currently GFXOFF is being used, usually called status.
> >> Even when GFXOFF is allowed, GPU firmware can decide to not used it
> >> accordingly to the GPU load.
> >>
> >> Userspace can allow/disallow GPUs to enter into GFXOFF via debugfs. The
> >> kernel maintains a counter of requests for GFXOFF (gfx_off_req_count)
> >> that should be decreased to allow GFXOFF and increased to disallow.
> >>
> >> The issue with this interface is that userspace can't be sure if GFXOFF
> >> is currently allowed. Even by checking amdgpu_gfxoff file, one might get
> >> an ambiguous 2, that means that GPU is currently out of GFXOFF, but that
> >> can be either because it's currently disallowed or because it's allowed
> >> but given the current GPU load it's enabled. Then, userspace needs to
> >> rely on the fact that GFXOFF is enabled by default on boot and to track
> >> this information.
> >>
> >> To make userspace life easier and GFXOFF more reliable, return the
> >> current state of GFXOFF to userspace when reading amdgpu_gfxoff with the
> >> same semantics of writing: 0 means not allowed, not 0 means allowed.
> >>
> >
> > This looks good. Can you document this in the amdgpu kerneldoc?
> >
>
> Sure, let me send a v2 with that

While you are at it, if we are missing docs for the other gfxoff file
can you add that as well?

Thanks!

Alex

>
> > Alex
> >
> >> Expose the current status of GFXOFF through a new file,
> >> amdgpu_gfxoff_status.
> >>
> >> Signed-off-by: André Almeida <andrealmeid@igalia.com>
> >> ---
> >>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 49 ++++++++++++++++++++-
> >>  1 file changed, 47 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >> index f3b3c688e4e7..e2eec985adb3 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> >> @@ -1117,13 +1117,50 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
> >>         }
> >>
> >>         while (size) {
> >> -               uint32_t value;
> >> +               u32 value = adev->gfx.gfx_off_state;
> >> +
> >> +               r = put_user(value, (u32 *)buf);
> >> +               if (r)
> >> +                       goto out;
> >> +
> >> +               result += 4;
> >> +               buf += 4;
> >> +               *pos += 4;
> >> +               size -= 4;
> >> +       }
> >> +
> >> +       r = result;
> >> +out:
> >> +       pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> >> +       pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >> +
> >> +       return r;
> >> +}
> >> +
> >> +static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
> >> +                                                size_t size, loff_t *pos)
> >> +{
> >> +       struct amdgpu_device *adev = file_inode(f)->i_private;
> >> +       ssize_t result = 0;
> >> +       int r;
> >> +
> >> +       if (size & 0x3 || *pos & 0x3)
> >> +               return -EINVAL;
> >> +
> >> +       r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
> >> +       if (r < 0) {
> >> +               pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> >> +               return r;
> >> +       }
> >> +
> >> +       while (size) {
> >> +               u32 value;
> >>
> >>                 r = amdgpu_get_gfx_off_status(adev, &value);
> >>                 if (r)
> >>                         goto out;
> >>
> >> -               r = put_user(value, (uint32_t *)buf);
> >> +               r = put_user(value, (u32 *)buf);
> >>                 if (r)
> >>                         goto out;
> >>
> >> @@ -1206,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
> >>         .llseek = default_llseek
> >>  };
> >>
> >> +static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
> >> +       .owner = THIS_MODULE,
> >> +       .read = amdgpu_debugfs_gfxoff_status_read,
> >> +       .llseek = default_llseek
> >> +};
> >> +
> >>  static const struct file_operations *debugfs_regs[] = {
> >>         &amdgpu_debugfs_regs_fops,
> >>         &amdgpu_debugfs_regs2_fops,
> >> @@ -1217,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = {
> >>         &amdgpu_debugfs_wave_fops,
> >>         &amdgpu_debugfs_gpr_fops,
> >>         &amdgpu_debugfs_gfxoff_fops,
> >> +       &amdgpu_debugfs_gfxoff_status_fops,
> >>  };
> >>
> >>  static const char *debugfs_regs_names[] = {
> >> @@ -1230,6 +1274,7 @@ static const char *debugfs_regs_names[] = {
> >>         "amdgpu_wave",
> >>         "amdgpu_gpr",
> >>         "amdgpu_gfxoff",
> >> +       "amdgpu_gfxoff_status",
> >>  };
> >>
> >>  /**
> >> --
> >> 2.37.0
> >>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f3b3c688e4e7..e2eec985adb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1117,13 +1117,50 @@  static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
 	}
 
 	while (size) {
-		uint32_t value;
+		u32 value = adev->gfx.gfx_off_state;
+
+		r = put_user(value, (u32 *)buf);
+		if (r)
+			goto out;
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	r = result;
+out:
+	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
+	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+
+	return r;
+}
+
+static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
+						 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	ssize_t result = 0;
+	int r;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
+	if (r < 0) {
+		pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
+		return r;
+	}
+
+	while (size) {
+		u32 value;
 
 		r = amdgpu_get_gfx_off_status(adev, &value);
 		if (r)
 			goto out;
 
-		r = put_user(value, (uint32_t *)buf);
+		r = put_user(value, (u32 *)buf);
 		if (r)
 			goto out;
 
@@ -1206,6 +1243,12 @@  static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
 	.llseek = default_llseek
 };
 
+static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gfxoff_status_read,
+	.llseek = default_llseek
+};
+
 static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_regs_fops,
 	&amdgpu_debugfs_regs2_fops,
@@ -1217,6 +1260,7 @@  static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_wave_fops,
 	&amdgpu_debugfs_gpr_fops,
 	&amdgpu_debugfs_gfxoff_fops,
+	&amdgpu_debugfs_gfxoff_status_fops,
 };
 
 static const char *debugfs_regs_names[] = {
@@ -1230,6 +1274,7 @@  static const char *debugfs_regs_names[] = {
 	"amdgpu_wave",
 	"amdgpu_gpr",
 	"amdgpu_gfxoff",
+	"amdgpu_gfxoff_status",
 };
 
 /**