diff mbox series

drm/amdkfd: Set handle to invalid for non GTT/VRAM BOs

Message ID 20220309174133.14454-1-david.yatsin@amd.com (mailing list archive)
State New, archived
Headers show
Series drm/amdkfd: Set handle to invalid for non GTT/VRAM BOs | expand

Commit Message

David Yat Sin March 9, 2022, 5:41 p.m. UTC
Set dmabuf handle to invalid for BOs that cannot be accessed using SDMA
during checkpoint/restore.

Signed-off-by: David Yat Sin <david.yatsin@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 8 ++++++--
 include/uapi/linux/kfd_ioctl.h           | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

Comments

Felix Kuehling March 9, 2022, 9:10 p.m. UTC | #1
On 2022-03-09 12:41, David Yat Sin wrote:
> Set dmabuf handle to invalid for BOs that cannot be accessed using SDMA
> during checkpoint/restore.
>
> Signed-off-by: David Yat Sin <david.yatsin@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 8 ++++++--
>   include/uapi/linux/kfd_ioctl.h           | 2 ++
>   2 files changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index e1e2362841f8..1ffa976ad318 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1767,7 +1767,9 @@ static int criu_checkpoint_bos(struct kfd_process *p,
>   						&bo_bucket->dmabuf_fd);
>   				if (ret)
>   					goto exit;
> -			}
> +			} else
> +				bo_bucket->dmabuf_fd = KFD_INVALID_FD;

Minor nit-pick: It would be better to use {} around the else-branch for 
consistency with the if-branch. Same below.

Ideally, this should have been part of the patch that bumped the KFD 
version to 1.8. Alex, is there a way to squash this when you send this 
in a pull-request for drm-next? Maybe if we create the commit with "git 
commit --fixup" you can let auto-squash handle it.

Other than that, the patch looks good to me.

Regards,
   Felix


> +
>   			if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
>   				bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
>   					KFD_MMAP_GPU_ID(pdd->dev->id);
> @@ -2219,7 +2221,9 @@ static int criu_restore_bo(struct kfd_process *p,
>   					    &bo_bucket->dmabuf_fd);
>   		if (ret)
>   			return ret;
> -	}
> +	} else
> +		bo_bucket->dmabuf_fd = KFD_INVALID_FD;
> +
>   	return 0;
>   }
>   
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index eb9ff85f8556..42975e940758 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -196,6 +196,8 @@ struct kfd_ioctl_dbg_wave_control_args {
>   	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
>   };
>   
> +#define KFD_INVALID_FD     0xffffffff
> +
>   /* Matching HSA_EVENTTYPE */
>   #define KFD_IOC_EVENT_SIGNAL			0
>   #define KFD_IOC_EVENT_NODECHANGE		1
Alex Deucher March 9, 2022, 9:21 p.m. UTC | #2
On Wed, Mar 9, 2022 at 4:10 PM Felix Kuehling <felix.kuehling@amd.com> wrote:
>
> On 2022-03-09 12:41, David Yat Sin wrote:
> > Set dmabuf handle to invalid for BOs that cannot be accessed using SDMA
> > during checkpoint/restore.
> >
> > Signed-off-by: David Yat Sin <david.yatsin@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 8 ++++++--
> >   include/uapi/linux/kfd_ioctl.h           | 2 ++
> >   2 files changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > index e1e2362841f8..1ffa976ad318 100644
> > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > @@ -1767,7 +1767,9 @@ static int criu_checkpoint_bos(struct kfd_process *p,
> >                                               &bo_bucket->dmabuf_fd);
> >                               if (ret)
> >                                       goto exit;
> > -                     }
> > +                     } else
> > +                             bo_bucket->dmabuf_fd = KFD_INVALID_FD;
>
> Minor nit-pick: It would be better to use {} around the else-branch for
> consistency with the if-branch. Same below.
>
> Ideally, this should have been part of the patch that bumped the KFD
> version to 1.8. Alex, is there a way to squash this when you send this
> in a pull-request for drm-next? Maybe if we create the commit with "git
> commit --fixup" you can let auto-squash handle it.
>

When did that patch land?  If I haven't included it in a PR yet, I can
squash this in.

Alex


> Other than that, the patch looks good to me.
>
> Regards,
>    Felix
>
>
> > +
> >                       if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
> >                               bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
> >                                       KFD_MMAP_GPU_ID(pdd->dev->id);
> > @@ -2219,7 +2221,9 @@ static int criu_restore_bo(struct kfd_process *p,
> >                                           &bo_bucket->dmabuf_fd);
> >               if (ret)
> >                       return ret;
> > -     }
> > +     } else
> > +             bo_bucket->dmabuf_fd = KFD_INVALID_FD;
> > +
> >       return 0;
> >   }
> >
> > diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> > index eb9ff85f8556..42975e940758 100644
> > --- a/include/uapi/linux/kfd_ioctl.h
> > +++ b/include/uapi/linux/kfd_ioctl.h
> > @@ -196,6 +196,8 @@ struct kfd_ioctl_dbg_wave_control_args {
> >       __u32 buf_size_in_bytes;        /*including gpu_id and buf_size */
> >   };
> >
> > +#define KFD_INVALID_FD     0xffffffff
> > +
> >   /* Matching HSA_EVENTTYPE */
> >   #define KFD_IOC_EVENT_SIGNAL                        0
> >   #define KFD_IOC_EVENT_NODECHANGE            1
Alex Deucher March 9, 2022, 9:23 p.m. UTC | #3
On Wed, Mar 9, 2022 at 4:21 PM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Wed, Mar 9, 2022 at 4:10 PM Felix Kuehling <felix.kuehling@amd.com> wrote:
> >
> > On 2022-03-09 12:41, David Yat Sin wrote:
> > > Set dmabuf handle to invalid for BOs that cannot be accessed using SDMA
> > > during checkpoint/restore.
> > >
> > > Signed-off-by: David Yat Sin <david.yatsin@amd.com>
> > > ---
> > >   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 8 ++++++--
> > >   include/uapi/linux/kfd_ioctl.h           | 2 ++
> > >   2 files changed, 8 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > > index e1e2362841f8..1ffa976ad318 100644
> > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> > > @@ -1767,7 +1767,9 @@ static int criu_checkpoint_bos(struct kfd_process *p,
> > >                                               &bo_bucket->dmabuf_fd);
> > >                               if (ret)
> > >                                       goto exit;
> > > -                     }
> > > +                     } else
> > > +                             bo_bucket->dmabuf_fd = KFD_INVALID_FD;
> >
> > Minor nit-pick: It would be better to use {} around the else-branch for
> > consistency with the if-branch. Same below.
> >
> > Ideally, this should have been part of the patch that bumped the KFD
> > version to 1.8. Alex, is there a way to squash this when you send this
> > in a pull-request for drm-next? Maybe if we create the commit with "git
> > commit --fixup" you can let auto-squash handle it.
> >
>
> When did that patch land?  If I haven't included it in a PR yet, I can
> squash this in.

Ah, I see it.  It was from yesterday, so no problem.

Alex

>
> Alex
>
>
> > Other than that, the patch looks good to me.
> >
> > Regards,
> >    Felix
> >
> >
> > > +
> > >                       if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
> > >                               bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
> > >                                       KFD_MMAP_GPU_ID(pdd->dev->id);
> > > @@ -2219,7 +2221,9 @@ static int criu_restore_bo(struct kfd_process *p,
> > >                                           &bo_bucket->dmabuf_fd);
> > >               if (ret)
> > >                       return ret;
> > > -     }
> > > +     } else
> > > +             bo_bucket->dmabuf_fd = KFD_INVALID_FD;
> > > +
> > >       return 0;
> > >   }
> > >
> > > diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> > > index eb9ff85f8556..42975e940758 100644
> > > --- a/include/uapi/linux/kfd_ioctl.h
> > > +++ b/include/uapi/linux/kfd_ioctl.h
> > > @@ -196,6 +196,8 @@ struct kfd_ioctl_dbg_wave_control_args {
> > >       __u32 buf_size_in_bytes;        /*including gpu_id and buf_size */
> > >   };
> > >
> > > +#define KFD_INVALID_FD     0xffffffff
> > > +
> > >   /* Matching HSA_EVENTTYPE */
> > >   #define KFD_IOC_EVENT_SIGNAL                        0
> > >   #define KFD_IOC_EVENT_NODECHANGE            1
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index e1e2362841f8..1ffa976ad318 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1767,7 +1767,9 @@  static int criu_checkpoint_bos(struct kfd_process *p,
 						&bo_bucket->dmabuf_fd);
 				if (ret)
 					goto exit;
-			}
+			} else
+				bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+
 			if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
 				bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
 					KFD_MMAP_GPU_ID(pdd->dev->id);
@@ -2219,7 +2221,9 @@  static int criu_restore_bo(struct kfd_process *p,
 					    &bo_bucket->dmabuf_fd);
 		if (ret)
 			return ret;
-	}
+	} else
+		bo_bucket->dmabuf_fd = KFD_INVALID_FD;
+
 	return 0;
 }
 
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index eb9ff85f8556..42975e940758 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -196,6 +196,8 @@  struct kfd_ioctl_dbg_wave_control_args {
 	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
+#define KFD_INVALID_FD     0xffffffff
+
 /* Matching HSA_EVENTTYPE */
 #define KFD_IOC_EVENT_SIGNAL			0
 #define KFD_IOC_EVENT_NODECHANGE		1