Message ID | 20230815195100.294458-4-andrealmeid@igalia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/amdgpu: Rework coredump memory allocation | expand |
Am 15.08.23 um 21:50 schrieb André Almeida: > Giving that we use codedump just for device resets, move it's functions > and structs to a more semantic file, the amdgpu_reset.{c, h}. > > Signed-off-by: André Almeida <andrealmeid@igalia.com> Acked-by: Christian König <christian.koenig@amd.com> for this patch here. I'm going to push patch #1 from this series into our internal branch since that is actually a bug fix. From my side the rest looks good as well, but going to leave this here on the list until Shashank had time to take a look as well. Thanks, Christian. > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 78 ---------------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 76 +++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 11 +++ > 4 files changed, 87 insertions(+), 87 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 0d560b713948..314b06cddc39 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1100,15 +1100,6 @@ struct amdgpu_device { > uint32_t aid_mask; > }; > > -#ifdef CONFIG_DEV_COREDUMP > -struct amdgpu_coredump_info { > - struct amdgpu_device *adev; > - struct amdgpu_task_info reset_task_info; > - struct timespec64 reset_time; > - bool reset_vram_lost; > -}; > -#endif > - > static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) > { > return container_of(ddev, struct amdgpu_device, ddev); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index b5b879bcc5c9..9706f608723a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -32,8 +32,6 @@ > #include <linux/slab.h> > #include <linux/iommu.h> > #include <linux/pci.h> > -#include <linux/devcoredump.h> > -#include <generated/utsrelease.h> > #include <linux/pci-p2pdma.h> > #include <linux/apple-gmux.h> > > @@ -4799,82 +4797,6 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) > return 0; > } > > -#ifndef CONFIG_DEV_COREDUMP > -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, > - struct amdgpu_reset_context *reset_context) > -{ > -} > -#else > -static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, > - size_t count, void *data, size_t datalen) > -{ > - struct drm_printer p; > - struct amdgpu_coredump_info *coredump = data; > - struct drm_print_iterator iter; > - int i; > - > - iter.data = buffer; > - iter.offset = 0; > - iter.start = offset; > - iter.remain = count; > - > - p = drm_coredump_printer(&iter); > - > - drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); > - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); > - drm_printf(&p, "module: " KBUILD_MODNAME "\n"); > - drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec); > - if (coredump->reset_task_info.pid) > - drm_printf(&p, "process_name: %s PID: %d\n", > - coredump->reset_task_info.process_name, > - coredump->reset_task_info.pid); > - > - if (coredump->reset_vram_lost) > - drm_printf(&p, "VRAM is lost due to GPU reset!\n"); > - if (coredump->adev->num_regs) { > - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); > - > - for (i = 0; i < coredump->adev->num_regs; i++) > - drm_printf(&p, "0x%08x: 0x%08x\n", > - coredump->adev->reset_dump_reg_list[i], > - coredump->adev->reset_dump_reg_value[i]); > - } > - > - return count - iter.remain; > -} > - > -static void amdgpu_devcoredump_free(void *data) > -{ > - kfree(data); > -} > - > -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, > - struct amdgpu_reset_context *reset_context) > -{ > - struct amdgpu_coredump_info *coredump; > - struct drm_device *dev = adev_to_drm(adev); > - > - coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); > - > - if (!coredump) { > - DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); > - return; > - } > - > - coredump->reset_vram_lost = vram_lost; > - > - if (reset_context->job && reset_context->job->vm) > - coredump->reset_task_info = reset_context->job->vm->task_info; > - > - coredump->adev = adev; > - > - ktime_get_ts64(&coredump->reset_time); > - > - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, > - amdgpu_devcoredump_read, amdgpu_devcoredump_free); > -} > -#endif > - > int amdgpu_do_asic_reset(struct list_head *device_list_handle, > struct amdgpu_reset_context *reset_context) > { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c > index 5fed06ffcc6b..46c8d6ce349c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c > @@ -21,6 +21,9 @@ > * > */ > > +#include <linux/devcoredump.h> > +#include <generated/utsrelease.h> > + > #include "amdgpu_reset.h" > #include "aldebaran.h" > #include "sienna_cichlid.h" > @@ -167,5 +170,78 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) > up_write(&reset_domain->sem); > } > > +#ifndef CONFIG_DEV_COREDUMP > +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, > + struct amdgpu_reset_context *reset_context) > +{ > +} > +#else > +static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, > + size_t count, void *data, size_t datalen) > +{ > + struct drm_printer p; > + struct amdgpu_coredump_info *coredump = data; > + struct drm_print_iterator iter; > + int i; > + > + iter.data = buffer; > + iter.offset = 0; > + iter.start = offset; > + iter.remain = count; > + > + p = drm_coredump_printer(&iter); > + > + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); > + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); > + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); > + drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec); > + if (coredump->reset_task_info.pid) > + drm_printf(&p, "process_name: %s PID: %d\n", > + coredump->reset_task_info.process_name, > + coredump->reset_task_info.pid); > + > + if (coredump->reset_vram_lost) > + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); > + if (coredump->adev->num_regs) { > + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); > + > + for (i = 0; i < coredump->adev->num_regs; i++) > + drm_printf(&p, "0x%08x: 0x%08x\n", > + coredump->adev->reset_dump_reg_list[i], > + coredump->adev->reset_dump_reg_value[i]); > + } > + > + return count - iter.remain; > +} > > +static void amdgpu_devcoredump_free(void *data) > +{ > + kfree(data); > +} > > +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, > + struct amdgpu_reset_context *reset_context) > +{ > + struct amdgpu_coredump_info *coredump; > + struct drm_device *dev = adev_to_drm(adev); > + > + coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); > + > + if (!coredump) { > + DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); > + return; > + } > + > + coredump->reset_vram_lost = vram_lost; > + > + if (reset_context->job && reset_context->job->vm) > + coredump->reset_task_info = reset_context->job->vm->task_info; > + > + coredump->adev = adev; > + > + ktime_get_ts64(&coredump->reset_time); > + > + dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, > + amdgpu_devcoredump_read, amdgpu_devcoredump_free); > +} > +#endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h > index f4a501ff87d9..362954521721 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h > @@ -87,6 +87,15 @@ struct amdgpu_reset_domain { > atomic_t reset_res; > }; > > +#ifdef CONFIG_DEV_COREDUMP > +struct amdgpu_coredump_info { > + struct amdgpu_device *adev; > + struct amdgpu_task_info reset_task_info; > + struct timespec64 reset_time; > + bool reset_vram_lost; > +}; > +#endif > + > > int amdgpu_reset_init(struct amdgpu_device *adev); > int amdgpu_reset_fini(struct amdgpu_device *adev); > @@ -126,4 +135,6 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); > > void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); > > +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, > + struct amdgpu_reset_context *reset_context); > #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0d560b713948..314b06cddc39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1100,15 +1100,6 @@ struct amdgpu_device { uint32_t aid_mask; }; -#ifdef CONFIG_DEV_COREDUMP -struct amdgpu_coredump_info { - struct amdgpu_device *adev; - struct amdgpu_task_info reset_task_info; - struct timespec64 reset_time; - bool reset_vram_lost; -}; -#endif - static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) { return container_of(ddev, struct amdgpu_device, ddev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b5b879bcc5c9..9706f608723a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -32,8 +32,6 @@ #include <linux/slab.h> #include <linux/iommu.h> #include <linux/pci.h> -#include <linux/devcoredump.h> -#include <generated/utsrelease.h> #include <linux/pci-p2pdma.h> #include <linux/apple-gmux.h> @@ -4799,82 +4797,6 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) return 0; } -#ifndef CONFIG_DEV_COREDUMP -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ -} -#else -static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, - size_t count, void *data, size_t datalen) -{ - struct drm_printer p; - struct amdgpu_coredump_info *coredump = data; - struct drm_print_iterator iter; - int i; - - iter.data = buffer; - iter.offset = 0; - iter.start = offset; - iter.remain = count; - - p = drm_coredump_printer(&iter); - - drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); - drm_printf(&p, "module: " KBUILD_MODNAME "\n"); - drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec); - if (coredump->reset_task_info.pid) - drm_printf(&p, "process_name: %s PID: %d\n", - coredump->reset_task_info.process_name, - coredump->reset_task_info.pid); - - if (coredump->reset_vram_lost) - drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_dump_reg_list[i], - coredump->adev->reset_dump_reg_value[i]); - } - - return count - iter.remain; -} - -static void amdgpu_devcoredump_free(void *data) -{ - kfree(data); -} - -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ - struct amdgpu_coredump_info *coredump; - struct drm_device *dev = adev_to_drm(adev); - - coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); - - if (!coredump) { - DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); - return; - } - - coredump->reset_vram_lost = vram_lost; - - if (reset_context->job && reset_context->job->vm) - coredump->reset_task_info = reset_context->job->vm->task_info; - - coredump->adev = adev; - - ktime_get_ts64(&coredump->reset_time); - - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); -} -#endif - int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 5fed06ffcc6b..46c8d6ce349c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -21,6 +21,9 @@ * */ +#include <linux/devcoredump.h> +#include <generated/utsrelease.h> + #include "amdgpu_reset.h" #include "aldebaran.h" #include "sienna_cichlid.h" @@ -167,5 +170,78 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) up_write(&reset_domain->sem); } +#ifndef CONFIG_DEV_COREDUMP +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ +} +#else +static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, + size_t count, void *data, size_t datalen) +{ + struct drm_printer p; + struct amdgpu_coredump_info *coredump = data; + struct drm_print_iterator iter; + int i; + + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec); + if (coredump->reset_task_info.pid) + drm_printf(&p, "process_name: %s PID: %d\n", + coredump->reset_task_info.process_name, + coredump->reset_task_info.pid); + + if (coredump->reset_vram_lost) + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (coredump->adev->num_regs) { + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); + + for (i = 0; i < coredump->adev->num_regs; i++) + drm_printf(&p, "0x%08x: 0x%08x\n", + coredump->adev->reset_dump_reg_list[i], + coredump->adev->reset_dump_reg_value[i]); + } + + return count - iter.remain; +} +static void amdgpu_devcoredump_free(void *data) +{ + kfree(data); +} +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_coredump_info *coredump; + struct drm_device *dev = adev_to_drm(adev); + + coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); + + if (!coredump) { + DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); + return; + } + + coredump->reset_vram_lost = vram_lost; + + if (reset_context->job && reset_context->job->vm) + coredump->reset_task_info = reset_context->job->vm->task_info; + + coredump->adev = adev; + + ktime_get_ts64(&coredump->reset_time); + + dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index f4a501ff87d9..362954521721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -87,6 +87,15 @@ struct amdgpu_reset_domain { atomic_t reset_res; }; +#ifdef CONFIG_DEV_COREDUMP +struct amdgpu_coredump_info { + struct amdgpu_device *adev; + struct amdgpu_task_info reset_task_info; + struct timespec64 reset_time; + bool reset_vram_lost; +}; +#endif + int amdgpu_reset_init(struct amdgpu_device *adev); int amdgpu_reset_fini(struct amdgpu_device *adev); @@ -126,4 +135,6 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context); #endif
Giving that we use codedump just for device resets, move it's functions and structs to a more semantic file, the amdgpu_reset.{c, h}. Signed-off-by: André Almeida <andrealmeid@igalia.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 78 ---------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 76 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 11 +++ 4 files changed, 87 insertions(+), 87 deletions(-)