Message ID | 20230327184339.125016-12-jonathan.kim@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/34] drm/amdkfd: add debug and runtime enable interface | expand |
[AMD Official Use Only - General] 3 tiny grammar/spelling things inline (not critical) Kent > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of > Jonathan Kim > Sent: Monday, March 27, 2023 2:43 PM > To: amd-gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org > Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Kim, Jonathan > <Jonathan.Kim@amd.com> > Subject: [PATCH 12/34] drm/amdgpu: add configurable grace period for unmap > queues > > The HWS schedule allows a grace period for wave completion prior to > preemption for better performance by avoiding CWSR on waves that can > potentially complete quickly. The debugger, on the other hand, will > want to inspect wave status immediately after it actively triggers > preemption (a suspend function to be provided). > > To minimize latency between preemption and debugger wave inspection, allow > immediate preemption by setting the grace period to 0. > > Note that setting the preepmtion grace period to 0 will result in an > infinite grace period being set due to a CP FW bug so set it to 1 for now. > > v2: clarify purpose in the description of this patch > > Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> > Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> > --- > .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 2 + > .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 + > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 43 ++++++++++++ > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 6 ++ > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 2 + > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 43 ++++++++++++ > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 9 ++- > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 62 +++++++++++++----- > .../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 + > .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 32 +++++++++ > .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 39 +++++++++++ > .../gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h | 65 +++++++++++++++++++ > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 ++ > 13 files changed, 291 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > index a6f98141c29c..b811a0985050 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > @@ -82,5 +82,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { > .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, > .enable_debug_trap = kgd_aldebaran_enable_debug_trap, > .disable_debug_trap = kgd_aldebaran_disable_debug_trap, > + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, > + .build_grace_period_packet_info = > kgd_gfx_v9_build_grace_period_packet_info, > .program_trap_handler_settings = > kgd_gfx_v9_program_trap_handler_settings, > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > index d2918e5c0dea..a62bd0068515 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > @@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { > > kgd_gfx_v9_set_vm_context_page_table_base, > .enable_debug_trap = kgd_arcturus_enable_debug_trap, > .disable_debug_trap = kgd_arcturus_disable_debug_trap, > + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, > + .build_grace_period_packet_info = > kgd_gfx_v9_build_grace_period_packet_info, > .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, > .program_trap_handler_settings = > kgd_gfx_v9_program_trap_handler_settings > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > index 969015281510..605387e55d33 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > @@ -802,6 +802,47 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct > amdgpu_device *adev, > return 0; > } > > +/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 > values > + * The values read are: > + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. > + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics > Offloads. > + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. > + * gws_wait_time -- Wait Count for Global Wave Syncs. > + * que_sleep_wait_time -- Wait Count for Dequeue Retry. > + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. > + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. > + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. > + */ > +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, > + uint32_t *wait_times) > + > +{ > + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, > mmCP_IQ_WAIT_TIME2)); > +} > + > +void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device > *adev, > + uint32_t wait_times, > + uint32_t grace_period, > + uint32_t *reg_offset, > + uint32_t *reg_data) > +{ > + *reg_data = wait_times; > + > + /* > + * The CP cannont handle a 0 grace period input and will result in cannont -> cannot > + * an infinite grace period being set so set to 1 to prevent this. Maybe throw in a comma after "being set" for clarity > + */ > + if (grace_period == 0) > + grace_period = 1; > + > + *reg_data = REG_SET_FIELD(*reg_data, > + CP_IQ_WAIT_TIME2, > + SCH_WAVE, > + grace_period); > + > + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); > +} > + > static void program_trap_handler_settings(struct amdgpu_device *adev, > uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) > { > @@ -846,5 +887,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { > .set_vm_context_page_table_base = > set_vm_context_page_table_base, > .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, > .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, > + .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, > + .build_grace_period_packet_info = > kgd_gfx_v10_build_grace_period_packet_info, > .program_trap_handler_settings = program_trap_handler_settings, > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h > index 370d6c312981..0abc1e805180 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h > @@ -26,3 +26,9 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct > amdgpu_device *adev, > uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, > bool keep_trap_enabled, > uint32_t vmid); > +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t > *wait_times); > +void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device > *adev, > + uint32_t wait_times, > + uint32_t grace_period, > + uint32_t *reg_offset, > + uint32_t *reg_data); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > index 73e3b9ae1fb0..c57f2a6b6e23 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > @@ -670,6 +670,8 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { > .get_atc_vmid_pasid_mapping_info = > get_atc_vmid_pasid_mapping_info_v10_3, > .set_vm_context_page_table_base = > set_vm_context_page_table_base_v10_3, > .program_trap_handler_settings = > program_trap_handler_settings_v10_3, > + .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, > + .build_grace_period_packet_info = > kgd_gfx_v10_build_grace_period_packet_info, > .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, > .disable_debug_trap = kgd_gfx_v10_disable_debug_trap > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > index e0bd61e16847..f231903bfec3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > @@ -736,6 +736,24 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct > amdgpu_device *adev, > return 0; > } > > +/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 > values > + * The values read are: > + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. > + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics > Offloads. > + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. > + * gws_wait_time -- Wait Count for Global Wave Syncs. > + * que_sleep_wait_time -- Wait Count for Dequeue Retry. > + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. > + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. > + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. > + */ > +void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, > + uint32_t *wait_times) > + > +{ > + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, > mmCP_IQ_WAIT_TIME2)); > +} > + > void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device > *adev, > uint32_t vmid, uint64_t page_table_base) > { > @@ -920,6 +938,29 @@ void kgd_gfx_v9_get_cu_occupancy(struct > amdgpu_device *adev, int pasid, > adev->gfx.cu_info.max_waves_per_simd; > } > > +void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device > *adev, > + uint32_t wait_times, > + uint32_t grace_period, > + uint32_t *reg_offset, > + uint32_t *reg_data) > +{ > + *reg_data = wait_times; > + > + /* > + * The CP cannont handle a 0 grace period input and will result in cannont again Kent > + * an infinite grace period being set so set to 1 to prevent this. > + */ > + if (grace_period == 0) > + grace_period = 1; > + > + *reg_data = REG_SET_FIELD(*reg_data, > + CP_IQ_WAIT_TIME2, > + SCH_WAVE, > + grace_period); > + > + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); > +} > + > void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, > uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) > { > @@ -963,6 +1004,8 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { > .set_vm_context_page_table_base = > kgd_gfx_v9_set_vm_context_page_table_base, > .enable_debug_trap = kgd_gfx_v9_enable_debug_trap, > .disable_debug_trap = kgd_gfx_v9_disable_debug_trap, > + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, > + .build_grace_period_packet_info = > kgd_gfx_v9_build_grace_period_packet_info, > .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, > .program_trap_handler_settings = > kgd_gfx_v9_program_trap_handler_settings, > }; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h > index d39256162616..c0866497cb5c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h > @@ -20,8 +20,6 @@ > * OTHER DEALINGS IN THE SOFTWARE. > */ > > - > - > void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, > uint32_t vmid, > uint32_t sh_mem_config, > uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, > @@ -51,7 +49,6 @@ int kgd_gfx_v9_wave_control_execute(struct > amdgpu_device *adev, > uint32_t sq_cmd); > bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device > *adev, > uint8_t vmid, uint16_t *p_pasid); > - > void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device > *adev, > uint32_t vmid, uint64_t page_table_base); > void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, > @@ -67,3 +64,9 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct > amdgpu_device *adev, > uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev, > bool keep_trap_enabled, > uint32_t vmid); > +void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t > *wait_times); > +void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device > *adev, > + uint32_t wait_times, > + uint32_t grace_period, > + uint32_t *reg_offset, > + uint32_t *reg_data); > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > index bf3fe7db76f3..807cad60d21e 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > @@ -46,10 +46,13 @@ static int set_pasid_vmid_mapping(struct > device_queue_manager *dqm, > > static int execute_queues_cpsch(struct device_queue_manager *dqm, > enum kfd_unmap_queues_filter filter, > - uint32_t filter_param); > + uint32_t filter_param, > + uint32_t grace_period); > static int unmap_queues_cpsch(struct device_queue_manager *dqm, > enum kfd_unmap_queues_filter filter, > - uint32_t filter_param, bool reset); > + uint32_t filter_param, > + uint32_t grace_period, > + bool reset); > > static int map_queues_cpsch(struct device_queue_manager *dqm); > > @@ -839,7 +842,7 @@ static int update_queue(struct device_queue_manager > *dqm, struct queue *q, > if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { > if (!dqm->dev->shared_resources.enable_mes) > retval = unmap_queues_cpsch(dqm, > - > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); > + > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > USE_DEFAULT_GRACE_PERIOD, false); > else if (prev_active) > retval = remove_queue_mes(dqm, q, &pdd->qpd); > > @@ -1015,7 +1018,8 @@ static int evict_process_queues_cpsch(struct > device_queue_manager *dqm, > retval = execute_queues_cpsch(dqm, > qpd->is_debug ? > > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : > - > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > + > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > + USE_DEFAULT_GRACE_PERIOD); > > out: > dqm_unlock(dqm); > @@ -1155,8 +1159,7 @@ static int restore_process_queues_cpsch(struct > device_queue_manager *dqm, > } > if (!dqm->dev->shared_resources.enable_mes) > retval = execute_queues_cpsch(dqm, > - > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > - > + > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > USE_DEFAULT_GRACE_PERIOD); > eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; > atomic64_add(eviction_duration, &pdd->evict_duration_counter); > vm_not_acquired: > @@ -1492,6 +1495,9 @@ static int initialize_cpsch(struct > device_queue_manager *dqm) > > init_sdma_bitmaps(dqm); > > + if (dqm->dev->kfd2kgd->get_iq_wait_times) > + dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, > + &dqm->wait_times); > return 0; > } > > @@ -1531,7 +1537,7 @@ static int start_cpsch(struct device_queue_manager > *dqm) > dqm->is_resetting = false; > dqm->sched_running = true; > if (!dqm->dev->shared_resources.enable_mes) > - execute_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > + execute_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > USE_DEFAULT_GRACE_PERIOD); > dqm_unlock(dqm); > > return 0; > @@ -1556,7 +1562,7 @@ static int stop_cpsch(struct device_queue_manager > *dqm) > > if (!dqm->is_hws_hang) { > if (!dqm->dev->shared_resources.enable_mes) > - unmap_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); > + unmap_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, > USE_DEFAULT_GRACE_PERIOD, false); > else > remove_all_queues_mes(dqm); > } > @@ -1598,7 +1604,8 @@ static int create_kernel_queue_cpsch(struct > device_queue_manager *dqm, > list_add(&kq->list, &qpd->priv_queue_list); > increment_queue_count(dqm, qpd, kq->queue); > qpd->is_debug = true; > - execute_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > + execute_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > + USE_DEFAULT_GRACE_PERIOD); > dqm_unlock(dqm); > > return 0; > @@ -1612,7 +1619,8 @@ static void destroy_kernel_queue_cpsch(struct > device_queue_manager *dqm, > list_del(&kq->list); > decrement_queue_count(dqm, qpd, kq->queue); > qpd->is_debug = false; > - execute_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); > + execute_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, > + USE_DEFAULT_GRACE_PERIOD); > /* > * Unconditionally decrement this counter, regardless of the queue's > * type. > @@ -1689,7 +1697,7 @@ static int create_queue_cpsch(struct > device_queue_manager *dqm, struct queue *q, > > if (!dqm->dev->shared_resources.enable_mes) > retval = execute_queues_cpsch(dqm, > - > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > + > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > USE_DEFAULT_GRACE_PERIOD); > else > retval = add_queue_mes(dqm, q, qpd); > if (retval) > @@ -1778,7 +1786,9 @@ static int map_queues_cpsch(struct > device_queue_manager *dqm) > /* dqm->lock mutex has to be locked before calling this function */ > static int unmap_queues_cpsch(struct device_queue_manager *dqm, > enum kfd_unmap_queues_filter filter, > - uint32_t filter_param, bool reset) > + uint32_t filter_param, > + uint32_t grace_period, > + bool reset) > { > int retval = 0; > struct mqd_manager *mqd_mgr; > @@ -1790,6 +1800,12 @@ static int unmap_queues_cpsch(struct > device_queue_manager *dqm, > if (!dqm->active_runlist) > return retval; > > + if (grace_period != USE_DEFAULT_GRACE_PERIOD) { > + retval = pm_update_grace_period(&dqm->packet_mgr, > grace_period); > + if (retval) > + return retval; > + } > + > retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, > filter_param, reset); > if (retval) > return retval; > @@ -1822,6 +1838,13 @@ static int unmap_queues_cpsch(struct > device_queue_manager *dqm, > return -ETIME; > } > > + /* We need to reset the grace period value for this device */ > + if (grace_period != USE_DEFAULT_GRACE_PERIOD) { > + if (pm_update_grace_period(&dqm->packet_mgr, > + USE_DEFAULT_GRACE_PERIOD)) > + pr_err("Failed to reset grace period\n"); > + } > + > pm_release_ib(&dqm->packet_mgr); > dqm->active_runlist = false; > > @@ -1837,7 +1860,7 @@ static int reset_queues_cpsch(struct > device_queue_manager *dqm, > dqm_lock(dqm); > > retval = unmap_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_BY_PASID, > - pasid, true); > + pasid, USE_DEFAULT_GRACE_PERIOD, true); > > dqm_unlock(dqm); > return retval; > @@ -1846,13 +1869,14 @@ static int reset_queues_cpsch(struct > device_queue_manager *dqm, > /* dqm->lock mutex has to be locked before calling this function */ > static int execute_queues_cpsch(struct device_queue_manager *dqm, > enum kfd_unmap_queues_filter filter, > - uint32_t filter_param) > + uint32_t filter_param, > + uint32_t grace_period) > { > int retval; > > if (dqm->is_hws_hang) > return -EIO; > - retval = unmap_queues_cpsch(dqm, filter, filter_param, false); > + retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, > false); > if (retval) > return retval; > > @@ -1910,7 +1934,8 @@ static int destroy_queue_cpsch(struct > device_queue_manager *dqm, > if (!dqm->dev->shared_resources.enable_mes) { > decrement_queue_count(dqm, qpd, q); > retval = execute_queues_cpsch(dqm, > - > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > + > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > + > USE_DEFAULT_GRACE_PERIOD); > if (retval == -ETIME) > qpd->reset_wavefronts = true; > } else { > @@ -2195,7 +2220,7 @@ static int process_termination_cpsch(struct > device_queue_manager *dqm, > } > > if (!dqm->dev->shared_resources.enable_mes) > - retval = execute_queues_cpsch(dqm, filter, 0); > + retval = execute_queues_cpsch(dqm, filter, 0, > USE_DEFAULT_GRACE_PERIOD); > > if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { > pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm- > >dev); > @@ -2540,7 +2565,8 @@ int dqm_debugfs_hang_hws(struct > device_queue_manager *dqm) > return r; > } > dqm->active_runlist = true; > - r = execute_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); > + r = execute_queues_cpsch(dqm, > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, > + 0, USE_DEFAULT_GRACE_PERIOD); > dqm_unlock(dqm); > > return r; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > index a537b9ef3e16..fb48b124161f 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > @@ -37,6 +37,7 @@ > > #define KFD_MES_PROCESS_QUANTUM 100000 > #define KFD_MES_GANG_QUANTUM 10000 > +#define USE_DEFAULT_GRACE_PERIOD 0xffffffff > > struct device_process_node { > struct qcm_process_device *qpd; > @@ -256,6 +257,7 @@ struct device_queue_manager { > struct work_struct hw_exception_work; > struct kfd_mem_obj hiq_sdma_mqd; > bool sched_running; > + uint32_t wait_times; > }; > > void device_queue_manager_init_cik( > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > index ed02b6d8bf63..c57f9a46dfcc 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > @@ -369,6 +369,38 @@ int pm_send_query_status(struct packet_manager > *pm, uint64_t fence_address, > return retval; > } > > +int pm_update_grace_period(struct packet_manager *pm, uint32_t > grace_period) > +{ > + int retval = 0; > + uint32_t *buffer, size; > + > + size = pm->pmf->set_grace_period_size; > + > + mutex_lock(&pm->lock); > + > + if (size) { > + kq_acquire_packet_buffer(pm->priv_queue, > + size / sizeof(uint32_t), > + (unsigned int **)&buffer); > + > + if (!buffer) { > + pr_err("Failed to allocate buffer on kernel queue\n"); > + retval = -ENOMEM; > + goto out; > + } > + > + retval = pm->pmf->set_grace_period(pm, buffer, grace_period); > + if (!retval) > + kq_submit_packet(pm->priv_queue); > + else > + kq_rollback_packet(pm->priv_queue); > + } > + > +out: > + mutex_unlock(&pm->lock); > + return retval; > +} > + > int pm_send_unmap_queue(struct packet_manager *pm, > enum kfd_unmap_queues_filter filter, > uint32_t filter_param, bool reset) > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > index 18250845a989..f0cdc8695b8c 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > @@ -251,6 +251,41 @@ static int pm_map_queues_v9(struct packet_manager > *pm, uint32_t *buffer, > return 0; > } > > +static int pm_set_grace_period_v9(struct packet_manager *pm, > + uint32_t *buffer, > + uint32_t grace_period) > +{ > + struct pm4_mec_write_data_mmio *packet; > + uint32_t reg_offset = 0; > + uint32_t reg_data = 0; > + > + pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( > + pm->dqm->dev->adev, > + pm->dqm->wait_times, > + grace_period, > + ®_offset, > + ®_data); > + > + if (grace_period == USE_DEFAULT_GRACE_PERIOD) > + reg_data = pm->dqm->wait_times; > + > + packet = (struct pm4_mec_write_data_mmio *)buffer; > + memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); > + > + packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA, > + sizeof(struct > pm4_mec_write_data_mmio)); > + > + packet->bitfields2.dst_sel = > dst_sel___write_data__mem_mapped_register; > + packet->bitfields2.addr_incr = > + addr_incr___write_data__do_not_increment_address; > + > + packet->bitfields3.dst_mmreg_addr = reg_offset; > + > + packet->data = reg_data; > + > + return 0; > +} > + > static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, > enum kfd_unmap_queues_filter filter, > uint32_t filter_param, bool reset) > @@ -333,6 +368,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = > { > .set_resources = pm_set_resources_v9, > .map_queues = pm_map_queues_v9, > .unmap_queues = pm_unmap_queues_v9, > + .set_grace_period = pm_set_grace_period_v9, > .query_status = pm_query_status_v9, > .release_mem = NULL, > .map_process_size = sizeof(struct pm4_mes_map_process), > @@ -340,6 +376,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = > { > .set_resources_size = sizeof(struct pm4_mes_set_resources), > .map_queues_size = sizeof(struct pm4_mes_map_queues), > .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), > + .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), > .query_status_size = sizeof(struct pm4_mes_query_status), > .release_mem_size = 0, > }; > @@ -350,6 +387,7 @@ const struct packet_manager_funcs > kfd_aldebaran_pm_funcs = { > .set_resources = pm_set_resources_v9, > .map_queues = pm_map_queues_v9, > .unmap_queues = pm_unmap_queues_v9, > + .set_grace_period = pm_set_grace_period_v9, > .query_status = pm_query_status_v9, > .release_mem = NULL, > .map_process_size = sizeof(struct > pm4_mes_map_process_aldebaran), > @@ -357,6 +395,7 @@ const struct packet_manager_funcs > kfd_aldebaran_pm_funcs = { > .set_resources_size = sizeof(struct pm4_mes_set_resources), > .map_queues_size = sizeof(struct pm4_mes_map_queues), > .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), > + .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), > .query_status_size = sizeof(struct pm4_mes_query_status), > .release_mem_size = 0, > }; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h > b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h > index a666710ed403..795001c947e1 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h > @@ -583,6 +583,71 @@ struct pm4_mec_release_mem { > > #endif > > +#ifndef PM4_MEC_WRITE_DATA_DEFINED > +#define PM4_MEC_WRITE_DATA_DEFINED > + > +enum WRITE_DATA_dst_sel_enum { > + dst_sel___write_data__mem_mapped_register = 0, > + dst_sel___write_data__tc_l2 = 2, > + dst_sel___write_data__gds = 3, > + dst_sel___write_data__memory = 5, > + dst_sel___write_data__memory_mapped_adc_persistent_state = 6, > +}; > + > +enum WRITE_DATA_addr_incr_enum { > + addr_incr___write_data__increment_address = 0, > + addr_incr___write_data__do_not_increment_address = 1 > +}; > + > +enum WRITE_DATA_wr_confirm_enum { > + wr_confirm___write_data__do_not_wait_for_write_confirmation = 0, > + wr_confirm___write_data__wait_for_write_confirmation = 1 > +}; > + > +enum WRITE_DATA_cache_policy_enum { > + cache_policy___write_data__lru = 0, > + cache_policy___write_data__stream = 1 > +}; > + > + > +struct pm4_mec_write_data_mmio { > + union { > + union PM4_MES_TYPE_3_HEADER header; /*header */ > + unsigned int ordinal1; > + }; > + > + union { > + struct { > + unsigned int reserved1:8; > + unsigned int dst_sel:4; > + unsigned int reserved2:4; > + unsigned int addr_incr:1; > + unsigned int reserved3:2; > + unsigned int resume_vf:1; > + unsigned int wr_confirm:1; > + unsigned int reserved4:4; > + unsigned int cache_policy:2; > + unsigned int reserved5:5; > + } bitfields2; > + unsigned int ordinal2; > + }; > + > + union { > + struct { > + unsigned int dst_mmreg_addr:18; > + unsigned int reserved6:14; > + } bitfields3; > + unsigned int ordinal3; > + }; > + > + uint32_t reserved7; > + > + uint32_t data; > + > +}; > + > +#endif > + > enum { > CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 > }; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index a34b000f1d25..c5f99a471211 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -1298,6 +1298,8 @@ struct packet_manager_funcs { > int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, > enum kfd_unmap_queues_filter mode, > uint32_t filter_param, bool reset); > + int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer, > + uint32_t grace_period); > int (*query_status)(struct packet_manager *pm, uint32_t *buffer, > uint64_t fence_address, uint64_t fence_value); > int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); > @@ -1308,6 +1310,7 @@ struct packet_manager_funcs { > int set_resources_size; > int map_queues_size; > int unmap_queues_size; > + int set_grace_period_size; > int query_status_size; > int release_mem_size; > }; > @@ -1330,6 +1333,8 @@ int pm_send_unmap_queue(struct packet_manager > *pm, > > void pm_release_ib(struct packet_manager *pm); > > +int pm_update_grace_period(struct packet_manager *pm, uint32_t > grace_period); > + > /* Following PM funcs can be shared among VI and AI */ > unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); > > -- > 2.25.1
[Public] Thanks for catch Kent. I'll fix up the typos with a follow-on. Jon > -----Original Message----- > From: Russell, Kent <Kent.Russell@amd.com> > Sent: Tuesday, March 28, 2023 11:19 AM > To: Kim, Jonathan <Jonathan.Kim@amd.com>; amd-gfx@lists.freedesktop.org; > dri-devel@lists.freedesktop.org > Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Kim, Jonathan > <Jonathan.Kim@amd.com> > Subject: RE: [PATCH 12/34] drm/amdgpu: add configurable grace period for > unmap queues > > [AMD Official Use Only - General] > > 3 tiny grammar/spelling things inline (not critical) > > Kent > > > -----Original Message----- > > From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of > > Jonathan Kim > > Sent: Monday, March 27, 2023 2:43 PM > > To: amd-gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org > > Cc: Kuehling, Felix <Felix.Kuehling@amd.com>; Kim, Jonathan > > <Jonathan.Kim@amd.com> > > Subject: [PATCH 12/34] drm/amdgpu: add configurable grace period for > unmap > > queues > > > > The HWS schedule allows a grace period for wave completion prior to > > preemption for better performance by avoiding CWSR on waves that can > > potentially complete quickly. The debugger, on the other hand, will > > want to inspect wave status immediately after it actively triggers > > preemption (a suspend function to be provided). > > > > To minimize latency between preemption and debugger wave inspection, > allow > > immediate preemption by setting the grace period to 0. > > > > Note that setting the preepmtion grace period to 0 will result in an > > infinite grace period being set due to a CP FW bug so set it to 1 for now. > > > > v2: clarify purpose in the description of this patch > > > > Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> > > Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> > > --- > > .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 2 + > > .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 + > > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c | 43 ++++++++++++ > > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h | 6 ++ > > .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 2 + > > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 43 ++++++++++++ > > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 9 ++- > > .../drm/amd/amdkfd/kfd_device_queue_manager.c | 62 +++++++++++++----- > > .../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 + > > .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 32 +++++++++ > > .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 39 +++++++++++ > > .../gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h | 65 > +++++++++++++++++++ > > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 ++ > > 13 files changed, 291 insertions(+), 21 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > > index a6f98141c29c..b811a0985050 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > > @@ -82,5 +82,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { > > .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, > > .enable_debug_trap = kgd_aldebaran_enable_debug_trap, > > .disable_debug_trap = kgd_aldebaran_disable_debug_trap, > > + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, > > + .build_grace_period_packet_info = > > kgd_gfx_v9_build_grace_period_packet_info, > > .program_trap_handler_settings = > > kgd_gfx_v9_program_trap_handler_settings, > > }; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > > index d2918e5c0dea..a62bd0068515 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c > > @@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { > > > > kgd_gfx_v9_set_vm_context_page_table_base, > > .enable_debug_trap = kgd_arcturus_enable_debug_trap, > > .disable_debug_trap = kgd_arcturus_disable_debug_trap, > > + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, > > + .build_grace_period_packet_info = > > kgd_gfx_v9_build_grace_period_packet_info, > > .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, > > .program_trap_handler_settings = > > kgd_gfx_v9_program_trap_handler_settings > > }; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > > index 969015281510..605387e55d33 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c > > @@ -802,6 +802,47 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct > > amdgpu_device *adev, > > return 0; > > } > > > > +/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 > > values > > + * The values read are: > > + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. > > + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics > > Offloads. > > + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. > > + * gws_wait_time -- Wait Count for Global Wave Syncs. > > + * que_sleep_wait_time -- Wait Count for Dequeue Retry. > > + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. > > + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. > > + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. > > + */ > > +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, > > + uint32_t *wait_times) > > + > > +{ > > + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, > > mmCP_IQ_WAIT_TIME2)); > > +} > > + > > +void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device > > *adev, > > + uint32_t wait_times, > > + uint32_t grace_period, > > + uint32_t *reg_offset, > > + uint32_t *reg_data) > > +{ > > + *reg_data = wait_times; > > + > > + /* > > + * The CP cannont handle a 0 grace period input and will result in > > > cannont -> cannot > > > + * an infinite grace period being set so set to 1 to prevent this. > > > Maybe throw in a comma after "being set" for clarity > > > + */ > > + if (grace_period == 0) > > + grace_period = 1; > > + > > + *reg_data = REG_SET_FIELD(*reg_data, > > + CP_IQ_WAIT_TIME2, > > + SCH_WAVE, > > + grace_period); > > + > > + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); > > +} > > + > > static void program_trap_handler_settings(struct amdgpu_device *adev, > > uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) > > { > > @@ -846,5 +887,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { > > .set_vm_context_page_table_base = > > set_vm_context_page_table_base, > > .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, > > .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, > > + .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, > > + .build_grace_period_packet_info = > > kgd_gfx_v10_build_grace_period_packet_info, > > .program_trap_handler_settings = program_trap_handler_settings, > > }; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h > > index 370d6c312981..0abc1e805180 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h > > @@ -26,3 +26,9 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct > > amdgpu_device *adev, > > uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, > > bool keep_trap_enabled, > > uint32_t vmid); > > +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t > > *wait_times); > > +void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device > > *adev, > > + uint32_t wait_times, > > + uint32_t grace_period, > > + uint32_t *reg_offset, > > + uint32_t *reg_data); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > > index 73e3b9ae1fb0..c57f2a6b6e23 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c > > @@ -670,6 +670,8 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { > > .get_atc_vmid_pasid_mapping_info = > > get_atc_vmid_pasid_mapping_info_v10_3, > > .set_vm_context_page_table_base = > > set_vm_context_page_table_base_v10_3, > > .program_trap_handler_settings = > > program_trap_handler_settings_v10_3, > > + .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, > > + .build_grace_period_packet_info = > > kgd_gfx_v10_build_grace_period_packet_info, > > .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, > > .disable_debug_trap = kgd_gfx_v10_disable_debug_trap > > }; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > > index e0bd61e16847..f231903bfec3 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c > > @@ -736,6 +736,24 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct > > amdgpu_device *adev, > > return 0; > > } > > > > +/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 > > values > > + * The values read are: > > + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. > > + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics > > Offloads. > > + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. > > + * gws_wait_time -- Wait Count for Global Wave Syncs. > > + * que_sleep_wait_time -- Wait Count for Dequeue Retry. > > + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. > > + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. > > + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. > > + */ > > +void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, > > + uint32_t *wait_times) > > + > > +{ > > + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, > > mmCP_IQ_WAIT_TIME2)); > > +} > > + > > void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device > > *adev, > > uint32_t vmid, uint64_t page_table_base) > > { > > @@ -920,6 +938,29 @@ void kgd_gfx_v9_get_cu_occupancy(struct > > amdgpu_device *adev, int pasid, > > adev->gfx.cu_info.max_waves_per_simd; > > } > > > > +void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device > > *adev, > > + uint32_t wait_times, > > + uint32_t grace_period, > > + uint32_t *reg_offset, > > + uint32_t *reg_data) > > +{ > > + *reg_data = wait_times; > > + > > + /* > > + * The CP cannont handle a 0 grace period input and will result in > cannont again > > Kent > > + * an infinite grace period being set so set to 1 to prevent this. > > + */ > > + if (grace_period == 0) > > + grace_period = 1; > > + > > + *reg_data = REG_SET_FIELD(*reg_data, > > + CP_IQ_WAIT_TIME2, > > + SCH_WAVE, > > + grace_period); > > + > > + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); > > +} > > + > > void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device > *adev, > > uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) > > { > > @@ -963,6 +1004,8 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { > > .set_vm_context_page_table_base = > > kgd_gfx_v9_set_vm_context_page_table_base, > > .enable_debug_trap = kgd_gfx_v9_enable_debug_trap, > > .disable_debug_trap = kgd_gfx_v9_disable_debug_trap, > > + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, > > + .build_grace_period_packet_info = > > kgd_gfx_v9_build_grace_period_packet_info, > > .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, > > .program_trap_handler_settings = > > kgd_gfx_v9_program_trap_handler_settings, > > }; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h > > index d39256162616..c0866497cb5c 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h > > @@ -20,8 +20,6 @@ > > * OTHER DEALINGS IN THE SOFTWARE. > > */ > > > > - > > - > > void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, > > uint32_t vmid, > > uint32_t sh_mem_config, > > uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, > > @@ -51,7 +49,6 @@ int kgd_gfx_v9_wave_control_execute(struct > > amdgpu_device *adev, > > uint32_t sq_cmd); > > bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device > > *adev, > > uint8_t vmid, uint16_t *p_pasid); > > - > > void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device > > *adev, > > uint32_t vmid, uint64_t page_table_base); > > void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, > > @@ -67,3 +64,9 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct > > amdgpu_device *adev, > > uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev, > > bool keep_trap_enabled, > > uint32_t vmid); > > +void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t > > *wait_times); > > +void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device > > *adev, > > + uint32_t wait_times, > > + uint32_t grace_period, > > + uint32_t *reg_offset, > > + uint32_t *reg_data); > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > > index bf3fe7db76f3..807cad60d21e 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > > @@ -46,10 +46,13 @@ static int set_pasid_vmid_mapping(struct > > device_queue_manager *dqm, > > > > static int execute_queues_cpsch(struct device_queue_manager *dqm, > > enum kfd_unmap_queues_filter filter, > > - uint32_t filter_param); > > + uint32_t filter_param, > > + uint32_t grace_period); > > static int unmap_queues_cpsch(struct device_queue_manager *dqm, > > enum kfd_unmap_queues_filter filter, > > - uint32_t filter_param, bool reset); > > + uint32_t filter_param, > > + uint32_t grace_period, > > + bool reset); > > > > static int map_queues_cpsch(struct device_queue_manager *dqm); > > > > @@ -839,7 +842,7 @@ static int update_queue(struct device_queue_manager > > *dqm, struct queue *q, > > if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { > > if (!dqm->dev->shared_resources.enable_mes) > > retval = unmap_queues_cpsch(dqm, > > - > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); > > + > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > > USE_DEFAULT_GRACE_PERIOD, false); > > else if (prev_active) > > retval = remove_queue_mes(dqm, q, &pdd->qpd); > > > > @@ -1015,7 +1018,8 @@ static int evict_process_queues_cpsch(struct > > device_queue_manager *dqm, > > retval = execute_queues_cpsch(dqm, > > qpd->is_debug ? > > > > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : > > - > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > > + > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > > + USE_DEFAULT_GRACE_PERIOD); > > > > out: > > dqm_unlock(dqm); > > @@ -1155,8 +1159,7 @@ static int restore_process_queues_cpsch(struct > > device_queue_manager *dqm, > > } > > if (!dqm->dev->shared_resources.enable_mes) > > retval = execute_queues_cpsch(dqm, > > - > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > > - > > + > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > > USE_DEFAULT_GRACE_PERIOD); > > eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; > > atomic64_add(eviction_duration, &pdd->evict_duration_counter); > > vm_not_acquired: > > @@ -1492,6 +1495,9 @@ static int initialize_cpsch(struct > > device_queue_manager *dqm) > > > > init_sdma_bitmaps(dqm); > > > > + if (dqm->dev->kfd2kgd->get_iq_wait_times) > > + dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, > > + &dqm->wait_times); > > return 0; > > } > > > > @@ -1531,7 +1537,7 @@ static int start_cpsch(struct device_queue_manager > > *dqm) > > dqm->is_resetting = false; > > dqm->sched_running = true; > > if (!dqm->dev->shared_resources.enable_mes) > > - execute_queues_cpsch(dqm, > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > > + execute_queues_cpsch(dqm, > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > > USE_DEFAULT_GRACE_PERIOD); > > dqm_unlock(dqm); > > > > return 0; > > @@ -1556,7 +1562,7 @@ static int stop_cpsch(struct device_queue_manager > > *dqm) > > > > if (!dqm->is_hws_hang) { > > if (!dqm->dev->shared_resources.enable_mes) > > - unmap_queues_cpsch(dqm, > > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); > > + unmap_queues_cpsch(dqm, > > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, > > USE_DEFAULT_GRACE_PERIOD, false); > > else > > remove_all_queues_mes(dqm); > > } > > @@ -1598,7 +1604,8 @@ static int create_kernel_queue_cpsch(struct > > device_queue_manager *dqm, > > list_add(&kq->list, &qpd->priv_queue_list); > > increment_queue_count(dqm, qpd, kq->queue); > > qpd->is_debug = true; > > - execute_queues_cpsch(dqm, > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > > + execute_queues_cpsch(dqm, > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > > + USE_DEFAULT_GRACE_PERIOD); > > dqm_unlock(dqm); > > > > return 0; > > @@ -1612,7 +1619,8 @@ static void destroy_kernel_queue_cpsch(struct > > device_queue_manager *dqm, > > list_del(&kq->list); > > decrement_queue_count(dqm, qpd, kq->queue); > > qpd->is_debug = false; > > - execute_queues_cpsch(dqm, > > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); > > + execute_queues_cpsch(dqm, > > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, > > + USE_DEFAULT_GRACE_PERIOD); > > /* > > * Unconditionally decrement this counter, regardless of the queue's > > * type. > > @@ -1689,7 +1697,7 @@ static int create_queue_cpsch(struct > > device_queue_manager *dqm, struct queue *q, > > > > if (!dqm->dev->shared_resources.enable_mes) > > retval = execute_queues_cpsch(dqm, > > - > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > > + > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > > USE_DEFAULT_GRACE_PERIOD); > > else > > retval = add_queue_mes(dqm, q, qpd); > > if (retval) > > @@ -1778,7 +1786,9 @@ static int map_queues_cpsch(struct > > device_queue_manager *dqm) > > /* dqm->lock mutex has to be locked before calling this function */ > > static int unmap_queues_cpsch(struct device_queue_manager *dqm, > > enum kfd_unmap_queues_filter filter, > > - uint32_t filter_param, bool reset) > > + uint32_t filter_param, > > + uint32_t grace_period, > > + bool reset) > > { > > int retval = 0; > > struct mqd_manager *mqd_mgr; > > @@ -1790,6 +1800,12 @@ static int unmap_queues_cpsch(struct > > device_queue_manager *dqm, > > if (!dqm->active_runlist) > > return retval; > > > > + if (grace_period != USE_DEFAULT_GRACE_PERIOD) { > > + retval = pm_update_grace_period(&dqm->packet_mgr, > > grace_period); > > + if (retval) > > + return retval; > > + } > > + > > retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, > > filter_param, reset); > > if (retval) > > return retval; > > @@ -1822,6 +1838,13 @@ static int unmap_queues_cpsch(struct > > device_queue_manager *dqm, > > return -ETIME; > > } > > > > + /* We need to reset the grace period value for this device */ > > + if (grace_period != USE_DEFAULT_GRACE_PERIOD) { > > + if (pm_update_grace_period(&dqm->packet_mgr, > > + USE_DEFAULT_GRACE_PERIOD)) > > + pr_err("Failed to reset grace period\n"); > > + } > > + > > pm_release_ib(&dqm->packet_mgr); > > dqm->active_runlist = false; > > > > @@ -1837,7 +1860,7 @@ static int reset_queues_cpsch(struct > > device_queue_manager *dqm, > > dqm_lock(dqm); > > > > retval = unmap_queues_cpsch(dqm, > > KFD_UNMAP_QUEUES_FILTER_BY_PASID, > > - pasid, true); > > + pasid, USE_DEFAULT_GRACE_PERIOD, true); > > > > dqm_unlock(dqm); > > return retval; > > @@ -1846,13 +1869,14 @@ static int reset_queues_cpsch(struct > > device_queue_manager *dqm, > > /* dqm->lock mutex has to be locked before calling this function */ > > static int execute_queues_cpsch(struct device_queue_manager *dqm, > > enum kfd_unmap_queues_filter filter, > > - uint32_t filter_param) > > + uint32_t filter_param, > > + uint32_t grace_period) > > { > > int retval; > > > > if (dqm->is_hws_hang) > > return -EIO; > > - retval = unmap_queues_cpsch(dqm, filter, filter_param, false); > > + retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, > > false); > > if (retval) > > return retval; > > > > @@ -1910,7 +1934,8 @@ static int destroy_queue_cpsch(struct > > device_queue_manager *dqm, > > if (!dqm->dev->shared_resources.enable_mes) { > > decrement_queue_count(dqm, qpd, q); > > retval = execute_queues_cpsch(dqm, > > - > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); > > + > > KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, > > + > > USE_DEFAULT_GRACE_PERIOD); > > if (retval == -ETIME) > > qpd->reset_wavefronts = true; > > } else { > > @@ -2195,7 +2220,7 @@ static int process_termination_cpsch(struct > > device_queue_manager *dqm, > > } > > > > if (!dqm->dev->shared_resources.enable_mes) > > - retval = execute_queues_cpsch(dqm, filter, 0); > > + retval = execute_queues_cpsch(dqm, filter, 0, > > USE_DEFAULT_GRACE_PERIOD); > > > > if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { > > pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm- > > >dev); > > @@ -2540,7 +2565,8 @@ int dqm_debugfs_hang_hws(struct > > device_queue_manager *dqm) > > return r; > > } > > dqm->active_runlist = true; > > - r = execute_queues_cpsch(dqm, > > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); > > + r = execute_queues_cpsch(dqm, > > KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, > > + 0, USE_DEFAULT_GRACE_PERIOD); > > dqm_unlock(dqm); > > > > return r; > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > > b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > > index a537b9ef3e16..fb48b124161f 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h > > @@ -37,6 +37,7 @@ > > > > #define KFD_MES_PROCESS_QUANTUM 100000 > > #define KFD_MES_GANG_QUANTUM 10000 > > +#define USE_DEFAULT_GRACE_PERIOD 0xffffffff > > > > struct device_process_node { > > struct qcm_process_device *qpd; > > @@ -256,6 +257,7 @@ struct device_queue_manager { > > struct work_struct hw_exception_work; > > struct kfd_mem_obj hiq_sdma_mqd; > > bool sched_running; > > + uint32_t wait_times; > > }; > > > > void device_queue_manager_init_cik( > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > > index ed02b6d8bf63..c57f9a46dfcc 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c > > @@ -369,6 +369,38 @@ int pm_send_query_status(struct packet_manager > > *pm, uint64_t fence_address, > > return retval; > > } > > > > +int pm_update_grace_period(struct packet_manager *pm, uint32_t > > grace_period) > > +{ > > + int retval = 0; > > + uint32_t *buffer, size; > > + > > + size = pm->pmf->set_grace_period_size; > > + > > + mutex_lock(&pm->lock); > > + > > + if (size) { > > + kq_acquire_packet_buffer(pm->priv_queue, > > + size / sizeof(uint32_t), > > + (unsigned int **)&buffer); > > + > > + if (!buffer) { > > + pr_err("Failed to allocate buffer on kernel queue\n"); > > + retval = -ENOMEM; > > + goto out; > > + } > > + > > + retval = pm->pmf->set_grace_period(pm, buffer, grace_period); > > + if (!retval) > > + kq_submit_packet(pm->priv_queue); > > + else > > + kq_rollback_packet(pm->priv_queue); > > + } > > + > > +out: > > + mutex_unlock(&pm->lock); > > + return retval; > > +} > > + > > int pm_send_unmap_queue(struct packet_manager *pm, > > enum kfd_unmap_queues_filter filter, > > uint32_t filter_param, bool reset) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > > b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > > index 18250845a989..f0cdc8695b8c 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c > > @@ -251,6 +251,41 @@ static int pm_map_queues_v9(struct > packet_manager > > *pm, uint32_t *buffer, > > return 0; > > } > > > > +static int pm_set_grace_period_v9(struct packet_manager *pm, > > + uint32_t *buffer, > > + uint32_t grace_period) > > +{ > > + struct pm4_mec_write_data_mmio *packet; > > + uint32_t reg_offset = 0; > > + uint32_t reg_data = 0; > > + > > + pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( > > + pm->dqm->dev->adev, > > + pm->dqm->wait_times, > > + grace_period, > > + ®_offset, > > + ®_data); > > + > > + if (grace_period == USE_DEFAULT_GRACE_PERIOD) > > + reg_data = pm->dqm->wait_times; > > + > > + packet = (struct pm4_mec_write_data_mmio *)buffer; > > + memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); > > + > > + packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA, > > + sizeof(struct > > pm4_mec_write_data_mmio)); > > + > > + packet->bitfields2.dst_sel = > > dst_sel___write_data__mem_mapped_register; > > + packet->bitfields2.addr_incr = > > + addr_incr___write_data__do_not_increment_address; > > + > > + packet->bitfields3.dst_mmreg_addr = reg_offset; > > + > > + packet->data = reg_data; > > + > > + return 0; > > +} > > + > > static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t > *buffer, > > enum kfd_unmap_queues_filter filter, > > uint32_t filter_param, bool reset) > > @@ -333,6 +368,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs > = > > { > > .set_resources = pm_set_resources_v9, > > .map_queues = pm_map_queues_v9, > > .unmap_queues = pm_unmap_queues_v9, > > + .set_grace_period = pm_set_grace_period_v9, > > .query_status = pm_query_status_v9, > > .release_mem = NULL, > > .map_process_size = sizeof(struct pm4_mes_map_process), > > @@ -340,6 +376,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs > = > > { > > .set_resources_size = sizeof(struct pm4_mes_set_resources), > > .map_queues_size = sizeof(struct pm4_mes_map_queues), > > .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), > > + .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), > > .query_status_size = sizeof(struct pm4_mes_query_status), > > .release_mem_size = 0, > > }; > > @@ -350,6 +387,7 @@ const struct packet_manager_funcs > > kfd_aldebaran_pm_funcs = { > > .set_resources = pm_set_resources_v9, > > .map_queues = pm_map_queues_v9, > > .unmap_queues = pm_unmap_queues_v9, > > + .set_grace_period = pm_set_grace_period_v9, > > .query_status = pm_query_status_v9, > > .release_mem = NULL, > > .map_process_size = sizeof(struct > > pm4_mes_map_process_aldebaran), > > @@ -357,6 +395,7 @@ const struct packet_manager_funcs > > kfd_aldebaran_pm_funcs = { > > .set_resources_size = sizeof(struct pm4_mes_set_resources), > > .map_queues_size = sizeof(struct pm4_mes_map_queues), > > .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), > > + .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), > > .query_status_size = sizeof(struct pm4_mes_query_status), > > .release_mem_size = 0, > > }; > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h > > b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h > > index a666710ed403..795001c947e1 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h > > @@ -583,6 +583,71 @@ struct pm4_mec_release_mem { > > > > #endif > > > > +#ifndef PM4_MEC_WRITE_DATA_DEFINED > > +#define PM4_MEC_WRITE_DATA_DEFINED > > + > > +enum WRITE_DATA_dst_sel_enum { > > + dst_sel___write_data__mem_mapped_register = 0, > > + dst_sel___write_data__tc_l2 = 2, > > + dst_sel___write_data__gds = 3, > > + dst_sel___write_data__memory = 5, > > + dst_sel___write_data__memory_mapped_adc_persistent_state = 6, > > +}; > > + > > +enum WRITE_DATA_addr_incr_enum { > > + addr_incr___write_data__increment_address = 0, > > + addr_incr___write_data__do_not_increment_address = 1 > > +}; > > + > > +enum WRITE_DATA_wr_confirm_enum { > > + wr_confirm___write_data__do_not_wait_for_write_confirmation = 0, > > + wr_confirm___write_data__wait_for_write_confirmation = 1 > > +}; > > + > > +enum WRITE_DATA_cache_policy_enum { > > + cache_policy___write_data__lru = 0, > > + cache_policy___write_data__stream = 1 > > +}; > > + > > + > > +struct pm4_mec_write_data_mmio { > > + union { > > + union PM4_MES_TYPE_3_HEADER header; /*header */ > > + unsigned int ordinal1; > > + }; > > + > > + union { > > + struct { > > + unsigned int reserved1:8; > > + unsigned int dst_sel:4; > > + unsigned int reserved2:4; > > + unsigned int addr_incr:1; > > + unsigned int reserved3:2; > > + unsigned int resume_vf:1; > > + unsigned int wr_confirm:1; > > + unsigned int reserved4:4; > > + unsigned int cache_policy:2; > > + unsigned int reserved5:5; > > + } bitfields2; > > + unsigned int ordinal2; > > + }; > > + > > + union { > > + struct { > > + unsigned int dst_mmreg_addr:18; > > + unsigned int reserved6:14; > > + } bitfields3; > > + unsigned int ordinal3; > > + }; > > + > > + uint32_t reserved7; > > + > > + uint32_t data; > > + > > +}; > > + > > +#endif > > + > > enum { > > CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 > > }; > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > > b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > > index a34b000f1d25..c5f99a471211 100644 > > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > > @@ -1298,6 +1298,8 @@ struct packet_manager_funcs { > > int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, > > enum kfd_unmap_queues_filter mode, > > uint32_t filter_param, bool reset); > > + int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer, > > + uint32_t grace_period); > > int (*query_status)(struct packet_manager *pm, uint32_t *buffer, > > uint64_t fence_address, uint64_t fence_value); > > int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); > > @@ -1308,6 +1310,7 @@ struct packet_manager_funcs { > > int set_resources_size; > > int map_queues_size; > > int unmap_queues_size; > > + int set_grace_period_size; > > int query_status_size; > > int release_mem_size; > > }; > > @@ -1330,6 +1333,8 @@ int pm_send_unmap_queue(struct packet_manager > > *pm, > > > > void pm_release_ib(struct packet_manager *pm); > > > > +int pm_update_grace_period(struct packet_manager *pm, uint32_t > > grace_period); > > + > > /* Following PM funcs can be shared among VI and AI */ > > unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); > > > > -- > > 2.25.1
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index a6f98141c29c..b811a0985050 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -82,5 +82,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .enable_debug_trap = kgd_aldebaran_enable_debug_trap, .disable_debug_trap = kgd_aldebaran_disable_debug_trap, + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, + .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index d2918e5c0dea..a62bd0068515 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { kgd_gfx_v9_set_vm_context_page_table_base, .enable_debug_trap = kgd_arcturus_enable_debug_trap, .disable_debug_trap = kgd_arcturus_disable_debug_trap, + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, + .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 969015281510..605387e55d33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -802,6 +802,47 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, return 0; } +/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values + * The values read are: + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. + * gws_wait_time -- Wait Count for Global Wave Syncs. + * que_sleep_wait_time -- Wait Count for Dequeue Retry. + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. + */ +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, + uint32_t *wait_times) + +{ + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); +} + +void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, + uint32_t wait_times, + uint32_t grace_period, + uint32_t *reg_offset, + uint32_t *reg_data) +{ + *reg_data = wait_times; + + /* + * The CP cannont handle a 0 grace period input and will result in + * an infinite grace period being set so set to 1 to prevent this. + */ + if (grace_period == 0) + grace_period = 1; + + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + SCH_WAVE, + grace_period); + + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); +} + static void program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) { @@ -846,5 +887,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .set_vm_context_page_table_base = set_vm_context_page_table_base, .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, + .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, + .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, .program_trap_handler_settings = program_trap_handler_settings, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index 370d6c312981..0abc1e805180 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -26,3 +26,9 @@ uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev, uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, bool keep_trap_enabled, uint32_t vmid); +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times); +void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, + uint32_t wait_times, + uint32_t grace_period, + uint32_t *reg_offset, + uint32_t *reg_data); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 73e3b9ae1fb0..c57f2a6b6e23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -670,6 +670,8 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3, + .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, + .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, .disable_debug_trap = kgd_gfx_v10_disable_debug_trap }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index e0bd61e16847..f231903bfec3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -736,6 +736,24 @@ uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev, return 0; } +/* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values + * The values read are: + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. + * gws_wait_time -- Wait Count for Global Wave Syncs. + * que_sleep_wait_time -- Wait Count for Dequeue Retry. + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. + */ +void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, + uint32_t *wait_times) + +{ + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); +} + void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -920,6 +938,29 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, adev->gfx.cu_info.max_waves_per_simd; } +void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, + uint32_t wait_times, + uint32_t grace_period, + uint32_t *reg_offset, + uint32_t *reg_data) +{ + *reg_data = wait_times; + + /* + * The CP cannont handle a 0 grace period input and will result in + * an infinite grace period being set so set to 1 to prevent this. + */ + if (grace_period == 0) + grace_period = 1; + + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + SCH_WAVE, + grace_period); + + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); +} + void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) { @@ -963,6 +1004,8 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, .enable_debug_trap = kgd_gfx_v9_enable_debug_trap, .disable_debug_trap = kgd_gfx_v9_disable_debug_trap, + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, + .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index d39256162616..c0866497cb5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -20,8 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. */ - - void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, @@ -51,7 +49,6 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t sq_cmd); bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); - void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, @@ -67,3 +64,9 @@ uint32_t kgd_gfx_v9_enable_debug_trap(struct amdgpu_device *adev, uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev, bool keep_trap_enabled, uint32_t vmid); +void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times); +void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, + uint32_t wait_times, + uint32_t grace_period, + uint32_t *reg_offset, + uint32_t *reg_data); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index bf3fe7db76f3..807cad60d21e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -46,10 +46,13 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, static int execute_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, - uint32_t filter_param); + uint32_t filter_param, + uint32_t grace_period); static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, - uint32_t filter_param, bool reset); + uint32_t filter_param, + uint32_t grace_period, + bool reset); static int map_queues_cpsch(struct device_queue_manager *dqm); @@ -839,7 +842,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q, if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { if (!dqm->dev->shared_resources.enable_mes) retval = unmap_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); else if (prev_active) retval = remove_queue_mes(dqm, q, &pdd->qpd); @@ -1015,7 +1018,8 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, retval = execute_queues_cpsch(dqm, qpd->is_debug ? KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD); out: dqm_unlock(dqm); @@ -1155,8 +1159,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, } if (!dqm->dev->shared_resources.enable_mes) retval = execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; atomic64_add(eviction_duration, &pdd->evict_duration_counter); vm_not_acquired: @@ -1492,6 +1495,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm) init_sdma_bitmaps(dqm); + if (dqm->dev->kfd2kgd->get_iq_wait_times) + dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, + &dqm->wait_times); return 0; } @@ -1531,7 +1537,7 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm->is_resetting = false; dqm->sched_running = true; if (!dqm->dev->shared_resources.enable_mes) - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); dqm_unlock(dqm); return 0; @@ -1556,7 +1562,7 @@ static int stop_cpsch(struct device_queue_manager *dqm) if (!dqm->is_hws_hang) { if (!dqm->dev->shared_resources.enable_mes) - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); else remove_all_queues_mes(dqm); } @@ -1598,7 +1604,8 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, list_add(&kq->list, &qpd->priv_queue_list); increment_queue_count(dqm, qpd, kq->queue); qpd->is_debug = true; - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD); dqm_unlock(dqm); return 0; @@ -1612,7 +1619,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, list_del(&kq->list); decrement_queue_count(dqm, qpd, kq->queue); qpd->is_debug = false; - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD); /* * Unconditionally decrement this counter, regardless of the queue's * type. @@ -1689,7 +1697,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (!dqm->dev->shared_resources.enable_mes) retval = execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); else retval = add_queue_mes(dqm, q, qpd); if (retval) @@ -1778,7 +1786,9 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) /* dqm->lock mutex has to be locked before calling this function */ static int unmap_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, - uint32_t filter_param, bool reset) + uint32_t filter_param, + uint32_t grace_period, + bool reset) { int retval = 0; struct mqd_manager *mqd_mgr; @@ -1790,6 +1800,12 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, if (!dqm->active_runlist) return retval; + if (grace_period != USE_DEFAULT_GRACE_PERIOD) { + retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); + if (retval) + return retval; + } + retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); if (retval) return retval; @@ -1822,6 +1838,13 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, return -ETIME; } + /* We need to reset the grace period value for this device */ + if (grace_period != USE_DEFAULT_GRACE_PERIOD) { + if (pm_update_grace_period(&dqm->packet_mgr, + USE_DEFAULT_GRACE_PERIOD)) + pr_err("Failed to reset grace period\n"); + } + pm_release_ib(&dqm->packet_mgr); dqm->active_runlist = false; @@ -1837,7 +1860,7 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm, dqm_lock(dqm); retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, - pasid, true); + pasid, USE_DEFAULT_GRACE_PERIOD, true); dqm_unlock(dqm); return retval; @@ -1846,13 +1869,14 @@ static int reset_queues_cpsch(struct device_queue_manager *dqm, /* dqm->lock mutex has to be locked before calling this function */ static int execute_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, - uint32_t filter_param) + uint32_t filter_param, + uint32_t grace_period) { int retval; if (dqm->is_hws_hang) return -EIO; - retval = unmap_queues_cpsch(dqm, filter, filter_param, false); + retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); if (retval) return retval; @@ -1910,7 +1934,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, if (!dqm->dev->shared_resources.enable_mes) { decrement_queue_count(dqm, qpd, q); retval = execute_queues_cpsch(dqm, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, + USE_DEFAULT_GRACE_PERIOD); if (retval == -ETIME) qpd->reset_wavefronts = true; } else { @@ -2195,7 +2220,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } if (!dqm->dev->shared_resources.enable_mes) - retval = execute_queues_cpsch(dqm, filter, 0); + retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); @@ -2540,7 +2565,8 @@ int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) return r; } dqm->active_runlist = true; - r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, + 0, USE_DEFAULT_GRACE_PERIOD); dqm_unlock(dqm); return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index a537b9ef3e16..fb48b124161f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -37,6 +37,7 @@ #define KFD_MES_PROCESS_QUANTUM 100000 #define KFD_MES_GANG_QUANTUM 10000 +#define USE_DEFAULT_GRACE_PERIOD 0xffffffff struct device_process_node { struct qcm_process_device *qpd; @@ -256,6 +257,7 @@ struct device_queue_manager { struct work_struct hw_exception_work; struct kfd_mem_obj hiq_sdma_mqd; bool sched_running; + uint32_t wait_times; }; void device_queue_manager_init_cik( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index ed02b6d8bf63..c57f9a46dfcc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -369,6 +369,38 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, return retval; } +int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) +{ + int retval = 0; + uint32_t *buffer, size; + + size = pm->pmf->set_grace_period_size; + + mutex_lock(&pm->lock); + + if (size) { + kq_acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), + (unsigned int **)&buffer); + + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); + retval = -ENOMEM; + goto out; + } + + retval = pm->pmf->set_grace_period(pm, buffer, grace_period); + if (!retval) + kq_submit_packet(pm->priv_queue); + else + kq_rollback_packet(pm->priv_queue); + } + +out: + mutex_unlock(&pm->lock); + return retval; +} + int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_unmap_queues_filter filter, uint32_t filter_param, bool reset) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 18250845a989..f0cdc8695b8c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -251,6 +251,41 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, return 0; } +static int pm_set_grace_period_v9(struct packet_manager *pm, + uint32_t *buffer, + uint32_t grace_period) +{ + struct pm4_mec_write_data_mmio *packet; + uint32_t reg_offset = 0; + uint32_t reg_data = 0; + + pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( + pm->dqm->dev->adev, + pm->dqm->wait_times, + grace_period, + ®_offset, + ®_data); + + if (grace_period == USE_DEFAULT_GRACE_PERIOD) + reg_data = pm->dqm->wait_times; + + packet = (struct pm4_mec_write_data_mmio *)buffer; + memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); + + packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA, + sizeof(struct pm4_mec_write_data_mmio)); + + packet->bitfields2.dst_sel = dst_sel___write_data__mem_mapped_register; + packet->bitfields2.addr_incr = + addr_incr___write_data__do_not_increment_address; + + packet->bitfields3.dst_mmreg_addr = reg_offset; + + packet->data = reg_data; + + return 0; +} + static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, enum kfd_unmap_queues_filter filter, uint32_t filter_param, bool reset) @@ -333,6 +368,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, + .set_grace_period = pm_set_grace_period_v9, .query_status = pm_query_status_v9, .release_mem = NULL, .map_process_size = sizeof(struct pm4_mes_map_process), @@ -340,6 +376,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; @@ -350,6 +387,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, + .set_grace_period = pm_set_grace_period_v9, .query_status = pm_query_status_v9, .release_mem = NULL, .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran), @@ -357,6 +395,7 @@ const struct packet_manager_funcs kfd_aldebaran_pm_funcs = { .set_resources_size = sizeof(struct pm4_mes_set_resources), .map_queues_size = sizeof(struct pm4_mes_map_queues), .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), .query_status_size = sizeof(struct pm4_mes_query_status), .release_mem_size = 0, }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index a666710ed403..795001c947e1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -583,6 +583,71 @@ struct pm4_mec_release_mem { #endif +#ifndef PM4_MEC_WRITE_DATA_DEFINED +#define PM4_MEC_WRITE_DATA_DEFINED + +enum WRITE_DATA_dst_sel_enum { + dst_sel___write_data__mem_mapped_register = 0, + dst_sel___write_data__tc_l2 = 2, + dst_sel___write_data__gds = 3, + dst_sel___write_data__memory = 5, + dst_sel___write_data__memory_mapped_adc_persistent_state = 6, +}; + +enum WRITE_DATA_addr_incr_enum { + addr_incr___write_data__increment_address = 0, + addr_incr___write_data__do_not_increment_address = 1 +}; + +enum WRITE_DATA_wr_confirm_enum { + wr_confirm___write_data__do_not_wait_for_write_confirmation = 0, + wr_confirm___write_data__wait_for_write_confirmation = 1 +}; + +enum WRITE_DATA_cache_policy_enum { + cache_policy___write_data__lru = 0, + cache_policy___write_data__stream = 1 +}; + + +struct pm4_mec_write_data_mmio { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int reserved1:8; + unsigned int dst_sel:4; + unsigned int reserved2:4; + unsigned int addr_incr:1; + unsigned int reserved3:2; + unsigned int resume_vf:1; + unsigned int wr_confirm:1; + unsigned int reserved4:4; + unsigned int cache_policy:2; + unsigned int reserved5:5; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + unsigned int dst_mmreg_addr:18; + unsigned int reserved6:14; + } bitfields3; + unsigned int ordinal3; + }; + + uint32_t reserved7; + + uint32_t data; + +}; + +#endif + enum { CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index a34b000f1d25..c5f99a471211 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1298,6 +1298,8 @@ struct packet_manager_funcs { int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, enum kfd_unmap_queues_filter mode, uint32_t filter_param, bool reset); + int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer, + uint32_t grace_period); int (*query_status)(struct packet_manager *pm, uint32_t *buffer, uint64_t fence_address, uint64_t fence_value); int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); @@ -1308,6 +1310,7 @@ struct packet_manager_funcs { int set_resources_size; int map_queues_size; int unmap_queues_size; + int set_grace_period_size; int query_status_size; int release_mem_size; }; @@ -1330,6 +1333,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, void pm_release_ib(struct packet_manager *pm); +int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period); + /* Following PM funcs can be shared among VI and AI */ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);