@@ -1105,6 +1105,7 @@ struct amdgpu_device {
/* Debug */
bool debug_vm;
bool debug_largebar;
+ bool debug_disable_soft_recovery;
};
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
@@ -124,6 +124,7 @@
enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_VM = BIT(0),
AMDGPU_DEBUG_LARGEBAR = BIT(1),
+ AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
};
unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -935,6 +936,7 @@ MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics
* - 0x2: Enable simulating large-bar capability on non-large bar system. This
* limits the VRAM size reported to ROCm applications to the visible
* size, usually 256MB.
+ * - 0x4: Disable GPU soft recovery, always do a full reset
*/
MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
@@ -2054,6 +2056,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
adev->debug_largebar = true;
}
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY) {
+ pr_info("debug: soft reset for GPU recovery disabled\n");
+ adev->debug_disable_soft_recovery = true;
+ }
}
static int amdgpu_pci_probe(struct pci_dev *pdev,
@@ -434,8 +434,12 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence)
{
unsigned long flags;
+ ktime_t deadline;
- ktime_t deadline = ktime_add_us(ktime_get(), 10000);
+ if (unlikely(ring->adev->debug_disable_soft_recovery))
+ return false;
+
+ deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false;