@@ -1101,6 +1101,10 @@ struct amdgpu_device {
bool dc_enabled;
/* Mask of active clusters */
uint32_t aid_mask;
+
+ /* Debug */
+ bool debug_vm;
+ bool debug_largebar;
};
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
@@ -1191,7 +1191,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
}
- if (amdgpu_vm_debug) {
+ if (adev->debug_vm) {
/* Invalidate all BOs to test for userspace bugs */
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
@@ -118,6 +118,14 @@
#define KMS_DRIVER_MINOR 54
#define KMS_DRIVER_PATCHLEVEL 0
+/*
+ * amdgpu.debug module options. Are all disabled by default
+ */
+enum AMDGPU_DEBUG_MASK {
+ AMDGPU_DEBUG_VM = BIT(0),
+ AMDGPU_DEBUG_LARGEBAR = BIT(1),
+};
+
unsigned int amdgpu_vram_limit = UINT_MAX;
int amdgpu_vis_vram_limit;
int amdgpu_gart_size = -1; /* auto */
@@ -140,7 +148,6 @@ int amdgpu_vm_size = -1;
int amdgpu_vm_fragment_size = -1;
int amdgpu_vm_block_size = -1;
int amdgpu_vm_fault_stop;
-int amdgpu_vm_debug;
int amdgpu_vm_update_mode = -1;
int amdgpu_exp_hw_support;
int amdgpu_dc = -1;
@@ -194,6 +201,7 @@ int amdgpu_use_xgmi_p2p = 1;
int amdgpu_vcnfw_log;
int amdgpu_sg_display = -1; /* auto */
int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
+uint amdgpu_debug_mask;
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
@@ -405,13 +413,6 @@ module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)");
module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
-/**
- * DOC: vm_debug (int)
- * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled).
- */
-MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
-module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
-
/**
* DOC: vm_update_mode (int)
* Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default
@@ -743,18 +744,6 @@ module_param(send_sigterm, int, 0444);
MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
-/**
- * DOC: debug_largebar (int)
- * Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
- * system. This limits the VRAM size reported to ROCm applications to the visible
- * size, usually 256MB.
- * Default value is 0, diabled.
- */
-int debug_largebar;
-module_param(debug_largebar, int, 0444);
-MODULE_PARM_DESC(debug_largebar,
- "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
-
/**
* DOC: halt_if_hws_hang (int)
* Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
@@ -938,6 +927,18 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
module_param(enforce_isolation, bool, 0444);
MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
+/**
+ * DOC: debug_mask (uint)
+ * Debug options for amdgpu, work as a binary mask with the following options:
+ *
+ * - 0x1: Debug VM handling
+ * - 0x2: Enable simulating large-bar capability on non-large bar system. This
+ * limits the VRAM size reported to ROCm applications to the visible
+ * size, usually 256MB.
+ */
+MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
+module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
+
/* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers
*/
@@ -2042,6 +2043,19 @@ static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
}
}
+static void amdgpu_init_debug_options(struct amdgpu_device *adev)
+{
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_VM) {
+ pr_info("debug: VM handling debug enabled\n");
+ adev->debug_vm = true;
+ }
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_LARGEBAR) {
+ pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
+ adev->debug_largebar = true;
+ }
+}
+
static int amdgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -2220,6 +2234,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
amdgpu_get_secondary_funcs(adev);
}
+ amdgpu_init_debug_options(adev);
+
return 0;
err_pci:
@@ -794,7 +794,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
default:
break;
}
- if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
+ if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm)
amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
args->operation);
@@ -1407,7 +1407,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
spin_unlock(&vm->status_lock);
/* Try to reserve the BO to avoid clearing its ptes */
- if (!amdgpu_vm_debug && dma_resv_trylock(resv))
+ if (!adev->debug_vm && dma_resv_trylock(resv))
clear = false;
/* Somebody else is using the BO right now */
else
@@ -1021,7 +1021,7 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
bool kfd_dev_is_large_bar(struct kfd_node *dev)
{
- if (debug_largebar) {
+ if (dev->kfd->adev->debug_largebar) {
pr_debug("Simulate large-bar allocation on non large-bar machine\n");
return true;
}
@@ -2115,7 +2115,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);
- if (debug_largebar)
+ if (kdev->adev->debug_largebar)
local_mem_info.local_mem_size_private = 0;
if (local_mem_info.local_mem_size_private == 0)