diff mbox

[v2,6/9] drm/amdkfd: Add bad opcode exception handling

Message ID 1431250112-28828-7-git-send-email-oded.gabbay@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Oded Gabbay May 10, 2015, 9:28 a.m. UTC
From: Alexey Skidanov <Alexey.Skidanov@amd.com>

Signed-off-by: Alexey Skidanov <Alexey.Skidanov@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c |  5 ++++-
 drivers/gpu/drm/amd/amdkfd/cik_int.h             |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_events.c          | 21 +++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_events.h          |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h            |  2 ++
 5 files changed, 29 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 629510a..211fc48 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -36,7 +36,8 @@  static bool cik_event_interrupt_isr(struct kfd_dev *dev,
 	/* Do not process in ISR, just request it to be forwarded to WQ. */
 	return (pasid != 0) &&
 		(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
-		ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG);
+		ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
+		ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
 }
 
 static void cik_event_interrupt_wq(struct kfd_dev *dev,
@@ -55,6 +56,8 @@  static void cik_event_interrupt_wq(struct kfd_dev *dev,
 		kfd_signal_event_interrupt(pasid, 0, 0);
 	else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG)
 		kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8);
+	else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
+		kfd_signal_hw_exception_event(pasid);
 }
 
 const struct kfd_event_interrupt_class event_interrupt_class_cik = {
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h
index bbef9e2..79a16d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h
@@ -34,6 +34,7 @@  struct cik_ih_ring_entry {
 
 #define CIK_INTSRC_DEQUEUE_COMPLETE	0xC6
 #define CIK_INTSRC_CP_END_OF_PIPE	0xB5
+#define CIK_INTSRC_CP_BAD_OPCODE	0xB7
 #define CIK_INTSRC_SQ_INTERRUPT_MSG	0xEF
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 3a64de1..f0a3f4e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -925,3 +925,24 @@  void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
 	mutex_unlock(&p->event_mutex);
 	mutex_unlock(&p->mutex);
 }
+
+void kfd_signal_hw_exception_event(unsigned int pasid)
+{
+	/*
+	 * Because we are called from arbitrary context (workqueue) as opposed
+	 * to process context, kfd_process could attempt to exit while we are
+	 * running so the lookup function returns a locked process.
+	 */
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+	if (!p)
+		return; /* Presumably process exited. */
+
+	mutex_lock(&p->event_mutex);
+
+	/* Lookup events by type and signal them */
+	lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
+
+	mutex_unlock(&p->event_mutex);
+	mutex_unlock(&p->mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index 691cf85..28f6838 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -74,6 +74,7 @@  struct kfd_event {
 
 /* Matching HSA_EVENTTYPE */
 #define KFD_EVENT_TYPE_SIGNAL 0
+#define KFD_EVENT_TYPE_HW_EXCEPTION 3
 #define KFD_EVENT_TYPE_DEBUG 5
 #define KFD_EVENT_TYPE_MEMORY 8
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 3594503..9383494 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -682,6 +682,7 @@  phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
 
 /* Events */
 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
+extern const struct kfd_device_global_init_class device_global_init_class_cik;
 
 enum kfd_event_wait_result {
 	KFD_WAIT_COMPLETE,
@@ -701,6 +702,7 @@  void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
 void kfd_signal_iommu_event(struct kfd_dev *dev,
 		unsigned int pasid, unsigned long address,
 		bool is_write_requested, bool is_execute_requested);
+void kfd_signal_hw_exception_event(unsigned int pasid);
 int kfd_set_event(struct kfd_process *p, uint32_t event_id);
 int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
 int kfd_event_create(struct file *devkfd, struct kfd_process *p,