@@ -6,6 +6,6 @@ radeon_kfd-y := kfd_module.o kfd_device.o kfd_chardev.o \
kfd_pasid.o kfd_topology.o kfd_process.o \
kfd_doorbell.o kfd_sched_cik_static.o kfd_registers.o \
kfd_vidmem.o kfd_interrupt.o kfd_aperture.o \
- kfd_queue.o kfd_hw_pointer_store.o
+ kfd_queue.o kfd_hw_pointer_store.o kfd_mqd_manager.o
obj-$(CONFIG_HSA_RADEON) += radeon_kfd.o
new file mode 100644
@@ -0,0 +1,251 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#ifndef CIK_MQDS_H_
+#define CIK_MQDS_H_
+
+#pragma pack(push, 4)
+
+struct cik_hpd_registers {
+ u32 cp_hpd_roq_offsets;
+ u32 cp_hpd_eop_base_addr;
+ u32 cp_hpd_eop_base_addr_hi;
+ u32 cp_hpd_eop_vmid;
+ u32 cp_hpd_eop_control;
+};
+
+struct cik_hqd_registers {
+ u32 cp_mqd_base_addr;
+ u32 cp_mqd_base_addr_hi;
+ u32 cp_hqd_active;
+ u32 cp_hqd_vmid;
+ u32 cp_hqd_persistent_state;
+ u32 cp_hqd_pipe_priority;
+ u32 cp_hqd_queue_priority;
+ u32 cp_hqd_quantum;
+ u32 cp_hqd_pq_base;
+ u32 cp_hqd_pq_base_hi;
+ u32 cp_hqd_pq_rptr;
+ u32 cp_hqd_pq_rptr_report_addr;
+ u32 cp_hqd_pq_rptr_report_addr_hi;
+ u32 cp_hqd_pq_wptr_poll_addr;
+ u32 cp_hqd_pq_wptr_poll_addr_hi;
+ u32 cp_hqd_pq_doorbell_control;
+ u32 cp_hqd_pq_wptr;
+ u32 cp_hqd_pq_control;
+ u32 cp_hqd_ib_base_addr;
+ u32 cp_hqd_ib_base_addr_hi;
+ u32 cp_hqd_ib_rptr;
+ u32 cp_hqd_ib_control;
+ u32 cp_hqd_iq_timer;
+ u32 cp_hqd_iq_rptr;
+ u32 cp_hqd_dequeue_request;
+ u32 cp_hqd_dma_offload;
+ u32 cp_hqd_sema_cmd;
+ u32 cp_hqd_msg_type;
+ u32 cp_hqd_atomic0_preop_lo;
+ u32 cp_hqd_atomic0_preop_hi;
+ u32 cp_hqd_atomic1_preop_lo;
+ u32 cp_hqd_atomic1_preop_hi;
+ u32 cp_hqd_hq_scheduler0;
+ u32 cp_hqd_hq_scheduler1;
+ u32 cp_mqd_control;
+};
+
+struct cik_mqd {
+ u32 header;
+ u32 dispatch_initiator;
+ u32 dimensions[3];
+ u32 start_idx[3];
+ u32 num_threads[3];
+ u32 pipeline_stat_enable;
+ u32 perf_counter_enable;
+ u32 pgm[2];
+ u32 tba[2];
+ u32 tma[2];
+ u32 pgm_rsrc[2];
+ u32 vmid;
+ u32 resource_limits;
+ u32 static_thread_mgmt01[2];
+ u32 tmp_ring_size;
+ u32 static_thread_mgmt23[2];
+ u32 restart[3];
+ u32 thread_trace_enable;
+ u32 reserved1;
+ u32 user_data[16];
+ u32 vgtcs_invoke_count[2];
+ struct cik_hqd_registers queue_state;
+ u32 dequeue_cntr;
+ u32 interrupt_queue[64];
+};
+
+/* This structure represents mqd used for cp scheduling queue
+ * taken from Gfx72_cp_program_spec.pdf
+ */
+struct cik_compute_mqd {
+ u32 header;
+ u32 compute_dispatch_initiator;
+ u32 compute_dim_x;
+ u32 compute_dim_y;
+ u32 compute_dim_z;
+ u32 compute_start_x;
+ u32 compute_start_y;
+ u32 compute_start_z;
+ u32 compute_num_thread_x;
+ u32 compute_num_thread_y;
+ u32 compute_num_thread_z;
+ u32 compute_pipelinestat_enable;
+ u32 compute_perfcount_enable;
+ u32 compute_pgm_lo;
+ u32 compute_pgm_hi;
+ u32 compute_tba_lo;
+ u32 compute_tba_hi;
+ u32 compute_tma_lo;
+ u32 compute_tma_hi;
+ u32 compute_pgm_rsrc1;
+ u32 compute_pgm_rsrc2;
+ u32 compute_vmid;
+ u32 compute_resource_limits;
+ u32 compute_static_thread_mgmt_se0;
+ u32 compute_static_thread_mgmt_se1;
+ u32 compute_tmpring_size;
+ u32 compute_static_thread_mgmt_se2;
+ u32 compute_static_thread_mgmt_se3;
+ u32 compute_restart_x;
+ u32 compute_restart_y;
+ u32 compute_restart_z;
+ u32 compute_thread_trace_enable;
+ u32 compute_misc_reserved;
+ u32 compute_user_data[16];
+ u32 vgt_csinvoc_count_lo;
+ u32 vgt_csinvoc_count_hi;
+ u32 cp_mqd_base_addr51;
+ u32 cp_mqd_base_addr_hi;
+ u32 cp_hqd_active;
+ u32 cp_hqd_vmid;
+ u32 cp_hqd_persistent_state;
+ u32 cp_hqd_pipe_priority;
+ u32 cp_hqd_queue_priority;
+ u32 cp_hqd_quantum;
+ u32 cp_hqd_pq_base;
+ u32 cp_hqd_pq_base_hi;
+ u32 cp_hqd_pq_rptr;
+ u32 cp_hqd_pq_rptr_report_addr;
+ u32 cp_hqd_pq_rptr_report_addr_hi;
+ u32 cp_hqd_pq_wptr_poll_addr;
+ u32 cp_hqd_pq_wptr_poll_addr_hi;
+ u32 cp_hqd_pq_doorbell_control;
+ u32 cp_hqd_pq_wptr;
+ u32 cp_hqd_pq_control;
+ u32 cp_hqd_ib_base_addr;
+ u32 cp_hqd_ib_base_addr_hi;
+ u32 cp_hqd_ib_rptr;
+ u32 cp_hqd_ib_control;
+ u32 cp_hqd_iq_timer;
+ u32 cp_hqd_iq_rptr;
+ u32 cp_hqd_dequeue_request;
+ u32 cp_hqd_dma_offload;
+ u32 cp_hqd_sema_cmd;
+ u32 cp_hqd_msg_type;
+ u32 cp_hqd_atomic0_preop_lo;
+ u32 cp_hqd_atomic0_preop_hi;
+ u32 cp_hqd_atomic1_preop_lo;
+ u32 cp_hqd_atomic1_preop_hi;
+ u32 cp_hqd_hq_scheduler0;
+ u32 cp_hqd_hq_scheduler1;
+ u32 cp_mqd_control;
+ u32 reserved1[10];
+ u32 cp_mqd_query_time_lo;
+ u32 cp_mqd_query_time_hi;
+ u32 reserved2[4];
+ u32 cp_mqd_connect_start_time_lo;
+ u32 cp_mqd_connect_start_time_hi;
+ u32 cp_mqd_connect_end_time_lo;
+ u32 cp_mqd_connect_end_time_hi;
+ u32 cp_mqd_connect_end_wf_count;
+ u32 cp_mqd_connect_end_pq_rptr;
+ u32 cp_mqd_connect_end_pq_wptr;
+ u32 cp_mqd_connect_end_ib_rptr;
+ u32 reserved3[18];
+};
+
+/* This structure represents all *IQs
+ * Taken from Gfx73_CPC_Eng_Init_Prog.pdf
+ */
+struct cik_interface_mqd {
+ u32 reserved1[128];
+ u32 cp_mqd_base_addr;
+ u32 cp_mqd_base_addr_hi;
+ u32 cp_hqd_active;
+ u32 cp_hqd_vmid;
+ u32 cp_hqd_persistent_state;
+ u32 cp_hqd_pipe_priority;
+ u32 cp_hqd_queue_priority;
+ u32 cp_hqd_quantum;
+ u32 cp_hqd_pq_base;
+ u32 cp_hqd_pq_base_hi;
+ u32 cp_hqd_pq_rptr;
+ u32 cp_hqd_pq_rptr_report_addr;
+ u32 cp_hqd_pq_rptr_report_addr_hi;
+ u32 cp_hqd_pq_wptr_poll_addr;
+ u32 cp_hqd_pq_wptr_poll_addr_hi;
+ u32 cp_hqd_pq_doorbell_control;
+ u32 cp_hqd_pq_wptr;
+ u32 cp_hqd_pq_control;
+ u32 cp_hqd_ib_base_addr;
+ u32 cp_hqd_ib_base_addr_hi;
+ u32 cp_hqd_ib_rptr;
+ u32 cp_hqd_ib_control;
+ u32 cp_hqd_iq_timer;
+ u32 cp_hqd_iq_rptr;
+ u32 cp_hqd_dequeue_request;
+ u32 cp_hqd_dma_offload;
+ u32 cp_hqd_sema_cmd;
+ u32 cp_hqd_msg_type;
+ u32 cp_hqd_atomic0_preop_lo;
+ u32 cp_hqd_atomic0_preop_hi;
+ u32 cp_hqd_atomic1_preop_lo;
+ u32 cp_hqd_atomic1_preop_hi;
+ u32 cp_hqd_hq_status0;
+ u32 cp_hqd_hq_control0;
+ u32 cp_mqd_control;
+ u32 reserved2[3];
+ u32 cp_hqd_hq_status1;
+ u32 cp_hqd_hq_control1;
+ u32 reserved3[16];
+ u32 cp_hqd_hq_status2;
+ u32 cp_hqd_hq_control2;
+ u32 cp_hqd_hq_status3;
+ u32 cp_hqd_hq_control3;
+ u32 reserved4[2];
+ u32 cp_mqd_query_time_lo;
+ u32 cp_mqd_query_time_hi;
+ u32 reserved5[48];
+ u32 cp_mqd_skip_process[16];
+};
+
+#pragma pack(pop)
+
+
+#endif /* CIK_MQDS_H_ */
@@ -168,6 +168,7 @@
#define CP_HQD_DEQUEUE_REQUEST 0xC974
#define DEQUEUE_REQUEST_DRAIN 1
+#define DEQUEUE_REQUEST_RESET 2
#define DEQUEUE_INT (1U << 8)
#define CP_HQD_SEMA_CMD 0xC97Cu
new file mode 100644
@@ -0,0 +1,453 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "cik_mqds.h"
+#include "cik_regs.h"
+
+inline uint32_t lower_32(uint64_t x)
+{
+ return (uint32_t)x;
+}
+
+inline uint32_t upper_32(uint64_t x)
+{
+ return (uint32_t)(x >> 32);
+}
+
+inline void busy_wait(unsigned long ms)
+{
+ while (time_before(jiffies, ms))
+ cpu_relax();
+}
+
+static inline struct cik_mqd *get_mqd(void *mqd)
+{
+ return (struct cik_mqd *)mqd;
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd, kfd_mem_obj *mqd_mem_obj,
+ uint64_t *gart_addr, struct queue_properties *q)
+{
+ uint64_t addr;
+ struct cik_mqd *m;
+ int retval;
+
+ BUG_ON(!mm || !q || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ retval = radeon_kfd_vidmem_alloc_map(
+ mm->dev,
+ mqd_mem_obj,
+ (void **)&m,
+ &addr,
+ ALIGN(sizeof(struct cik_mqd), 256));
+
+ if (retval != 0)
+ return -ENOMEM;
+
+ memset(m, 0, sizeof(struct cik_mqd));
+
+ m->header = 0xC0310800;
+ m->pipeline_stat_enable = 1;
+ m->static_thread_mgmt01[0] = 0xFFFFFFFF;
+ m->static_thread_mgmt01[1] = 0xFFFFFFFF;
+ m->static_thread_mgmt23[0] = 0xFFFFFFFF;
+ m->static_thread_mgmt23[1] = 0xFFFFFFFF;
+
+ m->queue_state.cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE;
+
+ m->queue_state.cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
+ m->queue_state.cp_mqd_base_addr = lower_32(addr);
+ m->queue_state.cp_mqd_base_addr_hi = upper_32(addr);
+
+ m->queue_state.cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
+ /* Although WinKFD writes this, I suspect it should not be necessary. */
+ m->queue_state.cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
+
+ m->queue_state.cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | QUANTUM_DURATION(10);
+
+ m->queue_state.cp_hqd_pipe_priority = 1;
+ m->queue_state.cp_hqd_queue_priority = 15;
+
+ *mqd = m;
+ if (gart_addr != NULL)
+ *gart_addr = addr;
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static void uninit_mqd(struct mqd_manager *mm, void *mqd, kfd_mem_obj mqd_mem_obj)
+{
+ BUG_ON(!mm || !mqd);
+ radeon_kfd_vidmem_free_unmap(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd)
+{
+ struct cik_mqd *m;
+
+ BUG_ON(!mm || !mqd);
+
+ m = get_mqd(mqd);
+
+ WRITE_REG(mm->dev, CP_MQD_BASE_ADDR, m->queue_state.cp_mqd_base_addr);
+ WRITE_REG(mm->dev, CP_MQD_BASE_ADDR_HI, m->queue_state.cp_mqd_base_addr_hi);
+ WRITE_REG(mm->dev, CP_MQD_CONTROL, m->queue_state.cp_mqd_control);
+
+ WRITE_REG(mm->dev, CP_HQD_PQ_BASE, m->queue_state.cp_hqd_pq_base);
+ WRITE_REG(mm->dev, CP_HQD_PQ_BASE_HI, m->queue_state.cp_hqd_pq_base_hi);
+ WRITE_REG(mm->dev, CP_HQD_PQ_CONTROL, m->queue_state.cp_hqd_pq_control);
+
+ WRITE_REG(mm->dev, CP_HQD_IB_CONTROL, m->queue_state.cp_hqd_ib_control);
+ WRITE_REG(mm->dev, CP_HQD_IB_BASE_ADDR, m->queue_state.cp_hqd_ib_base_addr);
+ WRITE_REG(mm->dev, CP_HQD_IB_BASE_ADDR_HI, m->queue_state.cp_hqd_ib_base_addr_hi);
+
+ WRITE_REG(mm->dev, CP_HQD_IB_RPTR, m->queue_state.cp_hqd_ib_rptr);
+
+ WRITE_REG(mm->dev, CP_HQD_PERSISTENT_STATE, m->queue_state.cp_hqd_persistent_state);
+ WRITE_REG(mm->dev, CP_HQD_SEMA_CMD, m->queue_state.cp_hqd_sema_cmd);
+ WRITE_REG(mm->dev, CP_HQD_MSG_TYPE, m->queue_state.cp_hqd_msg_type);
+
+ WRITE_REG(mm->dev, CP_HQD_ATOMIC0_PREOP_LO, m->queue_state.cp_hqd_atomic0_preop_lo);
+ WRITE_REG(mm->dev, CP_HQD_ATOMIC0_PREOP_HI, m->queue_state.cp_hqd_atomic0_preop_hi);
+ WRITE_REG(mm->dev, CP_HQD_ATOMIC1_PREOP_LO, m->queue_state.cp_hqd_atomic1_preop_lo);
+ WRITE_REG(mm->dev, CP_HQD_ATOMIC1_PREOP_HI, m->queue_state.cp_hqd_atomic1_preop_hi);
+
+ WRITE_REG(mm->dev, CP_HQD_PQ_RPTR_REPORT_ADDR, m->queue_state.cp_hqd_pq_rptr_report_addr);
+ WRITE_REG(mm->dev, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, m->queue_state.cp_hqd_pq_rptr_report_addr_hi);
+ WRITE_REG(mm->dev, CP_HQD_PQ_RPTR, m->queue_state.cp_hqd_pq_rptr);
+
+ WRITE_REG(mm->dev, CP_HQD_PQ_WPTR_POLL_ADDR, m->queue_state.cp_hqd_pq_wptr_poll_addr);
+ WRITE_REG(mm->dev, CP_HQD_PQ_WPTR_POLL_ADDR_HI, m->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
+
+ WRITE_REG(mm->dev, CP_HQD_PQ_DOORBELL_CONTROL, m->queue_state.cp_hqd_pq_doorbell_control);
+
+ WRITE_REG(mm->dev, CP_HQD_VMID, m->queue_state.cp_hqd_vmid);
+
+ WRITE_REG(mm->dev, CP_HQD_QUANTUM, m->queue_state.cp_hqd_quantum);
+
+ WRITE_REG(mm->dev, CP_HQD_PIPE_PRIORITY, m->queue_state.cp_hqd_pipe_priority);
+ WRITE_REG(mm->dev, CP_HQD_QUEUE_PRIORITY, m->queue_state.cp_hqd_queue_priority);
+
+ WRITE_REG(mm->dev, CP_HQD_HQ_SCHEDULER0, m->queue_state.cp_hqd_hq_scheduler0);
+ WRITE_REG(mm->dev, CP_HQD_HQ_SCHEDULER1, m->queue_state.cp_hqd_hq_scheduler1);
+
+ WRITE_REG(mm->dev, CP_HQD_ACTIVE, m->queue_state.cp_hqd_active);
+
+ return 0;
+}
+
+static int update_mqd(struct mqd_manager *mm, void *mqd, struct queue_properties *q)
+{
+ struct cik_mqd *m;
+
+ BUG_ON(!mm || !q || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ m = get_mqd(mqd);
+ m->queue_state.cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
+ /* calculating queue size which is log base 2 of actual queue size -1 dwords and another -1 for ffs */
+ m->queue_state.cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ m->queue_state.cp_hqd_pq_base = lower_32((uint64_t)q->queue_address >> 8);
+ m->queue_state.cp_hqd_pq_base_hi = upper_32((uint64_t)q->queue_address >> 8);
+ m->queue_state.cp_hqd_pq_rptr_report_addr = lower_32((uint64_t)q->read_ptr);
+ m->queue_state.cp_hqd_pq_rptr_report_addr_hi = upper_32((uint64_t)q->read_ptr);
+ m->queue_state.cp_hqd_pq_doorbell_control = DOORBELL_EN | DOORBELL_OFFSET(q->doorbell_off);
+
+ m->queue_state.cp_hqd_vmid = q->vmid;
+
+ m->queue_state.cp_hqd_active = 0;
+ q->is_active = false;
+ if (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0) {
+ m->queue_state.cp_hqd_active = 1;
+ q->is_active = true;
+ }
+
+ return 0;
+}
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout)
+{
+ int status;
+ uint32_t temp;
+ bool sync;
+
+ status = 0;
+ BUG_ON(!mm || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ WRITE_REG(mm->dev, CP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+ if (type == KFD_PREEMPT_TYPE_WAVEFRONT_RESET)
+ WRITE_REG(mm->dev, CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQUEST_RESET);
+ else
+ WRITE_REG(mm->dev, CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQUEST_DRAIN);
+
+ sync = (timeout > 0);
+ temp = timeout;
+
+ while (READ_REG(mm->dev, CP_HQD_ACTIVE) != 0) {
+ if (sync && timeout <= 0) {
+ status = -EBUSY;
+ pr_err("kfd: cp queue preemption time out (%dms)\n", temp);
+ break;
+ }
+ busy_wait(1000);
+ if (sync)
+ timeout--;
+ }
+
+ return status;
+}
+
+static inline uint32_t make_srbm_gfx_cntl_mpqv(unsigned int me,
+ unsigned int pipe,
+ unsigned int queue,
+ unsigned int vmid)
+{
+ return QUEUEID(queue) | VMID(vmid) | MEID(me) | PIPEID(pipe);
+}
+
+static inline uint32_t get_first_pipe_offset(struct mqd_manager *mm)
+{
+ BUG_ON(!mm);
+ return mm->dev->shared_resources.first_compute_pipe;
+}
+
+static void acquire_hqd(struct mqd_manager *mm, unsigned int pipe, unsigned int queue, unsigned int vmid)
+{
+ unsigned int mec, pipe_in_mec;
+
+ BUG_ON(!mm);
+
+ radeon_kfd_lock_srbm_index(mm->dev);
+
+ pipe_in_mec = (pipe + get_first_pipe_offset(mm)) % 4;
+ mec = (pipe + get_first_pipe_offset(mm)) / 4;
+ mec++;
+
+ pr_debug("kfd: acquire mec: %d pipe: %d queue: %d vmid: %d\n",
+ mec,
+ pipe_in_mec,
+ queue,
+ vmid);
+
+ WRITE_REG(mm->dev, SRBM_GFX_CNTL, make_srbm_gfx_cntl_mpqv(mec,
+ pipe_in_mec, queue, vmid));
+}
+
+static void release_hqd(struct mqd_manager *mm)
+{
+ BUG_ON(!mm);
+ /* Be nice to KGD, reset indexed CP registers to the GFX pipe. */
+ WRITE_REG(mm->dev, SRBM_GFX_CNTL, 0);
+ radeon_kfd_unlock_srbm_index(mm->dev);
+}
+
+bool is_occupied(struct mqd_manager *mm, void *mqd, struct queue_properties *q)
+{
+ int act;
+ struct cik_mqd *m;
+ uint32_t low, high;
+
+ BUG_ON(!mm || !mqd || !q);
+
+ m = get_mqd(mqd);
+
+ act = READ_REG(mm->dev, CP_HQD_ACTIVE);
+ if (act) {
+ low = lower_32((uint64_t)q->queue_address >> 8);
+ high = upper_32((uint64_t)q->queue_address >> 8);
+
+ if (low == READ_REG(mm->dev, CP_HQD_PQ_BASE) &&
+ high == READ_REG(mm->dev, CP_HQD_PQ_BASE_HI))
+ return true;
+ }
+
+ return false;
+}
+
+static int initialize(struct mqd_manager *mm)
+{
+ BUG_ON(!mm);
+ return 0;
+}
+
+static void uninitialize(struct mqd_manager *mm)
+{
+ BUG_ON(!mm);
+}
+
+/*
+ * HIQ MQD Implementation
+ */
+
+static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, kfd_mem_obj *mqd_mem_obj,
+ uint64_t *gart_addr, struct queue_properties *q)
+{
+ uint64_t addr;
+ struct cik_mqd *m;
+ int retval;
+
+ BUG_ON(!mm || !q || !mqd || !mqd_mem_obj);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ retval = radeon_kfd_vidmem_alloc_map(
+ mm->dev,
+ mqd_mem_obj,
+ (void **)&m,
+ &addr,
+ ALIGN(sizeof(struct cik_mqd), PAGE_SIZE));
+
+ if (retval != 0)
+ return -ENOMEM;
+
+ memset(m, 0, sizeof(struct cik_mqd));
+
+ m->header = 0xC0310800;
+ m->pipeline_stat_enable = 1;
+ m->static_thread_mgmt01[0] = 0xFFFFFFFF;
+ m->static_thread_mgmt01[1] = 0xFFFFFFFF;
+ m->static_thread_mgmt23[0] = 0xFFFFFFFF;
+ m->static_thread_mgmt23[1] = 0xFFFFFFFF;
+
+ m->queue_state.cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE;
+
+ m->queue_state.cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN;
+ m->queue_state.cp_mqd_base_addr = lower_32(addr);
+ m->queue_state.cp_mqd_base_addr_hi = upper_32(addr);
+
+ m->queue_state.cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE;
+
+ m->queue_state.cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | QUANTUM_DURATION(10);
+
+ m->queue_state.cp_hqd_pipe_priority = 1;
+ m->queue_state.cp_hqd_queue_priority = 15;
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, struct queue_properties *q)
+{
+ struct cik_mqd *m;
+
+ BUG_ON(!mm || !q || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ m = get_mqd(mqd);
+ m->queue_state.cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | DEFAULT_MIN_AVAIL_SIZE | PRIV_STATE | KMD_QUEUE;
+ /* calculating queue size which is log base 2 of actual queue size -1 dwords */
+ m->queue_state.cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ m->queue_state.cp_hqd_pq_base = lower_32((uint64_t)q->queue_address >> 8);
+ m->queue_state.cp_hqd_pq_base_hi = upper_32((uint64_t)q->queue_address >> 8);
+ m->queue_state.cp_hqd_pq_rptr_report_addr = lower_32((uint64_t)q->read_ptr);
+ m->queue_state.cp_hqd_pq_rptr_report_addr_hi = upper_32((uint64_t)q->read_ptr);
+ m->queue_state.cp_hqd_pq_doorbell_control = DOORBELL_EN | DOORBELL_OFFSET(q->doorbell_off);
+
+ m->queue_state.cp_hqd_vmid = q->vmid;
+
+ m->queue_state.cp_hqd_active = 0;
+ q->is_active = false;
+ if (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0) {
+ m->queue_state.cp_hqd_active = 1;
+ q->is_active = true;
+ }
+
+ return 0;
+}
+
+struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev)
+{
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dev);
+ BUG_ON(type >= KFD_MQD_TYPE_MAX);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CIK_CP:
+ case KFD_MQD_TYPE_CIK_COMPUTE:
+ mqd->init_mqd = init_mqd;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->acquire_hqd = acquire_hqd;
+ mqd->release_hqd = release_hqd;
+ mqd->is_occupied = is_occupied;
+ mqd->initialize = initialize;
+ mqd->uninitialize = uninitialize;
+ break;
+ case KFD_MQD_TYPE_CIK_HIQ:
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd_hiq;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->acquire_hqd = acquire_hqd;
+ mqd->release_hqd = release_hqd;
+ mqd->is_occupied = is_occupied;
+ mqd->initialize = initialize;
+ mqd->uninitialize = uninitialize;
+ break;
+ default:
+ return NULL;
+ break;
+ }
+
+ if (mqd->initialize(mqd) != 0) {
+ pr_err("kfd: mqd manager initialization failed\n");
+ kfree(mqd);
+ return NULL;
+ }
+ return mqd;
+}
+
+/* SDMA queues should be implemented here when the cp will supports them */
new file mode 100644
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Ben Goz
+ */
+
+#ifndef MQD_MANAGER_H_
+#define MQD_MANAGER_H_
+
+#include "kfd_priv.h"
+
+struct mqd_manager {
+ int (*init_mqd)(struct mqd_manager *mm, void **mqd, kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q);
+ int (*load_mqd)(struct mqd_manager *mm, void *mqd);
+ int (*update_mqd)(struct mqd_manager *mm, void *mqd, struct queue_properties *q);
+ int (*destroy_mqd)(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout);
+ void (*uninit_mqd)(struct mqd_manager *mm, void *mqd, kfd_mem_obj mqd_mem_obj);
+ void (*acquire_hqd)(struct mqd_manager *mm, unsigned int pipe, unsigned int queue, unsigned int vmid);
+ void (*release_hqd)(struct mqd_manager *mm);
+ bool (*is_occupied)(struct mqd_manager *mm, void *mqd, struct queue_properties *q);
+ int (*initialize)(struct mqd_manager *mm);
+ void (*uninitialize)(struct mqd_manager *mm);
+
+ struct mutex mqd_mutex;
+ struct kfd_dev *dev;
+};
+
+
+#endif /* MQD_MANAGER_H_ */
@@ -141,6 +141,9 @@ int radeon_kfd_vidmem_gpumap(struct kfd_dev *kfd, kfd_mem_obj mem_obj, uint64_t
void radeon_kfd_vidmem_ungpumap(struct kfd_dev *kfd, kfd_mem_obj mem_obj);
int radeon_kfd_vidmem_kmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj, void **ptr);
void radeon_kfd_vidmem_unkmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj);
+int radeon_kfd_vidmem_alloc_map(struct kfd_dev *kfd, kfd_mem_obj *mem_obj, void **ptr,
+ uint64_t *vmid0_address, size_t size);
+void radeon_kfd_vidmem_free_unmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj);
/* Character device interface */
int radeon_kfd_chardev_init(void);
@@ -161,6 +164,17 @@ struct kfd_queue {
struct kfd_scheduler_queue scheduler_queue;
};
+enum kfd_preempt_type_filter {
+ KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
+ KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES,
+ KFD_PRERMPT_TYPE_FILTER_BY_PASID
+};
+
+enum kfd_preempt_type {
+ KFD_PREEMPT_TYPE_WAVEFRONT,
+ KFD_PREEMPT_TYPE_WAVEFRONT_RESET
+};
+
enum kfd_queue_type {
KFD_QUEUE_TYPE_COMPUTE,
KFD_QUEUE_TYPE_SDMA,
@@ -204,6 +218,14 @@ struct queue {
struct kfd_dev *device;
};
+enum KFD_MQD_TYPE {
+ KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */
+ KFD_MQD_TYPE_CIK_HIQ, /* for hiq */
+ KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */
+ KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */
+ KFD_MQD_TYPE_MAX
+};
+
/* Data that is per-process-per device. */
struct kfd_process_device {
/* List of all per-device data for a process. Starts from kfd_process.per_device_data. */
@@ -325,10 +347,14 @@ int kgd2kfd_resume(struct kfd_dev *dev);
int kfd_init_apertures(struct kfd_process *process);
/* Queue Context Management */
+inline uint32_t lower_32(uint64_t x);
+inline uint32_t upper_32(uint64_t x);
+inline void busy_wait(unsigned long ms);
int init_queue(struct queue **q, struct queue_properties properties);
void uninit_queue(struct queue *q);
void print_queue_properties(struct queue_properties *q);
void print_queue(struct queue *q);
+struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev);
#endif
@@ -182,16 +182,6 @@ struct cik_static_queue {
uint32_t queue_size_encoded; /* CP_HQD_PQ_CONTROL.QUEUE_SIZE takes the queue size as log2(size) - 3. */
};
-static uint32_t lower_32(uint64_t x)
-{
- return (uint32_t)x;
-}
-
-static uint32_t upper_32(uint64_t x)
-{
- return (uint32_t)(x >> 32);
-}
-
/* SRBM_GFX_CNTL provides the MEC/pipe/queue and vmid for many registers that are
* In particular, CP_HQD_* and CP_MQD_* are instanced for each queue. CP_HPD_* are instanced for each pipe.
* SH_MEM_* are instanced per-VMID.
@@ -59,3 +59,39 @@ void radeon_kfd_vidmem_unkmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj)
{
kfd2kgd->unkmap_mem(kfd->kgd, (struct kgd_mem *)mem_obj);
}
+
+int radeon_kfd_vidmem_alloc_map(struct kfd_dev *kfd, kfd_mem_obj *mem_obj,
+ void **ptr, uint64_t *vmid0_address,
+ size_t size)
+{
+ int retval;
+
+ retval = radeon_kfd_vidmem_alloc(kfd, size, PAGE_SIZE, KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
+ mem_obj);
+ if (retval != 0)
+ goto fail_vidmem_alloc;
+
+ retval = radeon_kfd_vidmem_kmap(kfd, *mem_obj, ptr);
+ if (retval != 0)
+ goto fail_vidmem_kmap;
+
+ retval = radeon_kfd_vidmem_gpumap(kfd, *mem_obj, vmid0_address);
+ if (retval != 0)
+ goto fail_vidmem_gpumap;
+
+ return 0;
+
+fail_vidmem_gpumap:
+ radeon_kfd_vidmem_unkmap(kfd, *mem_obj);
+fail_vidmem_kmap:
+ radeon_kfd_vidmem_free(kfd, *mem_obj);
+fail_vidmem_alloc:
+ return retval;
+}
+
+void radeon_kfd_vidmem_free_unmap(struct kfd_dev *kfd, kfd_mem_obj mem_obj)
+{
+ radeon_kfd_vidmem_ungpumap(kfd, mem_obj);
+ radeon_kfd_vidmem_unkmap(kfd, mem_obj);
+ radeon_kfd_vidmem_free(kfd, mem_obj);
+}