diff mbox series

[V2,3/4] drivers/fpga/amd: Add remote queue

Message ID 20241210183734.30803-4-yidong.zhang@amd.com (mailing list archive)
State New
Headers show
Series Add versal-pci driver | expand

Commit Message

Yidong Zhang Dec. 10, 2024, 6:37 p.m. UTC
The remote queue is a hardware queue based ring buffer service between mgmt
PF and PCIe hardware firmware for programming FPGA Program Logic, loading
versal firmware and checking card healthy status.

Co-developed-by: Nishad Saraf <nishads@amd.com>
Signed-off-by: Nishad Saraf <nishads@amd.com>
Co-developed-by: Prapul Krishnamurthy <prapulk@amd.com>
Signed-off-by: Prapul Krishnamurthy <prapulk@amd.com>
Signed-off-by: Yidong Zhang <yidong.zhang@amd.com>
---
 drivers/fpga/amd/Makefile                |   3 +-
 drivers/fpga/amd/versal-pci-rm-queue.c   | 326 +++++++++++++++++++++++
 drivers/fpga/amd/versal-pci-rm-queue.h   |  21 ++
 drivers/fpga/amd/versal-pci-rm-service.h | 209 +++++++++++++++
 4 files changed, 558 insertions(+), 1 deletion(-)
 create mode 100644 drivers/fpga/amd/versal-pci-rm-queue.c
 create mode 100644 drivers/fpga/amd/versal-pci-rm-queue.h
 create mode 100644 drivers/fpga/amd/versal-pci-rm-service.h
diff mbox series

Patch

diff --git a/drivers/fpga/amd/Makefile b/drivers/fpga/amd/Makefile
index 7a604785e5f9..b2ffdbf23400 100644
--- a/drivers/fpga/amd/Makefile
+++ b/drivers/fpga/amd/Makefile
@@ -3,4 +3,5 @@ 
 obj-$(CONFIG_AMD_VERSAL_PCI)			+= versal-pci.o
 
 versal-pci-$(CONFIG_AMD_VERSAL_PCI)		:= versal-pci-main.o \
-						   versal-pci-comm-chan.o
+						   versal-pci-comm-chan.o \
+						   versal-pci-rm-queue.o
diff --git a/drivers/fpga/amd/versal-pci-rm-queue.c b/drivers/fpga/amd/versal-pci-rm-queue.c
new file mode 100644
index 000000000000..92f2e1165052
--- /dev/null
+++ b/drivers/fpga/amd/versal-pci-rm-queue.c
@@ -0,0 +1,326 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Versal PCIe device
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#include <linux/pci.h>
+
+#include "versal-pci.h"
+#include "versal-pci-rm-queue.h"
+#include "versal-pci-rm-service.h"
+
+static inline struct rm_device *to_rdev_msg_monitor(struct work_struct *w)
+{
+	return container_of(w, struct rm_device, msg_monitor);
+}
+
+static inline struct rm_device *to_rdev_msg_timer(struct timer_list *t)
+{
+	return container_of(t, struct rm_device, msg_timer);
+}
+
+static inline u32 rm_io_read(struct rm_device *rdev, u32 offset)
+{
+	/* TODO */
+	return 0;
+}
+
+static inline int rm_io_write(struct rm_device *rdev, u32 offset, u32 value)
+{
+	/* TODO */
+	return 0;
+}
+
+static inline u32 rm_queue_read(struct rm_device *rdev, u32 offset)
+{
+	/* TODO */
+	return 0;
+}
+
+static inline void rm_queue_write(struct rm_device *rdev, u32 offset, u32 value)
+{
+	/* TODO */
+}
+
+static inline void rm_queue_bulk_read(struct rm_device *rdev, u32 offset,
+				      u32 *value, u32 size)
+{
+	/* TODO */
+}
+
+static inline void rm_queue_bulk_write(struct rm_device *rdev, u32 offset,
+				       u32 *value, u32 size)
+{
+	/* TODO */
+}
+
+static inline u32 rm_queue_get_cidx(struct rm_device *rdev, enum rm_queue_type type)
+{
+	u32 off;
+
+	if (type == RM_QUEUE_SQ)
+		off = offsetof(struct rm_queue_header, sq_cidx);
+	else
+		off = offsetof(struct rm_queue_header, cq_cidx);
+
+	return rm_queue_read(rdev, off);
+}
+
+static inline void rm_queue_set_cidx(struct rm_device *rdev, enum rm_queue_type type,
+				     u32 value)
+{
+	u32 off;
+
+	if (type == RM_QUEUE_SQ)
+		off = offsetof(struct rm_queue_header, sq_cidx);
+	else
+		off = offsetof(struct rm_queue_header, cq_cidx);
+
+	rm_queue_write(rdev, off, value);
+}
+
+static inline u32 rm_queue_get_pidx(struct rm_device *rdev, enum rm_queue_type type)
+{
+	if (type == RM_QUEUE_SQ)
+		return rm_io_read(rdev, RM_IO_SQ_PIDX_OFF);
+	else
+		return rm_io_read(rdev, RM_IO_CQ_PIDX_OFF);
+}
+
+static inline int rm_queue_set_pidx(struct rm_device *rdev,
+				    enum rm_queue_type type, u32 value)
+{
+	if (type == RM_QUEUE_SQ)
+		return rm_io_write(rdev, RM_IO_SQ_PIDX_OFF, value);
+	else
+		return rm_io_write(rdev, RM_IO_CQ_PIDX_OFF, value);
+}
+
+static inline u32 rm_queue_get_sq_slot_offset(struct rm_device *rdev)
+{
+	u32 index;
+
+	if ((rdev->sq.pidx - rdev->sq.cidx) >= rdev->queue_size)
+		return RM_INVALID_SLOT;
+
+	index = rdev->sq.pidx & (rdev->queue_size - 1);
+	return rdev->sq.offset + RM_CMD_SQ_SLOT_SIZE * index;
+}
+
+static inline u32 rm_queue_get_cq_slot_offset(struct rm_device *rdev)
+{
+	u32 index;
+
+	index = rdev->cq.cidx & (rdev->queue_size - 1);
+	return rdev->cq.offset + RM_CMD_CQ_SLOT_SIZE * index;
+}
+
+static int rm_queue_submit_cmd(struct rm_cmd *cmd)
+{
+	struct versal_pci_device *vdev = cmd->rdev->vdev;
+	struct rm_device *rdev = cmd->rdev;
+	u32 offset;
+	int ret;
+
+	mutex_lock(&rdev->queue);
+
+	offset = rm_queue_get_sq_slot_offset(rdev);
+	if (!offset) {
+		vdev_err(vdev, "No SQ slot available");
+		ret = -ENOSPC;
+		goto exit;
+	}
+
+	rm_queue_bulk_write(rdev, offset, (u32 *)&cmd->sq_msg,
+			    sizeof(cmd->sq_msg));
+
+	ret = rm_queue_set_pidx(rdev, RM_QUEUE_SQ, ++rdev->sq.pidx);
+	if (ret) {
+		vdev_err(vdev, "Failed to update PIDX, ret %d", ret);
+		goto exit;
+	}
+
+	list_add_tail(&cmd->list, &rdev->submitted_cmds);
+exit:
+	mutex_unlock(&rdev->queue);
+	return ret;
+}
+
+void rm_queue_withdraw_cmd(struct rm_cmd *cmd)
+{
+	mutex_lock(&cmd->rdev->queue);
+	list_del(&cmd->list);
+	mutex_unlock(&cmd->rdev->queue);
+}
+
+static int rm_queue_wait_cmd_timeout(struct rm_cmd *cmd, unsigned long timeout)
+{
+	struct versal_pci_device *vdev = cmd->rdev->vdev;
+	int ret;
+
+	if (wait_for_completion_timeout(&cmd->executed, timeout)) {
+		ret = cmd->cq_msg.data.rcode;
+		if (!ret)
+			return 0;
+
+		vdev_err(vdev, "CMD returned with a failure: %d", ret);
+		return ret;
+	}
+
+	/*
+	 * each cmds will be cleaned up by complete before it times out.
+	 * if we reach here, the cmd should be cleared and hot reset should
+	 * be issued.
+	 */
+	vdev_err(vdev, "cmd is timedout after, please reset the card");
+	rm_queue_withdraw_cmd(cmd);
+	return -ETIME;
+}
+
+int rm_queue_send_cmd(struct rm_cmd *cmd, unsigned long timeout)
+{
+	int ret;
+
+	ret = rm_queue_submit_cmd(cmd);
+	if (ret)
+		return ret;
+
+	return rm_queue_wait_cmd_timeout(cmd, timeout);
+}
+
+static int rm_process_msg(struct rm_device *rdev)
+{
+	struct versal_pci_device *vdev = rdev->vdev;
+	struct rm_cmd *cmd, *next;
+	struct rm_cmd_cq_hdr header;
+	u32 offset;
+
+	offset = rm_queue_get_cq_slot_offset(rdev);
+	if (!offset) {
+		vdev_err(vdev, "Invalid CQ offset");
+		return -EINVAL;
+	}
+
+	rm_queue_bulk_read(rdev, offset, (u32 *)&header, sizeof(header));
+
+	list_for_each_entry_safe(cmd, next, &rdev->submitted_cmds, list) {
+		u32 value = 0;
+
+		if (cmd->sq_msg.hdr.id != header.id)
+			continue;
+
+		rm_queue_bulk_read(rdev, offset + sizeof(cmd->cq_msg.hdr),
+				   (u32 *)&cmd->cq_msg.data,
+				   sizeof(cmd->cq_msg.data));
+
+		rm_queue_write(rdev, offset, value);
+
+		list_del(&cmd->list);
+		complete(&cmd->executed);
+		return 0;
+	}
+
+	vdev_err(vdev, "Unknown cmd ID %d found in CQ", header.id);
+	return -EFAULT;
+}
+
+static void rm_check_msg(struct work_struct *w)
+{
+	struct rm_device *rdev = to_rdev_msg_monitor(w);
+	int ret;
+
+	mutex_lock(&rdev->queue);
+
+	rdev->sq.cidx = rm_queue_get_cidx(rdev, RM_QUEUE_SQ);
+	rdev->cq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_CQ);
+
+	while (rdev->cq.cidx < rdev->cq.pidx) {
+		ret = rm_process_msg(rdev);
+		if (ret)
+			break;
+
+		rdev->cq.cidx++;
+
+		rm_queue_set_cidx(rdev, RM_QUEUE_CQ, rdev->cq.cidx);
+	}
+
+	mutex_unlock(&rdev->queue);
+}
+
+static void rm_sched_work(struct timer_list *t)
+{
+	struct rm_device *rdev = to_rdev_msg_timer(t);
+
+	/* Schedule a work in the general workqueue */
+	schedule_work(&rdev->msg_monitor);
+	/* Periodic timer */
+	mod_timer(&rdev->msg_timer, jiffies + RM_COMPLETION_TIMER);
+}
+
+void rm_queue_fini(struct rm_device *rdev)
+{
+	del_timer_sync(&rdev->msg_timer);
+	cancel_work_sync(&rdev->msg_monitor);
+	mutex_destroy(&rdev->queue);
+}
+
+int rm_queue_init(struct rm_device *rdev)
+{
+	struct versal_pci_device *vdev = rdev->vdev;
+	struct rm_queue_header header = {0};
+	int ret;
+
+	INIT_LIST_HEAD(&rdev->submitted_cmds);
+	mutex_init(&rdev->queue);
+
+	rm_queue_bulk_read(rdev, RM_HDR_OFF, (u32 *)&header, sizeof(header));
+
+	if (header.magic != RM_QUEUE_HDR_MAGIC_NUM) {
+		vdev_err(vdev, "Invalid RM queue header");
+		ret = -ENODEV;
+		goto error;
+	}
+
+	if (!header.version) {
+		vdev_err(vdev, "Invalid RM queue header");
+		ret = -ENODEV;
+		goto error;
+	}
+
+	sema_init(&rdev->sq.data_lock, 1);
+	sema_init(&rdev->cq.data_lock, 1);
+	rdev->queue_size = header.size;
+	rdev->sq.offset = header.sq_off;
+	rdev->cq.offset = header.cq_off;
+	rdev->sq.type = RM_QUEUE_SQ;
+	rdev->cq.type = RM_QUEUE_CQ;
+	rdev->sq.data_size = rdev->queue_buffer_size - RM_CMD_CQ_BUFFER_SIZE;
+	rdev->cq.data_size = RM_CMD_CQ_BUFFER_SIZE;
+	rdev->sq.data_offset = rdev->queue_buffer_start +
+			       RM_CMD_CQ_BUFFER_OFFSET + RM_CMD_CQ_BUFFER_SIZE;
+	rdev->cq.data_offset = rdev->queue_buffer_start +
+			       RM_CMD_CQ_BUFFER_OFFSET;
+	rdev->sq.cidx = header.sq_cidx;
+	rdev->cq.cidx = header.cq_cidx;
+
+	rdev->sq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_SQ);
+	rdev->cq.pidx = rm_queue_get_pidx(rdev, RM_QUEUE_CQ);
+
+	if (rdev->cq.cidx != rdev->cq.pidx) {
+		vdev_warn(vdev, "Clearing stale completions");
+		rdev->cq.cidx = rdev->cq.pidx;
+		rm_queue_set_cidx(rdev, RM_QUEUE_CQ, rdev->cq.cidx);
+	}
+
+	/* Create and schedule timer to do recurring work */
+	INIT_WORK(&rdev->msg_monitor, &rm_check_msg);
+	timer_setup(&rdev->msg_timer, &rm_sched_work, 0);
+	mod_timer(&rdev->msg_timer, jiffies + RM_COMPLETION_TIMER);
+
+	return 0;
+error:
+	mutex_destroy(&rdev->queue);
+	return ret;
+}
diff --git a/drivers/fpga/amd/versal-pci-rm-queue.h b/drivers/fpga/amd/versal-pci-rm-queue.h
new file mode 100644
index 000000000000..3bc7102c6a58
--- /dev/null
+++ b/drivers/fpga/amd/versal-pci-rm-queue.h
@@ -0,0 +1,21 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Driver for Versal PCIe device
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#ifndef __RM_QUEUE_H
+#define __RM_QUEUE_H
+
+struct rm_device;
+
+/* rm queue hardware setup */
+int rm_queue_init(struct rm_device *rdev);
+void rm_queue_fini(struct rm_device *rdev);
+
+/* rm queue common API */
+int rm_queue_send_cmd(struct rm_cmd *cmd, unsigned long timeout);
+void rm_queue_withdraw_cmd(struct rm_cmd *cmd);
+
+#endif	/* __RM_QUEUE_H */
diff --git a/drivers/fpga/amd/versal-pci-rm-service.h b/drivers/fpga/amd/versal-pci-rm-service.h
new file mode 100644
index 000000000000..85a78257770a
--- /dev/null
+++ b/drivers/fpga/amd/versal-pci-rm-service.h
@@ -0,0 +1,209 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Driver for Versal PCIe device
+ *
+ * Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.
+ */
+
+#ifndef __RM_SERVICE_H
+#define __RM_SERVICE_H
+
+#define RM_HDR_OFF			0x0
+#define RM_HDR_MAGIC_NUM		0x564D5230
+#define RM_QUEUE_HDR_MAGIC_NUM		0x5847513F
+#define RM_PCI_IO_BAR_OFF		0x2010000
+#define RM_PCI_IO_SIZE			SZ_4K
+#define RM_PCI_SHMEM_BAR_OFF		0x8000000
+#define RM_PCI_SHMEM_SIZE		SZ_128M
+#define RM_PCI_SHMEM_HDR_SIZE		0x28
+
+#define RM_QUEUE_HDR_MAGIC_NUM_OFF	0x0
+#define RM_IO_SQ_PIDX_OFF		0x0
+#define RM_IO_CQ_PIDX_OFF		0x100
+
+#define RM_CMD_ID_MIN			1
+#define RM_CMD_ID_MAX			(BIT(17) - 1)
+#define RM_CMD_SQ_HDR_OPS_MSK		GENMASK(15, 0)
+#define RM_CMD_SQ_HDR_SIZE_MSK		GENMASK(14, 0)
+#define RM_CMD_SQ_SLOT_SIZE		SZ_512
+#define RM_CMD_CQ_SLOT_SIZE		SZ_16
+#define RM_CMD_CQ_BUFFER_SIZE		(1024 * 1024)
+#define RM_CMD_CQ_BUFFER_OFFSET		0x0
+#define RM_CMD_LOG_PAGE_TYPE_MASK	GENMASK(15, 0)
+#define RM_CMD_VMR_CONTROL_MSK		GENMASK(10, 8)
+#define RM_CMD_VMR_CONTROL_PS_MASK	BIT(9)
+
+#define RM_CMD_WAIT_CONFIG_TIMEOUT	msecs_to_jiffies(10 * 1000)
+#define RM_CMD_WAIT_DOWNLOAD_TIMEOUT	msecs_to_jiffies(300 * 1000)
+
+#define RM_COMPLETION_TIMER		(HZ / 10)
+#define RM_HEALTH_CHECK_TIMER		(HZ)
+
+#define RM_INVALID_SLOT			0
+
+enum rm_queue_opcode {
+	RM_QUEUE_OP_LOAD_XCLBIN		= 0x0,
+	RM_QUEUE_OP_GET_LOG_PAGE	= 0x8,
+	RM_QUEUE_OP_LOAD_FW		= 0xA,
+	RM_QUEUE_OP_LOAD_APU_FW		= 0xD,
+	RM_QUEUE_OP_VMR_CONTROL		= 0xE,
+	RM_QUEUE_OP_IDENTIFY		= 0x202,
+};
+
+struct rm_cmd_sq_hdr {
+	__u16 opcode;
+	__u16 msg_size;
+	__u16 id;
+	__u16 reserved;
+} __packed;
+
+struct rm_cmd_cq_hdr {
+	__u16 id;
+	__u16 reserved;
+} __packed;
+
+struct rm_cmd_sq_bin {
+	__u64			address;
+	__u32			size;
+	__u32			reserved1;
+	__u32			reserved2;
+	__u32			reserved3;
+	__u64			reserved4;
+} __packed;
+
+struct rm_cmd_sq_log_page {
+	__u64			address;
+	__u32			size;
+	__u32			reserved1;
+	__u32			type;
+	__u32			reserved2;
+} __packed;
+
+struct rm_cmd_sq_ctrl {
+	__u32			status;
+} __packed;
+
+struct rm_cmd_sq_data {
+	union {
+		struct rm_cmd_sq_log_page	page;
+		struct rm_cmd_sq_bin		bin;
+		struct rm_cmd_sq_ctrl		ctrl;
+	};
+} __packed;
+
+struct rm_cmd_cq_identify {
+	__u16			major;
+	__u16			minor;
+	__u32			reserved;
+} __packed;
+
+struct rm_cmd_cq_log_page {
+	__u32			len;
+	__u32			reserved;
+} __packed;
+
+struct rm_cmd_cq_control {
+	__u16			status;
+	__u16			reserved1;
+	__u32			reserved2;
+} __packed;
+
+struct rm_cmd_cq_data {
+	union {
+		struct rm_cmd_cq_identify	identify;
+		struct rm_cmd_cq_log_page	page;
+		struct rm_cmd_cq_control	ctrl;
+		__u32				reserved[2];
+	};
+	__u32			rcode;
+} __packed;
+
+struct rm_cmd_sq_msg {
+	struct rm_cmd_sq_hdr	hdr;
+	struct rm_cmd_sq_data	data;
+} __packed;
+
+struct rm_cmd_cq_msg {
+	struct rm_cmd_cq_hdr	hdr;
+	struct rm_cmd_cq_data	data;
+} __packed;
+
+struct rm_cmd {
+	struct rm_device	*rdev;
+	struct list_head	list;
+	struct completion	executed;
+	struct rm_cmd_sq_msg	sq_msg;
+	struct rm_cmd_cq_msg	cq_msg;
+	enum rm_queue_opcode	opcode;
+	__u8			*buffer;
+	ssize_t			size;
+};
+
+enum rm_queue_type {
+	RM_QUEUE_SQ,
+	RM_QUEUE_CQ
+};
+
+enum rm_cmd_log_page_type {
+	RM_CMD_LOG_PAGE_AXI_TRIP_STATUS	= 0x0,
+	RM_CMD_LOG_PAGE_FW_ID		= 0xA,
+};
+
+struct rm_queue {
+	enum rm_queue_type	type;
+	__u32			pidx;
+	__u32			cidx;
+	__u32			offset;
+	__u32			data_offset;
+	__u32			data_size;
+	struct semaphore	data_lock;
+};
+
+struct rm_queue_header {
+	__u32			magic;
+	__u32			version;
+	__u32			size;
+	__u32			sq_off;
+	__u32			sq_slot_size;
+	__u32			cq_off;
+	__u32			sq_cidx;
+	__u32			cq_cidx;
+};
+
+struct rm_header {
+	__u32			magic;
+	__u32			queue_base;
+	__u32			queue_size;
+	__u32			status_off;
+	__u32			status_len;
+	__u32			log_index;
+	__u32			log_off;
+	__u32			log_size;
+	__u32			data_start;
+	__u32			data_end;
+};
+
+struct rm_device {
+	struct versal_pci_device	*vdev;
+
+	struct rm_header	rm_metadata;
+	__u32			queue_buffer_start;
+	__u32			queue_buffer_size;
+	__u32			queue_base;
+
+	/* Lock to queue access */
+	struct mutex		queue;
+	struct rm_queue		sq;
+	struct rm_queue		cq;
+	__u32			queue_size;
+
+	struct timer_list	msg_timer;
+	struct work_struct	msg_monitor;
+	struct timer_list	health_timer;
+	struct work_struct	health_monitor;
+	struct list_head	submitted_cmds;
+
+	__u32			firewall_tripped;
+};
+
+#endif	/* __RM_SERVICE_H */