@@ -971,6 +971,12 @@ L: linux-edac@vger.kernel.org
S: Supported
F: drivers/ras/amd/atl/*
+AMD AE4DMA DRIVER
+M: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+L: dmaengine@vger.kernel.org
+S: Supported
+F: drivers/dma/amd/ae4dma/
+
AMD AXI W1 DRIVER
M: Kris Chaplin <kris.chaplin@amd.com>
R: Thomas Delev <thomas.delev@amd.com>
@@ -1,5 +1,20 @@
# SPDX-License-Identifier: GPL-2.0-only
#
+
+config AMD_AE4DMA
+ tristate "AMD AE4DMA Engine"
+ depends on (X86_64 || COMPILE_TEST) && PCI
+ depends on AMD_PTDMA
+ select DMA_ENGINE
+ select DMA_VIRTUAL_CHANNELS
+ help
+ Enable support for the AMD AE4DMA controller. This controller
+ provides DMA capabilities to perform high bandwidth memory to
+ memory and IO copy operations. It performs DMA transfer through
+ queue-based descriptor management. This DMA controller is intended
+ to be used with AMD Non-Transparent Bridge devices and not for
+ general purpose peripheral DMA.
+
config AMD_PTDMA
tristate "AMD PassThru DMA Engine"
depends on X86_64 && PCI
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_AMD_AE4DMA) += ae4dma/
obj-$(CONFIG_AMD_PTDMA) += ptdma/
obj-$(CONFIG_AMD_QDMA) += qdma/
new file mode 100644
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# AMD AE4DMA driver
+#
+
+obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
+
+ae4dma-objs := ae4dma-dev.o
+
+ae4dma-$(CONFIG_PCI) += ae4dma-pci.o
new file mode 100644
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+
+#include "ae4dma.h"
+
+static unsigned int max_hw_q = 1;
+module_param(max_hw_q, uint, 0444);
+MODULE_PARM_DESC(max_hw_q, "max hw queues supported by engine (any non-zero value, default: 1)");
+
+static void ae4_pending_work(struct work_struct *work)
+{
+ struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct ae4_cmd_queue, p_work.work);
+ struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
+ struct pt_cmd *cmd;
+ u32 cridx;
+
+ for (;;) {
+ wait_event_interruptible(ae4cmd_q->q_w,
+ ((atomic64_read(&ae4cmd_q->done_cnt)) <
+ atomic64_read(&ae4cmd_q->intr_cnt)));
+
+ atomic64_inc(&ae4cmd_q->done_cnt);
+
+ mutex_lock(&ae4cmd_q->cmd_lock);
+ cridx = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
+ while ((ae4cmd_q->dridx != cridx) && !list_empty(&ae4cmd_q->cmd)) {
+ cmd = list_first_entry(&ae4cmd_q->cmd, struct pt_cmd, entry);
+ list_del(&cmd->entry);
+
+ ae4_check_status_error(ae4cmd_q, ae4cmd_q->dridx);
+ cmd->pt_cmd_callback(cmd->data, cmd->ret);
+
+ ae4cmd_q->q_cmd_count--;
+ ae4cmd_q->dridx = (ae4cmd_q->dridx + 1) % CMD_Q_LEN;
+
+ complete_all(&ae4cmd_q->cmp);
+ }
+ mutex_unlock(&ae4cmd_q->cmd_lock);
+ }
+}
+
+static irqreturn_t ae4_core_irq_handler(int irq, void *data)
+{
+ struct ae4_cmd_queue *ae4cmd_q = data;
+ struct pt_cmd_queue *cmd_q;
+ struct pt_device *pt;
+ u32 status;
+
+ cmd_q = &ae4cmd_q->cmd_q;
+ pt = cmd_q->pt;
+
+ pt->total_interrupts++;
+ atomic64_inc(&ae4cmd_q->intr_cnt);
+
+ status = readl(cmd_q->reg_control + AE4_INTR_STS_OFF);
+ if (status & BIT(0)) {
+ status &= GENMASK(31, 1);
+ writel(status, cmd_q->reg_control + AE4_INTR_STS_OFF);
+ }
+
+ wake_up(&ae4cmd_q->q_w);
+
+ return IRQ_HANDLED;
+}
+
+void ae4_destroy_work(struct ae4_device *ae4)
+{
+ struct ae4_cmd_queue *ae4cmd_q;
+ int i;
+
+ for (i = 0; i < ae4->cmd_q_count; i++) {
+ ae4cmd_q = &ae4->ae4cmd_q[i];
+
+ if (!ae4cmd_q->pws)
+ break;
+
+ cancel_delayed_work_sync(&ae4cmd_q->p_work);
+ destroy_workqueue(ae4cmd_q->pws);
+ }
+}
+
+int ae4_core_init(struct ae4_device *ae4)
+{
+ struct pt_device *pt = &ae4->pt;
+ struct ae4_cmd_queue *ae4cmd_q;
+ struct device *dev = pt->dev;
+ struct pt_cmd_queue *cmd_q;
+ int i, ret = 0;
+
+ writel(max_hw_q, pt->io_regs);
+
+ for (i = 0; i < max_hw_q; i++) {
+ ae4cmd_q = &ae4->ae4cmd_q[i];
+ ae4cmd_q->id = ae4->cmd_q_count;
+ ae4->cmd_q_count++;
+
+ cmd_q = &ae4cmd_q->cmd_q;
+ cmd_q->pt = pt;
+
+ cmd_q->reg_control = pt->io_regs + ((i + 1) * AE4_Q_SZ);
+
+ ret = devm_request_irq(dev, ae4->ae4_irq[i], ae4_core_irq_handler, 0,
+ dev_name(pt->dev), ae4cmd_q);
+ if (ret)
+ return ret;
+
+ cmd_q->qsize = Q_SIZE(sizeof(struct ae4dma_desc));
+
+ cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize, &cmd_q->qbase_dma,
+ GFP_KERNEL);
+ if (!cmd_q->qbase)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < ae4->cmd_q_count; i++) {
+ ae4cmd_q = &ae4->ae4cmd_q[i];
+
+ cmd_q = &ae4cmd_q->cmd_q;
+
+ cmd_q->reg_control = pt->io_regs + ((i + 1) * AE4_Q_SZ);
+
+ /* Update the device registers with queue information. */
+ writel(CMD_Q_LEN, cmd_q->reg_control + AE4_MAX_IDX_OFF);
+
+ cmd_q->qdma_tail = cmd_q->qbase_dma;
+ writel(lower_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + AE4_Q_BASE_L_OFF);
+ writel(upper_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + AE4_Q_BASE_H_OFF);
+
+ INIT_LIST_HEAD(&ae4cmd_q->cmd);
+ init_waitqueue_head(&ae4cmd_q->q_w);
+
+ ae4cmd_q->pws = alloc_ordered_workqueue("ae4dma_%d", WQ_MEM_RECLAIM, ae4cmd_q->id);
+ if (!ae4cmd_q->pws) {
+ ae4_destroy_work(ae4);
+ return -ENOMEM;
+ }
+ INIT_DELAYED_WORK(&ae4cmd_q->p_work, ae4_pending_work);
+ queue_delayed_work(ae4cmd_q->pws, &ae4cmd_q->p_work, usecs_to_jiffies(100));
+
+ init_completion(&ae4cmd_q->cmp);
+ }
+
+ return ret;
+}
new file mode 100644
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+
+#include "ae4dma.h"
+
+static int ae4_get_irqs(struct ae4_device *ae4)
+{
+ struct ae4_msix *ae4_msix = ae4->ae4_msix;
+ struct pt_device *pt = &ae4->pt;
+ struct device *dev = pt->dev;
+ struct pci_dev *pdev;
+ int i, v, ret;
+
+ pdev = to_pci_dev(dev);
+
+ for (v = 0; v < ARRAY_SIZE(ae4_msix->msix_entry); v++)
+ ae4_msix->msix_entry[v].entry = v;
+
+ ret = pci_alloc_irq_vectors(pdev, v, v, PCI_IRQ_MSIX);
+ if (ret != v) {
+ if (ret > 0)
+ pci_free_irq_vectors(pdev);
+
+ dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+ if (ret < 0) {
+ dev_err(dev, "could not enable MSI (%d)\n", ret);
+ return ret;
+ }
+
+ ret = pci_irq_vector(pdev, 0);
+ if (ret < 0) {
+ pci_free_irq_vectors(pdev);
+ return ret;
+ }
+
+ for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
+ ae4->ae4_irq[i] = ret;
+
+ } else {
+ ae4_msix->msix_count = ret;
+ for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
+ ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector;
+ }
+
+ return ret;
+}
+
+static void ae4_free_irqs(struct ae4_device *ae4)
+{
+ struct ae4_msix *ae4_msix = ae4->ae4_msix;
+ struct pt_device *pt = &ae4->pt;
+ struct device *dev = pt->dev;
+ struct pci_dev *pdev;
+
+ pdev = to_pci_dev(dev);
+
+ if (ae4_msix && (ae4_msix->msix_count || ae4->ae4_irq[MAX_AE4_HW_QUEUES - 1]))
+ pci_free_irq_vectors(pdev);
+}
+
+static void ae4_deinit(struct ae4_device *ae4)
+{
+ ae4_free_irqs(ae4);
+}
+
+static int ae4_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct device *dev = &pdev->dev;
+ struct ae4_device *ae4;
+ struct pt_device *pt;
+ int bar_mask;
+ int ret = 0;
+
+ ae4 = devm_kzalloc(dev, sizeof(*ae4), GFP_KERNEL);
+ if (!ae4)
+ return -ENOMEM;
+
+ ae4->ae4_msix = devm_kzalloc(dev, sizeof(struct ae4_msix), GFP_KERNEL);
+ if (!ae4->ae4_msix)
+ return -ENOMEM;
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+ goto ae4_error;
+
+ bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+ ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma");
+ if (ret)
+ goto ae4_error;
+
+ pt = &ae4->pt;
+ pt->dev = dev;
+
+ pt->io_regs = pcim_iomap_table(pdev)[0];
+ if (!pt->io_regs) {
+ ret = -ENOMEM;
+ goto ae4_error;
+ }
+
+ ret = ae4_get_irqs(ae4);
+ if (ret < 0)
+ goto ae4_error;
+
+ pci_set_master(pdev);
+
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+
+ dev_set_drvdata(dev, ae4);
+
+ ret = ae4_core_init(ae4);
+ if (ret)
+ goto ae4_error;
+
+ return 0;
+
+ae4_error:
+ ae4_deinit(ae4);
+
+ return ret;
+}
+
+static void ae4_pci_remove(struct pci_dev *pdev)
+{
+ struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
+
+ ae4_destroy_work(ae4);
+ ae4_deinit(ae4);
+}
+
+static const struct pci_device_id ae4_pci_table[] = {
+ { PCI_VDEVICE(AMD, 0x14C8), },
+ { PCI_VDEVICE(AMD, 0x14DC), },
+ { PCI_VDEVICE(AMD, 0x149B), },
+ /* Last entry must be zero */
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ae4_pci_table);
+
+static struct pci_driver ae4_pci_driver = {
+ .name = "ae4dma",
+ .id_table = ae4_pci_table,
+ .probe = ae4_pci_probe,
+ .remove = ae4_pci_remove,
+};
+
+module_pci_driver(ae4_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AMD AE4DMA driver");
new file mode 100644
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD AE4DMA driver
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
+ */
+#ifndef __AE4DMA_H__
+#define __AE4DMA_H__
+
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+
+#include "../ptdma/ptdma.h"
+#include "../../virt-dma.h"
+
+#define MAX_AE4_HW_QUEUES 16
+
+#define AE4_DESC_COMPLETED 0x03
+
+#define AE4_MAX_IDX_OFF 0x08
+#define AE4_RD_IDX_OFF 0x0c
+#define AE4_WR_IDX_OFF 0x10
+#define AE4_INTR_STS_OFF 0x14
+#define AE4_Q_BASE_L_OFF 0x18
+#define AE4_Q_BASE_H_OFF 0x1c
+#define AE4_Q_SZ 0x20
+
+struct ae4_msix {
+ int msix_count;
+ struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
+};
+
+struct ae4_cmd_queue {
+ struct ae4_device *ae4;
+ struct pt_cmd_queue cmd_q;
+ struct list_head cmd;
+ /* protect command operations */
+ struct mutex cmd_lock;
+ struct delayed_work p_work;
+ struct workqueue_struct *pws;
+ struct completion cmp;
+ wait_queue_head_t q_w;
+ atomic64_t intr_cnt;
+ atomic64_t done_cnt;
+ u64 q_cmd_count;
+ u32 dridx;
+ u32 id;
+};
+
+union dwou {
+ u32 dw0;
+ struct dword0 {
+ u8 byte0;
+ u8 byte1;
+ u16 timestamp;
+ } dws;
+};
+
+struct dword1 {
+ u8 status;
+ u8 err_code;
+ u16 desc_id;
+};
+
+struct ae4dma_desc {
+ union dwou dwouv;
+ struct dword1 dw1;
+ u32 length;
+ u32 rsvd;
+ u32 src_hi;
+ u32 src_lo;
+ u32 dst_hi;
+ u32 dst_lo;
+};
+
+struct ae4_device {
+ struct pt_device pt;
+ struct ae4_msix *ae4_msix;
+ struct ae4_cmd_queue ae4cmd_q[MAX_AE4_HW_QUEUES];
+ unsigned int ae4_irq[MAX_AE4_HW_QUEUES];
+ unsigned int cmd_q_count;
+};
+
+int ae4_core_init(struct ae4_device *ae4);
+void ae4_destroy_work(struct ae4_device *ae4);
+void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx);
+#endif
@@ -10,8 +10,57 @@
*/
#include "ptdma.h"
+#include "../ae4dma/ae4dma.h"
#include "../../dmaengine.h"
+static char *ae4_error_codes[] = {
+ "",
+ "ERR 01: INVALID HEADER DW0",
+ "ERR 02: INVALID STATUS",
+ "ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT",
+ "ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT",
+ "ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT",
+ "ERR 06: INVALID ALIGNMENT",
+ "ERR 07: INVALID DESCRIPTOR",
+};
+
+static void ae4_log_error(struct pt_device *d, int e)
+{
+ /* ERR 01 - 07 represents Invalid AE4 errors */
+ if (e <= 7)
+ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", ae4_error_codes[e], e);
+ /* ERR 08 - 15 represents Invalid Descriptor errors */
+ else if (e > 7 && e <= 15)
+ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
+ /* ERR 16 - 31 represents Firmware errors */
+ else if (e > 15 && e <= 31)
+ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FIRMWARE ERROR", e);
+ /* ERR 32 - 63 represents Fatal errors */
+ else if (e > 31 && e <= 63)
+ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FATAL ERROR", e);
+ /* ERR 64 - 255 represents PTE errors */
+ else if (e > 63 && e <= 255)
+ dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
+ else
+ dev_info(d->dev, "Unknown AE4DMA error");
+}
+
+void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx)
+{
+ struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
+ struct ae4dma_desc desc;
+ u8 status;
+
+ memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct ae4dma_desc));
+ status = desc.dw1.status;
+ if (status && status != AE4_DESC_COMPLETED) {
+ cmd_q->cmd_error = desc.dw1.err_code;
+ if (cmd_q->cmd_error)
+ ae4_log_error(cmd_q->pt, cmd_q->cmd_error);
+ }
+}
+EXPORT_SYMBOL_GPL(ae4_check_status_error);
+
static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan)
{
return container_of(dma_chan, struct pt_dma_chan, vc.chan);