@@ -121,6 +121,9 @@ config XEN_PCIDEV_FRONTEND
config PCI_ATS
bool
+config PCI_DOE
+ bool
+
config PCI_ECAM
bool
@@ -31,6 +31,7 @@ obj-$(CONFIG_PCI_ECAM) += ecam.o
obj-$(CONFIG_PCI_P2PDMA) += p2pdma.o
obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
obj-$(CONFIG_VGA_ARB) += vgaarb.o
+obj-$(CONFIG_PCI_DOE) += doe.o
# Endpoint library must be initialized before its users
obj-$(CONFIG_PCI_ENDPOINT) += endpoint/
new file mode 100644
@@ -0,0 +1,633 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data Object Exchange
+ * PCIe r6.0, sec 6.30 DOE
+ *
+ * Copyright (C) 2021 Huawei
+ * Jonathan Cameron <Jonathan.Cameron@huawei.com>
+ *
+ * Copyright (C) 2022 Intel Corporation
+ * Ira Weiny <ira.weiny@intel.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci-doe.h>
+#include <linux/workqueue.h>
+
+#define PCI_DOE_PROTOCOL_DISCOVERY 0
+
+#define PCI_DOE_BUSY_MAX_RETRIES 16
+#define PCI_DOE_POLL_INTERVAL (HZ / 128)
+
+/* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
+#define PCI_DOE_TIMEOUT HZ
+
+static irqreturn_t pci_doe_irq_handler(int irq, void *data)
+{
+ struct pci_doe_mb *doe_mb = data;
+ struct pci_dev *pdev = doe_mb->pdev;
+ int offset = doe_mb->cap_offset;
+ u32 val;
+
+ pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
+
+ /* Leave the error case to be handled outside IRQ */
+ if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) {
+ mod_delayed_work(system_wq, &doe_mb->statemachine, 0);
+ return IRQ_HANDLED;
+ }
+
+ if (FIELD_GET(PCI_DOE_STATUS_INT_STATUS, val)) {
+ pci_write_config_dword(pdev, offset + PCI_DOE_STATUS,
+ PCI_DOE_STATUS_INT_STATUS);
+ mod_delayed_work(system_wq, &doe_mb->statemachine, 0);
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_NONE;
+}
+
+/*
+ * Only called when safe to directly access the DOE from
+ * doe_statemachine_work(). Outside access is not protected. Users who
+ * perform such access are left with the pieces.
+ */
+static void pci_doe_abort_start(struct pci_doe_mb *doe_mb)
+{
+ struct pci_dev *pdev = doe_mb->pdev;
+ int offset = doe_mb->cap_offset;
+ u32 val;
+
+ val = PCI_DOE_CTRL_ABORT;
+ if (doe_mb->irq >= 0)
+ val |= PCI_DOE_CTRL_INT_EN;
+ pci_write_config_dword(pdev, offset + PCI_DOE_CTRL, val);
+
+ doe_mb->timeout_jiffies = jiffies + HZ;
+ schedule_delayed_work(&doe_mb->statemachine, HZ);
+}
+
+static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
+ struct pci_doe_task *task)
+{
+ struct pci_dev *pdev = doe_mb->pdev;
+ int offset = doe_mb->cap_offset;
+ u32 val;
+ int i;
+
+ /*
+ * Check the DOE busy bit is not set. If it is set, this could indicate
+ * someone other than Linux (e.g. firmware) is using the mailbox. Note
+ * it is expected that firmware and OS will negotiate access rights via
+ * an, as yet to be defined method.
+ */
+ pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
+ if (FIELD_GET(PCI_DOE_STATUS_BUSY, val))
+ return -EBUSY;
+
+ if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
+ return -EIO;
+
+ /* Write DOE Header */
+ val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->prot.vid) |
+ FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->prot.type);
+ pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val);
+ /* Length is 2 DW of header + length of payload in DW */
+ pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
+ FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
+ 2 + task->request_pl_sz /
+ sizeof(u32)));
+ for (i = 0; i < task->request_pl_sz / sizeof(u32); i++)
+ pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
+ task->request_pl[i]);
+
+ val = PCI_DOE_CTRL_GO;
+ if (doe_mb->irq >= 0)
+ val |= PCI_DOE_CTRL_INT_EN;
+
+ pci_write_config_dword(pdev, offset + PCI_DOE_CTRL, val);
+ /* Request is sent - now wait for poll or IRQ */
+ return 0;
+}
+
+static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
+{
+ struct pci_dev *pdev = doe_mb->pdev;
+ int offset = doe_mb->cap_offset;
+ size_t length;
+ u32 val;
+ int i;
+
+ /* Read the first dword to get the protocol */
+ pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
+ if ((FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val) != task->prot.vid) ||
+ (FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val) != task->prot.type)) {
+ pci_err(pdev,
+ "DOE [%x] expected [VID, Protocol] = [%04x, %02x], got [%04x, %02x]\n",
+ doe_mb->cap_offset,
+ task->prot.vid, task->prot.type,
+ FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val),
+ FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val));
+ return -EIO;
+ }
+
+ pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
+ /* Read the second dword to get the length */
+ pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
+ pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
+
+ length = FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, val);
+ if (length > SZ_1M || length < 2)
+ return -EIO;
+
+ /* First 2 dwords have already been read */
+ length -= 2;
+ /* Read the rest of the response payload */
+ for (i = 0; i < min(length, task->response_pl_sz / sizeof(u32)); i++) {
+ pci_read_config_dword(pdev, offset + PCI_DOE_READ,
+ &task->response_pl[i]);
+ pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
+ }
+
+ /* Flush excess length */
+ for (; i < length; i++) {
+ pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
+ pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
+ }
+ /* Final error check to pick up on any since Data Object Ready */
+ pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
+ if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
+ return -EIO;
+
+ return min(length, task->response_pl_sz / sizeof(u32)) * sizeof(u32);
+}
+
+static void signal_task_complete(struct pci_doe_task *task, int rv)
+{
+ task->rv = rv;
+ task->complete(task);
+}
+
+static void retire_cur_task(struct pci_doe_mb *doe_mb)
+{
+ mutex_lock(&doe_mb->task_lock);
+ doe_mb->cur_task = NULL;
+ mutex_unlock(&doe_mb->task_lock);
+ wake_up_interruptible(&doe_mb->wq);
+}
+
+static void doe_statemachine_work(struct work_struct *work)
+{
+ struct delayed_work *w = to_delayed_work(work);
+ struct pci_doe_mb *doe_mb = container_of(w, struct pci_doe_mb,
+ statemachine);
+ struct pci_dev *pdev = doe_mb->pdev;
+ int offset = doe_mb->cap_offset;
+ struct pci_doe_task *task;
+ u32 val;
+ int rc;
+
+ mutex_lock(&doe_mb->task_lock);
+ task = doe_mb->cur_task;
+ mutex_unlock(&doe_mb->task_lock);
+
+ if (test_and_clear_bit(PCI_DOE_FLAG_ABORT, &doe_mb->flags)) {
+ /*
+ * Currently only used during init - care needed if
+ * pci_doe_abort() is generally exposed as it would impact
+ * queries in flight.
+ */
+ if (task)
+ pci_err(pdev, "DOE [%x] Aborting with active task!\n",
+ doe_mb->cap_offset);
+ doe_mb->state = DOE_WAIT_ABORT;
+ pci_doe_abort_start(doe_mb);
+ return;
+ }
+
+ switch (doe_mb->state) {
+ case DOE_IDLE:
+ if (task == NULL)
+ return;
+
+ rc = pci_doe_send_req(doe_mb, task);
+
+ /*
+ * The specification does not provide any guidance on how long
+ * some other entity could keep the DOE busy, so try for 1
+ * second then fail. Busy handling is best effort only, because
+ * there is no way of avoiding racing against another user of
+ * the DOE.
+ */
+ if (rc == -EBUSY) {
+ doe_mb->busy_retries++;
+ if (doe_mb->busy_retries == PCI_DOE_BUSY_MAX_RETRIES) {
+ /* Long enough, fail this request */
+ pci_warn(pdev,
+ "DOE [%x] busy for too long (> 1 sec)\n",
+ doe_mb->cap_offset);
+ doe_mb->busy_retries = 0;
+ goto err_busy;
+ }
+ schedule_delayed_work(w, HZ / PCI_DOE_BUSY_MAX_RETRIES);
+ return;
+ }
+ if (rc)
+ goto err_abort;
+ doe_mb->busy_retries = 0;
+
+ doe_mb->state = DOE_WAIT_RESP;
+ doe_mb->timeout_jiffies = jiffies + HZ;
+ /* Now poll or wait for IRQ with timeout */
+ if (doe_mb->irq >= 0)
+ schedule_delayed_work(w, PCI_DOE_TIMEOUT);
+ else
+ schedule_delayed_work(w, PCI_DOE_POLL_INTERVAL);
+ return;
+
+ case DOE_WAIT_RESP:
+ /* Not possible to get here with NULL task */
+ pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
+ if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) {
+ rc = -EIO;
+ goto err_abort;
+ }
+
+ if (!FIELD_GET(PCI_DOE_STATUS_DATA_OBJECT_READY, val)) {
+ /* If not yet at timeout reschedule otherwise abort */
+ if (time_after(jiffies, doe_mb->timeout_jiffies)) {
+ rc = -ETIMEDOUT;
+ goto err_abort;
+ }
+ schedule_delayed_work(w, PCI_DOE_POLL_INTERVAL);
+ return;
+ }
+
+ rc = pci_doe_recv_resp(doe_mb, task);
+ if (rc < 0)
+ goto err_abort;
+
+ doe_mb->state = DOE_IDLE;
+
+ retire_cur_task(doe_mb);
+ /* Set the return value to the length of received payload */
+ signal_task_complete(task, rc);
+
+ return;
+
+ case DOE_WAIT_ABORT:
+ case DOE_WAIT_ABORT_ON_ERR:
+ pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
+
+ if (!FIELD_GET(PCI_DOE_STATUS_ERROR, val) &&
+ !FIELD_GET(PCI_DOE_STATUS_BUSY, val)) {
+ /* Back to normal state - carry on */
+ retire_cur_task(doe_mb);
+
+ /*
+ * For deliberately triggered abort, someone is
+ * waiting.
+ */
+ if (doe_mb->state == DOE_WAIT_ABORT) {
+ if (task)
+ signal_task_complete(task, -EFAULT);
+ complete(&doe_mb->abort_c);
+ }
+
+ doe_mb->state = DOE_IDLE;
+ return;
+ }
+ if (time_after(jiffies, doe_mb->timeout_jiffies)) {
+ /* Task has timed out and is dead - abort */
+ pci_err(pdev, "DOE [%x] ABORT timed out\n",
+ doe_mb->cap_offset);
+ set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
+ retire_cur_task(doe_mb);
+
+ if (doe_mb->state == DOE_WAIT_ABORT) {
+ if (task)
+ signal_task_complete(task, -EFAULT);
+ complete(&doe_mb->abort_c);
+ }
+ }
+ return;
+ }
+
+err_abort:
+ doe_mb->state = DOE_WAIT_ABORT_ON_ERR;
+ pci_doe_abort_start(doe_mb);
+err_busy:
+ signal_task_complete(task, rc);
+ if (doe_mb->state == DOE_IDLE)
+ retire_cur_task(doe_mb);
+}
+
+static void pci_doe_task_complete(struct pci_doe_task *task)
+{
+ complete(task->private);
+}
+
+static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
+ u8 *protocol)
+{
+ u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX,
+ *index);
+ u32 response_pl;
+ DECLARE_COMPLETION_ONSTACK(c);
+ struct pci_doe_task task = {
+ .prot.vid = PCI_VENDOR_ID_PCI_SIG,
+ .prot.type = PCI_DOE_PROTOCOL_DISCOVERY,
+ .request_pl = &request_pl,
+ .request_pl_sz = sizeof(request_pl),
+ .response_pl = &response_pl,
+ .response_pl_sz = sizeof(response_pl),
+ .complete = pci_doe_task_complete,
+ .private = &c,
+ };
+ int ret;
+
+ ret = pci_doe_submit_task(doe_mb, &task);
+ if (ret < 0)
+ return ret;
+
+ wait_for_completion(&c);
+
+ if (task.rv != sizeof(response_pl))
+ return -EIO;
+
+ *vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl);
+ *protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL,
+ response_pl);
+ *index = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX,
+ response_pl);
+
+ return 0;
+}
+
+static int pci_doe_cache_protocols(struct pci_doe_mb *doe_mb)
+{
+ u8 index = 0;
+ int num_prots;
+ int rc;
+
+ /* Discovery protocol must always be supported and must report itself */
+ num_prots = 1;
+
+ doe_mb->prots = kcalloc(num_prots, sizeof(*doe_mb->prots), GFP_KERNEL);
+ if (!doe_mb->prots)
+ return -ENOMEM;
+
+ /*
+ * NOTE: doe_mb_prots is freed by pci_doe_free_mb() automatically on
+ * error if pci_doe_cache_protocols() fails past this point.
+ */
+ do {
+ struct pci_doe_protocol *prot;
+
+ prot = &doe_mb->prots[num_prots - 1];
+ rc = pci_doe_discovery(doe_mb, &index, &prot->vid, &prot->type);
+ if (rc)
+ return rc;
+
+ if (index) {
+ struct pci_doe_protocol *prot_new;
+
+ num_prots++;
+ prot_new = krealloc(doe_mb->prots,
+ sizeof(*doe_mb->prots) * num_prots,
+ GFP_KERNEL);
+ if (!prot_new)
+ return -ENOMEM;
+
+ doe_mb->prots = prot_new;
+ }
+ } while (index);
+
+ doe_mb->num_prots = num_prots;
+ return 0;
+}
+
+static int pci_doe_abort(struct pci_doe_mb *doe_mb)
+{
+ reinit_completion(&doe_mb->abort_c);
+ set_bit(PCI_DOE_FLAG_ABORT, &doe_mb->flags);
+ schedule_delayed_work(&doe_mb->statemachine, 0);
+ wait_for_completion(&doe_mb->abort_c);
+
+ if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
+ return -EIO;
+
+ return 0;
+}
+
+static int pci_doe_enable_irq(struct pci_doe_mb *doe_mb, unsigned int irq)
+{
+ struct pci_dev *pdev = doe_mb->pdev;
+ int offset = doe_mb->cap_offset;
+ int rc;
+
+ /*
+ * Enabling bus mastering is required for MSI/MSIx. It is safe to call
+ * this multiple times and thus is called here to ensure that mastering
+ * is enabled even if the driver has done so.
+ */
+ pci_set_master(pdev);
+ rc = pci_request_irq(pdev, irq, pci_doe_irq_handler, NULL, doe_mb,
+ "DOE[%d:%s]", irq, pci_name(pdev));
+ if (rc)
+ return rc;
+
+ doe_mb->irq = irq;
+ pci_write_config_dword(pdev, offset + PCI_DOE_CTRL,
+ PCI_DOE_CTRL_INT_EN);
+ return 0;
+}
+
+static void pci_doe_free_mb(struct pci_doe_mb *doe_mb)
+{
+ if (doe_mb->irq >= 0)
+ pci_free_irq(doe_mb->pdev, doe_mb->irq, doe_mb);
+ kfree(doe_mb->prots);
+ kfree(doe_mb);
+}
+
+/**
+ * pci_doe_get_irq_num() - Return the irq number for the mailbox at offset
+ *
+ * @pdev: The PCI device
+ * @offset: Offset of the DOE mailbox
+ *
+ * Returns: irq number on success
+ * -errno if irqs are not supported on this mailbox
+ */
+int pci_doe_get_irq_num(struct pci_dev *pdev, int offset)
+{
+ u32 val;
+
+ pci_read_config_dword(pdev, offset + PCI_DOE_CAP, &val);
+ if (!FIELD_GET(PCI_DOE_CAP_INT, val))
+ return -EOPNOTSUPP;
+
+ return FIELD_GET(PCI_DOE_CAP_IRQ, val);
+}
+EXPORT_SYMBOL_GPL(pci_doe_get_irq_num);
+
+/**
+ * pci_doe_create_mb() - Create a DOE mailbox object
+ *
+ * @pdev: PCI device to create the DOE mailbox for
+ * @cap_offset: Offset of the DOE mailbox
+ * @irq: irq number to use; a negative value means don't use interrupts
+ *
+ * Create a single mailbox object to manage the mailbox protocol at the
+ * cap_offset specified.
+ *
+ * Caller should allocate PCI IRQ vectors before setting use_irq.
+ *
+ * RETURNS: created mailbox object on success
+ * ERR_PTR(-errno) on failure
+ */
+struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev, u16 cap_offset,
+ int irq)
+{
+ struct pci_doe_mb *doe_mb;
+ int rc;
+
+ doe_mb = kzalloc(sizeof(*doe_mb), GFP_KERNEL);
+ if (!doe_mb)
+ return ERR_PTR(-ENOMEM);
+
+ doe_mb->pdev = pdev;
+ init_completion(&doe_mb->abort_c);
+ doe_mb->irq = -1;
+ doe_mb->cap_offset = cap_offset;
+
+ init_waitqueue_head(&doe_mb->wq);
+ mutex_init(&doe_mb->task_lock);
+ INIT_DELAYED_WORK(&doe_mb->statemachine, doe_statemachine_work);
+ doe_mb->state = DOE_IDLE;
+
+ if (irq >= 0) {
+ rc = pci_doe_enable_irq(doe_mb, irq);
+ if (rc)
+ pci_err(pdev,
+ "DOE [%x] enable requested IRQ (%d) failed : %d\n",
+ doe_mb->cap_offset, irq, rc);
+ }
+
+ /* Reset the mailbox by issuing an abort */
+ rc = pci_doe_abort(doe_mb);
+ if (rc) {
+ pci_err(pdev, "DOE [%x] failed to reset : %d\n",
+ doe_mb->cap_offset, rc);
+ pci_doe_free_mb(doe_mb);
+ return ERR_PTR(rc);
+ }
+
+ rc = pci_doe_cache_protocols(doe_mb);
+ if (rc) {
+ pci_err(pdev, "DOE [%x] failed to cache protocols : %d\n",
+ doe_mb->cap_offset, rc);
+ pci_doe_free_mb(doe_mb);
+ return ERR_PTR(rc);
+ }
+
+ return doe_mb;
+}
+EXPORT_SYMBOL_GPL(pci_doe_create_mb);
+
+/**
+ * pci_doe_supports_prot() - Return if the DOE instance supports the given
+ * protocol
+ * @doe_mb: DOE mailbox capability to query
+ * @vid: Protocol Vendor ID
+ * @type: Protocol type
+ *
+ * RETURNS: True if the DOE mailbox supports the protocol specified
+ */
+bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
+{
+ int i;
+
+ /* The discovery protocol must always be supported */
+ if (vid == PCI_VENDOR_ID_PCI_SIG && type == PCI_DOE_PROTOCOL_DISCOVERY)
+ return true;
+
+ for (i = 0; i < doe_mb->num_prots; i++)
+ if ((doe_mb->prots[i].vid == vid) &&
+ (doe_mb->prots[i].type == type))
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
+
+/**
+ * pci_doe_submit_task() - Submit a task to be processed by the state machine
+ *
+ * @doe_mb: DOE mailbox capability to submit to
+ * @task: task to be queued
+ *
+ * Submit a DOE task (request/response) to the DOE mailbox to be processed.
+ * Returns upon queueing the task object. If the queue is full this function
+ * will sleep until there is room in the queue.
+ *
+ * task->complete will be called when the state machine is done processing this
+ * task.
+ *
+ * Excess data will be discarded.
+ *
+ * RETURNS: 0 when task has been successful queued, -ERRNO on error
+ */
+int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
+{
+ if (!pci_doe_supports_prot(doe_mb, task->prot.vid, task->prot.type))
+ return -EINVAL;
+
+ /* DOE requests must be a whole number of DW */
+ if (task->request_pl_sz % sizeof(u32))
+ return -EINVAL;
+
+again:
+ mutex_lock(&doe_mb->task_lock);
+ if (doe_mb->cur_task) {
+ mutex_unlock(&doe_mb->task_lock);
+ wait_event_interruptible(doe_mb->wq, doe_mb->cur_task == NULL);
+ goto again;
+ }
+
+ if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags)) {
+ mutex_unlock(&doe_mb->task_lock);
+ return -EIO;
+ }
+ doe_mb->cur_task = task;
+ mutex_unlock(&doe_mb->task_lock);
+ schedule_delayed_work(&doe_mb->statemachine, 0);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_doe_submit_task);
+
+/**
+ * pci_doe_destroy_mb() - Destroy a DOE mailbox object created with
+ * pci_doe_create_mb()
+ *
+ * @doe_mb: DOE mailbox capability structure to destroy
+ *
+ * The mailbox becomes invalid and should not be used after this call.
+ */
+void pci_doe_destroy_mb(struct pci_doe_mb *doe_mb)
+{
+ /* abort any work in progress */
+ pci_doe_abort(doe_mb);
+
+ /* halt the state machine */
+ cancel_delayed_work_sync(&doe_mb->statemachine);
+
+ pci_doe_free_mb(doe_mb);
+}
+EXPORT_SYMBOL_GPL(pci_doe_destroy_mb);
new file mode 100644
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Data Object Exchange
+ * PCIe r6.0, sec 6.30 DOE
+ *
+ * Copyright (C) 2021 Huawei
+ * Jonathan Cameron <Jonathan.Cameron@huawei.com>
+ *
+ * Copyright (C) 2022 Intel Corporation
+ * Ira Weiny <ira.weiny@intel.com>
+ */
+
+#ifndef LINUX_PCI_DOE_H
+#define LINUX_PCI_DOE_H
+
+#include <linux/completion.h>
+
+enum pci_doe_state {
+ DOE_IDLE,
+ DOE_WAIT_RESP,
+ DOE_WAIT_ABORT,
+ DOE_WAIT_ABORT_ON_ERR,
+};
+
+#define PCI_DOE_FLAG_ABORT 0
+#define PCI_DOE_FLAG_DEAD 1
+
+struct pci_doe_protocol {
+ u16 vid;
+ u8 type;
+};
+
+/**
+ * struct pci_doe_task - represents a single query/response
+ *
+ * @prot: DOE Protocol
+ * @request_pl: The request payload
+ * @request_pl_sz: Size of the request payload
+ * @response_pl: The response payload
+ * @response_pl_sz: Size of the response payload
+ * @rv: Return value. Length of received response or error
+ * @complete: Called when task is complete
+ * @private: Private data for the consumer
+ */
+struct pci_doe_task {
+ struct pci_doe_protocol prot;
+ u32 *request_pl;
+ size_t request_pl_sz;
+ u32 *response_pl;
+ size_t response_pl_sz;
+ int rv;
+ void (*complete)(struct pci_doe_task *task);
+ void *private;
+};
+
+/**
+ * struct pci_doe_mb - State for a single DOE mailbox
+ *
+ * This state is used to manage a single DOE mailbox capability. All fields
+ * should be considered opaque to the consumers and the structure passed into
+ * the helpers below after being created by devm_pci_doe_create()
+ *
+ * @pdev: PCI device this belongs to mailbox belongs to
+ * @abort_c: Completion used for initial abort handling
+ * @irq: Interrupt used for signaling DOE ready or abort
+ * @prots: Array of protocols supported on this DOE
+ * @num_prots: Size of prots array
+ * @cap_offset: Capability offset
+ * @wq: Wait queue to wait on if a query is in progress
+ * @cur_task: Current task the state machine is working on
+ * @task_lock: Protect cur_task
+ * @statemachine: Work item for the DOE state machine
+ * @state: Current state of this DOE
+ * @timeout_jiffies: 1 second after GO set
+ * @busy_retries: Count of retry attempts
+ * @flags: Bit array of PCI_DOE_FLAG_* flags
+ *
+ * Note: prots can't be allocated with struct size because the number of
+ * protocols is not known until after this structure is in use. However, the
+ * single discovery protocol is always required to query for the number of
+ * protocols.
+ */
+struct pci_doe_mb {
+ struct pci_dev *pdev;
+ struct completion abort_c;
+ int irq;
+ struct pci_doe_protocol *prots;
+ int num_prots;
+ u16 cap_offset;
+
+ wait_queue_head_t wq;
+ struct pci_doe_task *cur_task;
+ struct mutex task_lock;
+ struct delayed_work statemachine;
+ enum pci_doe_state state;
+ unsigned long timeout_jiffies;
+ unsigned int busy_retries;
+ unsigned long flags;
+};
+
+/**
+ * pci_doe_for_each_off - Iterate each DOE capability
+ * @pdev: struct pci_dev to iterate
+ * @off: u16 of config space offset of each mailbox capability found
+ */
+#define pci_doe_for_each_off(pdev, off) \
+ for (off = pci_find_next_ext_capability(pdev, off, \
+ PCI_EXT_CAP_ID_DOE); \
+ off > 0; \
+ off = pci_find_next_ext_capability(pdev, off, \
+ PCI_EXT_CAP_ID_DOE))
+
+int pci_doe_get_irq_num(struct pci_dev *pdev, int offset);
+struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev, u16 cap_offset,
+ int irq);
+void pci_doe_destroy_mb(struct pci_doe_mb *doe_mb);
+bool pci_doe_supports_prot(struct pci_doe_mb *doe_mb, u16 vid, u8 type);
+int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task);
+
+#endif
@@ -736,7 +736,8 @@
#define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */
#define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */
#define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */
-#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_16GT
+#define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */
+#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE
#define PCI_EXT_CAP_DSN_SIZEOF 12
#define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
@@ -1102,4 +1103,30 @@
#define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK 0x000000F0
#define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT 4
+/* Data Object Exchange */
+#define PCI_DOE_CAP 0x04 /* DOE Capabilities Register */
+#define PCI_DOE_CAP_INT 0x00000001 /* Interrupt Support */
+#define PCI_DOE_CAP_IRQ 0x00000ffe /* Interrupt Message Number */
+#define PCI_DOE_CTRL 0x08 /* DOE Control Register */
+#define PCI_DOE_CTRL_ABORT 0x00000001 /* DOE Abort */
+#define PCI_DOE_CTRL_INT_EN 0x00000002 /* DOE Interrupt Enable */
+#define PCI_DOE_CTRL_GO 0x80000000 /* DOE Go */
+#define PCI_DOE_STATUS 0x0c /* DOE Status Register */
+#define PCI_DOE_STATUS_BUSY 0x00000001 /* DOE Busy */
+#define PCI_DOE_STATUS_INT_STATUS 0x00000002 /* DOE Interrupt Status */
+#define PCI_DOE_STATUS_ERROR 0x00000004 /* DOE Error */
+#define PCI_DOE_STATUS_DATA_OBJECT_READY 0x80000000 /* Data Object Ready */
+#define PCI_DOE_WRITE 0x10 /* DOE Write Data Mailbox Register */
+#define PCI_DOE_READ 0x14 /* DOE Read Data Mailbox Register */
+
+/* DOE Data Object - note not actually registers */
+#define PCI_DOE_DATA_OBJECT_HEADER_1_VID 0x0000ffff
+#define PCI_DOE_DATA_OBJECT_HEADER_1_TYPE 0x00ff0000
+#define PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH 0x0003ffff
+
+#define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX 0x000000ff
+#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID 0x0000ffff
+#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL 0x00ff0000
+#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX 0xff000000
+
#endif /* LINUX_PCI_REGS_H */