diff mbox series

[3/3] hw/cxl: introduce CXL type-2 device emulation

Message ID 20241212130422.69380-4-zhiw@nvidia.com (mailing list archive)
State New
Headers show
Series Introduce CXL type-2 device emulation | expand

Commit Message

Zhi Wang Dec. 12, 2024, 1:04 p.m. UTC
From: Zhi Wang <zhiwang@kernel.org>

Introduce a CXL type-2 device emulation that provides a minimum base for
testing kernel CXL core type-2 support and CXL type-2 virtualization. It
is also a good base for introducing the more emulated features.

Currently, it only supports:

- Emulating component registers with HDM decoders.
- Volatile memory backend and emualtion of region access.

The emulation is aimed to not tightly coupled with the current CXL type-3
emulation since many advanced CXL type-3 emulation features are not
implemented in a CXL type-2 device.

Co-developed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Zhi Wang <zhiwang@kernel.org>
---
 MAINTAINERS                    |   1 +
 docs/system/devices/cxl.rst    |  11 ++
 hw/cxl/cxl-component-utils.c   |   2 +
 hw/cxl/cxl-host.c              |  19 +-
 hw/mem/Kconfig                 |   5 +
 hw/mem/cxl_accel.c             | 319 +++++++++++++++++++++++++++++++++
 hw/mem/meson.build             |   1 +
 include/hw/cxl/cxl_component.h |   1 +
 include/hw/cxl/cxl_device.h    |  25 +++
 include/hw/pci/pci_ids.h       |   1 +
 10 files changed, 382 insertions(+), 3 deletions(-)
 create mode 100644 hw/mem/cxl_accel.c
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index aaf0505a21..72a6a505eb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2914,6 +2914,7 @@  R: Fan Ni <fan.ni@samsung.com>
 S: Supported
 F: hw/cxl/
 F: hw/mem/cxl_type3.c
+F: hw/mem/cxl_accel.c
 F: include/hw/cxl/
 F: qapi/cxl.json
 
diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
index 882b036f5e..13cc2417f2 100644
--- a/docs/system/devices/cxl.rst
+++ b/docs/system/devices/cxl.rst
@@ -332,6 +332,17 @@  The same volatile setup may optionally include an LSA region::
   -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,lsa=cxl-lsa0,id=cxl-vmem0 \
   -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
 
+A very simple setup with just one directly attached CXL Type 2 Volatile Memory
+Accelerator device::
+
+  qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \
+  ...
+  -object memory-backend-ram,id=vmem0,share=on,size=256M \
+  -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
+  -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \
+  -device cxl-accel,bus=root_port13,volatile-memdev=vmem0,id=cxl-accel0 \
+  -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
+
 A setup suitable for 4 way interleave. Only one fixed window provided, to enable 2 way
 interleave across 2 CXL host bridges.  Each host bridge has 2 CXL Root Ports, with
 the CXL Type3 device directly attached (no switches).::
diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c
index 355103d165..717ef117ac 100644
--- a/hw/cxl/cxl-component-utils.c
+++ b/hw/cxl/cxl-component-utils.c
@@ -262,6 +262,7 @@  static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
         write_msk[R_CXL_HDM_DECODER0_CTRL + i * hdm_inc] = 0x13ff;
         if (type == CXL2_DEVICE ||
             type == CXL2_TYPE3_DEVICE ||
+            type == CXL3_TYPE2_DEVICE ||
             type == CXL2_LOGICAL_DEVICE) {
             write_msk[R_CXL_HDM_DECODER0_TARGET_LIST_LO + i * hdm_inc] =
                 0xf0000000;
@@ -293,6 +294,7 @@  void cxl_component_register_init_common(uint32_t *reg_state,
     case CXL2_UPSTREAM_PORT:
     case CXL2_TYPE3_DEVICE:
     case CXL2_LOGICAL_DEVICE:
+    case CXL3_TYPE2_DEVICE:
         /* + HDM */
         caps = 3;
         break;
diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
index e9f2543c43..e603a3f2fc 100644
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@@ -201,7 +201,8 @@  static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr)
         return NULL;
     }
 
-    if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
+    if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3) ||
+        object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) {
         return d;
     }
 
@@ -256,7 +257,13 @@  static MemTxResult cxl_read_cfmws(void *opaque, hwaddr addr, uint64_t *data,
         return MEMTX_ERROR;
     }
 
-    return cxl_type3_read(d, addr + fw->base, data, size, attrs);
+    if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
+        return cxl_type3_read(d, addr + fw->base, data, size, attrs);
+    } else if (object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) {
+        return cxl_accel_read(d, addr + fw->base, data, size, attrs);
+    }
+
+    return MEMTX_ERROR;
 }
 
 static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr,
@@ -272,7 +279,13 @@  static MemTxResult cxl_write_cfmws(void *opaque, hwaddr addr,
         return MEMTX_OK;
     }
 
-    return cxl_type3_write(d, addr + fw->base, data, size, attrs);
+    if (object_dynamic_cast(OBJECT(d), TYPE_CXL_TYPE3)) {
+        return cxl_type3_write(d, addr + fw->base, data, size, attrs);
+    } else if (object_dynamic_cast(OBJECT(d), TYPE_CXL_ACCEL)) {
+        return cxl_accel_write(d, addr + fw->base, data, size, attrs);
+    }
+
+    return MEMTX_ERROR;
 }
 
 const MemoryRegionOps cfmws_ops = {
diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig
index 73c5ae8ad9..1f7d08c17d 100644
--- a/hw/mem/Kconfig
+++ b/hw/mem/Kconfig
@@ -16,3 +16,8 @@  config CXL_MEM_DEVICE
     bool
     default y if CXL
     select MEM_DEVICE
+
+config CXL_ACCEL_DEVICE
+    bool
+    default y if CXL
+    select MEM_DEVICE
diff --git a/hw/mem/cxl_accel.c b/hw/mem/cxl_accel.c
new file mode 100644
index 0000000000..770072126d
--- /dev/null
+++ b/hw/mem/cxl_accel.c
@@ -0,0 +1,319 @@ 
+/*
+ * CXL accel (type-2) device
+ *
+ * Copyright(C) 2024 NVIDIA Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See the
+ * COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-v2-only
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "hw/mem/memory-device.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/pci/pci.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/range.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
+#include "hw/cxl/cxl.h"
+#include "hw/pci/msix.h"
+
+static void update_dvsecs(CXLAccelDev *acceld)
+{
+    CXLComponentState *cxl_cstate = &acceld->cxl_cstate;
+    uint8_t *dvsec;
+    uint32_t range1_size_hi = 0, range1_size_lo = 0,
+             range1_base_hi = 0, range1_base_lo = 0;
+
+    if (acceld->hostvmem) {
+        range1_size_hi = acceld->hostvmem->size >> 32;
+        range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
+                         (acceld->hostvmem->size & 0xF0000000);
+    }
+
+    dvsec = (uint8_t *)&(CXLDVSECDevice){
+        .cap = 0x1e,
+        .ctrl = 0x2,
+        .status2 = 0x2,
+        .range1_size_hi = range1_size_hi,
+        .range1_size_lo = range1_size_lo,
+        .range1_base_hi = range1_base_hi,
+        .range1_base_lo = range1_base_lo,
+    };
+    cxl_component_update_dvsec(cxl_cstate, PCIE_CXL_DEVICE_DVSEC_LENGTH,
+                               PCIE_CXL_DEVICE_DVSEC, dvsec);
+
+    dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
+        .rsvd         = 0,
+        .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
+        .reg0_base_hi = 0,
+    };
+    cxl_component_update_dvsec(cxl_cstate, REG_LOC_DVSEC_LENGTH,
+                               REG_LOC_DVSEC, dvsec);
+
+    dvsec = (uint8_t *)&(CXLDVSECPortFlexBus){
+        .cap                     = 0x26, /* 68B, IO, Mem, non-MLD */
+        .ctrl                    = 0x02, /* IO always enabled */
+        .status                  = 0x26, /* same as capabilities */
+        .rcvd_mod_ts_data_phase1 = 0xef, /* WTF? */
+    };
+    cxl_component_update_dvsec(cxl_cstate, PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH,
+                               PCIE_FLEXBUS_PORT_DVSEC, dvsec);
+}
+
+static void build_dvsecs(CXLAccelDev *acceld)
+{
+    CXLComponentState *cxl_cstate = &acceld->cxl_cstate;
+
+    cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE,
+                               PCIE_CXL_DEVICE_DVSEC_LENGTH,
+                               PCIE_CXL_DEVICE_DVSEC,
+                               PCIE_CXL31_DEVICE_DVSEC_REVID, NULL);
+
+    cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE,
+                               REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
+                               REG_LOC_DVSEC_REVID, NULL);
+
+    cxl_component_create_dvsec(cxl_cstate, CXL3_TYPE2_DEVICE,
+                               PCIE_CXL3_FLEXBUS_PORT_DVSEC_LENGTH,
+                               PCIE_FLEXBUS_PORT_DVSEC,
+                               PCIE_CXL3_FLEXBUS_PORT_DVSEC_REVID, NULL);
+    update_dvsecs(acceld);
+}
+
+static bool cxl_accel_dpa(CXLAccelDev *acceld, hwaddr host_addr, uint64_t *dpa)
+{
+    return cxl_host_addr_to_dpa(&acceld->cxl_cstate, host_addr, dpa);
+}
+
+static int cxl_accel_hpa_to_as_and_dpa(CXLAccelDev *acceld,
+                                       hwaddr host_addr,
+                                       unsigned int size,
+                                       AddressSpace **as,
+                                       uint64_t *dpa_offset)
+{
+    MemoryRegion *vmr = NULL;
+    uint64_t vmr_size = 0;
+
+    if (!acceld->hostvmem) {
+        return -ENODEV;
+    }
+
+    vmr = host_memory_backend_get_memory(acceld->hostvmem);
+    if (!vmr) {
+        return -ENODEV;
+    }
+
+    vmr_size = memory_region_size(vmr);
+
+    if (!cxl_accel_dpa(acceld, host_addr, dpa_offset)) {
+        return -EINVAL;
+    }
+
+    if (*dpa_offset >= vmr_size) {
+        return -EINVAL;
+    }
+
+    *as = &acceld->hostvmem_as;
+    return 0;
+}
+
+MemTxResult cxl_accel_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
+                           unsigned size, MemTxAttrs attrs)
+{
+    CXLAccelDev *acceld = CXL_ACCEL(d);
+    uint64_t dpa_offset = 0;
+    AddressSpace *as = NULL;
+    int res;
+
+    res = cxl_accel_hpa_to_as_and_dpa(acceld, host_addr, size,
+                                      &as, &dpa_offset);
+    if (res) {
+        return MEMTX_ERROR;
+    }
+
+    return address_space_read(as, dpa_offset, attrs, data, size);
+}
+
+MemTxResult cxl_accel_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
+                            unsigned size, MemTxAttrs attrs)
+{
+    CXLAccelDev *acceld = CXL_ACCEL(d);
+    uint64_t dpa_offset = 0;
+    AddressSpace *as = NULL;
+    int res;
+
+    res = cxl_accel_hpa_to_as_and_dpa(acceld, host_addr, size,
+                                      &as, &dpa_offset);
+    if (res) {
+        return MEMTX_ERROR;
+    }
+
+    return address_space_write(as, dpa_offset, attrs, &data, size);
+}
+
+static void clean_memory(PCIDevice *pci_dev)
+{
+    CXLAccelDev *acceld = CXL_ACCEL(pci_dev);
+
+    if (acceld->hostvmem) {
+        address_space_destroy(&acceld->hostvmem_as);
+    }
+}
+
+static bool setup_memory(PCIDevice *pci_dev, Error **errp)
+{
+    CXLAccelDev *acceld = CXL_ACCEL(pci_dev);
+
+    if (acceld->hostvmem) {
+        MemoryRegion *vmr;
+        char *v_name;
+
+        vmr = host_memory_backend_get_memory(acceld->hostvmem);
+        if (!vmr) {
+            error_setg(errp, "volatile memdev must have backing device");
+            return false;
+        }
+        if (host_memory_backend_is_mapped(acceld->hostvmem)) {
+            error_setg(errp, "memory backend %s can't be used multiple times.",
+               object_get_canonical_path_component(OBJECT(acceld->hostvmem)));
+            return false;
+        }
+        memory_region_set_nonvolatile(vmr, false);
+        memory_region_set_enabled(vmr, true);
+        host_memory_backend_set_mapped(acceld->hostvmem, true);
+        v_name = g_strdup("cxl-accel-dpa-vmem-space");
+        address_space_init(&acceld->hostvmem_as, vmr, v_name);
+        g_free(v_name);
+    }
+    return true;
+}
+
+static void setup_cxl_regs(PCIDevice *pci_dev)
+{
+    CXLAccelDev *acceld = CXL_ACCEL(pci_dev);
+    CXLComponentState *cxl_cstate = &acceld->cxl_cstate;
+    ComponentRegisters *regs = &cxl_cstate->crb;
+    MemoryRegion *mr = &regs->component_registers;
+
+    cxl_cstate->dvsec_offset = 0x100;
+    cxl_cstate->pdev = pci_dev;
+
+    build_dvsecs(acceld);
+
+    cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
+                                      TYPE_CXL_ACCEL);
+
+    pci_register_bar(
+        pci_dev, CXL_COMPONENT_REG_BAR_IDX,
+        PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr);
+}
+
+#define MSIX_NUM 6
+
+static int setup_msix(PCIDevice *pci_dev)
+{
+    int i, rc;
+
+    /* MSI(-X) Initialization */
+    rc = msix_init_exclusive_bar(pci_dev, MSIX_NUM, 4, NULL);
+    if (rc) {
+        return rc;
+    }
+
+    for (i = 0; i < MSIX_NUM; i++) {
+        msix_vector_use(pci_dev, i);
+    }
+    return 0;
+}
+
+static void cxl_accel_realize(PCIDevice *pci_dev, Error **errp)
+{
+    ERRP_GUARD();
+    int rc;
+    uint8_t *pci_conf = pci_dev->config;
+
+    if (!setup_memory(pci_dev, errp)) {
+        return;
+    }
+
+    pci_config_set_prog_interface(pci_conf, 0x10);
+    pcie_endpoint_cap_init(pci_dev, 0x80);
+
+    setup_cxl_regs(pci_dev);
+
+    /* MSI(-X) Initialization */
+    rc = setup_msix(pci_dev);
+    if (rc) {
+        clean_memory(pci_dev);
+        return;
+    }
+}
+
+static void cxl_accel_exit(PCIDevice *pci_dev)
+{
+    clean_memory(pci_dev);
+}
+
+static void cxl_accel_reset(DeviceState *dev)
+{
+    CXLAccelDev *acceld = CXL_ACCEL(dev);
+    CXLComponentState *cxl_cstate = &acceld->cxl_cstate;
+    uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers;
+    uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
+
+    update_dvsecs(acceld);
+    cxl_component_register_init_common(reg_state, write_msk, CXL3_TYPE2_DEVICE);
+}
+
+static Property cxl_accel_props[] = {
+    DEFINE_PROP_LINK("volatile-memdev", CXLAccelDev, hostvmem,
+                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void cxl_accel_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
+
+    pc->realize = cxl_accel_realize;
+    pc->exit = cxl_accel_exit;
+
+    pc->class_id = PCI_CLASS_CXL_QEMU_ACCEL;
+    pc->vendor_id = PCI_VENDOR_ID_INTEL;
+    pc->device_id = 0xd94;
+    pc->revision = 1;
+
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+    dc->desc = "CXL Accelerator Device (Type 2)";
+    device_class_set_legacy_reset(dc, cxl_accel_reset);
+    device_class_set_props(dc, cxl_accel_props);
+}
+
+static const TypeInfo cxl_accel_dev_info = {
+    .name = TYPE_CXL_ACCEL,
+    .parent = TYPE_PCI_DEVICE,
+    .class_size = sizeof(struct CXLAccelClass),
+    .class_init = cxl_accel_class_init,
+    .instance_size = sizeof(CXLAccelDev),
+    .interfaces = (InterfaceInfo[]) {
+        { INTERFACE_CXL_DEVICE },
+        { INTERFACE_PCIE_DEVICE },
+        {}
+    },
+};
+
+static void cxl_accel_dev_registers(void)
+{
+    type_register_static(&cxl_accel_dev_info);
+}
+
+type_init(cxl_accel_dev_registers);
diff --git a/hw/mem/meson.build b/hw/mem/meson.build
index 1c1c6da24b..36a395dbb6 100644
--- a/hw/mem/meson.build
+++ b/hw/mem/meson.build
@@ -4,6 +4,7 @@  mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
 mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
 mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
 mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
+mem_ss.add(when: 'CONFIG_CXL_ACCEL_DEVICE', if_true: files('cxl_accel.c'))
 system_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c'))
 
 system_ss.add(when: 'CONFIG_MEM_DEVICE', if_false: files('memory-device-stubs.c'))
diff --git a/include/hw/cxl/cxl_component.h b/include/hw/cxl/cxl_component.h
index 30fe4bfa24..0e78db26b8 100644
--- a/include/hw/cxl/cxl_component.h
+++ b/include/hw/cxl/cxl_component.h
@@ -29,6 +29,7 @@  enum reg_type {
     CXL2_UPSTREAM_PORT,
     CXL2_DOWNSTREAM_PORT,
     CXL3_SWITCH_MAILBOX_CCI,
+    CXL3_TYPE2_DEVICE,
 };
 
 /*
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 561b375dc8..ac26b264da 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -630,6 +630,26 @@  struct CSWMBCCIDev {
     CXLCCI *cci;
 };
 
+struct CXLAccelDev {
+    /* Private */
+    PCIDevice parent_obj;
+
+    /* Properties */
+    HostMemoryBackend *hostvmem;
+
+    /* State */
+    AddressSpace hostvmem_as;
+    CXLComponentState cxl_cstate;
+};
+
+struct CXLAccelClass {
+    /* Private */
+    PCIDeviceClass parent_class;
+};
+
+#define TYPE_CXL_ACCEL "cxl-accel"
+OBJECT_DECLARE_TYPE(CXLAccelDev, CXLAccelClass, CXL_ACCEL)
+
 #define TYPE_CXL_SWITCH_MAILBOX_CCI "cxl-switch-mailbox-cci"
 OBJECT_DECLARE_TYPE(CSWMBCCIDev, CSWMBCCIClass, CXL_SWITCH_MAILBOX_CCI)
 
@@ -638,6 +658,11 @@  MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
 MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
                             unsigned size, MemTxAttrs attrs);
 
+MemTxResult cxl_accel_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
+                           unsigned size, MemTxAttrs attrs);
+MemTxResult cxl_accel_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
+                            unsigned size, MemTxAttrs attrs);
+
 uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds);
 
 void cxl_event_init(CXLDeviceState *cxlds, int start_msg_num);
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index f1a53fea8d..08bc469316 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -55,6 +55,7 @@ 
 #define PCI_CLASS_MEMORY_RAM             0x0500
 #define PCI_CLASS_MEMORY_FLASH           0x0501
 #define PCI_CLASS_MEMORY_CXL             0x0502
+#define PCI_CLASS_CXL_QEMU_ACCEL         0x0503
 #define PCI_CLASS_MEMORY_OTHER           0x0580
 
 #define PCI_BASE_CLASS_BRIDGE            0x06