@@ -14,6 +14,303 @@ that optionally define a device's contribution to an interleaved address
range across multiple devices underneath a host-bridge or interleaved
across host-bridges.
+CXL Bus: Theory of Operation
+============================
+Similar to how a RAID driver takes disk objects and assembles them into a new
+logical device, the CXL subsystem is tasked to take PCIe and ACPI objects and
+assemble them into a CXL.mem decode topology. The need for runtime configuration
+of the CXL.mem topology is also similar to RAID in that different environments
+with the same hardware configuration may decide to assemble the topology in
+contrasting ways. One may choose performance (RAID0) striping memory across
+multiple Host Bridges and endpoints while another may opt for fault tolerance
+and disable any striping in the CXL.mem topology.
+
+Platform firmware enumerates a menu of interleave options at the "CXL root port"
+(Linux term for the top of the CXL decode topology). From there, PCIe topology
+dictates which endpoints can participate in which Host Bridge decode regimes.
+Each PCIe Switch in the path between the root and an endpoint introduces a point
+at which the interleave can be split. For example platform firmware may say at a
+given range only decodes to 1 one Host Bridge, but that Host Bridge may in turn
+interleave cycles across multiple Root Ports. An intervening Switch between a
+port and an endpoint may interleave cycles across multiple Downstream Switch
+Ports, etc.
+
+Here is a sample listing of a CXL topology defined by 'cxl_test'. The 'cxl_test'
+module generates an emulated CXL topology of 2 Host Bridges each with 2 Root
+Ports. Each of those Root Ports are connected to 2-way switches with endpoints
+connected to those downstream ports for a total of 8 endpoints::
+
+ # cxl list -BEMPu -b cxl_test
+ {
+ "bus":"root3",
+ "provider":"cxl_test",
+ "ports:root3":[
+ {
+ "port":"port5",
+ "host":"cxl_host_bridge.1",
+ "ports:port5":[
+ {
+ "port":"port8",
+ "host":"cxl_switch_uport.1",
+ "endpoints:port8":[
+ {
+ "endpoint":"endpoint9",
+ "host":"mem2",
+ "memdev":{
+ "memdev":"mem2",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0x1",
+ "numa_node":1,
+ "host":"cxl_mem.1"
+ }
+ },
+ {
+ "endpoint":"endpoint15",
+ "host":"mem6",
+ "memdev":{
+ "memdev":"mem6",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0x5",
+ "numa_node":1,
+ "host":"cxl_mem.5"
+ }
+ }
+ ]
+ },
+ {
+ "port":"port12",
+ "host":"cxl_switch_uport.3",
+ "endpoints:port12":[
+ {
+ "endpoint":"endpoint17",
+ "host":"mem8",
+ "memdev":{
+ "memdev":"mem8",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0x7",
+ "numa_node":1,
+ "host":"cxl_mem.7"
+ }
+ },
+ {
+ "endpoint":"endpoint13",
+ "host":"mem4",
+ "memdev":{
+ "memdev":"mem4",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0x3",
+ "numa_node":1,
+ "host":"cxl_mem.3"
+ }
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "port":"port4",
+ "host":"cxl_host_bridge.0",
+ "ports:port4":[
+ {
+ "port":"port6",
+ "host":"cxl_switch_uport.0",
+ "endpoints:port6":[
+ {
+ "endpoint":"endpoint7",
+ "host":"mem1",
+ "memdev":{
+ "memdev":"mem1",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0",
+ "numa_node":0,
+ "host":"cxl_mem.0"
+ }
+ },
+ {
+ "endpoint":"endpoint14",
+ "host":"mem5",
+ "memdev":{
+ "memdev":"mem5",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0x4",
+ "numa_node":0,
+ "host":"cxl_mem.4"
+ }
+ }
+ ]
+ },
+ {
+ "port":"port10",
+ "host":"cxl_switch_uport.2",
+ "endpoints:port10":[
+ {
+ "endpoint":"endpoint16",
+ "host":"mem7",
+ "memdev":{
+ "memdev":"mem7",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0x6",
+ "numa_node":0,
+ "host":"cxl_mem.6"
+ }
+ },
+ {
+ "endpoint":"endpoint11",
+ "host":"mem3",
+ "memdev":{
+ "memdev":"mem3",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0x2",
+ "numa_node":0,
+ "host":"cxl_mem.2"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+
+In that listing each "root", "port", and "endpoint" object correspond a kernel
+'struct cxl_port' object. A 'cxl_port' is a device that can decode CXL.mem to
+its descendants. So "root" claims non-PCIe enumerable platform decode ranges and
+decodes them to "ports", "ports" decode to "endpoints", and "endpoints"
+represent the decode from SPA (System Physical Address) to DPA (Device Physical
+Address).
+
+Continuing the RAID analogy, disks have both topology metadata and on device
+metadata that determine RAID set assembly. CXL Port topology and CXL Port link
+status is metadata for CXL.mem set assembly. The CXL Port topology is enumerated
+by the arrival of a CXL.mem device. I.e. unless and until the PCIe core attaches
+the cxl_pci driver to a CXL Memory Expander there is no role for CXL Port
+objects. Conversely for hot-unplug / removal scenarios, there is no need for
+the Linux PCI core to tear down switch-level CXL resources because the endpoint
+->remove() event cleans up the port data that was established to support that
+Memory Expander.
+
+The port metadata and potential decode schemes that a give memory device may
+participate can be determined via a command like::
+
+ # cxl list -BDMu -d root -m mem3
+ {
+ "bus":"root3",
+ "provider":"cxl_test",
+ "decoders:root3":[
+ {
+ "decoder":"decoder3.1",
+ "resource":"0x8030000000",
+ "size":"512.00 MiB (536.87 MB)",
+ "volatile_capable":true,
+ "nr_targets":2
+ },
+ {
+ "decoder":"decoder3.3",
+ "resource":"0x8060000000",
+ "size":"512.00 MiB (536.87 MB)",
+ "pmem_capable":true,
+ "nr_targets":2
+ },
+ {
+ "decoder":"decoder3.0",
+ "resource":"0x8020000000",
+ "size":"256.00 MiB (268.44 MB)",
+ "volatile_capable":true,
+ "nr_targets":1
+ },
+ {
+ "decoder":"decoder3.2",
+ "resource":"0x8050000000",
+ "size":"256.00 MiB (268.44 MB)",
+ "pmem_capable":true,
+ "nr_targets":1
+ }
+ ],
+ "memdevs:root3":[
+ {
+ "memdev":"mem3",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0x2",
+ "numa_node":0,
+ "host":"cxl_mem.2"
+ }
+ ]
+ }
+
+...which queries the CXL topology to ask "given CXL Memory Expander with a kernel
+device name of 'mem3' which platform level decode ranges may this device
+participate". A given expander can participate in multiple CXL.mem interleave
+sets simultaneously depending on how many decoder resource it has. In this
+example mem3 can participate in one or more of a PMEM interleave that spans to
+Host Bridges, a PMEM interleave that targets a single Host Bridge, a Volatile
+memory interleave that spans 2 Host Bridges, and a Volatile memory interleave
+that only targets a single Host Bridge.
+
+Conversely the memory devices that can participate in a given platform level
+decode scheme can be determined via a command like the following::
+
+ # cxl list -MDu -d 3.2
+ [
+ {
+ "memdevs":[
+ {
+ "memdev":"mem1",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0",
+ "numa_node":0,
+ "host":"cxl_mem.0"
+ },
+ {
+ "memdev":"mem5",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0x4",
+ "numa_node":0,
+ "host":"cxl_mem.4"
+ },
+ {
+ "memdev":"mem7",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0x6",
+ "numa_node":0,
+ "host":"cxl_mem.6"
+ },
+ {
+ "memdev":"mem3",
+ "pmem_size":"256.00 MiB (268.44 MB)",
+ "ram_size":"256.00 MiB (268.44 MB)",
+ "serial":"0x2",
+ "numa_node":0,
+ "host":"cxl_mem.2"
+ }
+ ]
+ },
+ {
+ "root decoders":[
+ {
+ "decoder":"decoder3.2",
+ "resource":"0x8050000000",
+ "size":"256.00 MiB (268.44 MB)",
+ "pmem_capable":true,
+ "nr_targets":1
+ }
+ ]
+ }
+ ]
+
+...where the naming scheme for decoders is "decoder<port_id>.<instance_id>".
+
Driver Infrastructure
=====================
@@ -28,6 +325,11 @@ CXL Memory Device
.. kernel-doc:: drivers/cxl/pci.c
:internal:
+CXL Port
+--------
+.. kernel-doc:: drivers/cxl/port.c
+ :doc: cxl port
+
CXL Core
--------
.. kernel-doc:: drivers/cxl/cxl.h
@@ -77,4 +77,9 @@ config CXL_PMEM
provisioning the persistent memory capacity of CXL memory expanders.
If unsure say 'm'.
+
+config CXL_PORT
+ default CXL_BUS
+ tristate
+
endif
@@ -3,7 +3,9 @@ obj-$(CONFIG_CXL_BUS) += core/
obj-$(CONFIG_CXL_PCI) += cxl_pci.o
obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o
obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o
+obj-$(CONFIG_CXL_PORT) += cxl_port.o
cxl_pci-y := pci.o
cxl_acpi-y := acpi.o
cxl_pmem-y := pmem.o
+cxl_port-y := port.o
@@ -169,7 +169,6 @@ static int add_host_bridge_uport(struct device *match, void *arg)
struct acpi_device *bridge = to_cxl_host_bridge(host, match);
struct acpi_pci_root *pci_root;
struct cxl_dport *dport;
- struct cxl_hdm *cxlhdm;
struct cxl_port *port;
int rc;
@@ -197,28 +196,7 @@ static int add_host_bridge_uport(struct device *match, void *arg)
return PTR_ERR(port);
dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev));
- rc = devm_cxl_port_enumerate_dports(host, port);
- if (rc < 0)
- return rc;
- cxl_device_lock(&port->dev);
- if (rc == 1) {
- rc = devm_cxl_add_passthrough_decoder(host, port);
- goto out;
- }
-
- cxlhdm = devm_cxl_setup_hdm(host, port);
- if (IS_ERR(cxlhdm)) {
- rc = PTR_ERR(cxlhdm);
- goto out;
- }
-
- rc = devm_cxl_enumerate_decoders(host, cxlhdm);
- if (rc)
- dev_err(&port->dev, "Couldn't enumerate decoders (%d)\n", rc);
-
-out:
- cxl_device_unlock(&port->dev);
- return rc;
+ return 0;
}
struct cxl_chbs_context {
@@ -278,9 +256,7 @@ static int add_host_bridge_dport(struct device *match, void *arg)
return 0;
}
- cxl_device_lock(&root_port->dev);
dport = devm_cxl_add_dport(host, root_port, match, uid, ctx.chbcr);
- cxl_device_unlock(&root_port->dev);
if (IS_ERR(dport)) {
dev_err(host, "failed to add downstream port: %s\n",
dev_name(match));
@@ -50,10 +50,8 @@ static int match_add_dports(struct pci_dev *pdev, void *data)
dev_dbg(&port->dev, "failed to find component registers\n");
port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
- cxl_device_lock(&port->dev);
dport = devm_cxl_add_dport(host, port, &pdev->dev, port_num,
cxl_regmap_to_base(pdev, &map));
- cxl_device_unlock(&port->dev);
if (IS_ERR(dport)) {
ctx->error = PTR_ERR(dport);
return PTR_ERR(dport);
@@ -40,6 +40,11 @@ static int cxl_device_id(struct device *dev)
return CXL_DEVICE_NVDIMM_BRIDGE;
if (dev->type == &cxl_nvdimm_type)
return CXL_DEVICE_NVDIMM;
+ if (is_cxl_port(dev)) {
+ if (is_cxl_root(to_cxl_port(dev)))
+ return CXL_DEVICE_ROOT;
+ return CXL_DEVICE_PORT;
+ }
return 0;
}
@@ -300,6 +305,9 @@ static void unregister_port(void *_port)
{
struct cxl_port *port = _port;
+ if (!is_cxl_root(port))
+ device_lock_assert(port->dev.parent);
+
device_unregister(&port->dev);
}
@@ -523,14 +531,33 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new)
return dup ? -EEXIST : 0;
}
+/*
+ * Since root-level CXL dports cannot be enumerated by PCI they are not
+ * enumerated by the common port driver that acquires the port lock over
+ * dport add/remove. Instead, root dports are manually added by a
+ * platform driver and cond_port_lock() is used to take the missing port
+ * lock in that case.
+ */
+static void cond_port_lock(struct cxl_port *port)
+{
+ if (is_cxl_root(port))
+ cxl_device_lock(&port->dev);
+}
+
+static void cond_port_unlock(struct cxl_port *port)
+{
+ if (is_cxl_root(port))
+ cxl_device_unlock(&port->dev);
+}
+
static void cxl_dport_remove(void *data)
{
struct cxl_dport *dport = data;
struct cxl_port *port = dport->port;
- cxl_device_lock(&port->dev);
+ cond_port_lock(port);
list_del_init(&dport->list);
- cxl_device_unlock(&port->dev);
+ cond_port_unlock(port);
put_device(dport->dport);
}
@@ -584,7 +611,9 @@ struct cxl_dport *devm_cxl_add_dport(struct device *host, struct cxl_port *port,
dport->component_reg_phys = component_reg_phys;
dport->port = port;
+ cond_port_lock(port);
rc = add_dport(port, dport);
+ cond_port_unlock(port);
if (rc)
return ERR_PTR(rc);
@@ -881,6 +910,7 @@ static int cxl_bus_probe(struct device *dev)
rc = to_cxl_drv(dev->driver)->probe(dev);
cxl_nested_unlock(dev);
+ dev_dbg(dev, "probe: %d\n", rc);
return rc;
}
@@ -163,6 +163,8 @@ int cxl_map_device_regs(struct pci_dev *pdev,
enum cxl_regloc_type;
int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
struct cxl_register_map *map);
+void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
+ resource_size_t length);
#define CXL_RESOURCE_NONE ((resource_size_t) -1)
#define CXL_TARGET_STRLEN 20
@@ -348,6 +350,8 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
#define CXL_DEVICE_NVDIMM_BRIDGE 1
#define CXL_DEVICE_NVDIMM 2
+#define CXL_DEVICE_PORT 3
+#define CXL_DEVICE_ROOT 4
#define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
#define CXL_MODALIAS_FMT "cxl:t%d"
@@ -2,6 +2,7 @@
/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
#ifndef __CXL_PCI_H__
#define __CXL_PCI_H__
+#include <linux/pci.h>
#include "cxl.h"
#define CXL_MEMORY_PROGIF 0x10
new file mode 100644
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "cxlmem.h"
+#include "cxlpci.h"
+
+/**
+ * DOC: cxl port
+ *
+ * The port driver enumerates dport via PCI and scans for HDM
+ * (Host-managed-Device-Memory) decoder resources via the
+ * @component_reg_phys value passed in by the agent that registered the
+ * port. All descendant ports of a CXL root port (described by platform
+ * firmware) are managed in this drivers context. Each driver instance
+ * is responsible for tearing down the driver context of immediate
+ * descendant ports. The locking for this is validated by
+ * CONFIG_PROVE_CXL_LOCKING.
+ *
+ * The primary service this driver provides is presenting APIs to other
+ * drivers to utilize the decoders, and indicating to userspace (via bind
+ * status) the connectivity of the CXL.mem protocol throughout the
+ * PCIe topology.
+ */
+
+static int cxl_port_probe(struct device *dev)
+{
+ struct cxl_port *port = to_cxl_port(dev);
+ struct cxl_hdm *cxlhdm;
+ int rc;
+
+ rc = devm_cxl_port_enumerate_dports(dev, port);
+ if (rc < 0)
+ return rc;
+
+ if (rc == 1)
+ return devm_cxl_add_passthrough_decoder(dev, port);
+
+ cxlhdm = devm_cxl_setup_hdm(dev, port);
+ if (IS_ERR(cxlhdm))
+ return PTR_ERR(cxlhdm);
+
+ rc = devm_cxl_enumerate_decoders(dev, cxlhdm);
+ if (rc) {
+ dev_err(&port->dev, "Couldn't enumerate decoders (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+static struct cxl_driver cxl_port_driver = {
+ .name = "cxl_port",
+ .probe = cxl_port_probe,
+ .id = CXL_DEVICE_PORT,
+};
+
+module_cxl_driver(cxl_port_driver);
+MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(CXL);
+MODULE_ALIAS_CXL(CXL_DEVICE_PORT);
@@ -26,6 +26,12 @@ obj-m += cxl_pmem.o
cxl_pmem-y := $(CXL_SRC)/pmem.o
cxl_pmem-y += config_check.o
+obj-m += cxl_port.o
+
+cxl_port-y := $(CXL_SRC)/port.o
+cxl_port-y += config_check.o
+
+
obj-m += cxl_core.o
cxl_core-y := $(CXL_CORE_SRC)/port.o
@@ -437,10 +437,8 @@ static int mock_cxl_port_enumerate_dports(struct device *host,
if (pdev->dev.parent != port->uport)
continue;
- cxl_device_lock(&port->dev);
dport = devm_cxl_add_dport(host, port, &pdev->dev, pdev->id,
CXL_RESOURCE_NONE);
- cxl_device_unlock(&port->dev);
if (IS_ERR(dport)) {
dev_err(dev, "failed to add dport: %s (%ld)\n",