@@ -687,6 +687,151 @@ static int free_hpa(struct cxl_region *cxlr)
return 0;
}
+struct cxlrd_max_context {
+ struct device *host_bridge;
+ unsigned long flags;
+ resource_size_t max_hpa;
+ struct cxl_root_decoder *cxlrd;
+};
+
+static int find_max_hpa(struct device *dev, void *data)
+{
+ struct cxlrd_max_context *ctx = data;
+ struct cxl_switch_decoder *cxlsd;
+ struct cxl_root_decoder *cxlrd;
+ struct resource *res, *prev;
+ struct cxl_decoder *cxld;
+ resource_size_t max;
+
+ if (!is_root_decoder(dev))
+ return 0;
+
+ cxlrd = to_cxl_root_decoder(dev);
+ cxlsd = &cxlrd->cxlsd;
+ cxld = &cxlsd->cxld;
+ if ((cxld->flags & ctx->flags) != ctx->flags) {
+ dev_dbg(dev, "%s, flags not matching: %08lx vs %08lx\n",
+ __func__, cxld->flags, ctx->flags);
+ return 0;
+ }
+
+ /*
+ * The CXL specs do not forbid an accelerator being part of an
+ * interleaved HPA range, but it is unlikely and because it helps
+ * simplifying the code, we assume this being the case by now.
+ */
+ if (cxld->interleave_ways != 1) {
+ dev_dbg(dev, "%s, interleave_ways not matching\n", __func__);
+ return 0;
+ }
+
+ guard(rwsem_read)(&cxl_region_rwsem);
+ if (ctx->host_bridge != cxlsd->target[0]->dport_dev) {
+ dev_dbg(dev, "%s, host bridge does not match\n", __func__);
+ return 0;
+ }
+
+ /*
+ * Walk the root decoder resource range relying on cxl_region_rwsem to
+ * preclude sibling arrival/departure and find the largest free space
+ * gap.
+ */
+ lockdep_assert_held_read(&cxl_region_rwsem);
+ max = 0;
+ res = cxlrd->res->child;
+ if (!res)
+ max = resource_size(cxlrd->res);
+ else
+ max = 0;
+
+ for (prev = NULL; res; prev = res, res = res->sibling) {
+ struct resource *next = res->sibling;
+ resource_size_t free = 0;
+
+ if (!prev && res->start > cxlrd->res->start) {
+ free = res->start - cxlrd->res->start;
+ max = max(free, max);
+ }
+ if (prev && res->start > prev->end + 1) {
+ free = res->start - prev->end + 1;
+ max = max(free, max);
+ }
+ if (next && res->end + 1 < next->start) {
+ free = next->start - res->end + 1;
+ max = max(free, max);
+ }
+ if (!next && res->end + 1 < cxlrd->res->end + 1) {
+ free = cxlrd->res->end + 1 - res->end + 1;
+ max = max(free, max);
+ }
+ }
+
+ dev_dbg(CXLRD_DEV(cxlrd), "%s, found %pa bytes of free space\n",
+ __func__, &max);
+ if (max > ctx->max_hpa) {
+ if (ctx->cxlrd)
+ put_device(CXLRD_DEV(ctx->cxlrd));
+ get_device(CXLRD_DEV(cxlrd));
+ ctx->cxlrd = cxlrd;
+ ctx->max_hpa = max;
+ dev_dbg(CXLRD_DEV(cxlrd), "%s, found %pa bytes of free space\n",
+ __func__, &max);
+ }
+ return 0;
+}
+
+/**
+ * cxl_get_hpa_freespace - find a root decoder with free capacity per constraints
+ * @endpoint: an endpoint that is mapped by the returned decoder
+ * @flags: CXL_DECODER_F flags for selecting RAM vs PMEM, and HDM-H vs HDM-D[B]
+ * @max_avail_contig: output parameter of max contiguous bytes available in the
+ * returned decoder
+ *
+ * The return tuple of a 'struct cxl_root_decoder' and 'bytes available given
+ * in (@max_avail_contig))' is a point in time snapshot. If by the time the
+ * caller goes to use this root decoder's capacity the capacity is reduced then
+ * caller needs to loop and retry.
+ *
+ * The returned root decoder has an elevated reference count that needs to be
+ * put with put_device(cxlrd_dev(cxlrd)). Locking context is with
+ * cxl_{acquire,release}_endpoint(), that ensures removal of the root decoder
+ * does not race.
+ */
+struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd,
+ unsigned long flags,
+ resource_size_t *max_avail_contig)
+{
+ struct cxl_port *endpoint = cxlmd->endpoint;
+ struct cxlrd_max_context ctx = {
+ .host_bridge = endpoint->host_bridge,
+ .flags = flags,
+ };
+ struct cxl_port *root_port;
+ struct cxl_root *root __free(put_cxl_root) = find_cxl_root(endpoint);
+
+ if (!is_cxl_endpoint(endpoint)) {
+ dev_dbg(&endpoint->dev, "hpa requestor is not an endpoint\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!root) {
+ dev_dbg(&endpoint->dev, "endpoint can not be related to a root port\n");
+ return ERR_PTR(-ENXIO);
+ }
+
+ root_port = &root->port;
+ down_read(&cxl_region_rwsem);
+ device_for_each_child(&root_port->dev, &ctx, find_max_hpa);
+ up_read(&cxl_region_rwsem);
+
+ if (!ctx.cxlrd)
+ return ERR_PTR(-ENOMEM);
+
+ *max_avail_contig = ctx.max_hpa;
+ return ctx.cxlrd;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_get_hpa_freespace, CXL);
+
static ssize_t size_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
@@ -785,6 +785,9 @@ static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
struct cxl_decoder *to_cxl_decoder(struct device *dev);
struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
+
+#define CXLRD_DEV(cxlrd) (&(cxlrd)->cxlsd.cxld.dev)
+
struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev);
bool is_root_decoder(struct device *dev);
bool is_switch_decoder(struct device *dev);
@@ -7,6 +7,10 @@
#include <linux/ioport.h>
#include <linux/pci.h>
+#define CXL_DECODER_F_RAM BIT(0)
+#define CXL_DECODER_F_PMEM BIT(1)
+#define CXL_DECODER_F_TYPE2 BIT(2)
+
enum cxl_resource {
CXL_RES_DPA,
CXL_RES_RAM,
@@ -47,4 +51,8 @@ int cxl_release_resource(struct cxl_dev_state *cxlds, enum cxl_resource type);
void cxl_set_media_ready(struct cxl_dev_state *cxlds);
struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
struct cxl_dev_state *cxlds);
+struct cxl_port;
+struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd,
+ unsigned long flags,
+ resource_size_t *max);
#endif