diff mbox series

[v2,RFC,2/7] RDMA/hns: Add method for shrinking DCA memory pool

Message ID 1611394994-50363-3-git-send-email-liweihang@huawei.com (mailing list archive)
State RFC
Headers show
Series RDMA/hns: Add support for Dynamic Context Attachment | expand

Commit Message

Weihang Li Jan. 23, 2021, 9:43 a.m. UTC
From: Xi Wang <wangxi11@huawei.com>

If no QP is using a DCA mem object, the userspace driver can destroy it.
So add a new method 'HNS_IB_METHOD_DCA_MEM_SHRINK' to allow the userspace
dirver to remove an object from DCA memory pool.

If a DCA mem object has been shrunk, the userspace driver can destroy it
by 'HNS_IB_METHOD_DCA_MEM_DEREG' method and free the buffer which is
allocated in userspace.

Signed-off-by: Xi Wang <wangxi11@huawei.com>
Signed-off-by: Weihang Li <liweihang@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_dca.c | 142 ++++++++++++++++++++++++++++++-
 drivers/infiniband/hw/hns/hns_roce_dca.h |   7 ++
 include/uapi/rdma/hns-abi.h              |   9 ++
 3 files changed, 157 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c
index 2a03cf3..604d6cf 100644
--- a/drivers/infiniband/hw/hns/hns_roce_dca.c
+++ b/drivers/infiniband/hw/hns/hns_roce_dca.c
@@ -35,6 +35,11 @@  struct dca_mem_attr {
 	u32 size;
 };
 
+static inline bool dca_page_is_free(struct hns_dca_page_state *state)
+{
+	return state->buf_id == HNS_DCA_INVALID_BUF_ID;
+}
+
 static inline bool dca_mem_is_free(struct dca_mem *mem)
 {
 	return mem->flags == 0;
@@ -60,6 +65,11 @@  static inline void clr_dca_mem_registered(struct dca_mem *mem)
 	mem->flags &= ~DCA_MEM_FLAGS_REGISTERED;
 }
 
+static inline bool dca_mem_is_available(struct dca_mem *mem)
+{
+	return mem->flags == (DCA_MEM_FLAGS_ALLOCED | DCA_MEM_FLAGS_REGISTERED);
+}
+
 static void free_dca_pages(void *pages)
 {
 	ib_umem_release(pages);
@@ -123,6 +133,41 @@  static struct hns_dca_page_state *alloc_dca_states(void *pages, int count)
 	return states;
 }
 
+#define DCA_MEM_STOP_ITERATE -1
+#define DCA_MEM_NEXT_ITERATE -2
+static void travel_dca_pages(struct hns_roce_dca_ctx *ctx, void *param,
+			     int (*cb)(struct dca_mem *, int, void *))
+{
+	struct dca_mem *mem, *tmp;
+	unsigned long flags;
+	bool avail;
+	int ret;
+	int i;
+
+	spin_lock_irqsave(&ctx->pool_lock, flags);
+	list_for_each_entry_safe(mem, tmp, &ctx->pool, list) {
+		spin_unlock_irqrestore(&ctx->pool_lock, flags);
+
+		spin_lock(&mem->lock);
+		avail = dca_mem_is_available(mem);
+		ret = 0;
+		for (i = 0; avail && i < mem->page_count; i++) {
+			ret = cb(mem, i, param);
+			if (ret == DCA_MEM_STOP_ITERATE ||
+			    ret == DCA_MEM_NEXT_ITERATE)
+				break;
+		}
+		spin_unlock(&mem->lock);
+		spin_lock_irqsave(&ctx->pool_lock, flags);
+
+		if (ret == DCA_MEM_STOP_ITERATE)
+			goto done;
+	}
+
+done:
+	spin_unlock_irqrestore(&ctx->pool_lock, flags);
+}
+
 /* user DCA is managed by ucontext */
 static inline struct hns_roce_dca_ctx *
 to_hr_dca_ctx(struct hns_roce_ucontext *uctx)
@@ -194,6 +239,63 @@  static int register_dca_mem(struct hns_roce_dev *hr_dev,
 	return 0;
 }
 
+struct dca_mem_shrink_attr {
+	u64 shrink_key;
+	u32 shrink_mems;
+};
+
+static int shrink_dca_page_proc(struct dca_mem *mem, int index, void *param)
+{
+	struct dca_mem_shrink_attr *attr = param;
+	struct hns_dca_page_state *state;
+	int i, free_pages;
+
+	free_pages = 0;
+	for (i = 0; i < mem->page_count; i++) {
+		state = &mem->states[i];
+		if (dca_page_is_free(state))
+			free_pages++;
+	}
+
+	/* No pages are in use */
+	if (free_pages == mem->page_count) {
+		/* unregister first empty DCA mem */
+		if (!attr->shrink_mems) {
+			clr_dca_mem_registered(mem);
+			attr->shrink_key = mem->key;
+		}
+
+		attr->shrink_mems++;
+	}
+
+	if (attr->shrink_mems > 1)
+		return DCA_MEM_STOP_ITERATE;
+	else
+		return DCA_MEM_NEXT_ITERATE;
+}
+
+static int shrink_dca_mem(struct hns_roce_dev *hr_dev,
+			  struct hns_roce_ucontext *uctx, u64 reserved_size,
+			  struct hns_dca_shrink_resp *resp)
+{
+	struct hns_roce_dca_ctx *ctx = to_hr_dca_ctx(uctx);
+	struct dca_mem_shrink_attr attr = {};
+	unsigned long flags;
+	bool need_shink;
+
+	spin_lock_irqsave(&ctx->pool_lock, flags);
+	need_shink = ctx->free_mems > 0 && ctx->free_size > reserved_size;
+	spin_unlock_irqrestore(&ctx->pool_lock, flags);
+	if (!need_shink)
+		return 0;
+
+	travel_dca_pages(ctx, &attr, shrink_dca_page_proc);
+	resp->free_mems = attr.shrink_mems;
+	resp->free_key = attr.shrink_key;
+
+	return 0;
+}
+
 static void init_dca_context(struct hns_roce_dca_ctx *ctx)
 {
 	INIT_LIST_HEAD(&ctx->pool);
@@ -361,10 +463,48 @@  DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 	UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, HNS_IB_OBJECT_DCA_MEM,
 			UVERBS_ACCESS_DESTROY, UA_MANDATORY));
 
+static int UVERBS_HANDLER(HNS_IB_METHOD_DCA_MEM_SHRINK)(
+	struct uverbs_attr_bundle *attrs)
+{
+	struct hns_roce_ucontext *uctx = uverbs_attr_to_hr_uctx(attrs);
+	struct hns_dca_shrink_resp resp = {};
+	u64 reserved_size = 0;
+	int ret;
+
+	if (uverbs_copy_from(&reserved_size, attrs,
+			     HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE))
+		return -EFAULT;
+
+	ret = shrink_dca_mem(to_hr_dev(uctx->ibucontext.device), uctx,
+			     reserved_size, &resp);
+	if (ret)
+		return ret;
+
+	if (uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
+			   &resp.free_key, sizeof(resp.free_key)) ||
+	    uverbs_copy_to(attrs, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
+			   &resp.free_mems, sizeof(resp.free_mems)))
+		return -EFAULT;
+
+	return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+	HNS_IB_METHOD_DCA_MEM_SHRINK,
+	UVERBS_ATTR_IDR(HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE,
+			HNS_IB_OBJECT_DCA_MEM, UVERBS_ACCESS_WRITE,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE,
+			   UVERBS_ATTR_TYPE(u64), UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
+			    UVERBS_ATTR_TYPE(u64), UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
+			    UVERBS_ATTR_TYPE(u32), UA_MANDATORY));
 DECLARE_UVERBS_NAMED_OBJECT(HNS_IB_OBJECT_DCA_MEM,
 			    UVERBS_TYPE_ALLOC_IDR(dca_cleanup),
 			    &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_REG),
-			    &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DEREG));
+			    &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_DEREG),
+			    &UVERBS_METHOD(HNS_IB_METHOD_DCA_MEM_SHRINK));
 
 static bool dca_is_supported(struct ib_device *device)
 {
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h
index cb3481f..97caf03 100644
--- a/drivers/infiniband/hw/hns/hns_roce_dca.h
+++ b/drivers/infiniband/hw/hns/hns_roce_dca.h
@@ -14,6 +14,13 @@  struct hns_dca_page_state {
 	u32 head : 1; /* This page is the head in a continuous address range. */
 };
 
+struct hns_dca_shrink_resp {
+	u64 free_key; /* free buffer's key which registered by the user */
+	u32 free_mems; /* free buffer count which no any QP be using */
+};
+
+#define HNS_DCA_INVALID_BUF_ID 0UL
+
 void hns_roce_register_udca(struct hns_roce_dev *hr_dev,
 			    struct hns_roce_ucontext *uctx);
 void hns_roce_unregister_udca(struct hns_roce_dev *hr_dev,
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 62cf2bc..58a9d0b 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -107,6 +107,7 @@  enum hns_ib_objects {
 enum hns_ib_dca_mem_methods {
 	HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT),
 	HNS_IB_METHOD_DCA_MEM_DEREG,
+	HNS_IB_METHOD_DCA_MEM_SHRINK,
 };
 
 enum hns_ib_dca_mem_reg_attrs {
@@ -119,4 +120,12 @@  enum hns_ib_dca_mem_reg_attrs {
 enum hns_ib_dca_mem_dereg_attrs {
 	HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
 };
+
+enum hns_ib_dca_mem_shrink_attrs {
+	HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE,
+	HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY,
+	HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS,
+};
+
 #endif /* HNS_ABI_USER_H */