diff mbox

[v1,2/3] Add on-demand paging support

Message ID 1441292199-8371-3-git-send-email-haggaie@mellanox.com (mailing list archive)
State Accepted
Headers show

Commit Message

Haggai Eran Sept. 3, 2015, 2:56 p.m. UTC
On-demand paging feature allows registering memory regions without pinning
their pages. Unfortunately the feature doesn't work together will all
transports and all operations. This patch adds the ability to report on-demand
paging capabilities through the ibv_query_device_ex.

The patch also add the IBV_ACCESS_ON_DEMAND access flag to allow registration
of on-demand paging enabled memory regions.

Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
---
 examples/devinfo.c            | 51 +++++++++++++++++++++++++++++++++++++++++++
 include/infiniband/kern-abi.h | 11 ++++++++++
 include/infiniband/verbs.h    | 25 ++++++++++++++++++++-
 man/ibv_query_device_ex.3     | 23 +++++++++++++++++++
 man/ibv_reg_mr.3              |  2 ++
 src/cmd.c                     | 16 ++++++++++++++
 6 files changed, 127 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/examples/devinfo.c b/examples/devinfo.c
index f8aa9b45838a..a8de9826558e 100644
--- a/examples/devinfo.c
+++ b/examples/devinfo.c
@@ -43,6 +43,7 @@ 
 #include <netinet/in.h>
 #include <endian.h>
 #include <byteswap.h>
+#include <inttypes.h>
 
 #include <infiniband/verbs.h>
 #include <infiniband/driver.h>
@@ -204,6 +205,54 @@  static const char *link_layer_str(uint8_t link_layer)
 	}
 }
 
+void print_odp_trans_caps(uint32_t trans)
+{
+	uint32_t unknown_transport_caps = ~(IBV_ODP_SUPPORT_SEND |
+					    IBV_ODP_SUPPORT_RECV |
+					    IBV_ODP_SUPPORT_WRITE |
+					    IBV_ODP_SUPPORT_READ |
+					    IBV_ODP_SUPPORT_ATOMIC);
+
+	if (!trans) {
+		printf("\t\t\t\t\tNO SUPPORT\n");
+	} else {
+		if (trans & IBV_ODP_SUPPORT_SEND)
+			printf("\t\t\t\t\tSUPPORT_SEND\n");
+		if (trans & IBV_ODP_SUPPORT_RECV)
+			printf("\t\t\t\t\tSUPPORT_RECV\n");
+		if (trans & IBV_ODP_SUPPORT_WRITE)
+			printf("\t\t\t\t\tSUPPORT_WRITE\n");
+		if (trans & IBV_ODP_SUPPORT_READ)
+			printf("\t\t\t\t\tSUPPORT_READ\n");
+		if (trans & IBV_ODP_SUPPORT_ATOMIC)
+			printf("\t\t\t\t\tSUPPORT_ATOMIC\n");
+		if (trans & unknown_transport_caps)
+			printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n",
+			       trans & unknown_transport_caps);
+	}
+}
+
+void print_odp_caps(const struct ibv_odp_caps *caps)
+{
+	uint64_t unknown_general_caps = ~(IBV_ODP_SUPPORT);
+
+	/* general odp caps */
+	printf("\tgeneral_odp_caps:\n");
+	if (caps->general_caps & IBV_ODP_SUPPORT)
+		printf("\t\t\t\t\tODP_SUPPORT\n");
+	if (caps->general_caps & unknown_general_caps)
+		printf("\t\t\t\t\tUnknown flags: 0x%" PRIX64 "\n",
+		       caps->general_caps & unknown_general_caps);
+
+	/* RC transport */
+	printf("\trc_odp_caps:\n");
+	print_odp_trans_caps(caps->per_transport_caps.rc_odp_caps);
+	printf("\tuc_odp_caps:\n");
+	print_odp_trans_caps(caps->per_transport_caps.uc_odp_caps);
+	printf("\tud_odp_caps:\n");
+	print_odp_trans_caps(caps->per_transport_caps.ud_odp_caps);
+}
+
 static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 {
 	struct ibv_context *ctx;
@@ -288,6 +337,8 @@  static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port)
 		}
 		printf("\tmax_pkeys:\t\t\t%d\n", device_attr.orig_attr.max_pkeys);
 		printf("\tlocal_ca_ack_delay:\t\t%d\n", device_attr.orig_attr.local_ca_ack_delay);
+
+		print_odp_caps(&device_attr.odp_caps);
 	}
 
 	for (port = 1; port <= device_attr.orig_attr.phys_port_cnt; ++port) {
diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h
index baa897c0d1bf..800c5abab7f8 100644
--- a/include/infiniband/kern-abi.h
+++ b/include/infiniband/kern-abi.h
@@ -252,10 +252,21 @@  struct ibv_query_device_ex {
 	__u32		reserved;
 };
 
+struct ibv_odp_caps_resp {
+	__u64 general_caps;
+	struct {
+		__u32 rc_odp_caps;
+		__u32 uc_odp_caps;
+		__u32 ud_odp_caps;
+	} per_transport_caps;
+	__u32 reserved;
+};
+
 struct ibv_query_device_resp_ex {
 	struct ibv_query_device_resp base;
 	__u32 comp_mask;
 	__u32 response_length;
+	struct ibv_odp_caps_resp odp_caps;
 };
 
 struct ibv_query_port {
diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h
index a3b999eebe47..a32f29095eab 100644
--- a/include/infiniband/verbs.h
+++ b/include/infiniband/verbs.h
@@ -175,9 +175,31 @@  struct ibv_query_device_ex_input {
 	uint32_t		comp_mask;
 };
 
+enum ibv_odp_transport_cap_bits {
+	IBV_ODP_SUPPORT_SEND     = 1 << 0,
+	IBV_ODP_SUPPORT_RECV     = 1 << 1,
+	IBV_ODP_SUPPORT_WRITE    = 1 << 2,
+	IBV_ODP_SUPPORT_READ     = 1 << 3,
+	IBV_ODP_SUPPORT_ATOMIC   = 1 << 4,
+};
+
+struct ibv_odp_caps {
+	uint64_t general_caps;
+	struct {
+		uint32_t rc_odp_caps;
+		uint32_t uc_odp_caps;
+		uint32_t ud_odp_caps;
+	} per_transport_caps;
+};
+
+enum ibv_odp_general_caps {
+	IBV_ODP_SUPPORT = 1 << 0,
+};
+
 struct ibv_device_attr_ex {
 	struct ibv_device_attr	orig_attr;
 	uint32_t		comp_mask;
+	struct ibv_odp_caps	odp_caps;
 };
 
 enum ibv_mtu {
@@ -352,7 +374,8 @@  enum ibv_access_flags {
 	IBV_ACCESS_REMOTE_WRITE		= (1<<1),
 	IBV_ACCESS_REMOTE_READ		= (1<<2),
 	IBV_ACCESS_REMOTE_ATOMIC	= (1<<3),
-	IBV_ACCESS_MW_BIND		= (1<<4)
+	IBV_ACCESS_MW_BIND		= (1<<4),
+	IBV_ACCESS_ON_DEMAND		= (1<<6),
 };
 
 struct ibv_pd {
diff --git a/man/ibv_query_device_ex.3 b/man/ibv_query_device_ex.3
index 6b33f9f92ab1..1f483d276628 100644
--- a/man/ibv_query_device_ex.3
+++ b/man/ibv_query_device_ex.3
@@ -23,8 +23,31 @@  struct ibv_device_attr_ex {
 .in +8
 struct ibv_device_attr orig_attr;
 uint32_t               comp_mask;              /* Compatibility mask that defines which of the following variables are valid */
+struct ibv_odp_caps    odp_caps;               /* On-Demand Paging capabilities */
 .in -8
 };
+
+struct ibv_exp_odp_caps {
+	uint64_t	general_odp_caps;  /* Mask with enum ibv_odp_general_cap_bits */
+	struct {
+		uint32_t	rc_odp_caps;      /* Mask with enum ibv_odp_tranport_cap_bits to know which operations are supported. */
+		uint32_t	uc_odp_caps;      /* Mask with enum ibv_odp_tranport_cap_bits to know which operations are supported. */
+		uint32_t	ud_odp_caps;      /* Mask with enum ibv_odp_tranport_cap_bits to know which operations are supported. */
+	} per_transport_caps;
+};
+
+enum ibv_odp_general_cap_bits {
+        IBV_ODP_SUPPORT = 1 << 0, /* On demand paging is supported */
+};
+
+enum ibv_odp_transport_cap_bits {
+        IBV_ODP_SUPPORT_SEND     = 1 << 0, /* Send operations support on-demand paging */
+        IBV_ODP_SUPPORT_RECV     = 1 << 1, /* Receive operations support on-demand paging */
+        IBV_ODP_SUPPORT_WRITE    = 1 << 2, /* RDMA-Write operations support on-demand paging */
+        IBV_ODP_SUPPORT_READ     = 1 << 3, /* RDMA-Read operations support on-demand paging */
+        IBV_ODP_SUPPORT_ATOMIC   = 1 << 4, /* RDMA-Atomic operations support on-demand paging */
+};
+
 .fi
 .SH "RETURN VALUE"
 .B ibv_query_device_ex()
diff --git a/man/ibv_reg_mr.3 b/man/ibv_reg_mr.3
index 77237716b47c..cf151113070c 100644
--- a/man/ibv_reg_mr.3
+++ b/man/ibv_reg_mr.3
@@ -34,6 +34,8 @@  describes the desired memory protection attributes; it is either 0 or the bitwis
 .B IBV_ACCESS_REMOTE_ATOMIC\fR Enable Remote Atomic Operation Access (if supported)
 .TP
 .B IBV_ACCESS_MW_BIND\fR       Enable Memory Window Binding
+.TP
+.B IBV_ACCESS_ON_DEMAND\fR    Create an on-demand paging MR
 .PP
 If
 .B IBV_ACCESS_REMOTE_WRITE
diff --git a/src/cmd.c b/src/cmd.c
index c1a6883dfd6c..e1914e90e98e 100644
--- a/src/cmd.c
+++ b/src/cmd.c
@@ -172,6 +172,22 @@  int ibv_cmd_query_device_ex(struct ibv_context *context,
 	/* Report back supported comp_mask bits. For now no comp_mask bit is
 	 * defined */
 	attr->comp_mask = resp->comp_mask & 0;
+	if (attr_size >= offsetof(struct ibv_device_attr_ex, odp_caps) +
+			 sizeof(attr->odp_caps)) {
+		if (resp->response_length >=
+		    offsetof(struct ibv_query_device_resp_ex, odp_caps) +
+		    sizeof(resp->odp_caps)) {
+			attr->odp_caps.general_caps = resp->odp_caps.general_caps;
+			attr->odp_caps.per_transport_caps.rc_odp_caps =
+				resp->odp_caps.per_transport_caps.rc_odp_caps;
+			attr->odp_caps.per_transport_caps.uc_odp_caps =
+				resp->odp_caps.per_transport_caps.uc_odp_caps;
+			attr->odp_caps.per_transport_caps.ud_odp_caps =
+				resp->odp_caps.per_transport_caps.ud_odp_caps;
+		} else {
+			memset(&attr->odp_caps, 0, sizeof(attr->odp_caps));
+		}
+	}
 
 	return 0;
 }