From patchwork Thu Aug 27 15:22:34 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Haggai Eran X-Patchwork-Id: 7085681 Return-Path: X-Original-To: patchwork-linux-rdma@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork2.web.kernel.org (Postfix) with ESMTP id 9FBC3BEEC1 for ; Thu, 27 Aug 2015 15:23:09 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 611C220982 for ; Thu, 27 Aug 2015 15:23:08 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id EB00B209B3 for ; Thu, 27 Aug 2015 15:23:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752182AbbH0PXB (ORCPT ); Thu, 27 Aug 2015 11:23:01 -0400 Received: from [193.47.165.129] ([193.47.165.129]:40222 "EHLO mellanox.co.il" rhost-flags-FAIL-FAIL-OK-FAIL) by vger.kernel.org with ESMTP id S1753133AbbH0PXA (ORCPT ); Thu, 27 Aug 2015 11:23:00 -0400 Received: from Internal Mail-Server by MTLPINE1 (envelope-from haggaie@mellanox.com) with ESMTPS (AES256-SHA encrypted); 27 Aug 2015 18:22:35 +0300 Received: from arch003.mtl.labs.mlnx (arch003.mtl.labs.mlnx [10.137.35.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id t7RFMVO2032664; Thu, 27 Aug 2015 18:22:31 +0300 From: Haggai Eran To: Doug Ledford Cc: linux-rdma@vger.kernel.org, Eli Cohen , Matan Barak , Yevgeny Petrilin , Eran Ben Elisha , Moshe Lazer , Haggai Eran , Shachar Raindel , Majd Dibbiny Subject: [PATCH 2/3] Add on-demand paging support Date: Thu, 27 Aug 2015 18:22:34 +0300 Message-Id: <1440688955-7709-3-git-send-email-haggaie@mellanox.com> X-Mailer: git-send-email 1.7.11.2 In-Reply-To: <1440688955-7709-1-git-send-email-haggaie@mellanox.com> References: <1440688955-7709-1-git-send-email-haggaie@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Spam-Status: No, score=-6.3 required=5.0 tests=BAYES_00,LONGWORDS, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP On-demand paging feature allows registering memory regions without pinning their pages. Unfortunately the feature doesn't work together will all transports and all operations. This patch adds the ability to report on-demand paging capabilities through the ibv_query_device_ex. The patch also add the IBV_ACCESS_ON_DEMAND access flag to allow registration of on-demand paging enabled memory regions. Signed-off-by: Shachar Raindel Signed-off-by: Majd Dibbiny Signed-off-by: Haggai Eran Reviewed-by: Sagi Grimberg --- examples/devinfo.c | 51 +++++++++++++++++++++++++++++++++++++++++++ include/infiniband/kern-abi.h | 12 +++++++++- include/infiniband/verbs.h | 25 ++++++++++++++++++++- man/ibv_query_device_ex.3 | 23 +++++++++++++++++++ man/ibv_reg_mr.3 | 2 ++ src/cmd.c | 11 ++++++++++ 6 files changed, 122 insertions(+), 2 deletions(-) diff --git a/examples/devinfo.c b/examples/devinfo.c index 95e8f83753ca..61cfdf520be6 100644 --- a/examples/devinfo.c +++ b/examples/devinfo.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -204,6 +205,54 @@ static const char *link_layer_str(uint8_t link_layer) } } +void print_odp_trans_caps(uint32_t trans) +{ + uint32_t unknown_transport_caps = ~(IBV_ODP_SUPPORT_SEND | + IBV_ODP_SUPPORT_RECV | + IBV_ODP_SUPPORT_WRITE | + IBV_ODP_SUPPORT_READ | + IBV_ODP_SUPPORT_ATOMIC); + + if (!trans) { + printf("\t\t\t\t\tNO SUPPORT\n"); + } else { + if (trans & IBV_ODP_SUPPORT_SEND) + printf("\t\t\t\t\tSUPPORT_SEND\n"); + if (trans & IBV_ODP_SUPPORT_RECV) + printf("\t\t\t\t\tSUPPORT_RECV\n"); + if (trans & IBV_ODP_SUPPORT_WRITE) + printf("\t\t\t\t\tSUPPORT_WRITE\n"); + if (trans & IBV_ODP_SUPPORT_READ) + printf("\t\t\t\t\tSUPPORT_READ\n"); + if (trans & IBV_ODP_SUPPORT_ATOMIC) + printf("\t\t\t\t\tSUPPORT_ATOMIC\n"); + if (trans & unknown_transport_caps) + printf("\t\t\t\t\tUnknown flags: 0x%" PRIX32 "\n", + trans & unknown_transport_caps); + } +} + +void print_odp_caps(struct ibv_odp_caps caps) +{ + uint64_t unknown_general_caps = ~(IBV_ODP_SUPPORT); + + /* general odp caps */ + printf("\tgeneral_odp_caps:\n"); + if (caps.general_caps & IBV_ODP_SUPPORT) + printf("\t\t\t\t\tODP_SUPPORT\n"); + if (caps.general_caps & unknown_general_caps) + printf("\t\t\t\t\tUnknown flags: 0x%" PRIX64 "\n", + caps.general_caps & unknown_general_caps); + + /* RC transport */ + printf("\trc_odp_caps:\n"); + print_odp_trans_caps(caps.per_transport_caps.rc_odp_caps); + printf("\tuc_odp_caps:\n"); + print_odp_trans_caps(caps.per_transport_caps.uc_odp_caps); + printf("\tud_odp_caps:\n"); + print_odp_trans_caps(caps.per_transport_caps.ud_odp_caps); +} + static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port) { struct ibv_context *ctx; @@ -296,6 +345,8 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port) } printf("\tmax_pkeys:\t\t\t%d\n", device_attr.max_pkeys); printf("\tlocal_ca_ack_delay:\t\t%d\n", device_attr.local_ca_ack_delay); + + print_odp_caps(attrx.odp_caps); } for (port = 1; port <= device_attr.phys_port_cnt; ++port) { diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h index af2a1bebf683..1c0d0d30c612 100644 --- a/include/infiniband/kern-abi.h +++ b/include/infiniband/kern-abi.h @@ -254,11 +254,21 @@ struct ibv_query_device_ex { __u32 reserved; }; +struct ibv_odp_caps_resp { + __u64 general_caps; + struct { + __u32 rc_odp_caps; + __u32 uc_odp_caps; + __u32 ud_odp_caps; + } per_transport_caps; + __u32 reserved; +}; + struct ibv_query_device_resp_ex { struct ibv_query_device_resp base; __u32 comp_mask; __u32 response_length; - __u64 reserved[3]; + struct ibv_odp_caps_resp odp_caps; }; struct ibv_query_port { diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h index ff806bf8555d..ce56315b236e 100644 --- a/include/infiniband/verbs.h +++ b/include/infiniband/verbs.h @@ -168,9 +168,31 @@ struct ibv_device_attr { uint8_t phys_port_cnt; }; +enum ibv_odp_transport_cap_bits { + IBV_ODP_SUPPORT_SEND = 1 << 0, + IBV_ODP_SUPPORT_RECV = 1 << 1, + IBV_ODP_SUPPORT_WRITE = 1 << 2, + IBV_ODP_SUPPORT_READ = 1 << 3, + IBV_ODP_SUPPORT_ATOMIC = 1 << 4, +}; + +struct ibv_odp_caps { + uint64_t general_caps; + struct { + uint32_t rc_odp_caps; + uint32_t uc_odp_caps; + uint32_t ud_odp_caps; + } per_transport_caps; +}; + +enum ibv_odp_general_caps { + IBV_ODP_SUPPORT = 1 << 0, +}; + struct ibv_device_attr_ex { struct ibv_device_attr orig_attr; uint32_t comp_mask; + struct ibv_odp_caps odp_caps; }; struct ibv_device_attr_ex_resp { @@ -350,7 +372,8 @@ enum ibv_access_flags { IBV_ACCESS_REMOTE_WRITE = (1<<1), IBV_ACCESS_REMOTE_READ = (1<<2), IBV_ACCESS_REMOTE_ATOMIC = (1<<3), - IBV_ACCESS_MW_BIND = (1<<4) + IBV_ACCESS_MW_BIND = (1<<4), + IBV_ACCESS_ON_DEMAND = (1<<6), }; struct ibv_pd { diff --git a/man/ibv_query_device_ex.3 b/man/ibv_query_device_ex.3 index 6b33f9f92ab1..1f483d276628 100644 --- a/man/ibv_query_device_ex.3 +++ b/man/ibv_query_device_ex.3 @@ -23,8 +23,31 @@ struct ibv_device_attr_ex { .in +8 struct ibv_device_attr orig_attr; uint32_t comp_mask; /* Compatibility mask that defines which of the following variables are valid */ +struct ibv_odp_caps odp_caps; /* On-Demand Paging capabilities */ .in -8 }; + +struct ibv_exp_odp_caps { + uint64_t general_odp_caps; /* Mask with enum ibv_odp_general_cap_bits */ + struct { + uint32_t rc_odp_caps; /* Mask with enum ibv_odp_tranport_cap_bits to know which operations are supported. */ + uint32_t uc_odp_caps; /* Mask with enum ibv_odp_tranport_cap_bits to know which operations are supported. */ + uint32_t ud_odp_caps; /* Mask with enum ibv_odp_tranport_cap_bits to know which operations are supported. */ + } per_transport_caps; +}; + +enum ibv_odp_general_cap_bits { + IBV_ODP_SUPPORT = 1 << 0, /* On demand paging is supported */ +}; + +enum ibv_odp_transport_cap_bits { + IBV_ODP_SUPPORT_SEND = 1 << 0, /* Send operations support on-demand paging */ + IBV_ODP_SUPPORT_RECV = 1 << 1, /* Receive operations support on-demand paging */ + IBV_ODP_SUPPORT_WRITE = 1 << 2, /* RDMA-Write operations support on-demand paging */ + IBV_ODP_SUPPORT_READ = 1 << 3, /* RDMA-Read operations support on-demand paging */ + IBV_ODP_SUPPORT_ATOMIC = 1 << 4, /* RDMA-Atomic operations support on-demand paging */ +}; + .fi .SH "RETURN VALUE" .B ibv_query_device_ex() diff --git a/man/ibv_reg_mr.3 b/man/ibv_reg_mr.3 index 77237716b47c..cf151113070c 100644 --- a/man/ibv_reg_mr.3 +++ b/man/ibv_reg_mr.3 @@ -34,6 +34,8 @@ describes the desired memory protection attributes; it is either 0 or the bitwis .B IBV_ACCESS_REMOTE_ATOMIC\fR Enable Remote Atomic Operation Access (if supported) .TP .B IBV_ACCESS_MW_BIND\fR Enable Memory Window Binding +.TP +.B IBV_ACCESS_ON_DEMAND\fR Create an on-demand paging MR .PP If .B IBV_ACCESS_REMOTE_WRITE diff --git a/src/cmd.c b/src/cmd.c index 47f1acd33d68..215dc0159a2c 100644 --- a/src/cmd.c +++ b/src/cmd.c @@ -159,6 +159,17 @@ int ibv_cmd_query_device_ex(struct ibv_context *context, (struct ibv_query_device_resp *)resp, raw_fw_ver); attr->comp_mask = 0; + if (resp->response_length >= sizeof(*resp)) { + attr->odp_caps.general_caps = resp->odp_caps.general_caps; + attr->odp_caps.per_transport_caps.rc_odp_caps = + resp->odp_caps.per_transport_caps.rc_odp_caps; + attr->odp_caps.per_transport_caps.uc_odp_caps = + resp->odp_caps.per_transport_caps.uc_odp_caps; + attr->odp_caps.per_transport_caps.ud_odp_caps = + resp->odp_caps.per_transport_caps.ud_odp_caps; + } else { + memset(&attr->odp_caps, 0, sizeof(attr->odp_caps)); + } return 0; }