diff mbox series

[RFC] Reduce smbdirect send/receive SGE sizes

Message ID 6f6a2e87-4764-114b-2780-184c845a84c2@talpey.com (mailing list archive)
State New, archived
Headers show
Series [RFC] Reduce smbdirect send/receive SGE sizes | expand

Commit Message

Tom Talpey Aug. 24, 2022, 3:27 p.m. UTC
Reduce the number of SGEs requested by the smbdirect layer.
Previously 16 for both send and receive, these values were
unnecessarily high, and failed on the siw provider.

Also reword the fast-register page array maximum size, which
incorrectly stated it was related to SGE. Consider reducing
this (very) large value later.

This patch is UNTESTED and RFC at this time. 5 send SGEs and
1 receive SGE are expected to succeed with no loss of function.
It should fix operation on softiWARP, and improve efficiency on
all providers.

I don't have sufficient RDMA hardware or workload infrastructure.
Can anyone help test?

Signed-off-by: Tom Talpey <tom@talpey.com>

  fs/cifs/smbdirect.c | 24 ++++++++++--------------
  fs/cifs/smbdirect.h | 14 +++++++++-----
  2 files changed, 19 insertions(+), 19 deletions(-)

  	 * If payload is less than or equal to the threshold,
@@ -225,21 +225,25 @@ struct smbd_buffer_descriptor_v1 {
  	__le32 length;
  } __packed;

-/* Default maximum number of SGEs in a RDMA send/recv */
-#define SMBDIRECT_MAX_SGE	16
+/* Maximum number of SGEs needed by smbdirect.c in a send work request */
+#define SMBDIRECT_MAX_SEND_SGE	5
+
  /* The context for a SMBD request */
  struct smbd_request {
  	struct smbd_connection *info;
  	struct ib_cqe cqe;

-	/* the SGE entries for this packet */
-	struct ib_sge sge[SMBDIRECT_MAX_SGE];
+	/* the SGE entries for this work request */
+	struct ib_sge sge[SMBDIRECT_MAX_SEND_SGE];
  	int num_sge;

  	/* SMBD packet header follows this structure */
  	u8 packet[];
  };

+/* Maximum number of SGEs needed by smbdirect.c in a receive work 
request */
+#define SMBDIRECT_MAX_RECV_SGE	1
+
  /* The context for a SMBD response */
  struct smbd_response {
  	struct smbd_connection *info;

Comments

Long Li Aug. 27, 2022, 12:38 a.m. UTC | #1
> Subject: [RFC PATCH] Reduce smbdirect send/receive SGE sizes
> 
> Reduce the number of SGEs requested by the smbdirect layer.
> Previously 16 for both send and receive, these values were unnecessarily
> high, and failed on the siw provider.
> 
> Also reword the fast-register page array maximum size, which incorrectly
> stated it was related to SGE. Consider reducing this (very) large value later.
> 
> This patch is UNTESTED and RFC at this time. 5 send SGEs and
> 1 receive SGE are expected to succeed with no loss of function.
> It should fix operation on softiWARP, and improve efficiency on all providers.
> 
> I don't have sufficient RDMA hardware or workload infrastructure.
> Can anyone help test?
> 
> Signed-off-by: Tom Talpey <tom@talpey.com>
> 
>   fs/cifs/smbdirect.c | 24 ++++++++++--------------
>   fs/cifs/smbdirect.h | 14 +++++++++-----
>   2 files changed, 19 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index
> 5fbbec22bcc8..d96d420a1589 100644
> --- a/fs/cifs/smbdirect.c
> +++ b/fs/cifs/smbdirect.c
> @@ -99,7 +99,7 @@ int smbd_keep_alive_interval = 120;
>    * User configurable initial values for RDMA transport
>    * The actual values used may be lower and are limited to hardware
> capabilities
>    */
> -/* Default maximum number of SGEs in a RDMA write/read */
> +/* Default maximum number of pages in a single RDMA write/read */
>   int smbd_max_frmr_depth = 2048;
> 
>   /* If payload is less than this byte, use RDMA send/recv not read/write */
> @@ -1017,9 +1017,9 @@ static int smbd_post_send_data(
>   {
>   	int i;
>   	u32 data_length = 0;
> -	struct scatterlist sgl[SMBDIRECT_MAX_SGE];
> +	struct scatterlist sgl[SMBDIRECT_MAX_SEND_SGE];
> 
> -	if (n_vec > SMBDIRECT_MAX_SGE) {
> +	if (n_vec > SMBDIRECT_MAX_SEND_SGE) {
>   		cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec);
>   		return -EINVAL;
>   	}
> @@ -1562,17 +1562,13 @@ static struct smbd_connection
> *_smbd_get_connection(
>   	info->max_receive_size = smbd_max_receive_size;
>   	info->keep_alive_interval = smbd_keep_alive_interval;
> 
> -	if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE)
> {
> +	if (info->id->device->attrs.max_send_sge <
> SMBDIRECT_MAX_SEND_SGE ||
> +	    info->id->device->attrs.max_recv_sge <
> SMBDIRECT_MAX_RECV_SGE) {
>   		log_rdma_event(ERR,
> -			"warning: device max_send_sge = %d too small\n",
> -			info->id->device->attrs.max_send_sge);
> -		log_rdma_event(ERR, "Queue Pair creation may fail\n");
> -	}
> -	if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) {
> -		log_rdma_event(ERR,
> -			"warning: device max_recv_sge = %d too small\n",
> +			"device max_send_sge/max_recv_sge = %d/%d too
> small\n",
> +			info->id->device->attrs.max_send_sge,
>   			info->id->device->attrs.max_recv_sge);
> -		log_rdma_event(ERR, "Queue Pair creation may fail\n");
> +		goto config_failed;
>   	}
> 
>   	info->send_cq = NULL;
> @@ -1598,8 +1594,8 @@ static struct smbd_connection
> *_smbd_get_connection(
>   	qp_attr.qp_context = info;
>   	qp_attr.cap.max_send_wr = info->send_credit_target;
>   	qp_attr.cap.max_recv_wr = info->receive_credit_max;
> -	qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SGE;
> -	qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_SGE;
> +	qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE;
> +	qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE;
>   	qp_attr.cap.max_inline_data = 0;
>   	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
>   	qp_attr.qp_type = IB_QPT_RC;
> diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index
> a87fca82a796..2a9b2b12d6aa 100644
> --- a/fs/cifs/smbdirect.h
> +++ b/fs/cifs/smbdirect.h
> @@ -91,7 +91,7 @@ struct smbd_connection {
>   	/* Memory registrations */
>   	/* Maximum number of RDMA read/write outstanding on this
> connection */
>   	int responder_resources;
> -	/* Maximum number of SGEs in a RDMA write/read */
> +	/* Maximum number of pages in a single RDMA write/read on this
> connection */
>   	int max_frmr_depth;
>   	/*
>   	 * If payload is less than or equal to the threshold, @@ -225,21
> +225,25 @@ struct smbd_buffer_descriptor_v1 {
>   	__le32 length;
>   } __packed;
> 
> -/* Default maximum number of SGEs in a RDMA send/recv */
> -#define SMBDIRECT_MAX_SGE	16
> +/* Maximum number of SGEs needed by smbdirect.c in a send work
> request */
> +#define SMBDIRECT_MAX_SEND_SGE	5

I see the following comment in fs/cifs/smb2pdu.h:
/*
 * Maximum number of iovs we need for an open/create request.
 * [0] : struct smb2_create_req
 * [1] : path
 * [2] : lease context
 * [3] : durable context
 * [4] : posix context
 * [5] : time warp context
 * [6] : query id context
 * [7] : compound padding
 */
#define SMB2_CREATE_IOV_SIZE 8

Maybe using 8 is safer?

> +
>   /* The context for a SMBD request */
>   struct smbd_request {
>   	struct smbd_connection *info;
>   	struct ib_cqe cqe;
> 
> -	/* the SGE entries for this packet */
> -	struct ib_sge sge[SMBDIRECT_MAX_SGE];
> +	/* the SGE entries for this work request */
> +	struct ib_sge sge[SMBDIRECT_MAX_SEND_SGE];
>   	int num_sge;
> 
>   	/* SMBD packet header follows this structure */
>   	u8 packet[];
>   };
> 
> +/* Maximum number of SGEs needed by smbdirect.c in a receive work
> request */
> +#define SMBDIRECT_MAX_RECV_SGE	1
> +
>   /* The context for a SMBD response */
>   struct smbd_response {
>   	struct smbd_connection *info;
Tom Talpey Aug. 27, 2022, 12:45 p.m. UTC | #2
On 8/26/2022 8:38 PM, Long Li wrote:...
>> -/* Default maximum number of SGEs in a RDMA send/recv */
>> -#define SMBDIRECT_MAX_SGE	16
>> +/* Maximum number of SGEs needed by smbdirect.c in a send work
>> request */
>> +#define SMBDIRECT_MAX_SEND_SGE	5
> 
> I see the following comment in fs/cifs/smb2pdu.h:
> /*
>   * Maximum number of iovs we need for an open/create request.
>   * [0] : struct smb2_create_req
>   * [1] : path
>   * [2] : lease context
>   * [3] : durable context
>   * [4] : posix context
>   * [5] : time warp context
>   * [6] : query id context
>   * [7] : compound padding
>   */
> #define SMB2_CREATE_IOV_SIZE 8
> 
> Maybe using 8 is safer?

Thanks for taking a look!

SMB Direct re-frames the SMB3 request and the rq_iov's don't
always marshal the same way when they're transmitted over
RDMA. Sometimes they go in different packets, or get turned
into RDMA channel descriptors. So there isn't always a 1:1
mapping.

Also, I haven't perfectly convinced myself this comment is
correct. There is a good bit of optional context insertion,
and some buffer sharing, in SMB2_open_init(). And don't
forget SMB2_TRANSFORMs, compounds, and other SMB3 goop.

Either way, I don't think a "safer" number is a good idea.
It's just kicking the can down the road, and btw "8" still
won't fix the softiWARP compatibility issue. The best fix
may be to refactor smbdirect.c to build the requests more
compactly, so a crisp marshaling strategy can work, always.

I'm scraping some test gear together and I'll be taking a
deeper look next week. Maybe get some focused testing when
we're at the upcoming SDC IOLab.

Tom.
diff mbox series

Patch

diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 5fbbec22bcc8..d96d420a1589 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -99,7 +99,7 @@  int smbd_keep_alive_interval = 120;
   * User configurable initial values for RDMA transport
   * The actual values used may be lower and are limited to hardware 
capabilities
   */
-/* Default maximum number of SGEs in a RDMA write/read */
+/* Default maximum number of pages in a single RDMA write/read */
  int smbd_max_frmr_depth = 2048;

  /* If payload is less than this byte, use RDMA send/recv not read/write */
@@ -1017,9 +1017,9 @@  static int smbd_post_send_data(
  {
  	int i;
  	u32 data_length = 0;
-	struct scatterlist sgl[SMBDIRECT_MAX_SGE];
+	struct scatterlist sgl[SMBDIRECT_MAX_SEND_SGE];

-	if (n_vec > SMBDIRECT_MAX_SGE) {
+	if (n_vec > SMBDIRECT_MAX_SEND_SGE) {
  		cifs_dbg(VFS, "Can't fit data to SGL, n_vec=%d\n", n_vec);
  		return -EINVAL;
  	}
@@ -1562,17 +1562,13 @@  static struct smbd_connection *_smbd_get_connection(
  	info->max_receive_size = smbd_max_receive_size;
  	info->keep_alive_interval = smbd_keep_alive_interval;

-	if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) {
+	if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE ||
+	    info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) {
  		log_rdma_event(ERR,
-			"warning: device max_send_sge = %d too small\n",
-			info->id->device->attrs.max_send_sge);
-		log_rdma_event(ERR, "Queue Pair creation may fail\n");
-	}
-	if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) {
-		log_rdma_event(ERR,
-			"warning: device max_recv_sge = %d too small\n",
+			"device max_send_sge/max_recv_sge = %d/%d too small\n",
+			info->id->device->attrs.max_send_sge,
  			info->id->device->attrs.max_recv_sge);
-		log_rdma_event(ERR, "Queue Pair creation may fail\n");
+		goto config_failed;
  	}

  	info->send_cq = NULL;
@@ -1598,8 +1594,8 @@  static struct smbd_connection *_smbd_get_connection(
  	qp_attr.qp_context = info;
  	qp_attr.cap.max_send_wr = info->send_credit_target;
  	qp_attr.cap.max_recv_wr = info->receive_credit_max;
-	qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SGE;
-	qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_SGE;
+	qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE;
+	qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE;
  	qp_attr.cap.max_inline_data = 0;
  	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  	qp_attr.qp_type = IB_QPT_RC;
diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h
index a87fca82a796..2a9b2b12d6aa 100644
--- a/fs/cifs/smbdirect.h
+++ b/fs/cifs/smbdirect.h
@@ -91,7 +91,7 @@  struct smbd_connection {
  	/* Memory registrations */
  	/* Maximum number of RDMA read/write outstanding on this connection */
  	int responder_resources;
-	/* Maximum number of SGEs in a RDMA write/read */
+	/* Maximum number of pages in a single RDMA write/read on this 
connection */
  	int max_frmr_depth;
  	/*