Message ID | 20190220061145.28241-2-lsahlber@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Simplify handling of credits in compound_send_recv() | expand |
вт, 19 февр. 2019 г. в 22:12, Ronnie Sahlberg <lsahlber@redhat.com>: > > Change wait_for_free_request to allow specifying how many requests(credits) > to wait for. > Pass 1 from all callsites. > > This should not change any behavior but will allow future callers to > atomically wait for >1 requests. > > Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com> > --- > fs/cifs/cifsglob.h | 4 ++-- > fs/cifs/smb2ops.c | 2 +- > fs/cifs/transport.c | 25 ++++++++++++++----------- > 3 files changed, 17 insertions(+), 14 deletions(-) > > diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h > index 5bf463c2d9dc..1b3f62eee4d0 100644 > --- a/fs/cifs/cifsglob.h > +++ b/fs/cifs/cifsglob.h > @@ -732,13 +732,13 @@ in_flight(struct TCP_Server_Info *server) > } > > static inline bool > -has_credits(struct TCP_Server_Info *server, int *credits) > +has_credits(struct TCP_Server_Info *server, int *credits, int num_credits) > { > int num; > spin_lock(&server->req_lock); > num = *credits; > spin_unlock(&server->req_lock); > - return num > 0; > + return num >= num_credits; > } > > static inline void > diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c > index 080929a64c64..372a3563b79b 100644 > --- a/fs/cifs/smb2ops.c > +++ b/fs/cifs/smb2ops.c > @@ -185,7 +185,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, > spin_unlock(&server->req_lock); > cifs_num_waiters_inc(server); > rc = wait_event_killable(server->request_q, > - has_credits(server, &server->credits)); > + has_credits(server, &server->credits, 1)); > cifs_num_waiters_dec(server); > if (rc) > return rc; > diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c > index d4f1224fb7cb..8ce5b6e787c6 100644 > --- a/fs/cifs/transport.c > +++ b/fs/cifs/transport.c > @@ -478,7 +478,8 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, > > static int > wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, > - int *credits, unsigned int *instance) > + int num_credits, int *credits, > + unsigned int *instance) > { > int rc; > > @@ -495,11 +496,11 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, > } > > while (1) { > - if (*credits <= 0) { > + if (*credits < num_credits) { > spin_unlock(&server->req_lock); > cifs_num_waiters_inc(server); > rc = wait_event_killable(server->request_q, > - has_credits(server, credits)); > + has_credits(server, credits, num_credits)); > cifs_num_waiters_dec(server); > if (rc) > return rc; > @@ -517,8 +518,8 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, > > /* update # of requests on the wire to server */ > if (timeout != CIFS_BLOCKING_OP) { > - *credits -= 1; > - server->in_flight++; > + *credits -= num_credits; > + server->in_flight += num_credits; > *instance = server->reconnect_instance; > } > spin_unlock(&server->req_lock); > @@ -530,7 +531,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, > > static int > wait_for_free_request(struct TCP_Server_Info *server, const int timeout, > - const int optype, unsigned int *instance) > + int num, const int optype, unsigned int *instance) Waiting for more than one credit without a timeout is a source of possibility to be stuck. We already have 'timeout' argument which contains some part of flags passed to transport layer from PDU. We might consider renaming 'optype' argument into flags and use 'timeout' with its true meaning - timeout of the waiting for credits. Once implemented, we can define some default period of time which we can *afford* to wait for credits and then fall back to sequential code if we timed out. I also suggest to add another function wait_for_compound_credits and make wait_for_free_request calls it - doesn't require changes to all the places where the latter is being used. > { > int *val; > > @@ -538,7 +539,7 @@ wait_for_free_request(struct TCP_Server_Info *server, const int timeout, > /* Since an echo is already inflight, no need to wait to send another */ > if (*val <= 0 && optype == CIFS_ECHO_OP) > return -EAGAIN; > - return wait_for_free_credits(server, timeout, val, instance); > + return wait_for_free_credits(server, timeout, num, val, instance); > } > > int > @@ -646,7 +647,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, > optype = flags & CIFS_OP_MASK; > > if ((flags & CIFS_HAS_CREDITS) == 0) { > - rc = wait_for_free_request(server, timeout, optype, &instance); > + rc = wait_for_free_request(server, timeout, 1, optype, > + &instance); > if (rc) > return rc; > credits.value = 1; > @@ -923,7 +925,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, > * Ensure we obtain 1 credit per request in the compound chain. > */ > for (i = 0; i < num_rqst; i++) { > - rc = wait_for_free_request(ses->server, timeout, optype, > + rc = wait_for_free_request(ses->server, timeout, 1, optype, > &instance); > > if (rc == 0) { > @@ -1212,7 +1214,8 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, > return -EIO; > } > > - rc = wait_for_free_request(ses->server, timeout, 0, &credits.instance); > + rc = wait_for_free_request(ses->server, timeout, 1, 0, > + &credits.instance); > if (rc) > return rc; > > @@ -1354,7 +1357,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, > return -EIO; > } > > - rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP, 0, > + rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP, 1, 0, > &instance); > if (rc) > return rc; > -- > 2.13.6 > -- Best regards, Pavel Shilovsky
On Thu, Feb 21, 2019 at 11:39 AM Pavel Shilovsky <piastryyy@gmail.com> wrote: > > вт, 19 февр. 2019 г. в 22:12, Ronnie Sahlberg <lsahlber@redhat.com>: > > > > Change wait_for_free_request to allow specifying how many requests(credits) > > to wait for. > > Pass 1 from all callsites. > > > > This should not change any behavior but will allow future callers to > > atomically wait for >1 requests. > > > > Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com> > > --- > > fs/cifs/cifsglob.h | 4 ++-- > > fs/cifs/smb2ops.c | 2 +- > > fs/cifs/transport.c | 25 ++++++++++++++----------- > > 3 files changed, 17 insertions(+), 14 deletions(-) > > > > diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h > > index 5bf463c2d9dc..1b3f62eee4d0 100644 > > --- a/fs/cifs/cifsglob.h > > +++ b/fs/cifs/cifsglob.h > > @@ -732,13 +732,13 @@ in_flight(struct TCP_Server_Info *server) > > } > > > > static inline bool > > -has_credits(struct TCP_Server_Info *server, int *credits) > > +has_credits(struct TCP_Server_Info *server, int *credits, int num_credits) > > { > > int num; > > spin_lock(&server->req_lock); > > num = *credits; > > spin_unlock(&server->req_lock); > > - return num > 0; > > + return num >= num_credits; > > } > > > > static inline void > > diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c > > index 080929a64c64..372a3563b79b 100644 > > --- a/fs/cifs/smb2ops.c > > +++ b/fs/cifs/smb2ops.c > > @@ -185,7 +185,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, > > spin_unlock(&server->req_lock); > > cifs_num_waiters_inc(server); > > rc = wait_event_killable(server->request_q, > > - has_credits(server, &server->credits)); > > + has_credits(server, &server->credits, 1)); > > cifs_num_waiters_dec(server); > > if (rc) > > return rc; > > diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c > > index d4f1224fb7cb..8ce5b6e787c6 100644 > > --- a/fs/cifs/transport.c > > +++ b/fs/cifs/transport.c > > @@ -478,7 +478,8 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, > > > > static int > > wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, > > - int *credits, unsigned int *instance) > > + int num_credits, int *credits, > > + unsigned int *instance) > > { > > int rc; > > > > @@ -495,11 +496,11 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, > > } > > > > while (1) { > > - if (*credits <= 0) { > > + if (*credits < num_credits) { > > spin_unlock(&server->req_lock); > > cifs_num_waiters_inc(server); > > rc = wait_event_killable(server->request_q, > > - has_credits(server, credits)); > > + has_credits(server, credits, num_credits)); > > cifs_num_waiters_dec(server); > > if (rc) > > return rc; > > @@ -517,8 +518,8 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, > > > > /* update # of requests on the wire to server */ > > if (timeout != CIFS_BLOCKING_OP) { > > - *credits -= 1; > > - server->in_flight++; > > + *credits -= num_credits; > > + server->in_flight += num_credits; > > *instance = server->reconnect_instance; > > } > > spin_unlock(&server->req_lock); > > @@ -530,7 +531,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, > > > > static int > > wait_for_free_request(struct TCP_Server_Info *server, const int timeout, > > - const int optype, unsigned int *instance) > > + int num, const int optype, unsigned int *instance) > > Waiting for more than one credit without a timeout is a source of > possibility to be stuck. We already have 'timeout' argument which > contains some part of flags passed to transport layer from PDU. We > might consider renaming 'optype' argument into flags and use 'timeout' > with its true meaning - timeout of the waiting for credits. > > Once implemented, we can define some default period of time which we > can *afford* to wait for credits and then fall back to sequential code > if we timed out. > > I also suggest to add another function wait_for_compound_credits and > make wait_for_free_request calls it - doesn't require changes to all > the places where the latter is being used. Thanks, I am reworking it and will resend shortly. Passing flags down into these functions instead of timeout/optype makes sense and I will do that. I am not sure we need to add a timeout to represent a deadline after we return -EAGAIN or similar. But for sure we need to prevent waiting atomically for >1 credits to block forever. This could happen in two scenarios, the first would be if the number of free credits are persistently pegged at <=1 credits due to persistent and massive number of threads causing single credits SMB2 I/O. I.e. we are constantly starved for multi-credit requests. The second would be if we get stuck at <=1 available credits even when the session is idle. This would be a bug and the best is probably to let the reconnect eventually happen and thus force the credits to be in sync again between server and client. So I don't think we need t do anything explicit for that case here. The first case is a genuine case, but I think we can handle that in a different way instead of switching back to a "grab one credit at a time" like we have now. In that scenario we would have a different problem that is there could be so many concurrent calls for compounded operations requiring >=3 credits each that we eventually consume MAX_CREDITS number of credits across these threads. Each thread having already allocated one of more credits less that it needs for the full compound and the the threads are now all deadlocked each waiting for one or more credits that will never become available. Making the allocation of credits for a compound allocate all of them atomically solves this type of deadlock, but would be prone to the credits starvation issue. The way I propose to solve the credits starvation problem for compounds is something like this (I haven't coded it yet so I dont know exactly what it will look like) * After the session has been fully established, we should be able to assume we WILL have a whole bunch of credits available. Modulo a hypothetical situation where the sever will just never grant us more than 1 or 2 credits in total. I don't think this is a reasonable situation we need to worry too much about and maybe the right thing to do here is just to block all compound operations until we get a session reconnect where hopefully the server will be more reasonable. * We can define a new MAX_CREDITS_FOR_COMPOUND which would be the largest number of credits that we will ever create a single compound for. I think that would be 4 at this stage. * In the loop in wait_for_free_credits(), If this is a request for a single credit AND iff it is a normal operation, i.e. &CIFS_OP_MASK == 0 then we will NOT grant the single credit but continue looping. I.e. basically reserving the last MAX_CREDITS_FOR_COMPOUND credits before we completely run out to only be available for compounds. That way we do guarantee that compound requests will be making progress and will not be completely starved by non-compound requests.. > > > { > > int *val; > > > > @@ -538,7 +539,7 @@ wait_for_free_request(struct TCP_Server_Info *server, const int timeout, > > /* Since an echo is already inflight, no need to wait to send another */ > > if (*val <= 0 && optype == CIFS_ECHO_OP) > > return -EAGAIN; > > - return wait_for_free_credits(server, timeout, val, instance); > > + return wait_for_free_credits(server, timeout, num, val, instance); > > } > > > > int > > @@ -646,7 +647,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, > > optype = flags & CIFS_OP_MASK; > > > > if ((flags & CIFS_HAS_CREDITS) == 0) { > > - rc = wait_for_free_request(server, timeout, optype, &instance); > > + rc = wait_for_free_request(server, timeout, 1, optype, > > + &instance); > > if (rc) > > return rc; > > credits.value = 1; > > @@ -923,7 +925,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, > > * Ensure we obtain 1 credit per request in the compound chain. > > */ > > for (i = 0; i < num_rqst; i++) { > > - rc = wait_for_free_request(ses->server, timeout, optype, > > + rc = wait_for_free_request(ses->server, timeout, 1, optype, > > &instance); > > > > if (rc == 0) { > > @@ -1212,7 +1214,8 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, > > return -EIO; > > } > > > > - rc = wait_for_free_request(ses->server, timeout, 0, &credits.instance); > > + rc = wait_for_free_request(ses->server, timeout, 1, 0, > > + &credits.instance); > > if (rc) > > return rc; > > > > @@ -1354,7 +1357,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, > > return -EIO; > > } > > > > - rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP, 0, > > + rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP, 1, 0, > > &instance); > > if (rc) > > return rc; > > -- > > 2.13.6 > > > > -- > Best regards, > Pavel Shilovsky
On Wed, Feb 27, 2019 at 09:12 PM, ronnie sahlberg <ronniesahlberg@gmail.com> wrote: > > On Thu, Feb 21, 2019 at 11:39 AM Pavel Shilovsky <piastryyy@gmail.com> wrote: > > ... > > > > Waiting for more than one credit without a timeout is a source of > > possibility to be stuck. We already have 'timeout' argument which > > contains some part of flags passed to transport layer from PDU. We > > might consider renaming 'optype' argument into flags and use 'timeout' > > with its true meaning - timeout of the waiting for credits. > > > > Once implemented, we can define some default period of time which we > > can *afford* to wait for credits and then fall back to sequential code > > if we timed out. > > > > I also suggest to add another function wait_for_compound_credits and > > make wait_for_free_request calls it - doesn't require changes to all > > the places where the latter is being used. > > Thanks, I am reworking it and will resend shortly. > Passing flags down into these functions instead of timeout/optype > makes sense and I will do that. > > I am not sure we need to add a timeout to represent a deadline after > we return -EAGAIN or similar. > But for sure we need to prevent waiting atomically for >1 credits to > block forever. (adding Jeremy to comment and cc'ing samba-technical) Thanks for taking a look at this. If we don't set a timeout on waiting for several credits at once, we may end up waiting forever. It is only a matter of what is a possibility to be stuck. We can try to guess if the server is going to grant us more or not in future by analyzing the number of requests in flight or other parameters but we can't say for sure because the spec doesn't say anything about minimal number of credits. This means that under certain conditions the server may end up giving us a very few credits that won't be enough to do compounding. Those conditions might be different but from the top of the head I can give an example of a highly loaded file server with thousands of active connections that experiences CPU and/or memory pressure and decides to limit the number of credits per connection. That's why I do think that the ability to set a timeout on waiting is essential. We can start with something between 100ms and 1s (and even making it auto-tunable - let's say a couple round trips to the server) and then decide what is the best (or even make it configurable). > This could happen in two scenarios, the first would be if the number > of free credits are persistently pegged at <=1 credits > due to persistent and massive number of threads causing single credits > SMB2 I/O. > I.e. we are constantly starved for multi-credit requests. > > The second would be if we get stuck at <=1 available credits even when > the session is idle. > This would be a bug and the best is probably to let the reconnect > eventually happen and thus force the credits to be in sync again > between server and client. > So I don't think we need t do anything explicit for that case here. > > The first case is a genuine case, but I think we can handle that in a > different way instead of switching back to a "grab one credit at a > time" like we have now. > In that scenario we would have a different problem that is there > could be so many concurrent calls for compounded operations requiring > >=3 credits each > that we eventually consume MAX_CREDITS number of credits across these > threads. Each thread having already allocated one of more credits less > that it needs for the full compound > and the the threads are now all deadlocked each waiting for one or > more credits that will never become available. > Making the allocation of credits for a compound allocate all of them > atomically solves this type of deadlock, but would be prone to the > credits starvation issue. The approach of grabbing one credit in a time was a temporary work around that didn't require a lot of code changes and was easier to be pushed to stable kernels. There are no doubts that it left a possibility to be stuck and we need a proper solution to handle compound requests. > The way I propose to solve the credits starvation problem for > compounds is something like this (I haven't coded it yet so I dont > know exactly what it will look like) > * After the session has been fully established, we should be able to > assume we WILL have a whole bunch of credits available. > Modulo a hypothetical situation where the sever will just never grant > us more than 1 or 2 credits in total. I don't think this is a > reasonable situation we need to worry too much about and maybe the > right thing to do here is just to block all compound > operations until we get a session reconnect where hopefully the server > will be more reasonable. > * We can define a new MAX_CREDITS_FOR_COMPOUND which would be the > largest number of credits that we will ever create a single compound > for. > I think that would be 4 at this stage. Note, that we reserve 1 credit for echo request and 1 credit for oplock/lease break ack, so those operations won't be stuck waiting for credits. If the maximum number of requests in the compounded chain is 4, then 5 credits granted by the server will make our client useless for certain operations. > * In the loop in wait_for_free_credits(), If this is a request for a > single credit AND iff it is a normal operation, i.e. &CIFS_OP_MASK == > 0 > then we will NOT grant the single credit but continue looping. > > I.e. basically reserving the last MAX_CREDITS_FOR_COMPOUND credits > before we completely run out to only be available for compounds. > That way we do guarantee that compound requests will be making > progress and will not be completely starved by non-compound requests.. > Trying to reserve MAX_CREDITS_FOR_COMPOUNDING increases latency of single-credit requests. In some workloads compounding won't be even used (let's say it is pure IO workload), so instead of using all the bandwidth available to the client (and the credit bandwidth might be pretty narrow for highly loaded servers) we will end up using only a part of it. But it is not only about latency of a single-credit request, it is also about latency of a compounded request itself. Instead of waiting for all the credits for a long period of time we can start making progress by doing sequential operations. If the round trip time is small, the penalty of indefinite waiting instead of going sequential will be rather substantial. That's why waiting only for some reasonable period of time seems like a good option to try: if we have already waited for some reasonable time, let's stop waiting, return special error code and let the upper layer process the request sequentially. So, I think that compounding is a great optimization and it is important to try using it as much as possible across the client code. In the same time, the SMB3 client should be capable to handle any workload or scenario that doesn't contradict the spec. Thoughts? -- Best regards, Pavel Shilovsky
On Wed, Mar 6, 2019 at 12:15 PM Pavel Shilovsky <piastryyy@gmail.com> wrote: > > On Wed, Feb 27, 2019 at 09:12 PM, ronnie sahlberg > <ronniesahlberg@gmail.com> wrote: > > > > On Thu, Feb 21, 2019 at 11:39 AM Pavel Shilovsky <piastryyy@gmail.com> wrote: > > > > ... > > > > > > Waiting for more than one credit without a timeout is a source of > > > possibility to be stuck. We already have 'timeout' argument which > > > contains some part of flags passed to transport layer from PDU. We > > > might consider renaming 'optype' argument into flags and use 'timeout' > > > with its true meaning - timeout of the waiting for credits. > > > > > > Once implemented, we can define some default period of time which we > > > can *afford* to wait for credits and then fall back to sequential code > > > if we timed out. > > > > > > I also suggest to add another function wait_for_compound_credits and > > > make wait_for_free_request calls it - doesn't require changes to all > > > the places where the latter is being used. > > > > Thanks, I am reworking it and will resend shortly. > > Passing flags down into these functions instead of timeout/optype > > makes sense and I will do that. > > > > I am not sure we need to add a timeout to represent a deadline after > > we return -EAGAIN or similar. > > But for sure we need to prevent waiting atomically for >1 credits to > > block forever. > > (adding Jeremy to comment and cc'ing samba-technical) > > Thanks for taking a look at this. > > If we don't set a timeout on waiting for several credits at once, we > may end up waiting forever. It is only a matter of what is a > possibility to be stuck. We can try to guess if the server is going to > grant us more or not in future by analyzing the number of requests in > flight or other parameters but we can't say for sure because the spec > doesn't say anything about minimal number of credits. This means that > under certain conditions the server may end up giving us a very few > credits that won't be enough to do compounding. > > Those conditions might be different but from the top of the head I can > give an example of a highly loaded file server with thousands of > active connections that experiences CPU and/or memory pressure and > decides to limit the number of credits per connection. That's why I do > think that the ability to set a timeout on waiting is essential. We > can start with something between 100ms and 1s (and even making it > auto-tunable - let's say a couple round trips to the server) and then > decide what is the best (or even make it configurable). > > > This could happen in two scenarios, the first would be if the number > > of free credits are persistently pegged at <=1 credits > > due to persistent and massive number of threads causing single credits > > SMB2 I/O. > > I.e. we are constantly starved for multi-credit requests. > > > > The second would be if we get stuck at <=1 available credits even when > > the session is idle. > > This would be a bug and the best is probably to let the reconnect > > eventually happen and thus force the credits to be in sync again > > between server and client. > > So I don't think we need t do anything explicit for that case here. > > > > The first case is a genuine case, but I think we can handle that in a > > different way instead of switching back to a "grab one credit at a > > time" like we have now. > > In that scenario we would have a different problem that is there > > could be so many concurrent calls for compounded operations requiring > > >=3 credits each > > that we eventually consume MAX_CREDITS number of credits across these > > threads. Each thread having already allocated one of more credits less > > that it needs for the full compound > > and the the threads are now all deadlocked each waiting for one or > > more credits that will never become available. > > Making the allocation of credits for a compound allocate all of them > > atomically solves this type of deadlock, but would be prone to the > > credits starvation issue. > > The approach of grabbing one credit in a time was a temporary work > around that didn't require a lot of code changes and was easier to be > pushed to stable kernels. There are no doubts that it left a > possibility to be stuck and we need a proper solution to handle > compound requests. > > > The way I propose to solve the credits starvation problem for > > compounds is something like this (I haven't coded it yet so I dont > > know exactly what it will look like) > > * After the session has been fully established, we should be able to > > assume we WILL have a whole bunch of credits available. > > Modulo a hypothetical situation where the sever will just never grant > > us more than 1 or 2 credits in total. I don't think this is a > > reasonable situation we need to worry too much about and maybe the > > right thing to do here is just to block all compound > > operations until we get a session reconnect where hopefully the server > > will be more reasonable. > > * We can define a new MAX_CREDITS_FOR_COMPOUND which would be the > > largest number of credits that we will ever create a single compound > > for. > > I think that would be 4 at this stage. > > Note, that we reserve 1 credit for echo request and 1 credit for > oplock/lease break ack, so those operations won't be stuck waiting for > credits. If the maximum number of requests in the compounded chain is > 4, then 5 credits granted by the server will make our client useless > for certain operations. > > > * In the loop in wait_for_free_credits(), If this is a request for a > > single credit AND iff it is a normal operation, i.e. &CIFS_OP_MASK == > > 0 > > then we will NOT grant the single credit but continue looping. > > > > I.e. basically reserving the last MAX_CREDITS_FOR_COMPOUND credits > > before we completely run out to only be available for compounds. > > That way we do guarantee that compound requests will be making > > progress and will not be completely starved by non-compound requests.. > > > > Trying to reserve MAX_CREDITS_FOR_COMPOUNDING increases latency of > single-credit requests. In some workloads compounding won't be even > used (let's say it is pure IO workload), so instead of using all the > bandwidth available to the client (and the credit bandwidth might be > pretty narrow for highly loaded servers) we will end up using only a > part of it. > > But it is not only about latency of a single-credit request, it is > also about latency of a compounded request itself. Instead of waiting > for all the credits for a long period of time we can start making > progress by doing sequential operations. If the round trip time is > small, the penalty of indefinite waiting instead of going sequential > will be rather substantial. That's why waiting only for some > reasonable period of time seems like a good option to try: if we have > already waited for some reasonable time, let's stop waiting, return > special error code and let the upper layer process the request > sequentially. > > So, I think that compounding is a great optimization and it is > important to try using it as much as possible across the client code. > In the same time, the SMB3 client should be capable to handle any > workload or scenario that doesn't contradict the spec. > > Thoughts? Thanks. I have added a, for now, fixed timeout with a rather large value and logging a message when we give up waiting for a multi-credit request. These messages should be good for identifying if/when this happens. As for switching from multi-credit compound sequences to to non-compounded serialized code, I don't think we should do that because we would have to duplicate all these codepaths. And only to solve a pathological issue where performance would already be so poor that the share is basically inoperable IMHO. This would only be an issue IFF we have a server that for very long timeperiods pegs us with less than a handful of credits even on an idle session. I think it is fair to wait very long, even indefinitely in this case. What we might do is eventually perform a full reconnect and hope for a miracle or log something to point we need to re-balance our servers. Again, I am not sure if we should switched to single threaded un-compouded mode in that situation. I would consider it a pathological case where performance and behavior would have degraded so much that the smb share is almost inoperable. I would consider this similar to behavior where due to packetloss TCP pegs us permanently at a congestion window of 3kb. The solution there is not better packet-loss recovery in TCP or tuning re-transmission timeouts but fix the underlying network issue and prevent this pathological case from happening in the first place. Those are my thoughts. This would be a case where we can log what is wrong but the fix should be to prevent the server from doing this in the first place. regards ronnie sahlberg > > > -- > Best regards, > Pavel Shilovsky
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 5bf463c2d9dc..1b3f62eee4d0 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -732,13 +732,13 @@ in_flight(struct TCP_Server_Info *server) } static inline bool -has_credits(struct TCP_Server_Info *server, int *credits) +has_credits(struct TCP_Server_Info *server, int *credits, int num_credits) { int num; spin_lock(&server->req_lock); num = *credits; spin_unlock(&server->req_lock); - return num > 0; + return num >= num_credits; } static inline void diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 080929a64c64..372a3563b79b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -185,7 +185,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size, spin_unlock(&server->req_lock); cifs_num_waiters_inc(server); rc = wait_event_killable(server->request_q, - has_credits(server, &server->credits)); + has_credits(server, &server->credits, 1)); cifs_num_waiters_dec(server); if (rc) return rc; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index d4f1224fb7cb..8ce5b6e787c6 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -478,7 +478,8 @@ smb_send(struct TCP_Server_Info *server, struct smb_hdr *smb_buffer, static int wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, - int *credits, unsigned int *instance) + int num_credits, int *credits, + unsigned int *instance) { int rc; @@ -495,11 +496,11 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, } while (1) { - if (*credits <= 0) { + if (*credits < num_credits) { spin_unlock(&server->req_lock); cifs_num_waiters_inc(server); rc = wait_event_killable(server->request_q, - has_credits(server, credits)); + has_credits(server, credits, num_credits)); cifs_num_waiters_dec(server); if (rc) return rc; @@ -517,8 +518,8 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, /* update # of requests on the wire to server */ if (timeout != CIFS_BLOCKING_OP) { - *credits -= 1; - server->in_flight++; + *credits -= num_credits; + server->in_flight += num_credits; *instance = server->reconnect_instance; } spin_unlock(&server->req_lock); @@ -530,7 +531,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int timeout, static int wait_for_free_request(struct TCP_Server_Info *server, const int timeout, - const int optype, unsigned int *instance) + int num, const int optype, unsigned int *instance) { int *val; @@ -538,7 +539,7 @@ wait_for_free_request(struct TCP_Server_Info *server, const int timeout, /* Since an echo is already inflight, no need to wait to send another */ if (*val <= 0 && optype == CIFS_ECHO_OP) return -EAGAIN; - return wait_for_free_credits(server, timeout, val, instance); + return wait_for_free_credits(server, timeout, num, val, instance); } int @@ -646,7 +647,8 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst, optype = flags & CIFS_OP_MASK; if ((flags & CIFS_HAS_CREDITS) == 0) { - rc = wait_for_free_request(server, timeout, optype, &instance); + rc = wait_for_free_request(server, timeout, 1, optype, + &instance); if (rc) return rc; credits.value = 1; @@ -923,7 +925,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, * Ensure we obtain 1 credit per request in the compound chain. */ for (i = 0; i < num_rqst; i++) { - rc = wait_for_free_request(ses->server, timeout, optype, + rc = wait_for_free_request(ses->server, timeout, 1, optype, &instance); if (rc == 0) { @@ -1212,7 +1214,8 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, return -EIO; } - rc = wait_for_free_request(ses->server, timeout, 0, &credits.instance); + rc = wait_for_free_request(ses->server, timeout, 1, 0, + &credits.instance); if (rc) return rc; @@ -1354,7 +1357,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, return -EIO; } - rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP, 0, + rc = wait_for_free_request(ses->server, CIFS_BLOCKING_OP, 1, 0, &instance); if (rc) return rc;
Change wait_for_free_request to allow specifying how many requests(credits) to wait for. Pass 1 from all callsites. This should not change any behavior but will allow future callers to atomically wait for >1 requests. Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com> --- fs/cifs/cifsglob.h | 4 ++-- fs/cifs/smb2ops.c | 2 +- fs/cifs/transport.c | 25 ++++++++++++++----------- 3 files changed, 17 insertions(+), 14 deletions(-)