diff mbox series

[8/8] sunrpc: Drop the connection when the server drops a request

Message ID 20200301232145.1465430-9-trond.myklebust@hammerspace.com (mailing list archive)
State New, archived
Headers show
Series Bugfixes and tracing enhancements for knfsd | expand

Commit Message

Trond Myklebust March 1, 2020, 11:21 p.m. UTC
If a server wants to drop a request, then it should also drop the
connection, in order to let the client know.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/svc_xprt.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

Comments

David Wysochanski March 2, 2020, 4:27 p.m. UTC | #1
On Sun, Mar 1, 2020 at 6:25 PM Trond Myklebust <trondmy@gmail.com> wrote:
>
> If a server wants to drop a request, then it should also drop the
> connection, in order to let the client know.
>
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  net/sunrpc/svc_xprt.c | 10 ++++++++++
>  1 file changed, 10 insertions(+)
>
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index de3c077733a7..83a527e56c87 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -873,6 +873,13 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
>  }
>  EXPORT_SYMBOL_GPL(svc_recv);
>
> +static void svc_drop_connection(struct svc_xprt *xprt)
> +{
> +       if (test_bit(XPT_TEMP, &xprt->xpt_flags) &&
> +           !test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags))
> +               svc_xprt_enqueue(xprt);
> +}
> +
>  /*
>   * Drop request
>   */
> @@ -880,6 +887,8 @@ void svc_drop(struct svc_rqst *rqstp)
>  {
>         trace_svc_drop(rqstp);
>         dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
> +       /* Close the connection when dropping a request */
> +       svc_drop_connection(rqstp->rq_xprt);
>         svc_xprt_release(rqstp);
>  }
>  EXPORT_SYMBOL_GPL(svc_drop);
> @@ -1148,6 +1157,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
>         if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
>                 spin_unlock(&xprt->xpt_lock);
>                 dprintk("revisit canceled\n");
> +               svc_drop_connection(xprt);
>                 svc_xprt_put(xprt);
>                 trace_svc_drop_deferred(dr);
>                 kfree(dr);
> --
> 2.24.1
>

Trond, back in 2014 you had this NFSv4 only patch that took a more
surgical approach:
https://marc.info/?l=linux-nfs&m=141414531832768&w=2

It looks like discussion died out on it after it was ineffective to
solve a different problem.
Is there a reason why you don't want to do that approach now?
Trond Myklebust March 2, 2020, 8:12 p.m. UTC | #2
On Mon, 2020-03-02 at 11:27 -0500, David Wysochanski wrote:
> On Sun, Mar 1, 2020 at 6:25 PM Trond Myklebust <trondmy@gmail.com>
> wrote:
> > If a server wants to drop a request, then it should also drop the
> > connection, in order to let the client know.
> > 
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> >  net/sunrpc/svc_xprt.c | 10 ++++++++++
> >  1 file changed, 10 insertions(+)
> > 
> > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> > index de3c077733a7..83a527e56c87 100644
> > --- a/net/sunrpc/svc_xprt.c
> > +++ b/net/sunrpc/svc_xprt.c
> > @@ -873,6 +873,13 @@ int svc_recv(struct svc_rqst *rqstp, long
> > timeout)
> >  }
> >  EXPORT_SYMBOL_GPL(svc_recv);
> > 
> > +static void svc_drop_connection(struct svc_xprt *xprt)
> > +{
> > +       if (test_bit(XPT_TEMP, &xprt->xpt_flags) &&
> > +           !test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags))
> > +               svc_xprt_enqueue(xprt);
> > +}
> > +
> >  /*
> >   * Drop request
> >   */
> > @@ -880,6 +887,8 @@ void svc_drop(struct svc_rqst *rqstp)
> >  {
> >         trace_svc_drop(rqstp);
> >         dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
> > +       /* Close the connection when dropping a request */
> > +       svc_drop_connection(rqstp->rq_xprt);
> >         svc_xprt_release(rqstp);
> >  }
> >  EXPORT_SYMBOL_GPL(svc_drop);
> > @@ -1148,6 +1157,7 @@ static void svc_revisit(struct
> > cache_deferred_req *dreq, int too_many)
> >         if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
> >                 spin_unlock(&xprt->xpt_lock);
> >                 dprintk("revisit canceled\n");
> > +               svc_drop_connection(xprt);
> >                 svc_xprt_put(xprt);
> >                 trace_svc_drop_deferred(dr);
> >                 kfree(dr);
> > --
> > 2.24.1
> > 
> 
> Trond, back in 2014 you had this NFSv4 only patch that took a more
> surgical approach:
> https://marc.info/?l=linux-nfs&m=141414531832768&w=2
> 
> It looks like discussion died out on it after it was ineffective to
> solve a different problem.
> Is there a reason why you don't want to do that approach now?
> 

Let me resend this patch with a better proposal. I think the main 2
problems here are really

   1. the svc_revisit() case, where we cancel the revisit. That case
      affects all versions of NFS, and can lead to performance issues.
   2. the NFSv2,v3,v4.0 replay cache, where dropping the replay (e.g.
      after a connection breakage) can cause a performance hit, and for
      something like TCP, which has long (usually 60 second) timeouts it
      could cause the replay to be delayed until after the reply gets
      kicked out of the cache. This is the case where NFSv4.0 can probably
      end up hanging, since the replay won't be forthcoming until a new
      connection breakage occurs.

I think (1) is best served by a patch like this one.
Perhaps (2) is better served by adopting the svc_defer() mechanism?

Hmm... Perhaps 2 patches are in order...
Chuck Lever III March 11, 2020, 3:26 p.m. UTC | #3
> On Mar 2, 2020, at 3:12 PM, Trond Myklebust <trondmy@hammerspace.com> wrote:
> 
> On Mon, 2020-03-02 at 11:27 -0500, David Wysochanski wrote:
>> On Sun, Mar 1, 2020 at 6:25 PM Trond Myklebust <trondmy@gmail.com>
>> wrote:
>>> If a server wants to drop a request, then it should also drop the
>>> connection, in order to let the client know.
>>> 
>>> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
>>> ---
>>> net/sunrpc/svc_xprt.c | 10 ++++++++++
>>> 1 file changed, 10 insertions(+)
>>> 
>>> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
>>> index de3c077733a7..83a527e56c87 100644
>>> --- a/net/sunrpc/svc_xprt.c
>>> +++ b/net/sunrpc/svc_xprt.c
>>> @@ -873,6 +873,13 @@ int svc_recv(struct svc_rqst *rqstp, long
>>> timeout)
>>> }
>>> EXPORT_SYMBOL_GPL(svc_recv);
>>> 
>>> +static void svc_drop_connection(struct svc_xprt *xprt)
>>> +{
>>> +       if (test_bit(XPT_TEMP, &xprt->xpt_flags) &&
>>> +           !test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags))
>>> +               svc_xprt_enqueue(xprt);
>>> +}
>>> +
>>> /*
>>>  * Drop request
>>>  */
>>> @@ -880,6 +887,8 @@ void svc_drop(struct svc_rqst *rqstp)
>>> {
>>>        trace_svc_drop(rqstp);
>>>        dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
>>> +       /* Close the connection when dropping a request */
>>> +       svc_drop_connection(rqstp->rq_xprt);
>>>        svc_xprt_release(rqstp);
>>> }
>>> EXPORT_SYMBOL_GPL(svc_drop);
>>> @@ -1148,6 +1157,7 @@ static void svc_revisit(struct
>>> cache_deferred_req *dreq, int too_many)
>>>        if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
>>>                spin_unlock(&xprt->xpt_lock);
>>>                dprintk("revisit canceled\n");
>>> +               svc_drop_connection(xprt);
>>>                svc_xprt_put(xprt);
>>>                trace_svc_drop_deferred(dr);
>>>                kfree(dr);
>>> --
>>> 2.24.1
>>> 
>> 
>> Trond, back in 2014 you had this NFSv4 only patch that took a more
>> surgical approach:
>> https://marc.info/?l=linux-nfs&m=141414531832768&w=2
>> 
>> It looks like discussion died out on it after it was ineffective to
>> solve a different problem.
>> Is there a reason why you don't want to do that approach now?
>> 
> 
> Let me resend this patch with a better proposal.

Hey Trond, any progress here?


> I think the main 2
> problems here are really
> 
>   1. the svc_revisit() case, where we cancel the revisit. That case
>      affects all versions of NFS, and can lead to performance issues.
>   2. the NFSv2,v3,v4.0 replay cache, where dropping the replay (e.g.
>      after a connection breakage) can cause a performance hit, and for
>      something like TCP, which has long (usually 60 second) timeouts it
>      could cause the replay to be delayed until after the reply gets
>      kicked out of the cache. This is the case where NFSv4.0 can probably
>      end up hanging, since the replay won't be forthcoming until a new
>      connection breakage occurs.
> 
> I think (1) is best served by a patch like this one.
> Perhaps (2) is better served by adopting the svc_defer() mechanism?
> 
> Hmm... Perhaps 2 patches are in order...

--
Chuck Lever
diff mbox series

Patch

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index de3c077733a7..83a527e56c87 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -873,6 +873,13 @@  int svc_recv(struct svc_rqst *rqstp, long timeout)
 }
 EXPORT_SYMBOL_GPL(svc_recv);
 
+static void svc_drop_connection(struct svc_xprt *xprt)
+{
+	if (test_bit(XPT_TEMP, &xprt->xpt_flags) &&
+	    !test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags))
+		svc_xprt_enqueue(xprt);
+}
+
 /*
  * Drop request
  */
@@ -880,6 +887,8 @@  void svc_drop(struct svc_rqst *rqstp)
 {
 	trace_svc_drop(rqstp);
 	dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
+	/* Close the connection when dropping a request */
+	svc_drop_connection(rqstp->rq_xprt);
 	svc_xprt_release(rqstp);
 }
 EXPORT_SYMBOL_GPL(svc_drop);
@@ -1148,6 +1157,7 @@  static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
 	if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
 		spin_unlock(&xprt->xpt_lock);
 		dprintk("revisit canceled\n");
+		svc_drop_connection(xprt);
 		svc_xprt_put(xprt);
 		trace_svc_drop_deferred(dr);
 		kfree(dr);