diff mbox series

[RFC] nfs: make DELEGRETURN try harder to determine if a delegation has been revoked

Message ID 20181005133429.79332-1-aweits@rit.edu (mailing list archive)
State New, archived
Headers show
Series [RFC] nfs: make DELEGRETURN try harder to determine if a delegation has been revoked | expand

Commit Message

Andrew W Elble Oct. 5, 2018, 1:34 p.m. UTC
It's possible for DELEGRETURN to run into corner cases where a
delegation has been revoked by the server, but that fact is being
hidden by an error on the PUTFH/DELEGRETURN. So, in all error
cases where revoked status isn't immediately obvious, do a
TEST_STATEID to see if it is indeed revoked.

Signed-off-by: Andrew Elble <aweits@rit.edu>
---
 fs/nfs/nfs4proc.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

Comments

Trond Myklebust Oct. 5, 2018, 1:44 p.m. UTC | #1
On Fri, 2018-10-05 at 09:34 -0400, Andrew Elble wrote:
> It's possible for DELEGRETURN to run into corner cases where a
> delegation has been revoked by the server, but that fact is being
> hidden by an error on the PUTFH/DELEGRETURN. So, in all error
> cases where revoked status isn't immediately obvious, do a
> TEST_STATEID to see if it is indeed revoked.
> 
> Signed-off-by: Andrew Elble <aweits@rit.edu>
> ---
>  fs/nfs/nfs4proc.c | 20 +++++++++++++++++++-
>  1 file changed, 19 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 8220a168282e..3f38d281a814 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -386,6 +386,16 @@ static void nfs4_free_revoked_stateid(struct
> nfs_server *server,
>  	__nfs4_free_revoked_stateid(server, &tmp, cred);
>  }
>  
> +static void nfs4_free_possibly_revoked_stateid(struct nfs_server
> *server,
> +					const nfs4_stateid *stateid,
> +					struct rpc_cred *cred)
> +{
> +	nfs4_stateid tmp;
> +
> +	nfs4_stateid_copy(&tmp, stateid);
> +	nfs4_test_and_free_stateid(server, &tmp, cred);
> +}
> +
>  static long nfs4_update_delay(long *timeout)
>  {
>  	long ret;
> @@ -6035,9 +6045,13 @@ static void nfs4_delegreturn_done(struct
> rpc_task *task, void *calldata)
>  		nfs4_free_revoked_stateid(data->res.server,
>  				data->args.stateid,
>  				task->tk_msg.rpc_cred);
> -		/* Fallthrough */
> +		task->tk_status = 0;
> +		break;
>  	case -NFS4ERR_BAD_STATEID:
>  	case -NFS4ERR_STALE_STATEID:
> +		nfs4_free_possibly_revoked_stateid(data->res.server,
> +						data->args.stateid,
> +						task->tk_msg.rpc_cred);
>  		task->tk_status = 0;
>  		break;
>  	case -NFS4ERR_OLD_STATEID:
> @@ -6058,6 +6072,10 @@ static void nfs4_delegreturn_done(struct
> rpc_task *task, void *calldata)
>  				&exception);
>  		if (exception.retry)
>  			goto out_restart;
> +
> +		nfs4_free_possibly_revoked_stateid(data->res.server,
> +						data->args.stateid,
> +						task->tk_msg.rpc_cred);
>  	}
>  	data->rpc_status = task->tk_status;
>  	return;

NACK. We don't ever want to run synchronous RPC calls from inside an
rpciod context. There be deadlocks...
Trond Myklebust Oct. 5, 2018, 2:05 p.m. UTC | #2
On Fri, 2018-10-05 at 13:44 +0000, Trond Myklebust wrote:
> On Fri, 2018-10-05 at 09:34 -0400, Andrew Elble wrote:
> > It's possible for DELEGRETURN to run into corner cases where a
> > delegation has been revoked by the server, but that fact is being
> > hidden by an error on the PUTFH/DELEGRETURN. So, in all error
> > cases where revoked status isn't immediately obvious, do a
> > TEST_STATEID to see if it is indeed revoked.
> > 
> > Signed-off-by: Andrew Elble <aweits@rit.edu>
> > ---
> >  fs/nfs/nfs4proc.c | 20 +++++++++++++++++++-
> >  1 file changed, 19 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index 8220a168282e..3f38d281a814 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -386,6 +386,16 @@ static void nfs4_free_revoked_stateid(struct
> > nfs_server *server,
> >  	__nfs4_free_revoked_stateid(server, &tmp, cred);
> >  }
> >  
> > +static void nfs4_free_possibly_revoked_stateid(struct nfs_server
> > *server,
> > +					const nfs4_stateid *stateid,
> > +					struct rpc_cred *cred)
> > +{
> > +	nfs4_stateid tmp;
> > +
> > +	nfs4_stateid_copy(&tmp, stateid);
> > +	nfs4_test_and_free_stateid(server, &tmp, cred);
> > +}
> > +
> >  static long nfs4_update_delay(long *timeout)
> >  {
> >  	long ret;
> > @@ -6035,9 +6045,13 @@ static void nfs4_delegreturn_done(struct
> > rpc_task *task, void *calldata)
> >  		nfs4_free_revoked_stateid(data->res.server,
> >  				data->args.stateid,
> >  				task->tk_msg.rpc_cred);
> > -		/* Fallthrough */
> > +		task->tk_status = 0;
> > +		break;
> >  	case -NFS4ERR_BAD_STATEID:
> >  	case -NFS4ERR_STALE_STATEID:
> > +		nfs4_free_possibly_revoked_stateid(data->res.server,
> > +						data->args.stateid,
> > +						task->tk_msg.rpc_cred);
> >  		task->tk_status = 0;
> >  		break;
> >  	case -NFS4ERR_OLD_STATEID:
> > @@ -6058,6 +6072,10 @@ static void nfs4_delegreturn_done(struct
> > rpc_task *task, void *calldata)
> >  				&exception);
> >  		if (exception.retry)
> >  			goto out_restart;
> > +
> > +		nfs4_free_possibly_revoked_stateid(data->res.server,
> > +						data->args.stateid,
> > +						task->tk_msg.rpc_cred);
> >  	}
> >  	data->rpc_status = task->tk_status;
> >  	return;
> 
> NACK. We don't ever want to run synchronous RPC calls from inside an
> rpciod context. There be deadlocks...

So, my question is why would we need to change nfs4_delegreturn_done at
all? It should already be sending a FREE_STATEID when the server
returns NFS4ERR_EXPIRED or NFS4ERR_DELEG_REVOKED thanks to the call to
nfs4_free_revoked_stateid().

If the server is returning anything other than those 2 errors for a
stateid that is pending a FREE_STATEID from the client, then that
server is broken.
Andrew W Elble Oct. 5, 2018, 3:25 p.m. UTC | #3
>> NACK. We don't ever want to run synchronous RPC calls from inside an
>> rpciod context. There be deadlocks...

ahhh yes, I missed the fact that nfs4_free_revoked_stateid() is
safe in calling nfs4_test_and_free_stateid() because it is bypassing the
TEST_STATEID by setting stateid->type = NFS4_REVOKED_STATEID_TYPE.

> So, my question is why would we need to change nfs4_delegreturn_done at
> all? It should already be sending a FREE_STATEID when the server
> returns NFS4ERR_EXPIRED or NFS4ERR_DELEG_REVOKED thanks to the call to
> nfs4_free_revoked_stateid().
>
> If the server is returning anything other than those 2 errors for a
> stateid that is pending a FREE_STATEID from the client, then that
> server is broken.

My intent (with the BAD/STALE_STATEID part at least)
was to fail safe even in the presence of a broken server.

As to the other bit, the hypothesis is a bad response to PUTFH. (I
cannot prove that I or others have seen this, BTW)

What is the appropriate client/server response(s) if the delegation is
revoked and the PUTFH fails? The server should send the error on the
PUTFH before evaluating the DELEGRETURN, correct?

Thanks,

Andy
Trond Myklebust Oct. 5, 2018, 4:10 p.m. UTC | #4
On Fri, 2018-10-05 at 11:25 -0400, Andrew W Elble wrote:
> > > NACK. We don't ever want to run synchronous RPC calls from inside
> > > an
> > > rpciod context. There be deadlocks...
> 
> ahhh yes, I missed the fact that nfs4_free_revoked_stateid() is
> safe in calling nfs4_test_and_free_stateid() because it is bypassing
> the
> TEST_STATEID by setting stateid->type = NFS4_REVOKED_STATEID_TYPE.
> 
> > So, my question is why would we need to change
> > nfs4_delegreturn_done at
> > all? It should already be sending a FREE_STATEID when the server
> > returns NFS4ERR_EXPIRED or NFS4ERR_DELEG_REVOKED thanks to the call
> > to
> > nfs4_free_revoked_stateid().
> > 
> > If the server is returning anything other than those 2 errors for a
> > stateid that is pending a FREE_STATEID from the client, then that
> > server is broken.
> 
> My intent (with the BAD/STALE_STATEID part at least)
> was to fail safe even in the presence of a broken server.

I don't see how that would be useful. If the server is so broken that
it is returning NFS4ERR_BAD_STATEID in a situation where it expects the
client to send a FREE_STATEID, then it can't be trusted to do the right
thing for calls to TEST_STATEID either.

IOW: This isn't a fixable situation on the client.

> As to the other bit, the hypothesis is a bad response to PUTFH. (I
> cannot prove that I or others have seen this, BTW)
> 
> What is the appropriate client/server response(s) if the delegation
> is
> revoked and the PUTFH fails? The server should send the error on the
> PUTFH before evaluating the DELEGRETURN, correct?
> 

You're talking about the case where PUTFH returns NFS4ERR_STALE? Why
should the client be required free state for a file that has been
removed? There is nothing in RFC5661 that says that it has to, and it
wouldn't make any sense to impose such a requirement either. Once the
filehandle is stale, any locks protecting the file contents are a moot
point.
diff mbox series

Patch

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8220a168282e..3f38d281a814 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -386,6 +386,16 @@  static void nfs4_free_revoked_stateid(struct nfs_server *server,
 	__nfs4_free_revoked_stateid(server, &tmp, cred);
 }
 
+static void nfs4_free_possibly_revoked_stateid(struct nfs_server *server,
+					const nfs4_stateid *stateid,
+					struct rpc_cred *cred)
+{
+	nfs4_stateid tmp;
+
+	nfs4_stateid_copy(&tmp, stateid);
+	nfs4_test_and_free_stateid(server, &tmp, cred);
+}
+
 static long nfs4_update_delay(long *timeout)
 {
 	long ret;
@@ -6035,9 +6045,13 @@  static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 		nfs4_free_revoked_stateid(data->res.server,
 				data->args.stateid,
 				task->tk_msg.rpc_cred);
-		/* Fallthrough */
+		task->tk_status = 0;
+		break;
 	case -NFS4ERR_BAD_STATEID:
 	case -NFS4ERR_STALE_STATEID:
+		nfs4_free_possibly_revoked_stateid(data->res.server,
+						data->args.stateid,
+						task->tk_msg.rpc_cred);
 		task->tk_status = 0;
 		break;
 	case -NFS4ERR_OLD_STATEID:
@@ -6058,6 +6072,10 @@  static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 				&exception);
 		if (exception.retry)
 			goto out_restart;
+
+		nfs4_free_possibly_revoked_stateid(data->res.server,
+						data->args.stateid,
+						task->tk_msg.rpc_cred);
 	}
 	data->rpc_status = task->tk_status;
 	return;