diff mbox series

[v2] NFSv4.x recover from pre-mature loss of openstateid

Message ID 20191218211327.30362-1-olga.kornievskaia@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v2] NFSv4.x recover from pre-mature loss of openstateid | expand

Commit Message

Olga Kornievskaia Dec. 18, 2019, 9:13 p.m. UTC
From: Olga Kornievskaia <kolga@netapp.com>

Ever since the commit 0e0cb35b417f, it's possible to lose an open stateid
while retrying a CLOSE due to ERR_OLD_STATEID. Once that happens,
operations that require openstateid fail with EAGAIN which is propagated
to the application then tests like generic/446 and generic/168 fail with
"Resource temporarily unavailable".

Instead of returning this error, initiate state recovery when possible to
recover the open stateid and then try calling nfs4_select_rw_stateid()
again.

Fixes: 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE")
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
 fs/nfs/nfs42proc.c | 36 ++++++++++++++++++++++++++++--------
 fs/nfs/nfs4proc.c  |  2 ++
 fs/nfs/pnfs.c      |  2 +-
 3 files changed, 31 insertions(+), 9 deletions(-)

Comments

Calum Mackay Dec. 18, 2019, 9:25 p.m. UTC | #1
hi Olga…

On 18/12/2019 9:13 pm, Olga Kornievskaia wrote:
> From: Olga Kornievskaia <kolga@netapp.com>
> 
> Ever since the commit 0e0cb35b417f, it's possible to lose an open stateid
> while retrying a CLOSE due to ERR_OLD_STATEID. Once that happens,
> operations that require openstateid fail with EAGAIN which is propagated
> to the application then tests like generic/446 and generic/168 fail with
> "Resource temporarily unavailable".
> 
> Instead of returning this error, initiate state recovery when possible to
> recover the open stateid and then try calling nfs4_select_rw_stateid()
> again.
> 
> Fixes: 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE")
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> ---
>   fs/nfs/nfs42proc.c | 36 ++++++++++++++++++++++++++++--------
>   fs/nfs/nfs4proc.c  |  2 ++
>   fs/nfs/pnfs.c      |  2 +-
>   3 files changed, 31 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> index 1fe83e0f663e..9637aad36bdc 100644
> --- a/fs/nfs/nfs42proc.c
> +++ b/fs/nfs/nfs42proc.c
> @@ -61,8 +61,11 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
>   
>   	status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
>   			lock, FMODE_WRITE);
> -	if (status)
> +	if (status) {
> +		if (status == -EAGAIN)
> +			status = -NFS4ERR_BAD_STATEID;
>   		return status;
> +	}
>   
>   	res.falloc_fattr = nfs_alloc_fattr();
>   	if (!res.falloc_fattr)
> @@ -287,8 +290,11 @@ static ssize_t _nfs42_proc_copy(struct file *src,
>   	} else {
>   		status = nfs4_set_rw_stateid(&args->src_stateid,
>   				src_lock->open_context, src_lock, FMODE_READ);
> -		if (status)
> +		if (status) {
> +			if (status == -EAGAIN)
> +				status = -NFS4ERR_BAD_STATEID;
>   			return status;
> +		}
>   	}
>   	status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
>   			pos_src, pos_src + (loff_t)count - 1);
> @@ -297,8 +303,11 @@ static ssize_t _nfs42_proc_copy(struct file *src,
>   
>   	status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context,
>   				     dst_lock, FMODE_WRITE);
> -	if (status)
> +	if (status) {
> +		if (status == -EAGAIN)
> +			status = -NFS4ERR_BAD_STATEID;
>   		return status;
> +	}
>   
>   	status = nfs_sync_inode(dst_inode);
>   	if (status)
> @@ -546,8 +555,11 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
>   	status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx,
>   				     FMODE_READ);
>   	nfs_put_lock_context(l_ctx);
> -	if (status)
> +	if (status) {
> +		if (status == -EAGAIN)
> +			status = -NFS4ERR_BAD_STATEID;
>   		return status;
> +	}
>   
>   	status = nfs4_call_sync(src_server->client, src_server, &msg,
>   				&args->cna_seq_args, &res->cnr_seq_res, 0);
> @@ -618,8 +630,11 @@ static loff_t _nfs42_proc_llseek(struct file *filep,
>   
>   	status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
>   			lock, FMODE_READ);
> -	if (status)
> +	if (status) {
> +		if (status == -EAGAIN)
> +			status = -NFS4ERR_BAD_STATEID;
>   		return status;
> +	}
>   
>   	status = nfs_filemap_write_and_wait_range(inode->i_mapping,
>   			offset, LLONG_MAX);
> @@ -994,13 +1009,18 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
>   
>   	status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
>   			src_lock, FMODE_READ);
> -	if (status)
> +	if (status) {
> +		if (status == -EAGAIN)
> +			status = -NFS4ERR_BAD_STATEID;
>   		return status;
> -
> +	}
>   	status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
>   			dst_lock, FMODE_WRITE);
> -	if (status)
> +	if (status) {
> +		if (status == -EAGAIN)
> +			status = -NFS4ERR_BAD_STATEID;
>   		return status;
> +	}
>   
>   	res.dst_fattr = nfs_alloc_fattr();
>   	if (!res.dst_fattr)
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 76d37161409a..f9bb4b43a519 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -3239,6 +3239,8 @@ static int _nfs4_do_setattr(struct inode *inode,
>   		nfs_put_lock_context(l_ctx);
>   		if (status == -EIO)
>   			return -EBADF;
> +		else if (status == -EAGAIN)
> +			goto zero_stateid;
>   	} else {
>   zero_stateid:
>   		nfs4_stateid_copy(&arg->stateid, &zero_stateid);
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index cec3070ab577..fc36a60bf4ec 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1998,7 +1998,7 @@ pnfs_update_layout(struct inode *ino,
>   			trace_pnfs_update_layout(ino, pos, count,
>   					iomode, lo, lseg,
>   					PNFS_UPDATE_LAYOUT_INVALID_OPEN);
> -			if (status != -EAGAIN)
> +			if (status != -EAGAIN && status != -EAGAIN)

that doesn't look quite right?

>   				goto out_unlock;
>   			spin_unlock(&ino->i_lock);
>   			nfs4_schedule_stateid_recovery(server, ctx->state);
>
Olga Kornievskaia Dec. 18, 2019, 9:31 p.m. UTC | #2
On Wed, Dec 18, 2019 at 4:25 PM Calum Mackay <calum.mackay@oracle.com> wrote:
>
> hi Olga…
>
> On 18/12/2019 9:13 pm, Olga Kornievskaia wrote:
> > From: Olga Kornievskaia <kolga@netapp.com>
> >
> > Ever since the commit 0e0cb35b417f, it's possible to lose an open stateid
> > while retrying a CLOSE due to ERR_OLD_STATEID. Once that happens,
> > operations that require openstateid fail with EAGAIN which is propagated
> > to the application then tests like generic/446 and generic/168 fail with
> > "Resource temporarily unavailable".
> >
> > Instead of returning this error, initiate state recovery when possible to
> > recover the open stateid and then try calling nfs4_select_rw_stateid()
> > again.
> >
> > Fixes: 0e0cb35b417f ("NFSv4: Handle NFS4ERR_OLD_STATEID in CLOSE/OPEN_DOWNGRADE")
> > Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
> > ---
> >   fs/nfs/nfs42proc.c | 36 ++++++++++++++++++++++++++++--------
> >   fs/nfs/nfs4proc.c  |  2 ++
> >   fs/nfs/pnfs.c      |  2 +-
> >   3 files changed, 31 insertions(+), 9 deletions(-)
> >
> > diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> > index 1fe83e0f663e..9637aad36bdc 100644
> > --- a/fs/nfs/nfs42proc.c
> > +++ b/fs/nfs/nfs42proc.c
> > @@ -61,8 +61,11 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
> >
> >       status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
> >                       lock, FMODE_WRITE);
> > -     if (status)
> > +     if (status) {
> > +             if (status == -EAGAIN)
> > +                     status = -NFS4ERR_BAD_STATEID;
> >               return status;
> > +     }
> >
> >       res.falloc_fattr = nfs_alloc_fattr();
> >       if (!res.falloc_fattr)
> > @@ -287,8 +290,11 @@ static ssize_t _nfs42_proc_copy(struct file *src,
> >       } else {
> >               status = nfs4_set_rw_stateid(&args->src_stateid,
> >                               src_lock->open_context, src_lock, FMODE_READ);
> > -             if (status)
> > +             if (status) {
> > +                     if (status == -EAGAIN)
> > +                             status = -NFS4ERR_BAD_STATEID;
> >                       return status;
> > +             }
> >       }
> >       status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
> >                       pos_src, pos_src + (loff_t)count - 1);
> > @@ -297,8 +303,11 @@ static ssize_t _nfs42_proc_copy(struct file *src,
> >
> >       status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context,
> >                                    dst_lock, FMODE_WRITE);
> > -     if (status)
> > +     if (status) {
> > +             if (status == -EAGAIN)
> > +                     status = -NFS4ERR_BAD_STATEID;
> >               return status;
> > +     }
> >
> >       status = nfs_sync_inode(dst_inode);
> >       if (status)
> > @@ -546,8 +555,11 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
> >       status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx,
> >                                    FMODE_READ);
> >       nfs_put_lock_context(l_ctx);
> > -     if (status)
> > +     if (status) {
> > +             if (status == -EAGAIN)
> > +                     status = -NFS4ERR_BAD_STATEID;
> >               return status;
> > +     }
> >
> >       status = nfs4_call_sync(src_server->client, src_server, &msg,
> >                               &args->cna_seq_args, &res->cnr_seq_res, 0);
> > @@ -618,8 +630,11 @@ static loff_t _nfs42_proc_llseek(struct file *filep,
> >
> >       status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
> >                       lock, FMODE_READ);
> > -     if (status)
> > +     if (status) {
> > +             if (status == -EAGAIN)
> > +                     status = -NFS4ERR_BAD_STATEID;
> >               return status;
> > +     }
> >
> >       status = nfs_filemap_write_and_wait_range(inode->i_mapping,
> >                       offset, LLONG_MAX);
> > @@ -994,13 +1009,18 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
> >
> >       status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
> >                       src_lock, FMODE_READ);
> > -     if (status)
> > +     if (status) {
> > +             if (status == -EAGAIN)
> > +                     status = -NFS4ERR_BAD_STATEID;
> >               return status;
> > -
> > +     }
> >       status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
> >                       dst_lock, FMODE_WRITE);
> > -     if (status)
> > +     if (status) {
> > +             if (status == -EAGAIN)
> > +                     status = -NFS4ERR_BAD_STATEID;
> >               return status;
> > +     }
> >
> >       res.dst_fattr = nfs_alloc_fattr();
> >       if (!res.dst_fattr)
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index 76d37161409a..f9bb4b43a519 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -3239,6 +3239,8 @@ static int _nfs4_do_setattr(struct inode *inode,
> >               nfs_put_lock_context(l_ctx);
> >               if (status == -EIO)
> >                       return -EBADF;
> > +             else if (status == -EAGAIN)
> > +                     goto zero_stateid;
> >       } else {
> >   zero_stateid:
> >               nfs4_stateid_copy(&arg->stateid, &zero_stateid);
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index cec3070ab577..fc36a60bf4ec 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -1998,7 +1998,7 @@ pnfs_update_layout(struct inode *ino,
> >                       trace_pnfs_update_layout(ino, pos, count,
> >                                       iomode, lo, lseg,
> >                                       PNFS_UPDATE_LAYOUT_INVALID_OPEN);
> > -                     if (status != -EAGAIN)
> > +                     if (status != -EAGAIN && status != -EAGAIN)
>
> that doesn't look quite right?

*face palm* thanks. Will fix that.

>
> >                               goto out_unlock;
> >                       spin_unlock(&ino->i_lock);
> >                       nfs4_schedule_stateid_recovery(server, ctx->state);
> >
>
>
diff mbox series

Patch

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 1fe83e0f663e..9637aad36bdc 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -61,8 +61,11 @@  static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
 
 	status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
 			lock, FMODE_WRITE);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
+	}
 
 	res.falloc_fattr = nfs_alloc_fattr();
 	if (!res.falloc_fattr)
@@ -287,8 +290,11 @@  static ssize_t _nfs42_proc_copy(struct file *src,
 	} else {
 		status = nfs4_set_rw_stateid(&args->src_stateid,
 				src_lock->open_context, src_lock, FMODE_READ);
-		if (status)
+		if (status) {
+			if (status == -EAGAIN)
+				status = -NFS4ERR_BAD_STATEID;
 			return status;
+		}
 	}
 	status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
 			pos_src, pos_src + (loff_t)count - 1);
@@ -297,8 +303,11 @@  static ssize_t _nfs42_proc_copy(struct file *src,
 
 	status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context,
 				     dst_lock, FMODE_WRITE);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
+	}
 
 	status = nfs_sync_inode(dst_inode);
 	if (status)
@@ -546,8 +555,11 @@  static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
 	status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx,
 				     FMODE_READ);
 	nfs_put_lock_context(l_ctx);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
+	}
 
 	status = nfs4_call_sync(src_server->client, src_server, &msg,
 				&args->cna_seq_args, &res->cnr_seq_res, 0);
@@ -618,8 +630,11 @@  static loff_t _nfs42_proc_llseek(struct file *filep,
 
 	status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
 			lock, FMODE_READ);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
+	}
 
 	status = nfs_filemap_write_and_wait_range(inode->i_mapping,
 			offset, LLONG_MAX);
@@ -994,13 +1009,18 @@  static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
 
 	status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
 			src_lock, FMODE_READ);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
-
+	}
 	status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
 			dst_lock, FMODE_WRITE);
-	if (status)
+	if (status) {
+		if (status == -EAGAIN)
+			status = -NFS4ERR_BAD_STATEID;
 		return status;
+	}
 
 	res.dst_fattr = nfs_alloc_fattr();
 	if (!res.dst_fattr)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 76d37161409a..f9bb4b43a519 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3239,6 +3239,8 @@  static int _nfs4_do_setattr(struct inode *inode,
 		nfs_put_lock_context(l_ctx);
 		if (status == -EIO)
 			return -EBADF;
+		else if (status == -EAGAIN)
+			goto zero_stateid;
 	} else {
 zero_stateid:
 		nfs4_stateid_copy(&arg->stateid, &zero_stateid);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cec3070ab577..fc36a60bf4ec 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1998,7 +1998,7 @@  pnfs_update_layout(struct inode *ino,
 			trace_pnfs_update_layout(ino, pos, count,
 					iomode, lo, lseg,
 					PNFS_UPDATE_LAYOUT_INVALID_OPEN);
-			if (status != -EAGAIN)
+			if (status != -EAGAIN && status != -EAGAIN)
 				goto out_unlock;
 			spin_unlock(&ino->i_lock);
 			nfs4_schedule_stateid_recovery(server, ctx->state);