diff mbox series

[07/12] NFS: support parallel updates in the one directory.

Message ID 165516230200.21248.14713533079253477888.stgit@noble.brown (mailing list archive)
State New, archived
Headers show
Series Allow concurrent directory updates. | expand

Commit Message

NeilBrown June 13, 2022, 11:18 p.m. UTC
NFS can easily support parallel updates as the locking is done on the
server, so this patch enables parallel updates for NFS.

NFS unlink needs to block concurrent opens() once it decides to actually
unlink the file, rather than rename it to .nfsXXXX (aka sillyrename).
It currently does this by temporarily unhashing the dentry and relying
on the exclusive lock on the directory to block a ->lookup().  That
doesn't work now that unlink uses a shared lock, so an alternate
approach is needed.

__nfs_lookup_revalidate (->d_revalidate) now blocks if DCACHE_PAR_UPDATE
is set, and if nfs_unlink() happens to be called with an exclusive lock
and DCACHE_PAR_UPDATE is not set, it get set during the potential race window.

I'd rather use some other indicator in the dentry to tell
_nfs_lookup_revalidate() to wait, but we are nearly out of d_flags bits,
and NFS doesn't have a general-purpose d_fsdata.

NFS "silly-rename" may now be called with only a shared lock on the
directory, so it needs a bit of extra care to get exclusive access to
the new name. d_lock_update_nested() and d_unlock_update() help here.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfs/dir.c    |   29 +++++++++++++++++++++++------
 fs/nfs/inode.c  |    2 ++
 fs/nfs/unlink.c |    5 ++++-
 3 files changed, 29 insertions(+), 7 deletions(-)
diff mbox series

Patch

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a8ecdd527662..54c2c7adcd56 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1778,6 +1778,9 @@  __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
 	int ret;
 
 	if (flags & LOOKUP_RCU) {
+		if (dentry->d_flags & DCACHE_PAR_UPDATE)
+			/* Pending unlink */
+			return -ECHILD;
 		parent = READ_ONCE(dentry->d_parent);
 		dir = d_inode_rcu(parent);
 		if (!dir)
@@ -1786,6 +1789,9 @@  __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
 		if (parent != READ_ONCE(dentry->d_parent))
 			return -ECHILD;
 	} else {
+		/* Wait for unlink to complete */
+		wait_var_event(&dentry->d_flags,
+			       !(dentry->d_flags & DCACHE_PAR_UPDATE));
 		parent = dget_parent(dentry);
 		ret = reval(d_inode(parent), dentry, flags);
 		dput(parent);
@@ -2453,7 +2459,7 @@  static int nfs_safe_remove(struct dentry *dentry)
 int nfs_unlink(struct inode *dir, struct dentry *dentry)
 {
 	int error;
-	int need_rehash = 0;
+	bool did_set_par_update = false;
 
 	dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry);
@@ -2468,15 +2474,26 @@  int nfs_unlink(struct inode *dir, struct dentry *dentry)
 		error = nfs_sillyrename(dir, dentry);
 		goto out;
 	}
-	if (!d_unhashed(dentry)) {
-		__d_drop(dentry);
-		need_rehash = 1;
+	/* We must prevent any concurrent open until the unlink
+	 * completes.  ->d_revalidate will wait for DCACHE_PAR_UPDATE
+	 * to clear, but if this happens to a non-parallel update, we
+	 * still want to block opens.  So set DCACHE_PAR_UPDATE
+	 * temporarily.
+	 */
+	if (!(dentry->d_flags & DCACHE_PAR_UPDATE)) {
+		/* Must have exclusive lock on parent */
+		did_set_par_update = true;
+		dentry->d_flags |= DCACHE_PAR_UPDATE;
 	}
+
 	spin_unlock(&dentry->d_lock);
 	error = nfs_safe_remove(dentry);
 	nfs_dentry_remove_handle_error(dir, dentry, error);
-	if (need_rehash)
-		d_rehash(dentry);
+	if (did_set_par_update) {
+		spin_lock(&dentry->d_lock);
+		dentry->d_flags &= ~DCACHE_PAR_UPDATE;
+		spin_unlock(&dentry->d_lock);
+	}
 out:
 	trace_nfs_unlink_exit(dir, dentry, error);
 	return error;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b4e46b0ffa2d..cea2554710d2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -481,6 +481,8 @@  nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 
 		/* We can't support update_atime(), since the server will reset it */
 		inode->i_flags |= S_NOATIME|S_NOCMTIME;
+		/* Parallel updates to directories are trivial */
+		inode->i_flags |= S_PAR_UPDATE;
 		inode->i_mode = fattr->mode;
 		nfsi->cache_validity = 0;
 		if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 9697cd5d2561..52a20eb6131c 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -462,6 +462,7 @@  nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	sdentry = NULL;
 	do {
 		int slen;
+		d_unlock_update(sdentry);
 		dput(sdentry);
 		sillycounter++;
 		slen = scnprintf(silly, sizeof(silly),
@@ -479,7 +480,8 @@  nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 		 */
 		if (IS_ERR(sdentry))
 			goto out;
-	} while (d_inode(sdentry) != NULL); /* need negative lookup */
+	} while (!d_lock_update_nested(sdentry, NULL, NULL,
+				       SINGLE_DEPTH_NESTING));
 
 	ihold(inode);
 
@@ -524,6 +526,7 @@  nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	rpc_put_task(task);
 out_dput:
 	iput(inode);
+	d_unlock_update(sdentry);
 	dput(sdentry);
 out:
 	return error;