Message ID | 20250217053727.3368579-4-neilb@suse.de (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | change ->mkdir() and vfs_mkdir() to return a dentry | expand |
On Mon, 2025-02-17 at 16:30 +1100, NeilBrown wrote: > vfs_mkdir() does not currently guarantee to leave the child dentry > hashed or make it positive on success. It may leave it unhashed and > negative and then the caller needs to perform a lookup to find the > target dentry, if it needs it. > > This patch changes vfs_mkdir() to return the dentry provided by the > filesystems which is hashed and positive when provided. This reduces > the need for lookup code which is now included in vfs_mkdir() rather > than at various call-sites. The included lookup does not include the > permission check that some of the existing code included in e.g. > lookup_one_len(). This should not be necessary for lookup up a > directory which has just be successfully created. > > If a different dentry is returned, the first one is put. If necessary > the fact that it is new can be determined by comparing pointers. A new > dentry will certainly have a new pointer (as the old is put after the > new is obtained). > > The dentry returned by vfs_mkdir(), when not an error, *is* now > guaranteed to be hashed and positive. > > A few callers do not need the dentry, but will now sometimes perform the > lookup anyway. This should be cheap except possibly in the case of cifs. > > Signed-off-by: NeilBrown <neilb@suse.de> > --- > drivers/base/devtmpfs.c | 7 ++-- > fs/cachefiles/namei.c | 10 +++--- > fs/ecryptfs/inode.c | 14 +++++--- > fs/init.c | 7 ++-- > fs/namei.c | 70 +++++++++++++++++++++++++++++++--------- > fs/nfsd/nfs4recover.c | 7 ++-- > fs/nfsd/vfs.c | 28 +++++----------- > fs/overlayfs/dir.c | 37 +++------------------ > fs/overlayfs/overlayfs.h | 15 ++++----- > fs/overlayfs/super.c | 7 ++-- > fs/smb/server/vfs.c | 31 ++++++------------ > fs/xfs/scrub/orphanage.c | 9 +++--- > include/linux/fs.h | 4 +-- > 13 files changed, 121 insertions(+), 125 deletions(-) > > diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c > index c9e34842139f..8ec756b5dec4 100644 > --- a/drivers/base/devtmpfs.c > +++ b/drivers/base/devtmpfs.c > @@ -160,18 +160,17 @@ static int dev_mkdir(const char *name, umode_t mode) > { > struct dentry *dentry; > struct path path; > - int err; > > dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); > if (IS_ERR(dentry)) > return PTR_ERR(dentry); > > - err = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode); > - if (!err) > + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode); > + if (!IS_ERR(dentry)) > /* mark as kernel-created inode */ > d_inode(dentry)->i_private = &thread; > done_path_create(&path, dentry); > - return err; > + return PTR_ERR_OR_ZERO(dentry); > } > > static int create_path(const char *nodepath) > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > index 7cf59713f0f7..8a8337d1be05 100644 > --- a/fs/cachefiles/namei.c > +++ b/fs/cachefiles/namei.c > @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > /* search the current directory for the element name */ > inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); > > -retry: > ret = cachefiles_inject_read_error(); > if (ret == 0) > subdir = lookup_one_len(dirname, dir, strlen(dirname)); > @@ -128,10 +127,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > ret = security_path_mkdir(&path, subdir, 0700); > if (ret < 0) > goto mkdir_error; > - ret = cachefiles_inject_write_error(); > - if (ret == 0) > - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > - if (ret < 0) { > + subdir = ERR_PTR(cachefiles_inject_write_error()); > + if (!IS_ERR(subdir)) > + subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > + ret = PTR_ERR(subdir); > + if (IS_ERR(subdir)) { > trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, > cachefiles_trace_mkdir_error); > goto mkdir_error; > diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c > index 6315dd194228..51a5c54eb740 100644 > --- a/fs/ecryptfs/inode.c > +++ b/fs/ecryptfs/inode.c > @@ -511,10 +511,16 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > struct inode *lower_dir; > > rc = lock_parent(dentry, &lower_dentry, &lower_dir); > - if (!rc) > - rc = vfs_mkdir(&nop_mnt_idmap, lower_dir, > - lower_dentry, mode); > - if (rc || d_really_is_negative(lower_dentry)) > + if (rc) > + goto out; > + > + lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir, > + lower_dentry, mode); > + rc = PTR_ERR(lower_dentry); > + if (IS_ERR(lower_dentry)) > + goto out; > + rc = 0; > + if (d_unhashed(lower_dentry)) > goto out; > rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); > if (rc) > diff --git a/fs/init.c b/fs/init.c > index e9387b6c4f30..eef5124885e3 100644 > --- a/fs/init.c > +++ b/fs/init.c > @@ -230,9 +230,12 @@ int __init init_mkdir(const char *pathname, umode_t mode) > return PTR_ERR(dentry); > mode = mode_strip_umask(d_inode(path.dentry), mode); > error = security_path_mkdir(&path, dentry, mode); > - if (!error) > - error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, > + if (!error) { > + dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, > dentry, mode); > + if (IS_ERR(dentry)) > + error = PTR_ERR(dentry); > + } > done_path_create(&path, dentry); > return error; > } > diff --git a/fs/namei.c b/fs/namei.c > index 19d5ea340a18..f76fee6df369 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -4132,7 +4132,8 @@ EXPORT_SYMBOL(kern_path_create); > > void done_path_create(struct path *path, struct dentry *dentry) > { > - dput(dentry); > + if (!IS_ERR(dentry)) > + dput(dentry); > inode_unlock(path->dentry->d_inode); > mnt_drop_write(path->mnt); > path_put(path); > @@ -4278,7 +4279,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d > } > > /** > - * vfs_mkdir - create directory > + * vfs_mkdir - create directory returning correct dentry is possible > * @idmap: idmap of the mount the inode was found from > * @dir: inode of the parent directory > * @dentry: dentry of the child directory > @@ -4291,9 +4292,20 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d > * care to map the inode according to @idmap before checking permissions. > * On non-idmapped mounts or if permission checking is to be performed on the > * raw inode simply pass @nop_mnt_idmap. > + * > + * In the event that the filesystem does not use the *@dentry but leaves it > + * negative or unhashes it and possibly splices a different one returning it, > + * the original dentry is dput() and the alternate is returned. > + * > + * If the file-system reports success but leaves the dentry unhashed or > + * negative, a lookup is performed and providing that is positive and > + * a directory, it will be returned. If the lookup is not successful > + * the original dentry is unhashed and returned. > + * > + * In case of an error the dentry is dput() and an ERR_PTR() is returned. > */ > -int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > - struct dentry *dentry, umode_t mode) > +struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > + struct dentry *dentry, umode_t mode) > { > int error; > unsigned max_links = dir->i_sb->s_max_links; > @@ -4301,30 +4313,54 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > error = may_create(idmap, dir, dentry); > if (error) > - return error; > + goto err; > > + error = -EPERM; > if (!dir->i_op->mkdir) > - return -EPERM; > + goto err; > > mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); > error = security_inode_mkdir(dir, dentry, mode); > if (error) > - return error; > + goto err; > > + error = -EMLINK; > if (max_links && dir->i_nlink >= max_links) > - return -EMLINK; > + goto err; > > de = dir->i_op->mkdir(idmap, dir, dentry, mode); > + error = PTR_ERR(de); > if (IS_ERR(de)) > - return PTR_ERR(de); > + goto err; > + if (!de && (d_unhashed(dentry) || d_is_negative(dentry))) { Would it be better to push this down into the callers that need it and make returning a hashed, positive dentry a requirement for the mkdir operation? You could just turn this block of code into a helper function that those four filesystems could call, which would mean that you could avoid the d_unhashed() and d_is_negative() checks on the other filesystems. > + /* A few filesystems leave the dentry negative on success, > + * currently cifs(with posix extensions), kernfs, tracefs, hostfs. > + * They rely on a subsequent lookup which we perform here. > + */ > + const struct qstr *d_name = (void*)&dentry->d_name; > + de = lookup_dcache(d_name, dentry->d_parent, 0); > + if (!de) > + de = __lookup_slow(d_name, dentry->d_parent, 0); > + if (IS_ERR(de)) > + de = NULL; > + else if (unlikely(d_is_negative(de) || !d_is_dir(de))) { > + dput(de); > + de = NULL; > + } > + if (!de) > + /* Ensure caller can easily detect that dentry is not useful */ > + d_drop(dentry); > + } > if (de) { > - fsnotify_mkdir(dir, de); > - /* Cannot return de yet */ > - dput(de); > - } else > - fsnotify_mkdir(dir, dentry); > + dput(dentry); > + dentry = de; > + } > + fsnotify_mkdir(dir, dentry); > + return dentry; > > - return 0; > +err: > + dput(dentry); > + return ERR_PTR(error); > } > EXPORT_SYMBOL(vfs_mkdir); > > @@ -4344,8 +4380,10 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode) > error = security_path_mkdir(&path, dentry, > mode_strip_umask(path.dentry->d_inode, mode)); > if (!error) { > - error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, > + dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, > dentry, mode); > + if (IS_ERR(dentry)) > + error = PTR_ERR(dentry); > } > done_path_create(&path, dentry); > if (retry_estale(error, lookup_flags)) { > diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c > index 28f4d5311c40..c1d9bd07285f 100644 > --- a/fs/nfsd/nfs4recover.c > +++ b/fs/nfsd/nfs4recover.c > @@ -233,9 +233,12 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) > * as well be forgiving and just succeed silently. > */ > goto out_put; > - status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); > + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); > + if (IS_ERR(dentry)) > + status = PTR_ERR(dentry); > out_put: > - dput(dentry); > + if (!status) > + dput(dentry); > out_unlock: > inode_unlock(d_inode(dir)); > if (status == 0) { > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index 29cb7b812d71..205b07f21e8d 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -1461,7 +1461,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, > struct inode *dirp; > struct iattr *iap = attrs->na_iattr; > __be32 err; > - int host_err; > + int host_err = 0; > > dentry = fhp->fh_dentry; > dirp = d_inode(dentry); > @@ -1488,26 +1488,13 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, > nfsd_check_ignore_resizing(iap); > break; > case S_IFDIR: > - host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); > - if (!host_err && unlikely(d_unhashed(dchild))) { > - struct dentry *d; > - d = lookup_one_len(dchild->d_name.name, > - dchild->d_parent, > - dchild->d_name.len); > - if (IS_ERR(d)) { > - host_err = PTR_ERR(d); > - break; > - } > - if (unlikely(d_is_negative(d))) { > - dput(d); > - err = nfserr_serverfault; > - goto out; > - } > + dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); > + if (IS_ERR(dchild)) > + host_err = PTR_ERR(dchild); > + else if (unlikely(dchild != resfhp->fh_dentry)) { > dput(resfhp->fh_dentry); > - resfhp->fh_dentry = dget(d); > + resfhp->fh_dentry = dget(dchild); > err = fh_update(resfhp); > - dput(dchild); > - dchild = d; > if (err) > goto out; > } > @@ -1530,7 +1517,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, > err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); > > out: > - dput(dchild); > + if (!IS_ERR(dchild)) > + dput(dchild); > return err; > > out_nfserr: > diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c > index 21c3aaf7b274..fe493f3ed6b6 100644 > --- a/fs/overlayfs/dir.c > +++ b/fs/overlayfs/dir.c > @@ -138,37 +138,6 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, > goto out; > } > > -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, > - struct dentry **newdentry, umode_t mode) > -{ > - int err; > - struct dentry *d, *dentry = *newdentry; > - > - err = ovl_do_mkdir(ofs, dir, dentry, mode); > - if (err) > - return err; > - > - if (likely(!d_unhashed(dentry))) > - return 0; > - > - /* > - * vfs_mkdir() may succeed and leave the dentry passed > - * to it unhashed and negative. If that happens, try to > - * lookup a new hashed and positive dentry. > - */ > - d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent, > - dentry->d_name.len); > - if (IS_ERR(d)) { > - pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", > - dentry, err); > - return PTR_ERR(d); > - } > - dput(dentry); > - *newdentry = d; > - > - return 0; > -} > - > struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, > struct dentry *newdentry, struct ovl_cattr *attr) > { > @@ -191,7 +160,8 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, > > case S_IFDIR: > /* mkdir is special... */ > - err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode); > + newdentry = ovl_do_mkdir(ofs, dir, newdentry, attr->mode); > + err = PTR_ERR_OR_ZERO(newdentry); > break; > > case S_IFCHR: > @@ -219,7 +189,8 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, > } > out: > if (err) { > - dput(newdentry); > + if (!IS_ERR(newdentry)) > + dput(newdentry); > return ERR_PTR(err); > } > return newdentry; > diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h > index 0021e2025020..6f2f8f4cfbbc 100644 > --- a/fs/overlayfs/overlayfs.h > +++ b/fs/overlayfs/overlayfs.h > @@ -241,13 +241,14 @@ static inline int ovl_do_create(struct ovl_fs *ofs, > return err; > } > > -static inline int ovl_do_mkdir(struct ovl_fs *ofs, > - struct inode *dir, struct dentry *dentry, > - umode_t mode) > +static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs, > + struct inode *dir, > + struct dentry *dentry, > + umode_t mode) > { > - int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); > - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); > - return err; > + dentry = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); > + pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(dentry)); > + return dentry; > } > > static inline int ovl_do_mknod(struct ovl_fs *ofs, > @@ -838,8 +839,6 @@ struct ovl_cattr { > > #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) > > -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, > - struct dentry **newdentry, umode_t mode); > struct dentry *ovl_create_real(struct ovl_fs *ofs, > struct inode *dir, struct dentry *newdentry, > struct ovl_cattr *attr); > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > index 86ae6f6da36b..a381765802e9 100644 > --- a/fs/overlayfs/super.c > +++ b/fs/overlayfs/super.c > @@ -327,9 +327,10 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, > goto retry; > } > > - err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); > - if (err) > - goto out_dput; > + work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode); > + err = PTR_ERR(work); > + if (IS_ERR(work)) > + goto out_err; > > /* Weird filesystem returning with hashed negative (kernfs)? */ > err = -EINVAL; > diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c > index fe29acef5872..d682443b14ac 100644 > --- a/fs/smb/server/vfs.c > +++ b/fs/smb/server/vfs.c > @@ -206,7 +206,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) > { > struct mnt_idmap *idmap; > struct path path; > - struct dentry *dentry; > + struct dentry *dentry, *d; > int err; > > dentry = ksmbd_vfs_kern_path_create(work, name, > @@ -222,31 +222,18 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) > > idmap = mnt_idmap(path.mnt); > mode |= S_IFDIR; > - err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); > - if (!err && d_unhashed(dentry)) { > - struct dentry *d; > - > - d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent, > - dentry->d_name.len); > - if (IS_ERR(d)) { > - err = PTR_ERR(d); > - goto out_err; > - } > - if (unlikely(d_is_negative(d))) { > - dput(d); > - err = -ENOENT; > - goto out_err; > - } > - > + d = dentry; > + entry = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); > + err = PTR_ERR(dentry); > + if (!IS_ERR(dentry) && dentry != d) > ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d)); > - dput(d); > - } > > -out_err: > done_path_create(&path, dentry); > - if (err) > + if (IS_ERR(dentry)) { > pr_err("mkdir(%s): creation failed (err:%d)\n", name, err); > - return err; > + return err; > + } > + return 0; > } > > static ssize_t ksmbd_vfs_getcasexattr(struct mnt_idmap *idmap, > diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c > index c287c755f2c5..3537f3cca6d5 100644 > --- a/fs/xfs/scrub/orphanage.c > +++ b/fs/xfs/scrub/orphanage.c > @@ -167,10 +167,11 @@ xrep_orphanage_create( > * directory to control access to a file we put in here. > */ > if (d_really_is_negative(orphanage_dentry)) { > - error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry, > - 0750); > - if (error) > - goto out_dput_orphanage; > + orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode, > + orphanage_dentry, 0750); > + error = PTR_ERR(orphanage_dentry); > + if (IS_ERR(orphanage_dentry)) > + goto out_unlock_root; > } > > /* Not a directory? Bail out. */ > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 45269608ee23..beae24bc990d 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1981,8 +1981,8 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap, > */ > int vfs_create(struct mnt_idmap *, struct inode *, > struct dentry *, umode_t, bool); > -int vfs_mkdir(struct mnt_idmap *, struct inode *, > - struct dentry *, umode_t); > +struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, > + struct dentry *, umode_t); > int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, > umode_t, dev_t); > int vfs_symlink(struct mnt_idmap *, struct inode *,
On Wed, 19 Feb 2025, Jeff Layton wrote: > On Mon, 2025-02-17 at 16:30 +1100, NeilBrown wrote: > > vfs_mkdir() does not currently guarantee to leave the child dentry > > hashed or make it positive on success. It may leave it unhashed and > > negative and then the caller needs to perform a lookup to find the > > target dentry, if it needs it. > > > > This patch changes vfs_mkdir() to return the dentry provided by the > > filesystems which is hashed and positive when provided. This reduces > > the need for lookup code which is now included in vfs_mkdir() rather > > than at various call-sites. The included lookup does not include the > > permission check that some of the existing code included in e.g. > > lookup_one_len(). This should not be necessary for lookup up a > > directory which has just be successfully created. > > > > If a different dentry is returned, the first one is put. If necessary > > the fact that it is new can be determined by comparing pointers. A new > > dentry will certainly have a new pointer (as the old is put after the > > new is obtained). > > > > The dentry returned by vfs_mkdir(), when not an error, *is* now > > guaranteed to be hashed and positive. > > > > A few callers do not need the dentry, but will now sometimes perform the > > lookup anyway. This should be cheap except possibly in the case of cifs. > > > > Signed-off-by: NeilBrown <neilb@suse.de> > > --- > > drivers/base/devtmpfs.c | 7 ++-- > > fs/cachefiles/namei.c | 10 +++--- > > fs/ecryptfs/inode.c | 14 +++++--- > > fs/init.c | 7 ++-- > > fs/namei.c | 70 +++++++++++++++++++++++++++++++--------- > > fs/nfsd/nfs4recover.c | 7 ++-- > > fs/nfsd/vfs.c | 28 +++++----------- > > fs/overlayfs/dir.c | 37 +++------------------ > > fs/overlayfs/overlayfs.h | 15 ++++----- > > fs/overlayfs/super.c | 7 ++-- > > fs/smb/server/vfs.c | 31 ++++++------------ > > fs/xfs/scrub/orphanage.c | 9 +++--- > > include/linux/fs.h | 4 +-- > > 13 files changed, 121 insertions(+), 125 deletions(-) > > > > diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c > > index c9e34842139f..8ec756b5dec4 100644 > > --- a/drivers/base/devtmpfs.c > > +++ b/drivers/base/devtmpfs.c > > @@ -160,18 +160,17 @@ static int dev_mkdir(const char *name, umode_t mode) > > { > > struct dentry *dentry; > > struct path path; > > - int err; > > > > dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); > > if (IS_ERR(dentry)) > > return PTR_ERR(dentry); > > > > - err = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode); > > - if (!err) > > + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode); > > + if (!IS_ERR(dentry)) > > /* mark as kernel-created inode */ > > d_inode(dentry)->i_private = &thread; > > done_path_create(&path, dentry); > > - return err; > > + return PTR_ERR_OR_ZERO(dentry); > > } > > > > static int create_path(const char *nodepath) > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c > > index 7cf59713f0f7..8a8337d1be05 100644 > > --- a/fs/cachefiles/namei.c > > +++ b/fs/cachefiles/namei.c > > @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > /* search the current directory for the element name */ > > inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); > > > > -retry: > > ret = cachefiles_inject_read_error(); > > if (ret == 0) > > subdir = lookup_one_len(dirname, dir, strlen(dirname)); > > @@ -128,10 +127,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, > > ret = security_path_mkdir(&path, subdir, 0700); > > if (ret < 0) > > goto mkdir_error; > > - ret = cachefiles_inject_write_error(); > > - if (ret == 0) > > - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > > - if (ret < 0) { > > + subdir = ERR_PTR(cachefiles_inject_write_error()); > > + if (!IS_ERR(subdir)) > > + subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); > > + ret = PTR_ERR(subdir); > > + if (IS_ERR(subdir)) { > > trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, > > cachefiles_trace_mkdir_error); > > goto mkdir_error; > > diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c > > index 6315dd194228..51a5c54eb740 100644 > > --- a/fs/ecryptfs/inode.c > > +++ b/fs/ecryptfs/inode.c > > @@ -511,10 +511,16 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > struct inode *lower_dir; > > > > rc = lock_parent(dentry, &lower_dentry, &lower_dir); > > - if (!rc) > > - rc = vfs_mkdir(&nop_mnt_idmap, lower_dir, > > - lower_dentry, mode); > > - if (rc || d_really_is_negative(lower_dentry)) > > + if (rc) > > + goto out; > > + > > + lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir, > > + lower_dentry, mode); > > + rc = PTR_ERR(lower_dentry); > > + if (IS_ERR(lower_dentry)) > > + goto out; > > + rc = 0; > > + if (d_unhashed(lower_dentry)) > > goto out; > > rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); > > if (rc) > > diff --git a/fs/init.c b/fs/init.c > > index e9387b6c4f30..eef5124885e3 100644 > > --- a/fs/init.c > > +++ b/fs/init.c > > @@ -230,9 +230,12 @@ int __init init_mkdir(const char *pathname, umode_t mode) > > return PTR_ERR(dentry); > > mode = mode_strip_umask(d_inode(path.dentry), mode); > > error = security_path_mkdir(&path, dentry, mode); > > - if (!error) > > - error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, > > + if (!error) { > > + dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, > > dentry, mode); > > + if (IS_ERR(dentry)) > > + error = PTR_ERR(dentry); > > + } > > done_path_create(&path, dentry); > > return error; > > } > > diff --git a/fs/namei.c b/fs/namei.c > > index 19d5ea340a18..f76fee6df369 100644 > > --- a/fs/namei.c > > +++ b/fs/namei.c > > @@ -4132,7 +4132,8 @@ EXPORT_SYMBOL(kern_path_create); > > > > void done_path_create(struct path *path, struct dentry *dentry) > > { > > - dput(dentry); > > + if (!IS_ERR(dentry)) > > + dput(dentry); > > inode_unlock(path->dentry->d_inode); > > mnt_drop_write(path->mnt); > > path_put(path); > > @@ -4278,7 +4279,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d > > } > > > > /** > > - * vfs_mkdir - create directory > > + * vfs_mkdir - create directory returning correct dentry is possible > > * @idmap: idmap of the mount the inode was found from > > * @dir: inode of the parent directory > > * @dentry: dentry of the child directory > > @@ -4291,9 +4292,20 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d > > * care to map the inode according to @idmap before checking permissions. > > * On non-idmapped mounts or if permission checking is to be performed on the > > * raw inode simply pass @nop_mnt_idmap. > > + * > > + * In the event that the filesystem does not use the *@dentry but leaves it > > + * negative or unhashes it and possibly splices a different one returning it, > > + * the original dentry is dput() and the alternate is returned. > > + * > > + * If the file-system reports success but leaves the dentry unhashed or > > + * negative, a lookup is performed and providing that is positive and > > + * a directory, it will be returned. If the lookup is not successful > > + * the original dentry is unhashed and returned. > > + * > > + * In case of an error the dentry is dput() and an ERR_PTR() is returned. > > */ > > -int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > - struct dentry *dentry, umode_t mode) > > +struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > + struct dentry *dentry, umode_t mode) > > { > > int error; > > unsigned max_links = dir->i_sb->s_max_links; > > @@ -4301,30 +4313,54 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, > > > > error = may_create(idmap, dir, dentry); > > if (error) > > - return error; > > + goto err; > > > > + error = -EPERM; > > if (!dir->i_op->mkdir) > > - return -EPERM; > > + goto err; > > > > mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); > > error = security_inode_mkdir(dir, dentry, mode); > > if (error) > > - return error; > > + goto err; > > > > + error = -EMLINK; > > if (max_links && dir->i_nlink >= max_links) > > - return -EMLINK; > > + goto err; > > > > de = dir->i_op->mkdir(idmap, dir, dentry, mode); > > + error = PTR_ERR(de); > > if (IS_ERR(de)) > > - return PTR_ERR(de); > > + goto err; > > + if (!de && (d_unhashed(dentry) || d_is_negative(dentry))) { > > Would it be better to push this down into the callers that need it and > make returning a hashed, positive dentry a requirement for the mkdir > operation? nit: I think you mean callees, not callers ? > > You could just turn this block of code into a helper function that > those four filesystems could call, which would mean that you could > avoid the d_unhashed() and d_is_negative() checks on the other > filesystems. yes and no. or maybe... yes: I would rally like to require that ->mkdir always provided a hashed positive dentry on success. no: I would not do it by asking them to use this case, as each filesystem could more easily do it with internal interfaces more simply yes: I guess we could impose this code as a first-cut and encourage each fs to convert it to better internal code no: it isn't always possible. cifs is the main problem (that I'm aware of). cifs is a network filesystem but doesn't (I think) have a concept comparable with the NFS filehandle. I think some handle is involved when a file is open, but not otherwise. The only handle you can use on a non-open file is the path name. This is actually much the same as the POSIX user-space interface. It means that if you "mkdir" and directory and then want to act on it, you need to give the name again, and some other process might have removed, or moved the directory and possibly put something else under the same name between the lookup and the subsequent act. So inside cifs it can perform a remote lookup for the name after the mkdir and might find a directory and can reasonable expect it to be the same directory and can fill in some inode information. But it might find a file, or nothing... How much does this matter? Is POSIX doesn't allow a 'stat' after 'mkdir' to guarantee to hit the same object, why do we need it in the kernel? 1/ nfsd needs to reply to "mkdir" with a filehandle for the created directory. This is actually optional in v3 and v4. So providing we get the filesys to return a good dentry whenever it can, we don't really need the lookup when the filesys honestly cannot provide something. 2/ ksmbd wants to effect an "inherit_owner" request to set the owner of the child to match the parent ... though it only writes the uid, it doesn't call setattr, so that doesn't seem all that important for the filesystems which would be problematic 3/ cachefiles ... I've messed up that part of the patch! It wants to start creating files in the directory. I now think it would be safest for cachefiles to refused to work on filesystems which cannot give an inode back from a mkdir request. The code in question is quite able to fail of something looks wrong. The inode being NULL should just be another wrong thing. So thanks for asking. I think it is important for ->mkdir to be able to return a good dentry if it is possible, but we must accept that sometimes it isn't possible and I now don't think it is worthwhile for the in-kernel clients to try a lookup in those cases. Filesystems can be encouraged to do the best they can and clients shouldn't assume they can do any better. I'll revise my last patch. Thanks, NeilBrown
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index c9e34842139f..8ec756b5dec4 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -160,18 +160,17 @@ static int dev_mkdir(const char *name, umode_t mode) { struct dentry *dentry; struct path path; - int err; dentry = kern_path_create(AT_FDCWD, name, &path, LOOKUP_DIRECTORY); if (IS_ERR(dentry)) return PTR_ERR(dentry); - err = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode); - if (!err) + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode); + if (!IS_ERR(dentry)) /* mark as kernel-created inode */ d_inode(dentry)->i_private = &thread; done_path_create(&path, dentry); - return err; + return PTR_ERR_OR_ZERO(dentry); } static int create_path(const char *nodepath) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 7cf59713f0f7..8a8337d1be05 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -95,7 +95,6 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, /* search the current directory for the element name */ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); -retry: ret = cachefiles_inject_read_error(); if (ret == 0) subdir = lookup_one_len(dirname, dir, strlen(dirname)); @@ -128,10 +127,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, ret = security_path_mkdir(&path, subdir, 0700); if (ret < 0) goto mkdir_error; - ret = cachefiles_inject_write_error(); - if (ret == 0) - ret = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); - if (ret < 0) { + subdir = ERR_PTR(cachefiles_inject_write_error()); + if (!IS_ERR(subdir)) + subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); + ret = PTR_ERR(subdir); + if (IS_ERR(subdir)) { trace_cachefiles_vfs_error(NULL, d_inode(dir), ret, cachefiles_trace_mkdir_error); goto mkdir_error; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 6315dd194228..51a5c54eb740 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -511,10 +511,16 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct inode *lower_dir; rc = lock_parent(dentry, &lower_dentry, &lower_dir); - if (!rc) - rc = vfs_mkdir(&nop_mnt_idmap, lower_dir, - lower_dentry, mode); - if (rc || d_really_is_negative(lower_dentry)) + if (rc) + goto out; + + lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir, + lower_dentry, mode); + rc = PTR_ERR(lower_dentry); + if (IS_ERR(lower_dentry)) + goto out; + rc = 0; + if (d_unhashed(lower_dentry)) goto out; rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); if (rc) diff --git a/fs/init.c b/fs/init.c index e9387b6c4f30..eef5124885e3 100644 --- a/fs/init.c +++ b/fs/init.c @@ -230,9 +230,12 @@ int __init init_mkdir(const char *pathname, umode_t mode) return PTR_ERR(dentry); mode = mode_strip_umask(d_inode(path.dentry), mode); error = security_path_mkdir(&path, dentry, mode); - if (!error) - error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, + if (!error) { + dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, mode); + if (IS_ERR(dentry)) + error = PTR_ERR(dentry); + } done_path_create(&path, dentry); return error; } diff --git a/fs/namei.c b/fs/namei.c index 19d5ea340a18..f76fee6df369 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4132,7 +4132,8 @@ EXPORT_SYMBOL(kern_path_create); void done_path_create(struct path *path, struct dentry *dentry) { - dput(dentry); + if (!IS_ERR(dentry)) + dput(dentry); inode_unlock(path->dentry->d_inode); mnt_drop_write(path->mnt); path_put(path); @@ -4278,7 +4279,7 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d } /** - * vfs_mkdir - create directory + * vfs_mkdir - create directory returning correct dentry is possible * @idmap: idmap of the mount the inode was found from * @dir: inode of the parent directory * @dentry: dentry of the child directory @@ -4291,9 +4292,20 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. + * + * In the event that the filesystem does not use the *@dentry but leaves it + * negative or unhashes it and possibly splices a different one returning it, + * the original dentry is dput() and the alternate is returned. + * + * If the file-system reports success but leaves the dentry unhashed or + * negative, a lookup is performed and providing that is positive and + * a directory, it will be returned. If the lookup is not successful + * the original dentry is unhashed and returned. + * + * In case of an error the dentry is dput() and an ERR_PTR() is returned. */ -int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) +struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { int error; unsigned max_links = dir->i_sb->s_max_links; @@ -4301,30 +4313,54 @@ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, error = may_create(idmap, dir, dentry); if (error) - return error; + goto err; + error = -EPERM; if (!dir->i_op->mkdir) - return -EPERM; + goto err; mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); error = security_inode_mkdir(dir, dentry, mode); if (error) - return error; + goto err; + error = -EMLINK; if (max_links && dir->i_nlink >= max_links) - return -EMLINK; + goto err; de = dir->i_op->mkdir(idmap, dir, dentry, mode); + error = PTR_ERR(de); if (IS_ERR(de)) - return PTR_ERR(de); + goto err; + if (!de && (d_unhashed(dentry) || d_is_negative(dentry))) { + /* A few filesystems leave the dentry negative on success, + * currently cifs(with posix extensions), kernfs, tracefs, hostfs. + * They rely on a subsequent lookup which we perform here. + */ + const struct qstr *d_name = (void*)&dentry->d_name; + de = lookup_dcache(d_name, dentry->d_parent, 0); + if (!de) + de = __lookup_slow(d_name, dentry->d_parent, 0); + if (IS_ERR(de)) + de = NULL; + else if (unlikely(d_is_negative(de) || !d_is_dir(de))) { + dput(de); + de = NULL; + } + if (!de) + /* Ensure caller can easily detect that dentry is not useful */ + d_drop(dentry); + } if (de) { - fsnotify_mkdir(dir, de); - /* Cannot return de yet */ - dput(de); - } else - fsnotify_mkdir(dir, dentry); + dput(dentry); + dentry = de; + } + fsnotify_mkdir(dir, dentry); + return dentry; - return 0; +err: + dput(dentry); + return ERR_PTR(error); } EXPORT_SYMBOL(vfs_mkdir); @@ -4344,8 +4380,10 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode) error = security_path_mkdir(&path, dentry, mode_strip_umask(path.dentry->d_inode, mode)); if (!error) { - error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, + dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, mode); + if (IS_ERR(dentry)) + error = PTR_ERR(dentry); } done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 28f4d5311c40..c1d9bd07285f 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -233,9 +233,12 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); + if (IS_ERR(dentry)) + status = PTR_ERR(dentry); out_put: - dput(dentry); + if (!status) + dput(dentry); out_unlock: inode_unlock(d_inode(dir)); if (status == 0) { diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 29cb7b812d71..205b07f21e8d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1461,7 +1461,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, struct inode *dirp; struct iattr *iap = attrs->na_iattr; __be32 err; - int host_err; + int host_err = 0; dentry = fhp->fh_dentry; dirp = d_inode(dentry); @@ -1488,26 +1488,13 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_check_ignore_resizing(iap); break; case S_IFDIR: - host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); - if (!host_err && unlikely(d_unhashed(dchild))) { - struct dentry *d; - d = lookup_one_len(dchild->d_name.name, - dchild->d_parent, - dchild->d_name.len); - if (IS_ERR(d)) { - host_err = PTR_ERR(d); - break; - } - if (unlikely(d_is_negative(d))) { - dput(d); - err = nfserr_serverfault; - goto out; - } + dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); + if (IS_ERR(dchild)) + host_err = PTR_ERR(dchild); + else if (unlikely(dchild != resfhp->fh_dentry)) { dput(resfhp->fh_dentry); - resfhp->fh_dentry = dget(d); + resfhp->fh_dentry = dget(dchild); err = fh_update(resfhp); - dput(dchild); - dchild = d; if (err) goto out; } @@ -1530,7 +1517,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); out: - dput(dchild); + if (!IS_ERR(dchild)) + dput(dchild); return err; out_nfserr: diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 21c3aaf7b274..fe493f3ed6b6 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -138,37 +138,6 @@ int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir, goto out; } -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, - struct dentry **newdentry, umode_t mode) -{ - int err; - struct dentry *d, *dentry = *newdentry; - - err = ovl_do_mkdir(ofs, dir, dentry, mode); - if (err) - return err; - - if (likely(!d_unhashed(dentry))) - return 0; - - /* - * vfs_mkdir() may succeed and leave the dentry passed - * to it unhashed and negative. If that happens, try to - * lookup a new hashed and positive dentry. - */ - d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent, - dentry->d_name.len); - if (IS_ERR(d)) { - pr_warn("failed lookup after mkdir (%pd2, err=%i).\n", - dentry, err); - return PTR_ERR(d); - } - dput(dentry); - *newdentry = d; - - return 0; -} - struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr) { @@ -191,7 +160,8 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, case S_IFDIR: /* mkdir is special... */ - err = ovl_mkdir_real(ofs, dir, &newdentry, attr->mode); + newdentry = ovl_do_mkdir(ofs, dir, newdentry, attr->mode); + err = PTR_ERR_OR_ZERO(newdentry); break; case S_IFCHR: @@ -219,7 +189,8 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, } out: if (err) { - dput(newdentry); + if (!IS_ERR(newdentry)) + dput(newdentry); return ERR_PTR(err); } return newdentry; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 0021e2025020..6f2f8f4cfbbc 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -241,13 +241,14 @@ static inline int ovl_do_create(struct ovl_fs *ofs, return err; } -static inline int ovl_do_mkdir(struct ovl_fs *ofs, - struct inode *dir, struct dentry *dentry, - umode_t mode) +static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs, + struct inode *dir, + struct dentry *dentry, + umode_t mode) { - int err = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); - return err; + dentry = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); + pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(dentry)); + return dentry; } static inline int ovl_do_mknod(struct ovl_fs *ofs, @@ -838,8 +839,6 @@ struct ovl_cattr { #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) -int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir, - struct dentry **newdentry, umode_t mode); struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 86ae6f6da36b..a381765802e9 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -327,9 +327,10 @@ static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, goto retry; } - err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); - if (err) - goto out_dput; + work = ovl_do_mkdir(ofs, dir, work, attr.ia_mode); + err = PTR_ERR(work); + if (IS_ERR(work)) + goto out_err; /* Weird filesystem returning with hashed negative (kernfs)? */ err = -EINVAL; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index fe29acef5872..d682443b14ac 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -206,7 +206,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) { struct mnt_idmap *idmap; struct path path; - struct dentry *dentry; + struct dentry *dentry, *d; int err; dentry = ksmbd_vfs_kern_path_create(work, name, @@ -222,31 +222,18 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) idmap = mnt_idmap(path.mnt); mode |= S_IFDIR; - err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); - if (!err && d_unhashed(dentry)) { - struct dentry *d; - - d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent, - dentry->d_name.len); - if (IS_ERR(d)) { - err = PTR_ERR(d); - goto out_err; - } - if (unlikely(d_is_negative(d))) { - dput(d); - err = -ENOENT; - goto out_err; - } - + d = dentry; + entry = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); + err = PTR_ERR(dentry); + if (!IS_ERR(dentry) && dentry != d) ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d)); - dput(d); - } -out_err: done_path_create(&path, dentry); - if (err) + if (IS_ERR(dentry)) { pr_err("mkdir(%s): creation failed (err:%d)\n", name, err); - return err; + return err; + } + return 0; } static ssize_t ksmbd_vfs_getcasexattr(struct mnt_idmap *idmap, diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index c287c755f2c5..3537f3cca6d5 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -167,10 +167,11 @@ xrep_orphanage_create( * directory to control access to a file we put in here. */ if (d_really_is_negative(orphanage_dentry)) { - error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry, - 0750); - if (error) - goto out_dput_orphanage; + orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode, + orphanage_dentry, 0750); + error = PTR_ERR(orphanage_dentry); + if (IS_ERR(orphanage_dentry)) + goto out_unlock_root; } /* Not a directory? Bail out. */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 45269608ee23..beae24bc990d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1981,8 +1981,8 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap, */ int vfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); -int vfs_mkdir(struct mnt_idmap *, struct inode *, - struct dentry *, umode_t); +struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, + struct dentry *, umode_t); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); int vfs_symlink(struct mnt_idmap *, struct inode *,
vfs_mkdir() does not currently guarantee to leave the child dentry hashed or make it positive on success. It may leave it unhashed and negative and then the caller needs to perform a lookup to find the target dentry, if it needs it. This patch changes vfs_mkdir() to return the dentry provided by the filesystems which is hashed and positive when provided. This reduces the need for lookup code which is now included in vfs_mkdir() rather than at various call-sites. The included lookup does not include the permission check that some of the existing code included in e.g. lookup_one_len(). This should not be necessary for lookup up a directory which has just be successfully created. If a different dentry is returned, the first one is put. If necessary the fact that it is new can be determined by comparing pointers. A new dentry will certainly have a new pointer (as the old is put after the new is obtained). The dentry returned by vfs_mkdir(), when not an error, *is* now guaranteed to be hashed and positive. A few callers do not need the dentry, but will now sometimes perform the lookup anyway. This should be cheap except possibly in the case of cifs. Signed-off-by: NeilBrown <neilb@suse.de> --- drivers/base/devtmpfs.c | 7 ++-- fs/cachefiles/namei.c | 10 +++--- fs/ecryptfs/inode.c | 14 +++++--- fs/init.c | 7 ++-- fs/namei.c | 70 +++++++++++++++++++++++++++++++--------- fs/nfsd/nfs4recover.c | 7 ++-- fs/nfsd/vfs.c | 28 +++++----------- fs/overlayfs/dir.c | 37 +++------------------ fs/overlayfs/overlayfs.h | 15 ++++----- fs/overlayfs/super.c | 7 ++-- fs/smb/server/vfs.c | 31 ++++++------------ fs/xfs/scrub/orphanage.c | 9 +++--- include/linux/fs.h | 4 +-- 13 files changed, 121 insertions(+), 125 deletions(-)