Message ID | 168780370085.2142.4461798321197359310.stgit@manet.1015granger.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | shmemfs stable directory offsets | expand |
On 6/26/23 20:21, Chuck Lever wrote: > From: Chuck Lever <chuck.lever@oracle.com> > > The current cursor-based directory offset mechanism doesn't work > when a tmpfs filesystem is exported via NFS. This is because NFS > clients do not open directories. Each server-side READDIR operation > has to open the directory, read it, then close it. The cursor state > for that directory, being associated strictly with the opened > struct file, is thus discarded after each NFS READDIR operation. > > Directory offsets are cached not only by NFS clients, but also by > user space libraries on those clients. Essentially there is no way > to invalidate those caches when directory offsets have changed on > an NFS server after the offset-to-dentry mapping changes. Thus the > whole application stack depends on unchanging directory offsets. > > The solution we've come up with is to make the directory offset for > each file in a tmpfs filesystem stable for the life of the directory > entry it represents. > > shmem_readdir() and shmem_dir_llseek() now use an xarray to map each > directory offset (an loff_t integer) to the memory address of a > struct dentry. > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > --- > mm/shmem.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 47 insertions(+), 7 deletions(-) > > diff --git a/mm/shmem.c b/mm/shmem.c > index 721f9fd064aa..89012f3583b1 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -2410,7 +2410,8 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block > /* Some things misbehave if size == 0 on a directory */ > inode->i_size = 2 * BOGO_DIRENT_SIZE; > inode->i_op = &shmem_dir_inode_operations; > - inode->i_fop = &simple_dir_operations; > + inode->i_fop = &stable_dir_operations; > + stable_offset_init(inode); > break; > case S_IFLNK: > /* > @@ -2950,7 +2951,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, > if (error && error != -EOPNOTSUPP) > goto out_iput; > > - error = 0; > + error = stable_offset_add(dir, dentry); > + if (error) > + goto out_iput; > + > dir->i_size += BOGO_DIRENT_SIZE; > dir->i_ctime = dir->i_mtime = current_time(dir); > inode_inc_iversion(dir); > @@ -3027,6 +3031,13 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr > goto out; > } > > + ret = stable_offset_add(dir, dentry); > + if (ret) { > + if (inode->i_nlink) > + shmem_free_inode(inode->i_sb); > + goto out; > + } > + > dir->i_size += BOGO_DIRENT_SIZE; > inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); > inode_inc_iversion(dir); > @@ -3045,6 +3056,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) > if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) > shmem_free_inode(inode->i_sb); > > + stable_offset_remove(dir, dentry); > + > dir->i_size -= BOGO_DIRENT_SIZE; > inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); > inode_inc_iversion(dir); > @@ -3103,24 +3116,41 @@ static int shmem_rename2(struct mnt_idmap *idmap, > { > struct inode *inode = d_inode(old_dentry); > int they_are_dirs = S_ISDIR(inode->i_mode); > + int error; > > if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) > return -EINVAL; > > - if (flags & RENAME_EXCHANGE) > + if (flags & RENAME_EXCHANGE) { > + error = stable_offset_add(new_dir, old_dentry); > + if (error) > + return error; Won't this fail in stable_offset_add() with -EBUSY, because dentry->d_offset is set? > + error = stable_offset_add(old_dir, new_dentry); > + if (error) { > + stable_offset_remove(new_dir, old_dentry); > + return error; > + } > + stable_offset_remove(old_dir, old_dentry); > + stable_offset_remove(new_dir, new_dentry); Assuming stable_offset_add() would have succeeded (somehow), old_dentry and new_dentry would have gotten reset their dentry->d_offset? > + > + /* Always returns zero */ > return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); > + } Hmm, let's assume simple_rename_exchange() fails, stable entries are now the other way around? I actually start to wonder if we need to introduce error injection for RENAME_EXCHANGE, I think it the most complex part in the series. > > if (!simple_empty(new_dentry)) > return -ENOTEMPTY; > > if (flags & RENAME_WHITEOUT) { > - int error; > - > error = shmem_whiteout(idmap, old_dir, old_dentry); > if (error) > return error; > } > > + stable_offset_remove(old_dir, old_dentry); > + error = stable_offset_add(new_dir, old_dentry); > + if (error) > + return error; > + > if (d_really_is_positive(new_dentry)) { > (void) shmem_unlink(new_dir, new_dentry); > if (they_are_dirs) { > @@ -3164,19 +3194,23 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, > if (error && error != -EOPNOTSUPP) > goto out_iput; > > + error = stable_offset_add(dir, dentry); > + if (error) > + goto out_iput; > + > inode->i_size = len-1; > if (len <= SHORT_SYMLINK_LEN) { > inode->i_link = kmemdup(symname, len, GFP_KERNEL); > if (!inode->i_link) { > error = -ENOMEM; > - goto out_iput; > + goto out_remove_offset; > } > inode->i_op = &shmem_short_symlink_operations; > } else { > inode_nohighmem(inode); > error = shmem_get_folio(inode, 0, &folio, SGP_WRITE); > if (error) > - goto out_iput; > + goto out_remove_offset; > inode->i_mapping->a_ops = &shmem_aops; > inode->i_op = &shmem_symlink_inode_operations; > memcpy(folio_address(folio), symname, len); > @@ -3185,12 +3219,16 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, > folio_unlock(folio); > folio_put(folio); > } > + > dir->i_size += BOGO_DIRENT_SIZE; > dir->i_ctime = dir->i_mtime = current_time(dir); > inode_inc_iversion(dir); > d_instantiate(dentry, inode); > dget(dentry); > return 0; > + > +out_remove_offset: > + stable_offset_remove(dir, dentry); > out_iput: > iput(inode); > return error; > @@ -3920,6 +3958,8 @@ static void shmem_destroy_inode(struct inode *inode) > { > if (S_ISREG(inode->i_mode)) > mpol_free_shared_policy(&SHMEM_I(inode)->policy); > + if (S_ISDIR(inode->i_mode)) > + stable_offset_destroy(inode); > } > > static void shmem_init_inode(void *foo) > >
> On Jun 27, 2023, at 10:06 AM, Bernd Schubert <bernd.schubert@fastmail.fm> wrote: > > > > On 6/26/23 20:21, Chuck Lever wrote: >> From: Chuck Lever <chuck.lever@oracle.com> >> The current cursor-based directory offset mechanism doesn't work >> when a tmpfs filesystem is exported via NFS. This is because NFS >> clients do not open directories. Each server-side READDIR operation >> has to open the directory, read it, then close it. The cursor state >> for that directory, being associated strictly with the opened >> struct file, is thus discarded after each NFS READDIR operation. >> Directory offsets are cached not only by NFS clients, but also by >> user space libraries on those clients. Essentially there is no way >> to invalidate those caches when directory offsets have changed on >> an NFS server after the offset-to-dentry mapping changes. Thus the >> whole application stack depends on unchanging directory offsets. >> The solution we've come up with is to make the directory offset for >> each file in a tmpfs filesystem stable for the life of the directory >> entry it represents. >> shmem_readdir() and shmem_dir_llseek() now use an xarray to map each >> directory offset (an loff_t integer) to the memory address of a >> struct dentry. >> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> >> --- >> mm/shmem.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++------- >> 1 file changed, 47 insertions(+), 7 deletions(-) >> diff --git a/mm/shmem.c b/mm/shmem.c >> index 721f9fd064aa..89012f3583b1 100644 >> --- a/mm/shmem.c >> +++ b/mm/shmem.c >> @@ -2410,7 +2410,8 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block >> /* Some things misbehave if size == 0 on a directory */ >> inode->i_size = 2 * BOGO_DIRENT_SIZE; >> inode->i_op = &shmem_dir_inode_operations; >> - inode->i_fop = &simple_dir_operations; >> + inode->i_fop = &stable_dir_operations; >> + stable_offset_init(inode); >> break; >> case S_IFLNK: >> /* >> @@ -2950,7 +2951,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, >> if (error && error != -EOPNOTSUPP) >> goto out_iput; >> - error = 0; >> + error = stable_offset_add(dir, dentry); >> + if (error) >> + goto out_iput; >> + >> dir->i_size += BOGO_DIRENT_SIZE; >> dir->i_ctime = dir->i_mtime = current_time(dir); >> inode_inc_iversion(dir); >> @@ -3027,6 +3031,13 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr >> goto out; >> } >> + ret = stable_offset_add(dir, dentry); >> + if (ret) { >> + if (inode->i_nlink) >> + shmem_free_inode(inode->i_sb); >> + goto out; >> + } >> + >> dir->i_size += BOGO_DIRENT_SIZE; >> inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); >> inode_inc_iversion(dir); >> @@ -3045,6 +3056,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) >> if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) >> shmem_free_inode(inode->i_sb); >> + stable_offset_remove(dir, dentry); >> + >> dir->i_size -= BOGO_DIRENT_SIZE; >> inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); >> inode_inc_iversion(dir); >> @@ -3103,24 +3116,41 @@ static int shmem_rename2(struct mnt_idmap *idmap, >> { >> struct inode *inode = d_inode(old_dentry); >> int they_are_dirs = S_ISDIR(inode->i_mode); >> + int error; >> if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) >> return -EINVAL; >> - if (flags & RENAME_EXCHANGE) >> + if (flags & RENAME_EXCHANGE) { >> + error = stable_offset_add(new_dir, old_dentry); >> + if (error) >> + return error; > > Won't this fail in stable_offset_add() with -EBUSY, because dentry->d_offset is set? > >> + error = stable_offset_add(old_dir, new_dentry); >> + if (error) { >> + stable_offset_remove(new_dir, old_dentry); >> + return error; >> + } >> + stable_offset_remove(old_dir, old_dentry); >> + stable_offset_remove(new_dir, new_dentry); > > Assuming stable_offset_add() would have succeeded (somehow), old_dentry and new_dentry would have gotten reset their dentry->d_offset? Probably. I can have another look. >> + >> + /* Always returns zero */ >> return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); >> + } > > Hmm, let's assume simple_rename_exchange() fails, stable entries are now the other way around? Today it never fails. We can add an assertion here. Otherwise cleaning up after a simple_rename_exchange() failure is going to be even more hairy. > I actually start to wonder if we need to introduce error injection for RENAME_EXCHANGE, I think it the most complex part in the series. And it's the least frequently used mode of rename. >> if (!simple_empty(new_dentry)) >> return -ENOTEMPTY; >> if (flags & RENAME_WHITEOUT) { >> - int error; >> - >> error = shmem_whiteout(idmap, old_dir, old_dentry); >> if (error) >> return error; >> } >> + stable_offset_remove(old_dir, old_dentry); >> + error = stable_offset_add(new_dir, old_dentry); >> + if (error) >> + return error; >> + >> if (d_really_is_positive(new_dentry)) { >> (void) shmem_unlink(new_dir, new_dentry); >> if (they_are_dirs) { >> @@ -3164,19 +3194,23 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, >> if (error && error != -EOPNOTSUPP) >> goto out_iput; >> + error = stable_offset_add(dir, dentry); >> + if (error) >> + goto out_iput; >> + >> inode->i_size = len-1; >> if (len <= SHORT_SYMLINK_LEN) { >> inode->i_link = kmemdup(symname, len, GFP_KERNEL); >> if (!inode->i_link) { >> error = -ENOMEM; >> - goto out_iput; >> + goto out_remove_offset; >> } >> inode->i_op = &shmem_short_symlink_operations; >> } else { >> inode_nohighmem(inode); >> error = shmem_get_folio(inode, 0, &folio, SGP_WRITE); >> if (error) >> - goto out_iput; >> + goto out_remove_offset; >> inode->i_mapping->a_ops = &shmem_aops; >> inode->i_op = &shmem_symlink_inode_operations; >> memcpy(folio_address(folio), symname, len); >> @@ -3185,12 +3219,16 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, >> folio_unlock(folio); >> folio_put(folio); >> } >> + >> dir->i_size += BOGO_DIRENT_SIZE; >> dir->i_ctime = dir->i_mtime = current_time(dir); >> inode_inc_iversion(dir); >> d_instantiate(dentry, inode); >> dget(dentry); >> return 0; >> + >> +out_remove_offset: >> + stable_offset_remove(dir, dentry); >> out_iput: >> iput(inode); >> return error; >> @@ -3920,6 +3958,8 @@ static void shmem_destroy_inode(struct inode *inode) >> { >> if (S_ISREG(inode->i_mode)) >> mpol_free_shared_policy(&SHMEM_I(inode)->policy); >> + if (S_ISDIR(inode->i_mode)) >> + stable_offset_destroy(inode); >> } >> static void shmem_init_inode(void *foo) -- Chuck Lever
On 6/27/23 16:11, Chuck Lever III wrote: > > >> On Jun 27, 2023, at 10:06 AM, Bernd Schubert <bernd.schubert@fastmail.fm> wrote: >> >> >> >> On 6/26/23 20:21, Chuck Lever wrote: >>> From: Chuck Lever <chuck.lever@oracle.com> >>> The current cursor-based directory offset mechanism doesn't work >>> when a tmpfs filesystem is exported via NFS. This is because NFS >>> clients do not open directories. Each server-side READDIR operation >>> has to open the directory, read it, then close it. The cursor state >>> for that directory, being associated strictly with the opened >>> struct file, is thus discarded after each NFS READDIR operation. >>> Directory offsets are cached not only by NFS clients, but also by >>> user space libraries on those clients. Essentially there is no way >>> to invalidate those caches when directory offsets have changed on >>> an NFS server after the offset-to-dentry mapping changes. Thus the >>> whole application stack depends on unchanging directory offsets. >>> The solution we've come up with is to make the directory offset for >>> each file in a tmpfs filesystem stable for the life of the directory >>> entry it represents. >>> shmem_readdir() and shmem_dir_llseek() now use an xarray to map each >>> directory offset (an loff_t integer) to the memory address of a >>> struct dentry. >>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> >>> --- >>> mm/shmem.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++------- >>> 1 file changed, 47 insertions(+), 7 deletions(-) >>> diff --git a/mm/shmem.c b/mm/shmem.c >>> index 721f9fd064aa..89012f3583b1 100644 >>> --- a/mm/shmem.c >>> +++ b/mm/shmem.c >>> @@ -2410,7 +2410,8 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block >>> /* Some things misbehave if size == 0 on a directory */ >>> inode->i_size = 2 * BOGO_DIRENT_SIZE; >>> inode->i_op = &shmem_dir_inode_operations; >>> - inode->i_fop = &simple_dir_operations; >>> + inode->i_fop = &stable_dir_operations; >>> + stable_offset_init(inode); >>> break; >>> case S_IFLNK: >>> /* >>> @@ -2950,7 +2951,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, >>> if (error && error != -EOPNOTSUPP) >>> goto out_iput; >>> - error = 0; >>> + error = stable_offset_add(dir, dentry); >>> + if (error) >>> + goto out_iput; >>> + >>> dir->i_size += BOGO_DIRENT_SIZE; >>> dir->i_ctime = dir->i_mtime = current_time(dir); >>> inode_inc_iversion(dir); >>> @@ -3027,6 +3031,13 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr >>> goto out; >>> } >>> + ret = stable_offset_add(dir, dentry); >>> + if (ret) { >>> + if (inode->i_nlink) >>> + shmem_free_inode(inode->i_sb); >>> + goto out; >>> + } >>> + >>> dir->i_size += BOGO_DIRENT_SIZE; >>> inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); >>> inode_inc_iversion(dir); >>> @@ -3045,6 +3056,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) >>> if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) >>> shmem_free_inode(inode->i_sb); >>> + stable_offset_remove(dir, dentry); >>> + >>> dir->i_size -= BOGO_DIRENT_SIZE; >>> inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); >>> inode_inc_iversion(dir); >>> @@ -3103,24 +3116,41 @@ static int shmem_rename2(struct mnt_idmap *idmap, >>> { >>> struct inode *inode = d_inode(old_dentry); >>> int they_are_dirs = S_ISDIR(inode->i_mode); >>> + int error; >>> if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) >>> return -EINVAL; >>> - if (flags & RENAME_EXCHANGE) >>> + if (flags & RENAME_EXCHANGE) { >>> + error = stable_offset_add(new_dir, old_dentry); >>> + if (error) >>> + return error; >> >> Won't this fail in stable_offset_add() with -EBUSY, because dentry->d_offset is set? >> >>> + error = stable_offset_add(old_dir, new_dentry); >>> + if (error) { >>> + stable_offset_remove(new_dir, old_dentry); >>> + return error; >>> + } >>> + stable_offset_remove(old_dir, old_dentry); >>> + stable_offset_remove(new_dir, new_dentry); >> >> Assuming stable_offset_add() would have succeeded (somehow), old_dentry and new_dentry would have gotten reset their dentry->d_offset? > > Probably. I can have another look. > > >>> + >>> + /* Always returns zero */ >>> return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); >>> + } >> >> Hmm, let's assume simple_rename_exchange() fails, stable entries are now the other way around? > > Today it never fails. We can add an assertion here. > > Otherwise cleaning up after a simple_rename_exchange() failure is going to be even more hair > > Hmm, then add in the commit message that error handling for RENAME_EXCHANGE is not perfect? >> I actually start to wonder if we need to introduce error injection for RENAME_EXCHANGE, I think it the most complex part in the series. > > And it's the least frequently used mode of rename. > > >>> if (!simple_empty(new_dentry)) >>> return -ENOTEMPTY; >>> if (flags & RENAME_WHITEOUT) { >>> - int error; >>> - >>> error = shmem_whiteout(idmap, old_dir, old_dentry); >>> if (error) >>> return error; >>> } >>> + stable_offset_remove(old_dir, old_dentry); >>> + error = stable_offset_add(new_dir, old_dentry); >>> + if (error) >>> + return error; >>> + >>> if (d_really_is_positive(new_dentry)) { >>> (void) shmem_unlink(new_dir, new_dentry); >>> if (they_are_dirs) { >>> @@ -3164,19 +3194,23 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, >>> if (error && error != -EOPNOTSUPP) >>> goto out_iput; >>> + error = stable_offset_add(dir, dentry); >>> + if (error) >>> + goto out_iput; >>> + >>> inode->i_size = len-1; >>> if (len <= SHORT_SYMLINK_LEN) { >>> inode->i_link = kmemdup(symname, len, GFP_KERNEL); >>> if (!inode->i_link) { >>> error = -ENOMEM; >>> - goto out_iput; >>> + goto out_remove_offset; >>> } >>> inode->i_op = &shmem_short_symlink_operations; >>> } else { >>> inode_nohighmem(inode); >>> error = shmem_get_folio(inode, 0, &folio, SGP_WRITE); >>> if (error) >>> - goto out_iput; >>> + goto out_remove_offset; >>> inode->i_mapping->a_ops = &shmem_aops; >>> inode->i_op = &shmem_symlink_inode_operations; >>> memcpy(folio_address(folio), symname, len); >>> @@ -3185,12 +3219,16 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, >>> folio_unlock(folio); >>> folio_put(folio); >>> } >>> + >>> dir->i_size += BOGO_DIRENT_SIZE; >>> dir->i_ctime = dir->i_mtime = current_time(dir); >>> inode_inc_iversion(dir); >>> d_instantiate(dentry, inode); >>> dget(dentry); >>> return 0; >>> + >>> +out_remove_offset: >>> + stable_offset_remove(dir, dentry); >>> out_iput: >>> iput(inode); >>> return error; >>> @@ -3920,6 +3958,8 @@ static void shmem_destroy_inode(struct inode *inode) >>> { >>> if (S_ISREG(inode->i_mode)) >>> mpol_free_shared_policy(&SHMEM_I(inode)->policy); >>> + if (S_ISDIR(inode->i_mode)) >>> + stable_offset_destroy(inode); >>> } >>> static void shmem_init_inode(void *foo) > > > -- > Chuck Lever > >
diff --git a/mm/shmem.c b/mm/shmem.c index 721f9fd064aa..89012f3583b1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2410,7 +2410,8 @@ static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block /* Some things misbehave if size == 0 on a directory */ inode->i_size = 2 * BOGO_DIRENT_SIZE; inode->i_op = &shmem_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + inode->i_fop = &stable_dir_operations; + stable_offset_init(inode); break; case S_IFLNK: /* @@ -2950,7 +2951,10 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, if (error && error != -EOPNOTSUPP) goto out_iput; - error = 0; + error = stable_offset_add(dir, dentry); + if (error) + goto out_iput; + dir->i_size += BOGO_DIRENT_SIZE; dir->i_ctime = dir->i_mtime = current_time(dir); inode_inc_iversion(dir); @@ -3027,6 +3031,13 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr goto out; } + ret = stable_offset_add(dir, dentry); + if (ret) { + if (inode->i_nlink) + shmem_free_inode(inode->i_sb); + goto out; + } + dir->i_size += BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); inode_inc_iversion(dir); @@ -3045,6 +3056,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) shmem_free_inode(inode->i_sb); + stable_offset_remove(dir, dentry); + dir->i_size -= BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); inode_inc_iversion(dir); @@ -3103,24 +3116,41 @@ static int shmem_rename2(struct mnt_idmap *idmap, { struct inode *inode = d_inode(old_dentry); int they_are_dirs = S_ISDIR(inode->i_mode); + int error; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; - if (flags & RENAME_EXCHANGE) + if (flags & RENAME_EXCHANGE) { + error = stable_offset_add(new_dir, old_dentry); + if (error) + return error; + error = stable_offset_add(old_dir, new_dentry); + if (error) { + stable_offset_remove(new_dir, old_dentry); + return error; + } + stable_offset_remove(old_dir, old_dentry); + stable_offset_remove(new_dir, new_dentry); + + /* Always returns zero */ return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); + } if (!simple_empty(new_dentry)) return -ENOTEMPTY; if (flags & RENAME_WHITEOUT) { - int error; - error = shmem_whiteout(idmap, old_dir, old_dentry); if (error) return error; } + stable_offset_remove(old_dir, old_dentry); + error = stable_offset_add(new_dir, old_dentry); + if (error) + return error; + if (d_really_is_positive(new_dentry)) { (void) shmem_unlink(new_dir, new_dentry); if (they_are_dirs) { @@ -3164,19 +3194,23 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, if (error && error != -EOPNOTSUPP) goto out_iput; + error = stable_offset_add(dir, dentry); + if (error) + goto out_iput; + inode->i_size = len-1; if (len <= SHORT_SYMLINK_LEN) { inode->i_link = kmemdup(symname, len, GFP_KERNEL); if (!inode->i_link) { error = -ENOMEM; - goto out_iput; + goto out_remove_offset; } inode->i_op = &shmem_short_symlink_operations; } else { inode_nohighmem(inode); error = shmem_get_folio(inode, 0, &folio, SGP_WRITE); if (error) - goto out_iput; + goto out_remove_offset; inode->i_mapping->a_ops = &shmem_aops; inode->i_op = &shmem_symlink_inode_operations; memcpy(folio_address(folio), symname, len); @@ -3185,12 +3219,16 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, folio_unlock(folio); folio_put(folio); } + dir->i_size += BOGO_DIRENT_SIZE; dir->i_ctime = dir->i_mtime = current_time(dir); inode_inc_iversion(dir); d_instantiate(dentry, inode); dget(dentry); return 0; + +out_remove_offset: + stable_offset_remove(dir, dentry); out_iput: iput(inode); return error; @@ -3920,6 +3958,8 @@ static void shmem_destroy_inode(struct inode *inode) { if (S_ISREG(inode->i_mode)) mpol_free_shared_policy(&SHMEM_I(inode)->policy); + if (S_ISDIR(inode->i_mode)) + stable_offset_destroy(inode); } static void shmem_init_inode(void *foo)