Message ID | 20240206142453.1906268-10-amir73il@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | FUSE passthrough for file io | expand |
On 2/6/24 09:24, Amir Goldstein wrote: > After passthrough read/write, we invalidate a/c/mtime/size attributes > if the backing inode attributes differ from FUSE inode attributes. > > Do the same in fuse_getattr() and after detach of backing inode, so that > passthrough mmap read/write changes to a/c/mtime/size attribute of the > backing inode will be propagated to the FUSE inode. > > The rules of invalidating a/c/mtime/size attributes with writeback cache > are more complicated, so for now, writeback cache and passthrough cannot > be enabled on the same filesystem. > > Signed-off-by: Amir Goldstein <amir73il@gmail.com> > --- > fs/fuse/dir.c | 4 ++++ > fs/fuse/fuse_i.h | 2 ++ > fs/fuse/inode.c | 4 ++++ > fs/fuse/iomode.c | 5 +++- > fs/fuse/passthrough.c | 55 ++++++++++++++++++++++++++++++++++++------- > 5 files changed, 61 insertions(+), 9 deletions(-) > > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c > index 95330c2ca3d8..7f9d002b8f23 100644 > --- a/fs/fuse/dir.c > +++ b/fs/fuse/dir.c > @@ -2118,6 +2118,10 @@ static int fuse_getattr(struct mnt_idmap *idmap, > return -EACCES; > } > > + /* Maybe update/invalidate attributes from backing inode */ > + if (fuse_inode_backing(get_fuse_inode(inode))) > + fuse_backing_update_attr_mask(inode, request_mask); > + > return fuse_update_get_attr(inode, NULL, stat, request_mask, flags); > } > > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index 98f878a52af1..4b011d31012f 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -1456,6 +1456,8 @@ void fuse_backing_files_init(struct fuse_conn *fc); > void fuse_backing_files_free(struct fuse_conn *fc); > int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map); > int fuse_backing_close(struct fuse_conn *fc, int backing_id); > +void fuse_backing_update_attr(struct inode *inode, struct fuse_backing *fb); > +void fuse_backing_update_attr_mask(struct inode *inode, u32 request_mask); > > struct fuse_backing *fuse_passthrough_open(struct file *file, > struct inode *inode, > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c > index c26a84439934..c68f005b6e86 100644 > --- a/fs/fuse/inode.c > +++ b/fs/fuse/inode.c > @@ -1302,9 +1302,13 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, > * on a stacked fs (e.g. overlayfs) themselves and with > * max_stack_depth == 1, FUSE fs can be stacked as the > * underlying fs of a stacked fs (e.g. overlayfs). > + * > + * For now, writeback cache and passthrough cannot be > + * enabled on the same filesystem. > */ > if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) && > (flags & FUSE_PASSTHROUGH) && > + !fc->writeback_cache && > arg->max_stack_depth > 0 && > arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH) { > fc->passthrough = 1; > diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c > index c545058a01e1..96eb311fe7bd 100644 > --- a/fs/fuse/iomode.c > +++ b/fs/fuse/iomode.c > @@ -157,8 +157,11 @@ void fuse_file_uncached_io_end(struct inode *inode) > spin_unlock(&fi->lock); > if (!uncached_io) > wake_up(&fi->direct_io_waitq); > - if (oldfb) > + if (oldfb) { > + /* Maybe update attributes after detaching backing inode */ > + fuse_backing_update_attr(inode, oldfb); > fuse_backing_put(oldfb); > + } > } > > /* > diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c > index 260e76fc72d5..c1bb80a6e536 100644 > --- a/fs/fuse/passthrough.c > +++ b/fs/fuse/passthrough.c > @@ -11,11 +11,8 @@ > #include <linux/backing-file.h> > #include <linux/splice.h> > > -static void fuse_file_accessed(struct file *file) > +static void fuse_backing_accessed(struct inode *inode, struct fuse_backing *fb) > { > - struct inode *inode = file_inode(file); > - struct fuse_inode *fi = get_fuse_inode(inode); > - struct fuse_backing *fb = fuse_inode_backing(fi); > struct inode *backing_inode = file_inode(fb->file); > struct timespec64 atime = inode_get_atime(inode); > struct timespec64 batime = inode_get_atime(backing_inode); > @@ -25,11 +22,8 @@ static void fuse_file_accessed(struct file *file) > fuse_invalidate_atime(inode); > } > > -static void fuse_file_modified(struct file *file) > +static void fuse_backing_modified(struct inode *inode, struct fuse_backing *fb) > { > - struct inode *inode = file_inode(file); > - struct fuse_inode *fi = get_fuse_inode(inode); > - struct fuse_backing *fb = fuse_inode_backing(fi); > struct inode *backing_inode = file_inode(fb->file); > struct timespec64 ctime = inode_get_ctime(inode); > struct timespec64 mtime = inode_get_mtime(inode); > @@ -42,6 +36,51 @@ static void fuse_file_modified(struct file *file) > fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); > } > > +/* Called from fuse_file_uncached_io_end() after detach of backing inode */ > +void fuse_backing_update_attr(struct inode *inode, struct fuse_backing *fb) > +{ > + fuse_backing_modified(inode, fb); > + fuse_backing_accessed(inode, fb); > +} > + > +/* Called from fuse_getattr() - may race with detach of backing inode */ > +void fuse_backing_update_attr_mask(struct inode *inode, u32 request_mask) > +{ > + struct fuse_inode *fi = get_fuse_inode(inode); > + struct fuse_backing *fb; > + > + rcu_read_lock(); > + fb = fuse_backing_get(fuse_inode_backing(fi)); > + rcu_read_unlock(); > + if (!fb) > + return; > + > + if (request_mask & FUSE_STATX_MODSIZE) > + fuse_backing_modified(inode, fb); > + if (request_mask & STATX_ATIME) > + fuse_backing_accessed(inode, fb); > + > + fuse_backing_put(fb); > +} > + > +static void fuse_file_accessed(struct file *file) > +{ > + struct inode *inode = file_inode(file); > + struct fuse_inode *fi = get_fuse_inode(inode); > + struct fuse_backing *fb = fuse_inode_backing(fi); > + > + fuse_backing_accessed(inode, fb); > +} > + > +static void fuse_file_modified(struct file *file) > +{ > + struct inode *inode = file_inode(file); > + struct fuse_inode *fi = get_fuse_inode(inode); > + struct fuse_backing *fb = fuse_inode_backing(fi); > + > + fuse_backing_modified(inode, fb); > +} > + > ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *iter) > { > struct file *file = iocb->ki_filp; I noticed this patch doesn't seem to have made it into 6.9 like the rest of these passthrough patches -- may I ask why it didn't make it? I think it still makes sense but I might be missing some change between what's in 6.9 and this version of the patches. Thanks! Sweet Tea
On 4/2/24 22:13, Sweet Tea Dorminy wrote: > > > On 2/6/24 09:24, Amir Goldstein wrote: >> After passthrough read/write, we invalidate a/c/mtime/size attributes >> if the backing inode attributes differ from FUSE inode attributes. >> >> Do the same in fuse_getattr() and after detach of backing inode, so that >> passthrough mmap read/write changes to a/c/mtime/size attribute of the >> backing inode will be propagated to the FUSE inode. >> >> The rules of invalidating a/c/mtime/size attributes with writeback cache >> are more complicated, so for now, writeback cache and passthrough cannot >> be enabled on the same filesystem. >> >> Signed-off-by: Amir Goldstein <amir73il@gmail.com> >> --- >> fs/fuse/dir.c | 4 ++++ >> fs/fuse/fuse_i.h | 2 ++ >> fs/fuse/inode.c | 4 ++++ >> fs/fuse/iomode.c | 5 +++- >> fs/fuse/passthrough.c | 55 ++++++++++++++++++++++++++++++++++++------- >> 5 files changed, 61 insertions(+), 9 deletions(-) >> >> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c >> index 95330c2ca3d8..7f9d002b8f23 100644 >> --- a/fs/fuse/dir.c >> +++ b/fs/fuse/dir.c >> @@ -2118,6 +2118,10 @@ static int fuse_getattr(struct mnt_idmap *idmap, >> return -EACCES; >> } >> + /* Maybe update/invalidate attributes from backing inode */ >> + if (fuse_inode_backing(get_fuse_inode(inode))) >> + fuse_backing_update_attr_mask(inode, request_mask); >> + >> return fuse_update_get_attr(inode, NULL, stat, request_mask, >> flags); >> } >> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h >> index 98f878a52af1..4b011d31012f 100644 >> --- a/fs/fuse/fuse_i.h >> +++ b/fs/fuse/fuse_i.h >> @@ -1456,6 +1456,8 @@ void fuse_backing_files_init(struct fuse_conn *fc); >> void fuse_backing_files_free(struct fuse_conn *fc); >> int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map >> *map); >> int fuse_backing_close(struct fuse_conn *fc, int backing_id); >> +void fuse_backing_update_attr(struct inode *inode, struct >> fuse_backing *fb); >> +void fuse_backing_update_attr_mask(struct inode *inode, u32 >> request_mask); >> struct fuse_backing *fuse_passthrough_open(struct file *file, >> struct inode *inode, >> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c >> index c26a84439934..c68f005b6e86 100644 >> --- a/fs/fuse/inode.c >> +++ b/fs/fuse/inode.c >> @@ -1302,9 +1302,13 @@ static void process_init_reply(struct >> fuse_mount *fm, struct fuse_args *args, >> * on a stacked fs (e.g. overlayfs) themselves and with >> * max_stack_depth == 1, FUSE fs can be stacked as the >> * underlying fs of a stacked fs (e.g. overlayfs). >> + * >> + * For now, writeback cache and passthrough cannot be >> + * enabled on the same filesystem. >> */ >> if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) && >> (flags & FUSE_PASSTHROUGH) && >> + !fc->writeback_cache && >> arg->max_stack_depth > 0 && >> arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH) { >> fc->passthrough = 1; >> diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c >> index c545058a01e1..96eb311fe7bd 100644 >> --- a/fs/fuse/iomode.c >> +++ b/fs/fuse/iomode.c >> @@ -157,8 +157,11 @@ void fuse_file_uncached_io_end(struct inode *inode) >> spin_unlock(&fi->lock); >> if (!uncached_io) >> wake_up(&fi->direct_io_waitq); >> - if (oldfb) >> + if (oldfb) { >> + /* Maybe update attributes after detaching backing inode */ >> + fuse_backing_update_attr(inode, oldfb); >> fuse_backing_put(oldfb); >> + } >> } >> /* >> diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c >> index 260e76fc72d5..c1bb80a6e536 100644 >> --- a/fs/fuse/passthrough.c >> +++ b/fs/fuse/passthrough.c >> @@ -11,11 +11,8 @@ >> #include <linux/backing-file.h> >> #include <linux/splice.h> >> -static void fuse_file_accessed(struct file *file) >> +static void fuse_backing_accessed(struct inode *inode, struct >> fuse_backing *fb) >> { >> - struct inode *inode = file_inode(file); >> - struct fuse_inode *fi = get_fuse_inode(inode); >> - struct fuse_backing *fb = fuse_inode_backing(fi); >> struct inode *backing_inode = file_inode(fb->file); >> struct timespec64 atime = inode_get_atime(inode); >> struct timespec64 batime = inode_get_atime(backing_inode); >> @@ -25,11 +22,8 @@ static void fuse_file_accessed(struct file *file) >> fuse_invalidate_atime(inode); >> } >> -static void fuse_file_modified(struct file *file) >> +static void fuse_backing_modified(struct inode *inode, struct >> fuse_backing *fb) >> { >> - struct inode *inode = file_inode(file); >> - struct fuse_inode *fi = get_fuse_inode(inode); >> - struct fuse_backing *fb = fuse_inode_backing(fi); >> struct inode *backing_inode = file_inode(fb->file); >> struct timespec64 ctime = inode_get_ctime(inode); >> struct timespec64 mtime = inode_get_mtime(inode); >> @@ -42,6 +36,51 @@ static void fuse_file_modified(struct file *file) >> fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); >> } >> +/* Called from fuse_file_uncached_io_end() after detach of backing >> inode */ >> +void fuse_backing_update_attr(struct inode *inode, struct >> fuse_backing *fb) >> +{ >> + fuse_backing_modified(inode, fb); >> + fuse_backing_accessed(inode, fb); >> +} >> + >> +/* Called from fuse_getattr() - may race with detach of backing inode */ >> +void fuse_backing_update_attr_mask(struct inode *inode, u32 >> request_mask) >> +{ >> + struct fuse_inode *fi = get_fuse_inode(inode); >> + struct fuse_backing *fb; >> + >> + rcu_read_lock(); >> + fb = fuse_backing_get(fuse_inode_backing(fi)); >> + rcu_read_unlock(); >> + if (!fb) >> + return; >> + >> + if (request_mask & FUSE_STATX_MODSIZE) >> + fuse_backing_modified(inode, fb); >> + if (request_mask & STATX_ATIME) >> + fuse_backing_accessed(inode, fb); >> + >> + fuse_backing_put(fb); >> +} >> + >> +static void fuse_file_accessed(struct file *file) >> +{ >> + struct inode *inode = file_inode(file); >> + struct fuse_inode *fi = get_fuse_inode(inode); >> + struct fuse_backing *fb = fuse_inode_backing(fi); >> + >> + fuse_backing_accessed(inode, fb); >> +} >> + >> +static void fuse_file_modified(struct file *file) >> +{ >> + struct inode *inode = file_inode(file); >> + struct fuse_inode *fi = get_fuse_inode(inode); >> + struct fuse_backing *fb = fuse_inode_backing(fi); >> + >> + fuse_backing_modified(inode, fb); >> +} >> + >> ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct >> iov_iter *iter) >> { >> struct file *file = iocb->ki_filp; > > I noticed this patch doesn't seem to have made it into 6.9 like the rest > of these passthrough patches -- may I ask why it didn't make it? I think > it still makes sense but I might be missing some change between what's > in 6.9 and this version of the patches. > > Thanks! > > Sweet Tea See here please https://lore.kernel.org/all/CAOQ4uxj8Az6VEZ-Ky5gs33gc0N9hjv4XqL6XC_kc+vsVpaBCOg@mail.gmail.com/ Bernd
On Wed, Apr 3, 2024 at 12:18 AM Bernd Schubert <bernd.schubert@fastmail.fm> wrote: > > > > On 4/2/24 22:13, Sweet Tea Dorminy wrote: > > > > > > On 2/6/24 09:24, Amir Goldstein wrote: > >> After passthrough read/write, we invalidate a/c/mtime/size attributes > >> if the backing inode attributes differ from FUSE inode attributes. > >> > >> Do the same in fuse_getattr() and after detach of backing inode, so that > >> passthrough mmap read/write changes to a/c/mtime/size attribute of the > >> backing inode will be propagated to the FUSE inode. > >> > >> The rules of invalidating a/c/mtime/size attributes with writeback cache > >> are more complicated, so for now, writeback cache and passthrough cannot > >> be enabled on the same filesystem. > >> > >> Signed-off-by: Amir Goldstein <amir73il@gmail.com> > >> --- > >> fs/fuse/dir.c | 4 ++++ > >> fs/fuse/fuse_i.h | 2 ++ > >> fs/fuse/inode.c | 4 ++++ > >> fs/fuse/iomode.c | 5 +++- > >> fs/fuse/passthrough.c | 55 ++++++++++++++++++++++++++++++++++++------- > >> 5 files changed, 61 insertions(+), 9 deletions(-) > >> > >> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c > >> index 95330c2ca3d8..7f9d002b8f23 100644 > >> --- a/fs/fuse/dir.c > >> +++ b/fs/fuse/dir.c > >> @@ -2118,6 +2118,10 @@ static int fuse_getattr(struct mnt_idmap *idmap, > >> return -EACCES; > >> } > >> + /* Maybe update/invalidate attributes from backing inode */ > >> + if (fuse_inode_backing(get_fuse_inode(inode))) > >> + fuse_backing_update_attr_mask(inode, request_mask); > >> + > >> return fuse_update_get_attr(inode, NULL, stat, request_mask, > >> flags); > >> } > >> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > >> index 98f878a52af1..4b011d31012f 100644 > >> --- a/fs/fuse/fuse_i.h > >> +++ b/fs/fuse/fuse_i.h > >> @@ -1456,6 +1456,8 @@ void fuse_backing_files_init(struct fuse_conn *fc); > >> void fuse_backing_files_free(struct fuse_conn *fc); > >> int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map > >> *map); > >> int fuse_backing_close(struct fuse_conn *fc, int backing_id); > >> +void fuse_backing_update_attr(struct inode *inode, struct > >> fuse_backing *fb); > >> +void fuse_backing_update_attr_mask(struct inode *inode, u32 > >> request_mask); > >> struct fuse_backing *fuse_passthrough_open(struct file *file, > >> struct inode *inode, > >> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c > >> index c26a84439934..c68f005b6e86 100644 > >> --- a/fs/fuse/inode.c > >> +++ b/fs/fuse/inode.c > >> @@ -1302,9 +1302,13 @@ static void process_init_reply(struct > >> fuse_mount *fm, struct fuse_args *args, > >> * on a stacked fs (e.g. overlayfs) themselves and with > >> * max_stack_depth == 1, FUSE fs can be stacked as the > >> * underlying fs of a stacked fs (e.g. overlayfs). > >> + * > >> + * For now, writeback cache and passthrough cannot be > >> + * enabled on the same filesystem. > >> */ > >> if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) && > >> (flags & FUSE_PASSTHROUGH) && > >> + !fc->writeback_cache && > >> arg->max_stack_depth > 0 && > >> arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH) { > >> fc->passthrough = 1; > >> diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c > >> index c545058a01e1..96eb311fe7bd 100644 > >> --- a/fs/fuse/iomode.c > >> +++ b/fs/fuse/iomode.c > >> @@ -157,8 +157,11 @@ void fuse_file_uncached_io_end(struct inode *inode) > >> spin_unlock(&fi->lock); > >> if (!uncached_io) > >> wake_up(&fi->direct_io_waitq); > >> - if (oldfb) > >> + if (oldfb) { > >> + /* Maybe update attributes after detaching backing inode */ > >> + fuse_backing_update_attr(inode, oldfb); > >> fuse_backing_put(oldfb); > >> + } > >> } > >> /* > >> diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c > >> index 260e76fc72d5..c1bb80a6e536 100644 > >> --- a/fs/fuse/passthrough.c > >> +++ b/fs/fuse/passthrough.c > >> @@ -11,11 +11,8 @@ > >> #include <linux/backing-file.h> > >> #include <linux/splice.h> > >> -static void fuse_file_accessed(struct file *file) > >> +static void fuse_backing_accessed(struct inode *inode, struct > >> fuse_backing *fb) > >> { > >> - struct inode *inode = file_inode(file); > >> - struct fuse_inode *fi = get_fuse_inode(inode); > >> - struct fuse_backing *fb = fuse_inode_backing(fi); > >> struct inode *backing_inode = file_inode(fb->file); > >> struct timespec64 atime = inode_get_atime(inode); > >> struct timespec64 batime = inode_get_atime(backing_inode); > >> @@ -25,11 +22,8 @@ static void fuse_file_accessed(struct file *file) > >> fuse_invalidate_atime(inode); > >> } > >> -static void fuse_file_modified(struct file *file) > >> +static void fuse_backing_modified(struct inode *inode, struct > >> fuse_backing *fb) > >> { > >> - struct inode *inode = file_inode(file); > >> - struct fuse_inode *fi = get_fuse_inode(inode); > >> - struct fuse_backing *fb = fuse_inode_backing(fi); > >> struct inode *backing_inode = file_inode(fb->file); > >> struct timespec64 ctime = inode_get_ctime(inode); > >> struct timespec64 mtime = inode_get_mtime(inode); > >> @@ -42,6 +36,51 @@ static void fuse_file_modified(struct file *file) > >> fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); > >> } > >> +/* Called from fuse_file_uncached_io_end() after detach of backing > >> inode */ > >> +void fuse_backing_update_attr(struct inode *inode, struct > >> fuse_backing *fb) > >> +{ > >> + fuse_backing_modified(inode, fb); > >> + fuse_backing_accessed(inode, fb); > >> +} > >> + > >> +/* Called from fuse_getattr() - may race with detach of backing inode */ > >> +void fuse_backing_update_attr_mask(struct inode *inode, u32 > >> request_mask) > >> +{ > >> + struct fuse_inode *fi = get_fuse_inode(inode); > >> + struct fuse_backing *fb; > >> + > >> + rcu_read_lock(); > >> + fb = fuse_backing_get(fuse_inode_backing(fi)); > >> + rcu_read_unlock(); > >> + if (!fb) > >> + return; > >> + > >> + if (request_mask & FUSE_STATX_MODSIZE) > >> + fuse_backing_modified(inode, fb); > >> + if (request_mask & STATX_ATIME) > >> + fuse_backing_accessed(inode, fb); > >> + > >> + fuse_backing_put(fb); > >> +} > >> + > >> +static void fuse_file_accessed(struct file *file) > >> +{ > >> + struct inode *inode = file_inode(file); > >> + struct fuse_inode *fi = get_fuse_inode(inode); > >> + struct fuse_backing *fb = fuse_inode_backing(fi); > >> + > >> + fuse_backing_accessed(inode, fb); > >> +} > >> + > >> +static void fuse_file_modified(struct file *file) > >> +{ > >> + struct inode *inode = file_inode(file); > >> + struct fuse_inode *fi = get_fuse_inode(inode); > >> + struct fuse_backing *fb = fuse_inode_backing(fi); > >> + > >> + fuse_backing_modified(inode, fb); > >> +} > >> + > >> ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct > >> iov_iter *iter) > >> { > >> struct file *file = iocb->ki_filp; > > > > I noticed this patch doesn't seem to have made it into 6.9 like the rest > > of these passthrough patches -- may I ask why it didn't make it? I think > > it still makes sense but I might be missing some change between what's > > in 6.9 and this version of the patches. > > > > Thanks! > > > > Sweet Tea > > See here please > https://lore.kernel.org/all/CAOQ4uxj8Az6VEZ-Ky5gs33gc0N9hjv4XqL6XC_kc+vsVpaBCOg@mail.gmail.com/ > > FWIW, I intend to take a swing at getattr() passthrough "as soon as I can". Sweet Tea, Can you please explain the workload where you find that this patch is needed? Is your workload using mmap writes? requires a long attribute cache timeout? Does your workload involve mixing passthrough IO and direct/cached IO on the same inode at different times or by different open fd's? I would like to know, so I can tell you if getattr() passthrough design is going to help your use case. For example, my current getattr() passthrough design (in my head) will not allow opening the inode in cached IO mode from lookup time until evict/forget, unlike the current read/write passthrough, which is from first open to last close. Thanks, Amir.
> Sweet Tea, > > Can you please explain the workload where you find that this patch is needed? I was researching before sending out my own version of attr passthrough - it seemed like a step in the direction, but then the code in-tree wasn't the same. > Is your workload using mmap writes? requires a long attribute cache timeout? > Does your workload involve mixing passthrough IO and direct/cached IO > on the same inode at different times or by different open fd's? > > I would like to know, so I can tell you if getattr() passthrough design is > going to help your use case. > > For example, my current getattr() passthrough design (in my head) > will not allow opening the inode in cached IO mode from lookup time > until evict/forget, unlike the current read/write passthrough, which is > from first open to last close. I think the things I'd been working on is very similar. Two possible HSM variants, both focused on doing passthrough IO with minimal involvement from the fuse server in at least some cases. One would be using passthrough for temporary ingestion of some memory state for a workload, user writes files and the FUSE server can choose to passthrough them to local storage temporarily or to send them to remote storage -- as ingestion requires pausing the workload and is therefore very expensive, I'd like to pass through attr updates to the backing file so that there are minimal roundtrips to the fuse server during write. Later the HSM would move the files to remote storage, or delete them. One would be using passthrough for binaries -- providing specific sets of mostly binaries with some tracking on open/close, so the HSM can delete unused sets. Again the goal is to avoid metadata query roundtrips to userspace for speed; we don't expect a file open in passthrough mode to be opened again for FUSE-server-mediated IO until the passthrough version is closed. Thanks! Sweet Tea
On Fri, Apr 5, 2024 at 11:52 PM Sweet Tea Dorminy <sweettea-kernel@dorminy.me> wrote: > > > > Sweet Tea, > > > > Can you please explain the workload where you find that this patch is needed? > > I was researching before sending out my own version of attr passthrough > - it seemed like a step in the direction, but then the code in-tree > wasn't the same. > FYI, I have pushed a WIP branch with some patches in the general direction of getattr() passthrough: https://github.com/amir73il/linux/commits/fuse-backing-inode-wip/ It is not at all functional and probably not working - I only verified that it does not explode when I run xfstests, but passthrough_hp does not yet have an API to enable getattr() passthrough. I am posting this branch here so that we can compare notes and so that you can learn it before we meet in LSFMM. I wanted to give some ideas for API and implementation. the main thing I added is the ability to declare the passthrough ops in a mask with the backing file setup: --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -1076,9 +1076,21 @@ struct fuse_notify_retrieve_in { struct fuse_backing_map { int32_t fd; uint32_t flags; - uint64_t padding; + uint64_t ops_mask; }; +#define FUSE_PASSTHROUGH_OP(op) (1ULL << ((op) - 1)) + +/* These passthrough operations are implied by FOPEN_PASSTHROUGH */ +#define FUSE_PASSTHROUGH_RW_OPS \ + (FUSE_PASSTHROUGH_OP(FUSE_READ) | FUSE_PASSTHROUGH_OP(FUSE_WRITE)) + +#define FUSE_BACKING_MAP_OP(map, op) \ + ((map)->ops_mask & FUSE_PASSTHROUGH_OP(op)) + +#define FUSE_BACKING_MAP_VALID_OPS \ + (FUSE_PASSTHROUGH_RW_OPS) + Which is later extended to support also + /* Inode passthrough operations for backing file attached on lookup */ + #define FUSE_PASSTHROUGH_INODE_OPS \ + (FUSE_PASSTHROUGH_OP(FUSE_GETATTR) | \ + FUSE_PASSTHROUGH_OP(FUSE_GETXATTR) | \ + FUSE_PASSTHROUGH_OP(FUSE_LISTXATTR) | \ + FUSE_PASSTHROUGH_OP(FUSE_STATX)) The idea is that these would be setup during FUSE_LOOKUP response. Let me know what you think. Thanks, Amir.
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 95330c2ca3d8..7f9d002b8f23 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -2118,6 +2118,10 @@ static int fuse_getattr(struct mnt_idmap *idmap, return -EACCES; } + /* Maybe update/invalidate attributes from backing inode */ + if (fuse_inode_backing(get_fuse_inode(inode))) + fuse_backing_update_attr_mask(inode, request_mask); + return fuse_update_get_attr(inode, NULL, stat, request_mask, flags); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 98f878a52af1..4b011d31012f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1456,6 +1456,8 @@ void fuse_backing_files_init(struct fuse_conn *fc); void fuse_backing_files_free(struct fuse_conn *fc); int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map); int fuse_backing_close(struct fuse_conn *fc, int backing_id); +void fuse_backing_update_attr(struct inode *inode, struct fuse_backing *fb); +void fuse_backing_update_attr_mask(struct inode *inode, u32 request_mask); struct fuse_backing *fuse_passthrough_open(struct file *file, struct inode *inode, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index c26a84439934..c68f005b6e86 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1302,9 +1302,13 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, * on a stacked fs (e.g. overlayfs) themselves and with * max_stack_depth == 1, FUSE fs can be stacked as the * underlying fs of a stacked fs (e.g. overlayfs). + * + * For now, writeback cache and passthrough cannot be + * enabled on the same filesystem. */ if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) && (flags & FUSE_PASSTHROUGH) && + !fc->writeback_cache && arg->max_stack_depth > 0 && arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH) { fc->passthrough = 1; diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c index c545058a01e1..96eb311fe7bd 100644 --- a/fs/fuse/iomode.c +++ b/fs/fuse/iomode.c @@ -157,8 +157,11 @@ void fuse_file_uncached_io_end(struct inode *inode) spin_unlock(&fi->lock); if (!uncached_io) wake_up(&fi->direct_io_waitq); - if (oldfb) + if (oldfb) { + /* Maybe update attributes after detaching backing inode */ + fuse_backing_update_attr(inode, oldfb); fuse_backing_put(oldfb); + } } /* diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c index 260e76fc72d5..c1bb80a6e536 100644 --- a/fs/fuse/passthrough.c +++ b/fs/fuse/passthrough.c @@ -11,11 +11,8 @@ #include <linux/backing-file.h> #include <linux/splice.h> -static void fuse_file_accessed(struct file *file) +static void fuse_backing_accessed(struct inode *inode, struct fuse_backing *fb) { - struct inode *inode = file_inode(file); - struct fuse_inode *fi = get_fuse_inode(inode); - struct fuse_backing *fb = fuse_inode_backing(fi); struct inode *backing_inode = file_inode(fb->file); struct timespec64 atime = inode_get_atime(inode); struct timespec64 batime = inode_get_atime(backing_inode); @@ -25,11 +22,8 @@ static void fuse_file_accessed(struct file *file) fuse_invalidate_atime(inode); } -static void fuse_file_modified(struct file *file) +static void fuse_backing_modified(struct inode *inode, struct fuse_backing *fb) { - struct inode *inode = file_inode(file); - struct fuse_inode *fi = get_fuse_inode(inode); - struct fuse_backing *fb = fuse_inode_backing(fi); struct inode *backing_inode = file_inode(fb->file); struct timespec64 ctime = inode_get_ctime(inode); struct timespec64 mtime = inode_get_mtime(inode); @@ -42,6 +36,51 @@ static void fuse_file_modified(struct file *file) fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); } +/* Called from fuse_file_uncached_io_end() after detach of backing inode */ +void fuse_backing_update_attr(struct inode *inode, struct fuse_backing *fb) +{ + fuse_backing_modified(inode, fb); + fuse_backing_accessed(inode, fb); +} + +/* Called from fuse_getattr() - may race with detach of backing inode */ +void fuse_backing_update_attr_mask(struct inode *inode, u32 request_mask) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_backing *fb; + + rcu_read_lock(); + fb = fuse_backing_get(fuse_inode_backing(fi)); + rcu_read_unlock(); + if (!fb) + return; + + if (request_mask & FUSE_STATX_MODSIZE) + fuse_backing_modified(inode, fb); + if (request_mask & STATX_ATIME) + fuse_backing_accessed(inode, fb); + + fuse_backing_put(fb); +} + +static void fuse_file_accessed(struct file *file) +{ + struct inode *inode = file_inode(file); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_backing *fb = fuse_inode_backing(fi); + + fuse_backing_accessed(inode, fb); +} + +static void fuse_file_modified(struct file *file) +{ + struct inode *inode = file_inode(file); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_backing *fb = fuse_inode_backing(fi); + + fuse_backing_modified(inode, fb); +} + ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp;
After passthrough read/write, we invalidate a/c/mtime/size attributes if the backing inode attributes differ from FUSE inode attributes. Do the same in fuse_getattr() and after detach of backing inode, so that passthrough mmap read/write changes to a/c/mtime/size attribute of the backing inode will be propagated to the FUSE inode. The rules of invalidating a/c/mtime/size attributes with writeback cache are more complicated, so for now, writeback cache and passthrough cannot be enabled on the same filesystem. Signed-off-by: Amir Goldstein <amir73il@gmail.com> --- fs/fuse/dir.c | 4 ++++ fs/fuse/fuse_i.h | 2 ++ fs/fuse/inode.c | 4 ++++ fs/fuse/iomode.c | 5 +++- fs/fuse/passthrough.c | 55 ++++++++++++++++++++++++++++++++++++------- 5 files changed, 61 insertions(+), 9 deletions(-)