Message ID | 3f19cd8daab0dc3c4d0381019ce61cd106970097.1708709155.git.john@groves.net |
---|---|
State | Superseded |
Headers | show |
Series | Introduce the famfs shared-memory file system | expand |
On Fri, 23 Feb 2024 11:41:58 -0600 John Groves <John@Groves.net> wrote: > This commit introduces the famfs file_operations. We call > thp_get_unmapped_area() to force PMD page alignment. Our read and > write handlers (famfs_dax_read_iter() and famfs_dax_write_iter()) > call dax_iomap_rw() to do the work. > > famfs_file_invalid() checks for various ways a famfs file can be > in an invalid state so we can fail I/O or fault resolution in those > cases. Those cases include the following: > > * No famfs metadata > * file i_size does not match the originally allocated size > * file is not flagged as DAX > * errors were detected previously on the file > > An invalid file can often be fixed by replaying the log, or by > umount/mount/log replay - all of which are user space operations. > > Signed-off-by: John Groves <john@groves.net> > --- > fs/famfs/famfs_file.c | 136 ++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 136 insertions(+) > > diff --git a/fs/famfs/famfs_file.c b/fs/famfs/famfs_file.c > index fc667d5f7be8..5228e9de1e3b 100644 > --- a/fs/famfs/famfs_file.c > +++ b/fs/famfs/famfs_file.c > @@ -19,6 +19,142 @@ > #include <uapi/linux/famfs_ioctl.h> > #include "famfs_internal.h" > > +/********************************************************************* > + * file_operations > + */ > + > +/* Reject I/O to files that aren't in a valid state */ > +static ssize_t > +famfs_file_invalid(struct inode *inode) > +{ > + size_t i_size = i_size_read(inode); > + struct famfs_file_meta *meta = inode->i_private; > + > + if (!meta) { > + pr_err("%s: un-initialized famfs file\n", __func__); > + return -EIO; > + } > + if (i_size != meta->file_size) { > + pr_err("%s: something changed the size from %ld to %ld\n", > + __func__, meta->file_size, i_size); > + meta->error = 1; > + return -ENXIO; > + } > + if (!IS_DAX(inode)) { > + pr_err("%s: inode %llx IS_DAX is false\n", __func__, (u64)inode); > + meta->error = 1; > + return -ENXIO; > + } > + if (meta->error) { > + pr_err("%s: previously detected metadata errors\n", __func__); > + meta->error = 1; Already set? If treating it as only a boolean, maybe make it one? > + return -EIO; > + } > + return 0; > +}
On 24/02/26 01:32PM, Jonathan Cameron wrote: > On Fri, 23 Feb 2024 11:41:58 -0600 > John Groves <John@Groves.net> wrote: > > > This commit introduces the famfs file_operations. We call > > thp_get_unmapped_area() to force PMD page alignment. Our read and > > write handlers (famfs_dax_read_iter() and famfs_dax_write_iter()) > > call dax_iomap_rw() to do the work. > > > > famfs_file_invalid() checks for various ways a famfs file can be > > in an invalid state so we can fail I/O or fault resolution in those > > cases. Those cases include the following: > > > > * No famfs metadata > > * file i_size does not match the originally allocated size > > * file is not flagged as DAX > > * errors were detected previously on the file > > > > An invalid file can often be fixed by replaying the log, or by > > umount/mount/log replay - all of which are user space operations. > > > > Signed-off-by: John Groves <john@groves.net> > > --- > > fs/famfs/famfs_file.c | 136 ++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 136 insertions(+) > > > > diff --git a/fs/famfs/famfs_file.c b/fs/famfs/famfs_file.c > > index fc667d5f7be8..5228e9de1e3b 100644 > > --- a/fs/famfs/famfs_file.c > > +++ b/fs/famfs/famfs_file.c > > @@ -19,6 +19,142 @@ > > #include <uapi/linux/famfs_ioctl.h> > > #include "famfs_internal.h" > > > > +/********************************************************************* > > + * file_operations > > + */ > > + > > +/* Reject I/O to files that aren't in a valid state */ > > +static ssize_t > > +famfs_file_invalid(struct inode *inode) > > +{ > > + size_t i_size = i_size_read(inode); > > + struct famfs_file_meta *meta = inode->i_private; > > + > > + if (!meta) { > > + pr_err("%s: un-initialized famfs file\n", __func__); > > + return -EIO; > > + } > > + if (i_size != meta->file_size) { > > + pr_err("%s: something changed the size from %ld to %ld\n", > > + __func__, meta->file_size, i_size); > > + meta->error = 1; > > + return -ENXIO; > > + } > > + if (!IS_DAX(inode)) { > > + pr_err("%s: inode %llx IS_DAX is false\n", __func__, (u64)inode); > > + meta->error = 1; > > + return -ENXIO; > > + } > > + if (meta->error) { > > + pr_err("%s: previously detected metadata errors\n", __func__); > > + meta->error = 1; > > Already set? If treating it as only a boolean, maybe make it one? Done, thanks John
diff --git a/fs/famfs/famfs_file.c b/fs/famfs/famfs_file.c index fc667d5f7be8..5228e9de1e3b 100644 --- a/fs/famfs/famfs_file.c +++ b/fs/famfs/famfs_file.c @@ -19,6 +19,142 @@ #include <uapi/linux/famfs_ioctl.h> #include "famfs_internal.h" +/********************************************************************* + * file_operations + */ + +/* Reject I/O to files that aren't in a valid state */ +static ssize_t +famfs_file_invalid(struct inode *inode) +{ + size_t i_size = i_size_read(inode); + struct famfs_file_meta *meta = inode->i_private; + + if (!meta) { + pr_err("%s: un-initialized famfs file\n", __func__); + return -EIO; + } + if (i_size != meta->file_size) { + pr_err("%s: something changed the size from %ld to %ld\n", + __func__, meta->file_size, i_size); + meta->error = 1; + return -ENXIO; + } + if (!IS_DAX(inode)) { + pr_err("%s: inode %llx IS_DAX is false\n", __func__, (u64)inode); + meta->error = 1; + return -ENXIO; + } + if (meta->error) { + pr_err("%s: previously detected metadata errors\n", __func__); + meta->error = 1; + return -EIO; + } + return 0; +} + +static ssize_t +famfs_dax_read_iter( + struct kiocb *iocb, + struct iov_iter *to) +{ + struct inode *inode = iocb->ki_filp->f_mapping->host; + size_t i_size = i_size_read(inode); + size_t count = iov_iter_count(to); + size_t max_count; + ssize_t rc; + + rc = famfs_file_invalid(inode); + if (rc) + return rc; + + max_count = max_t(size_t, 0, i_size - iocb->ki_pos); + + if (count > max_count) + iov_iter_truncate(to, max_count); + + if (!iov_iter_count(to)) + return 0; + + rc = dax_iomap_rw(iocb, to, &famfs_iomap_ops); + + file_accessed(iocb->ki_filp); + return rc; +} + +/** + * famfs_write_iter() + * + * We need our own write-iter in order to prevent append + */ +static ssize_t +famfs_dax_write_iter( + struct kiocb *iocb, + struct iov_iter *from) +{ + struct inode *inode = iocb->ki_filp->f_mapping->host; + size_t i_size = i_size_read(inode); + size_t count = iov_iter_count(from); + size_t max_count; + ssize_t rc; + + rc = famfs_file_invalid(inode); + if (rc) + return rc; + + /* Starting offset of write is: iocb->ki_pos + * length is iov_iter_count(from) + */ + max_count = max_t(size_t, 0, i_size - iocb->ki_pos); + + /* If write would go past EOF, truncate it to end at EOF since famfs does not + * alloc-on-write + */ + if (count > max_count) + iov_iter_truncate(from, max_count); + + if (!iov_iter_count(from)) + return 0; + + return dax_iomap_rw(iocb, from, &famfs_iomap_ops); +} + +static int +famfs_file_mmap( + struct file *file, + struct vm_area_struct *vma) +{ + struct inode *inode = file_inode(file); + ssize_t rc; + + rc = famfs_file_invalid(inode); + if (rc) + return (int)rc; + + file_accessed(file); + vma->vm_ops = &famfs_file_vm_ops; + vm_flags_set(vma, VM_HUGEPAGE); + return 0; +} + +const struct file_operations famfs_file_operations = { + .owner = THIS_MODULE, + + /* Custom famfs operations */ + .write_iter = famfs_dax_write_iter, + .read_iter = famfs_dax_read_iter, + .mmap = famfs_file_mmap, + + /* Force PMD alignment for mmap */ + .get_unmapped_area = thp_get_unmapped_area, + + /* Generic Operations */ + .fsync = noop_fsync, + .splice_read = filemap_splice_read, + .splice_write = iter_file_splice_write, + .llseek = generic_file_llseek, +}; + /********************************************************************* * iomap_operations *
This commit introduces the famfs file_operations. We call thp_get_unmapped_area() to force PMD page alignment. Our read and write handlers (famfs_dax_read_iter() and famfs_dax_write_iter()) call dax_iomap_rw() to do the work. famfs_file_invalid() checks for various ways a famfs file can be in an invalid state so we can fail I/O or fault resolution in those cases. Those cases include the following: * No famfs metadata * file i_size does not match the originally allocated size * file is not flagged as DAX * errors were detected previously on the file An invalid file can often be fixed by replaying the log, or by umount/mount/log replay - all of which are user space operations. Signed-off-by: John Groves <john@groves.net> --- fs/famfs/famfs_file.c | 136 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+)