@@ -8,6 +8,7 @@ obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
+fuse-y += iomode.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
virtiofs-y := virtio_fs.o
@@ -113,6 +113,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
struct fuse_release_args *ra = ff->release_args;
struct fuse_args *args = (ra ? &ra->args : NULL);
+ if (ra && ra->inode)
+ fuse_file_io_release(ff, ra->inode);
+
if (!args) {
/* Do nothing when server does not implement 'open' */
} else if (sync) {
@@ -204,6 +207,11 @@ int fuse_finish_open(struct inode *inode, struct file *file)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ err = fuse_file_io_open(file, inode);
+ if (err)
+ return err;
if (ff->open_flags & FOPEN_STREAM)
stream_open(inode, file);
@@ -2507,6 +2515,7 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fm->fc;
+ int rc;
/* DAX mmap is superior to direct_io mmap */
if (FUSE_IS_DAX(file_inode(file)))
@@ -2522,6 +2531,13 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
invalidate_inode_pages2(file->f_mapping);
+ /*
+ * First mmap of direct_io file enters caching inode io mode.
+ */
+ rc = fuse_file_io_mmap(ff, file_inode(file));
+ if (rc)
+ return rc;
+
if (!(vma->vm_flags & VM_MAYSHARE)) {
/* MAP_PRIVATE */
return generic_file_mmap(file, vma);
@@ -3294,6 +3310,7 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
fi->writectr = 0;
+ fi->iocachectr = 0;
init_waitqueue_head(&fi->page_waitq);
fi->writepages = RB_ROOT;
@@ -111,7 +111,7 @@ struct fuse_inode {
u64 attr_version;
union {
- /* Write related fields (regular file only) */
+ /* read/write io cache (regular file only) */
struct {
/* Files usable in writepage. Protected by fi->lock */
struct list_head write_files;
@@ -123,6 +123,9 @@ struct fuse_inode {
* (FUSE_NOWRITE) means more writes are blocked */
int writectr;
+ /** Number of files/maps using page cache */
+ int iocachectr;
+
/* Waitq for writepage completion */
wait_queue_head_t page_waitq;
@@ -187,6 +190,8 @@ enum {
FUSE_I_BAD,
/* Has btime */
FUSE_I_BTIME,
+ /* Wants or already has page cache IO */
+ FUSE_I_CACHE_IO_MODE,
};
struct fuse_conn;
@@ -246,6 +251,9 @@ struct fuse_file {
/** Has flock been performed on this file? */
bool flock:1;
+
+ /** Was file opened for io? */
+ bool io_opened:1;
};
/** One input argument of a request */
@@ -1346,8 +1354,17 @@ int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa);
int fuse_fileattr_set(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
-/* file.c */
+/* iomode.c */
+int fuse_file_cached_io_start(struct inode *inode);
+void fuse_file_cached_io_end(struct inode *inode);
+int fuse_file_uncached_io_start(struct inode *inode);
+void fuse_file_uncached_io_end(struct inode *inode);
+int fuse_file_io_open(struct file *file, struct inode *inode);
+int fuse_file_io_mmap(struct fuse_file *ff, struct inode *inode);
+void fuse_file_io_release(struct fuse_file *ff, struct inode *inode);
+
+/* file.c */
struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
unsigned int open_flags, bool isdir);
void fuse_file_release(struct inode *inode, struct fuse_file *ff,
new file mode 100644
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * FUSE inode io modes.
+ *
+ * Copyright (c) 2024 CTERA Networks.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+
+/*
+ * Request an open in caching mode.
+ * Return 0 if in caching mode.
+ */
+static int fuse_inode_get_io_cache(struct fuse_inode *fi)
+{
+ assert_spin_locked(&fi->lock);
+ if (fi->iocachectr < 0)
+ return -ETXTBSY;
+ if (fi->iocachectr++ == 0)
+ set_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
+ return 0;
+}
+
+/*
+ * Release an open in caching mode.
+ * Return 0 if in neutral (direct io) mode.
+ */
+static int fuse_inode_put_io_cache(struct fuse_inode *fi)
+{
+ assert_spin_locked(&fi->lock);
+ if (WARN_ON(fi->iocachectr <= 0))
+ return -EIO;
+ if (--fi->iocachectr == 0)
+ clear_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
+ return fi->iocachectr;
+}
+
+/*
+ * Requets to deny new opens in caching mode.
+ * Return 0 if denying new opens in caching mode.
+ */
+static int fuse_inode_deny_io_cache(struct fuse_inode *fi)
+{
+ assert_spin_locked(&fi->lock);
+ if (fi->iocachectr > 0)
+ return -ETXTBSY;
+ fi->iocachectr--;
+ return 0;
+}
+
+/*
+ * Release a request to deny open in caching mode.
+ * Return 0 if allowing new opens in caching mode.
+ */
+static int fuse_inode_allow_io_cache(struct fuse_inode *fi)
+{
+ assert_spin_locked(&fi->lock);
+ if (WARN_ON(fi->iocachectr >= 0))
+ return -EIO;
+ fi->iocachectr++;
+ return -fi->iocachectr;
+}
+
+/* Start cached io mode where parallel dio writes are not allowed */
+int fuse_file_cached_io_start(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ int err;
+
+ spin_lock(&fi->lock);
+ err = fuse_inode_get_io_cache(fi);
+ spin_unlock(&fi->lock);
+ return err;
+}
+
+void fuse_file_cached_io_end(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fi->lock);
+ fuse_inode_put_io_cache(get_fuse_inode(inode));
+ spin_unlock(&fi->lock);
+}
+
+/* Start strictly uncached io mode where cache access is not allowed */
+int fuse_file_uncached_io_start(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ int err;
+
+ spin_lock(&fi->lock);
+ err = fuse_inode_deny_io_cache(fi);
+ spin_unlock(&fi->lock);
+ return err;
+}
+
+void fuse_file_uncached_io_end(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fi->lock);
+ fuse_inode_allow_io_cache(fi);
+ spin_unlock(&fi->lock);
+}
+
+/* Open flags to determine regular file io mode */
+#define FOPEN_IO_MODE_MASK \
+ (FOPEN_DIRECT_IO | FOPEN_CACHE_IO)
+
+/* Request access to submit new io to inode via open file */
+int fuse_file_io_open(struct file *file, struct inode *inode)
+{
+ struct fuse_file *ff = file->private_data;
+ int iomode_flags = ff->open_flags & FOPEN_IO_MODE_MASK;
+ int err;
+
+ err = -EBADF;
+ if (WARN_ON(!S_ISREG(inode->i_mode)))
+ goto fail;
+
+ err = -EBUSY;
+ if (WARN_ON(ff->io_opened))
+ goto fail;
+
+ /*
+ * io modes are not relevant with DAX and with server that does not
+ * implement open.
+ */
+ err = -EINVAL;
+ if (FUSE_IS_DAX(inode) || !ff->release_args) {
+ if (iomode_flags)
+ goto fail;
+ return 0;
+ }
+
+ /*
+ * FOPEN_CACHE_IO is an internal flag that is set on file not open in
+ * direct io mode and it cannot be set explicitly by the server.
+ * This includes a file open with O_DIRECT, but server did not specify
+ * FOPEN_DIRECT_IO. In this case, a later fcntl() could remove O_DIRECT,
+ * so we put the inode in caching mode to prevent parallel dio.
+ * FOPEN_PARALLEL_DIRECT_WRITES requires FOPEN_DIRECT_IO.
+ */
+ if (ff->open_flags & FOPEN_CACHE_IO) {
+ goto fail;
+ } else if (!(ff->open_flags & FOPEN_DIRECT_IO)) {
+ ff->open_flags |= FOPEN_CACHE_IO;
+ ff->open_flags &= ~FOPEN_PARALLEL_DIRECT_WRITES;
+ }
+
+ /*
+ * First caching file open enters caching inode io mode.
+ * First parallel dio open denies caching inode io mode.
+ */
+ err = 0;
+ if (ff->open_flags & FOPEN_CACHE_IO)
+ err = fuse_file_cached_io_start(inode);
+ else if (ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES)
+ err = fuse_file_uncached_io_start(inode);
+ if (err)
+ goto fail;
+
+ ff->io_opened = true;
+ return 0;
+
+fail:
+ pr_debug("failed to open file in requested io mode (open_flags=0x%x, err=%i).\n",
+ ff->open_flags, err);
+ /*
+ * The file open mode determines the inode io mode.
+ * Using incorrect open mode is a server mistake, which results in
+ * user visible failure of open() with EIO error.
+ */
+ return -EIO;
+}
+
+/* Request access to submit new io to inode via mmap */
+int fuse_file_io_mmap(struct fuse_file *ff, struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ int err = 0;
+
+ /* There are no io modes if server does not implement open */
+ if (!ff->release_args)
+ return 0;
+
+ if (WARN_ON(!ff->io_opened))
+ return -ENODEV;
+
+ spin_lock(&fi->lock);
+ /* First mmap of direct_io file enters caching inode io mode */
+ if (!(ff->open_flags & FOPEN_CACHE_IO)) {
+ err = fuse_inode_get_io_cache(fi);
+ if (!err)
+ ff->open_flags |= FOPEN_CACHE_IO;
+ }
+ spin_unlock(&fi->lock);
+
+ return err;
+}
+
+/* No more pending io and no new io possible to inode via open/mmapped file */
+void fuse_file_io_release(struct fuse_file *ff, struct inode *inode)
+{
+ if (!ff->io_opened)
+ return;
+
+ /* Last caching file close exits caching inode io mode */
+ if (ff->open_flags & FOPEN_CACHE_IO)
+ fuse_file_cached_io_end(inode);
+
+ ff->io_opened = false;
+}
@@ -353,6 +353,7 @@ struct fuse_file_lock {
* FOPEN_STREAM: the file is stream-like (no file position at all)
* FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
* FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode
+ * FOPEN_CACHE_IO: internal flag for mmap of direct_io (resereved for future use)
*/
#define FOPEN_DIRECT_IO (1 << 0)
#define FOPEN_KEEP_CACHE (1 << 1)
@@ -361,6 +362,7 @@ struct fuse_file_lock {
#define FOPEN_STREAM (1 << 4)
#define FOPEN_NOFLUSH (1 << 5)
#define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6)
+#define FOPEN_CACHE_IO (1 << 7)
/**
* INIT request/reply flags