@@ -280,7 +280,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
loop_iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
file_start_write(file);
- bw = vfs_iter_write(file, &i, ppos, 0);
+ bw = vfs_iter_write(file, &i, ppos, _RWF_DEFAULT);
file_end_write(file);
if (likely(bw == bvec->bv_len))
@@ -356,7 +356,8 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq,
rq_for_each_segment(bvec, rq, iter) {
loop_iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
- len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
+ len = vfs_iter_read(lo->lo_backing_file, &i, &pos,
+ _RWF_DEFAULT);
if (len < 0)
return len;
@@ -397,7 +398,8 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq,
b.bv_len = bvec.bv_len;
loop_iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
- len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
+ len = vfs_iter_read(lo->lo_backing_file, &i, &pos,
+ _RWF_DEFAULT);
if (len < 0) {
ret = len;
goto out_free_page;
@@ -307,7 +307,7 @@ static ssize_t ashmem_read_iter(struct kiocb *iocb, struct iov_iter *iter)
* ashmem_release is called.
*/
mutex_unlock(&ashmem_mutex);
- ret = vfs_iter_read(asma->file, iter, &iocb->ki_pos, 0);
+ ret = vfs_iter_read(asma->file, iter, &iocb->ki_pos, _RWF_DEFAULT);
mutex_lock(&ashmem_mutex);
if (ret > 0)
asma->file->f_pos = iocb->ki_pos;
@@ -355,9 +355,9 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
iov_iter_bvec(&iter, READ, bvec, sgl_nents, len);
if (is_write)
- ret = vfs_iter_write(fd, &iter, &pos, 0);
+ ret = vfs_iter_write(fd, &iter, &pos, _RWF_DEFAULT);
else
- ret = vfs_iter_read(fd, &iter, &pos, 0);
+ ret = vfs_iter_read(fd, &iter, &pos, _RWF_DEFAULT);
if (is_write) {
if (ret < 0 || ret != data_length) {
@@ -491,7 +491,7 @@ fd_execute_write_same(struct se_cmd *cmd)
}
iov_iter_bvec(&iter, READ, bvec, nolb, len);
- ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0);
+ ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, _RWF_DEFAULT);
kfree(bvec);
if (ret < 0 || ret != len) {
@@ -35,7 +35,8 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
- return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos, 0);
+ return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos,
+ _RWF_DEFAULT);
}
static ssize_t
@@ -52,7 +53,8 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
host_file = cfi->cfi_container;
file_start_write(host_file);
inode_lock(coda_inode);
- ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0);
+ ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos,
+ _RWF_DEFAULT);
coda_inode->i_size = file_inode(host_file)->i_size;
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
coda_inode->i_mtime = coda_inode->i_ctime = current_time(coda_inode);
@@ -943,7 +943,7 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
iov_iter_kvec(&iter, READ, vec, vlen, *count);
- host_err = vfs_iter_read(file, &iter, &offset, 0);
+ host_err = vfs_iter_read(file, &iter, &offset, _RWF_DEFAULT);
return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
}
@@ -996,7 +996,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
int use_wgather;
loff_t pos = offset;
unsigned int pflags = current->flags;
- rwf_t flags = 0;
+ rwf_t flags = rwf_flags(file);
trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
@@ -174,23 +174,6 @@ static void ovl_file_accessed(struct file *file)
touch_atime(&file->f_path);
}
-static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
-{
- int ifl = iocb->ki_flags;
- rwf_t flags = 0;
-
- if (ifl & IOCB_NOWAIT)
- flags |= RWF_NOWAIT;
- if (ifl & IOCB_HIPRI)
- flags |= RWF_HIPRI;
- if (ifl & IOCB_DSYNC)
- flags |= RWF_DSYNC;
- if (ifl & IOCB_SYNC)
- flags |= RWF_SYNC;
-
- return flags;
-}
-
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
@@ -207,7 +190,7 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
old_cred = ovl_override_creds(file_inode(file)->i_sb);
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
+ rwf_from_iocb_flags(iocb));
revert_creds(old_cred);
ovl_file_accessed(file);
@@ -242,7 +225,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
old_cred = ovl_override_creds(file_inode(file)->i_sb);
file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb));
+ rwf_from_iocb_flags(iocb));
file_end_write(real.file);
revert_creds(old_cred);
@@ -670,6 +670,8 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
+ if (flags == _RWF_DEFAULT)
+ flags = rwf_flags(filp);
ret = kiocb_set_rw_flags(&kiocb, flags);
if (ret)
return ret;
@@ -690,7 +692,7 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
{
ssize_t ret = 0;
- if (flags & ~RWF_HIPRI)
+ if ((flags != _RWF_DEFAULT) && (flags & ~RWF_HIPRI))
return -EOPNOTSUPP;
while (iov_iter_count(iter)) {
@@ -1101,13 +1103,13 @@ static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen)
{
- return do_readv(fd, vec, vlen, 0);
+ return do_readv(fd, vec, vlen, _RWF_DEFAULT);
}
SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen)
{
- return do_writev(fd, vec, vlen, 0);
+ return do_writev(fd, vec, vlen, _RWF_DEFAULT);
}
SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
@@ -1115,7 +1117,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
{
loff_t pos = pos_from_hilo(pos_h, pos_l);
- return do_preadv(fd, vec, vlen, pos, 0);
+ return do_preadv(fd, vec, vlen, pos, _RWF_DEFAULT);
}
SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
@@ -1124,6 +1126,9 @@ SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
{
loff_t pos = pos_from_hilo(pos_h, pos_l);
+ if (flags == _RWF_DEFAULT)
+ return -EOPNOTSUPP;
+
if (pos == -1)
return do_readv(fd, vec, vlen, flags);
@@ -1135,7 +1140,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
{
loff_t pos = pos_from_hilo(pos_h, pos_l);
- return do_pwritev(fd, vec, vlen, pos, 0);
+ return do_pwritev(fd, vec, vlen, pos, _RWF_DEFAULT);
}
SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
@@ -1144,6 +1149,9 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
{
loff_t pos = pos_from_hilo(pos_h, pos_l);
+ if (flags == _RWF_DEFAULT)
+ return -EOPNOTSUPP;
+
if (pos == -1)
return do_writev(fd, vec, vlen, flags);
@@ -1221,7 +1229,7 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos)
{
- return do_compat_preadv64(fd, vec, vlen, pos, 0);
+ return do_compat_preadv64(fd, vec, vlen, pos, _RWF_DEFAULT);
}
#endif
@@ -1231,7 +1239,7 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return do_compat_preadv64(fd, vec, vlen, pos, 0);
+ return do_compat_preadv64(fd, vec, vlen, pos, _RWF_DEFAULT);
}
#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
@@ -1239,6 +1247,9 @@ COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos, rwf_t, flags)
{
+ if (flags == _RWF_DEFAULT)
+ return -EOPNOTSUPP;
+
if (pos == -1)
return do_compat_readv(fd, vec, vlen, flags);
@@ -1253,6 +1264,9 @@ COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+ if (flags == _RWF_DEFAULT)
+ return -EOPNOTSUPP;
+
if (pos == -1)
return do_compat_readv(fd, vec, vlen, flags);
@@ -1303,7 +1317,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
const struct compat_iovec __user *, vec,
compat_ulong_t, vlen)
{
- return do_compat_writev(fd, vec, vlen, 0);
+ return do_compat_writev(fd, vec, vlen, _RWF_DEFAULT);
}
static long do_compat_pwritev64(unsigned long fd,
@@ -1330,7 +1344,7 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos)
{
- return do_compat_pwritev64(fd, vec, vlen, pos, 0);
+ return do_compat_pwritev64(fd, vec, vlen, pos, _RWF_DEFAULT);
}
#endif
@@ -1340,7 +1354,7 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return do_compat_pwritev64(fd, vec, vlen, pos, 0);
+ return do_compat_pwritev64(fd, vec, vlen, pos, _RWF_DEFAULT);
}
#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
@@ -1348,6 +1362,9 @@ COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos, rwf_t, flags)
{
+ if (flags == _RWF_DEFAULT)
+ return -EOPNOTSUPP;
+
if (pos == -1)
return do_compat_writev(fd, vec, vlen, flags);
@@ -1361,6 +1378,9 @@ COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+ if (flags == _RWF_DEFAULT)
+ return -EOPNOTSUPP;
+
if (pos == -1)
return do_compat_writev(fd, vec, vlen, flags);
@@ -302,6 +302,12 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
iov_iter_pipe(&to, READ, pipe, len);
idx = to.idx;
init_sync_kiocb(&kiocb, in);
+ /*
+ * SPLICE_F_NONBLOCK is only used on the pipe/socket end???
+ *
+ * Don't set IOCB_NOWAIT based on it here (O_NONBLOCK might set
+ * IOCB_NOWAIT though).
+ */
kiocb.ki_pos = *ppos;
ret = call_read_iter(in, &kiocb, &to);
if (ret > 0) {
@@ -355,7 +361,8 @@ static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
old_fs = get_fs();
set_fs(KERNEL_DS);
/* The cast to a user pointer is valid due to the set_fs() */
- res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
+ res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos,
+ _RWF_DEFAULT);
set_fs(old_fs);
return res;
@@ -742,7 +749,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
}
iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left);
- ret = vfs_iter_write(out, &from, &sd.pos, 0);
+ ret = vfs_iter_write(out, &from, &sd.pos, _RWF_DEFAULT);
if (ret <= 0)
break;
@@ -83,6 +83,12 @@ extern int sysctl_protected_fifos;
extern int sysctl_protected_regular;
typedef __kernel_rwf_t rwf_t;
+/*
+ * kernel internal only value to signal using default flags from file;
+ * can't be combined with other flags and only used to simplify kernel
+ * APIs.
+ */
+#define _RWF_DEFAULT ((__force __kernel_rwf_t)-1)
struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
@@ -3332,11 +3338,21 @@ static inline int iocb_flags(struct file *file)
res |= IOCB_DSYNC;
if (file->f_flags & __O_SYNC)
res |= IOCB_SYNC;
+ if ((file->f_flags & O_NONBLOCK) && (file->f_mode & FMODE_NOWAIT))
+ res |= IOCB_NOWAIT;
return res;
}
+/* does NOT handle _RWF_DEFAULT */
static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
{
+ /*
+ * unset all flags controlled through RWF, right now all but:
+ * IOCB_EVENTFD, IOCB_DIRECT, IOCB_WRITE
+ */
+ ki->ki_flags &= ~(IOCB_APPEND | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC |
+ IOCB_NOWAIT);
+
if (unlikely(flags & ~RWF_SUPPORTED))
return -EOPNOTSUPP;
@@ -3356,6 +3372,41 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
return 0;
}
+static inline rwf_t rwf_flags(struct file *file)
+{
+ rwf_t flags = 0;
+
+ if (file->f_flags & O_APPEND)
+ flags |= RWF_APPEND;
+ if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))
+ flags |= RWF_DSYNC;
+ if (file->f_flags & __O_SYNC)
+ flags |= RWF_SYNC;
+ if ((file->f_flags & O_NONBLOCK) && (file->f_mode & FMODE_NOWAIT))
+ flags |= RWF_NOWAIT;
+
+ return flags;
+}
+
+static inline rwf_t rwf_from_iocb_flags(struct kiocb *iocb)
+{
+ int ifl = iocb->ki_flags;
+ rwf_t flags = 0;
+
+ if (ifl & IOCB_APPEND)
+ flags |= RWF_APPEND;
+ if (ifl & IOCB_NOWAIT)
+ flags |= RWF_NOWAIT;
+ if (ifl & IOCB_HIPRI)
+ flags |= RWF_HIPRI;
+ if (ifl & IOCB_DSYNC)
+ flags |= RWF_DSYNC;
+ if (ifl & IOCB_SYNC)
+ flags |= RWF_SYNC;
+
+ return flags;
+}
+
static inline ino_t parent_ino(struct dentry *dentry)
{
ino_t res;
Mapping between IOCB, RWF and file flags: IOCB_APPEND | RWF_APPEND | O_APPEND IOCB_HIPRI | RWF_HIPRI | - IOCB_DSYNC | RWF_DSYNC | O_DSYNC || IS_SYNC(f_mapping->host) IOCB_SYNC | RWF_SYNC | __O_SYNC IOCB_NOWAIT | RWF_NOWAIT | O_NONBLOCK && (f_mode & FMODE_NOWAIT) IOCB_DIRECT | - | io_is_direct() Most internal kernel functions taking rwf_t flags support taking _RWF_DEFAULT instead of individual flags to signal "use default flags from file flags"; APIs exposed to userspace should return EOPNOTSUPP when they are passed _RWF_DEFAULT. Also convert O_NONBLOCK file flag to IOCB_NOWAIT if supported by the file (i.e. FMODE_NOWAIT is set), so read_iter/write_iter implementations can consistently check for IOCB_NOWAIT instead of O_NONBLOCK. Signed-off-by: Stefan Bühler <source@stbuehler.de> --- drivers/block/loop.c | 8 +++-- drivers/staging/android/ashmem.c | 2 +- drivers/target/target_core_file.c | 6 ++-- fs/coda/file.c | 6 ++-- fs/nfsd/vfs.c | 4 +-- fs/overlayfs/file.c | 21 ++----------- fs/read_write.c | 40 ++++++++++++++++++------ fs/splice.c | 11 +++++-- include/linux/fs.h | 51 +++++++++++++++++++++++++++++++ 9 files changed, 107 insertions(+), 42 deletions(-)