@@ -1125,7 +1125,7 @@ static void update_time_for_write(struct inode *inode)
}
static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
- size_t count)
+ size_t count, unsigned int *ilock_flags)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
@@ -1145,9 +1145,17 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
return -EAGAIN;
- ret = file_remove_privs(file);
- if (ret)
- return ret;
+ ret = file_needs_remove_privs(file);
+ if (ret) {
+ if (ilock_flags && *ilock_flags & BTRFS_ILOCK_SHARED) {
+ *ilock_flags &= ~BTRFS_ILOCK_SHARED;
+ return -EAGAIN;
+ }
+
+ ret = file_remove_privs(file);
+ if (ret)
+ return ret;
+ }
/*
* We reserve space for updating the inode when we reserve space for the
@@ -1204,7 +1212,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
if (ret <= 0)
goto out;
- ret = btrfs_write_check(iocb, i, ret);
+ ret = btrfs_write_check(iocb, i, ret, NULL);
if (ret < 0)
goto out;
@@ -1462,13 +1470,16 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
ssize_t err;
unsigned int ilock_flags = 0;
struct iomap_dio *dio;
+ bool has_shared_lock;
if (iocb->ki_flags & IOCB_NOWAIT)
ilock_flags |= BTRFS_ILOCK_TRY;
/* If the write DIO is within EOF, use a shared lock */
- if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode))
+ if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode)) {
ilock_flags |= BTRFS_ILOCK_SHARED;
+ has_shared_lock = true;
+ }
relock:
err = btrfs_inode_lock(BTRFS_I(inode), ilock_flags);
@@ -1481,8 +1492,17 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
return err;
}
- err = btrfs_write_check(iocb, from, err);
+ /* might uset BTRFS_ILOCK_SHARED */
+ err = btrfs_write_check(iocb, from, err, &ilock_flags);
if (err < 0) {
+ if (err == -EAGAIN && has_shared_lock &&
+ !(ilock_flags & BTRFS_ILOCK_SHARED)) {
+ btrfs_inode_unlock(BTRFS_I(inode),
+ ilock_flags | BTRFS_ILOCK_SHARED);
+ has_shared_lock = false;
+ goto relock;
+ }
+
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
goto out;
}
@@ -1496,6 +1516,7 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
pos + iov_iter_count(from) > i_size_read(inode)) {
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
ilock_flags &= ~BTRFS_ILOCK_SHARED;
+ has_shared_lock = false;
goto relock;
}
@@ -1632,7 +1653,7 @@ static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
if (ret || encoded->len == 0)
goto out;
- ret = btrfs_write_check(iocb, from, encoded->len);
+ ret = btrfs_write_check(iocb, from, encoded->len, NULL);
if (ret < 0)
goto out;
file_remove_privs might call into notify_change(), which requires to hold an exclusive lock. In order to keep the shared lock for most IOs it now first checks if privilege changes are needed, then switches to the exclusive lock, rechecks and only then calls file_remove_privs. This makes usage of the new exported function file_needs_remove_privs(). The file_remove_privs code path is not optimized, under the assumption that it would be a rare call (file_remove_privs calls file_needs_remove_privs a 2nd time). Fixes: e9adabb9712e ("btrfs: use shared lock for direct writes within EOF") Cc: Goldwyn Rodrigues <rgoldwyn@suse.com> Cc: Miklos Szeredi <miklos@szeredi.hu> Cc: Dharmendra Singh <dsingh@ddn.com> Cc: Chris Mason <clm@fb.com> Cc: Josef Bacik <josef@toxicpanda.com> Cc: David Sterba <dsterba@suse.com> Cc: linux-btrfs@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Bernd Schubert <bschubert@ddn.com> --- fs/btrfs/file.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-)