diff mbox series

[f2fs-dev,4/4] f2fs: atomic: fix to forbid dio in atomic_file

Message ID 20240625031351.3586955-4-chao@kernel.org (mailing list archive)
State Accepted
Commit 374a8881ce4ccf787f5381a39f825cb17a3f6b14
Headers show
Series [f2fs-dev,1/4] f2fs: atomic: fix to avoid racing w/ GC | expand

Commit Message

Chao Yu June 25, 2024, 3:13 a.m. UTC
atomic write can only be used via buffered IO, let's fail direct IO on
atomic_file and return -EOPNOTSUPP.

Signed-off-by: Chao Yu <chao@kernel.org>
---
 fs/f2fs/file.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

Comments

Sunmin Jeong July 2, 2024, 12:13 p.m. UTC | #1
Hello

I also have been thinking about the GC of the atomic file these days. I
read your patches for atomic write, but I think there are some corner
cases that can't be resolved with them.

First is when the GC of the atomic file occurs right after the writeback
of newly updated page. Since the atomic page flag is cleared at the end
of f2fs_do_write_data_page, the GC thread will set the page dirty and
let it be written to the atomic file, which will cause the data
corruption of the original inode.

Second is the foreground GC of atomic file. Although your patch can
distinguish whether pages should be written to the original inode or cow
inode, it can't handle the case when the atomic page needs to be
migrated but updated page already exists in the page cache as below.

// atomic file's 1st old block is a and new block is b.
// b is in the page cache
GC thread(FG_GC)
  - select A as a victim segment
  do_garbage_collect
    - iget atomic file's inode for block a
    move_data_page
      f2fs_do_write_data_page
        - use dn of cow inode since b has atomic flag
    - seg_freed is 0 since block a is still valid
    - goto gc_more and A is selected as victim again

Third is a race condition between GC of cow file and writeback thread of
atomic file. Since there are two page caches for one dnode, I think we
need to consider the race condition between them such as the case
between the file inode and the meta inode.

I submitted a patch set for atomic write, so could you review it?
The patch links are as below.
https://sourceforge.net/p/linux-f2fs/mailman/message/58790988/
https://sourceforge.net/p/linux-f2fs/mailman/message/58790989/

Thanks



> atomic write can only be used via buffered IO, let's fail direct IO on
> atomic_file and return -EOPNOTSUPP.
> 
> Signed-off-by: Chao Yu <chao@kernel.org>
> ---
>  fs/f2fs/file.c | 15 +++++++++++++++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index
> 0355cb054521..a527de1e7a2f 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -2150,6 +2150,7 @@ static int f2fs_ioc_start_atomic_write(struct file
> *filp, bool truncate)
>  		goto out;
> 
>  	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
> +	f2fs_down_write(&fi->i_gc_rwsem[READ]);
> 
>  	/*
>  	 * Should wait end_io to count F2FS_WB_CP_DATA correctly by @@ -
> 2209,6 +2210,7 @@ static int f2fs_ioc_start_atomic_write(struct file
*filp,
> bool truncate)
>  	}
>  	f2fs_i_size_write(fi->cow_inode, isize);
> 
> +	f2fs_up_write(&fi->i_gc_rwsem[READ]);
>  	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
> 
>  	f2fs_update_time(sbi, REQ_TIME);
> @@ -4537,6 +4539,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb
> *iocb, struct iov_iter *to)
>  		f2fs_down_read(&fi->i_gc_rwsem[READ]);
>  	}
> 
> +	/* dio is not compatible w/ atomic file */
> +	if (f2fs_is_atomic_file(inode)) {
> +		f2fs_up_read(&fi->i_gc_rwsem[READ]);
> +		ret = -EOPNOTSUPP;
> +		goto out;
> +	}
> +
>  	/*
>  	 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead
> of
>  	 * the higher-level function iomap_dio_rw() in order to ensure that
> the @@ -4948,6 +4957,12 @@ static ssize_t f2fs_file_write_iter(struct
> kiocb *iocb, struct iov_iter *from)
>  	/* Determine whether we will do a direct write or a buffered write.
> */
>  	dio = f2fs_should_use_dio(inode, iocb, from);
> 
> +	/* dio is not compatible w/ atomic write */
> +	if (dio && f2fs_is_atomic_file(inode)) {
> +		ret = -EOPNOTSUPP;
> +		goto out_unlock;
> +	}
> +
>  	/* Possibly preallocate the blocks for the write. */
>  	target_size = iocb->ki_pos + iov_iter_count(from);
>  	preallocated = f2fs_preallocate_blocks(iocb, from, dio);
> --
> 2.40.1
> 
> 
> 
> _______________________________________________
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
diff mbox series

Patch

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0355cb054521..a527de1e7a2f 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2150,6 +2150,7 @@  static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
 		goto out;
 
 	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
+	f2fs_down_write(&fi->i_gc_rwsem[READ]);
 
 	/*
 	 * Should wait end_io to count F2FS_WB_CP_DATA correctly by
@@ -2209,6 +2210,7 @@  static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
 	}
 	f2fs_i_size_write(fi->cow_inode, isize);
 
+	f2fs_up_write(&fi->i_gc_rwsem[READ]);
 	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
 
 	f2fs_update_time(sbi, REQ_TIME);
@@ -4537,6 +4539,13 @@  static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		f2fs_down_read(&fi->i_gc_rwsem[READ]);
 	}
 
+	/* dio is not compatible w/ atomic file */
+	if (f2fs_is_atomic_file(inode)) {
+		f2fs_up_read(&fi->i_gc_rwsem[READ]);
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
 	/*
 	 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
 	 * the higher-level function iomap_dio_rw() in order to ensure that the
@@ -4948,6 +4957,12 @@  static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	/* Determine whether we will do a direct write or a buffered write. */
 	dio = f2fs_should_use_dio(inode, iocb, from);
 
+	/* dio is not compatible w/ atomic write */
+	if (dio && f2fs_is_atomic_file(inode)) {
+		ret = -EOPNOTSUPP;
+		goto out_unlock;
+	}
+
 	/* Possibly preallocate the blocks for the write. */
 	target_size = iocb->ki_pos + iov_iter_count(from);
 	preallocated = f2fs_preallocate_blocks(iocb, from, dio);