diff mbox series

[2/2] zonefs: Fix O_APPEND async write handling

Message ID 20210315034919.87980-3-damien.lemoal@wdc.com (mailing list archive)
State New, archived
Headers show
Series zonefs fixes | expand

Commit Message

Damien Le Moal March 15, 2021, 3:49 a.m. UTC
zonefs updates the size of a sequential zone file inode only on
completion of direct writes. When executing asynchronous append writes
(with a file open with O_APPEND or using RWF_APPEND), the use of the
current inode size in generic_write_checks() to set an iocb offset thus
leads to unaligned write if an application issues an append write
operation with another write already being executed.

Fix this problem by introducing zonefs_write_checks() as a modified
version of generic_write_checks() using the file inode wp_offset for an
append write iocb offset. Also introduce zonefs_write_check_limits() to
replace generic_write_check_limits() call. This zonefs special helper
makes sure that the maximum file limit used is the maximum size of the
file being accessed.

Since zonefs_write_checks() already truncates the iov_iter, the calls
to iov_iter_truncate() in zonefs_file_dio_write() and
zonefs_file_buffered_write() are removed.

Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system")
Cc: <stable@vger.kernel.org>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
---
 fs/zonefs/super.c | 76 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 66 insertions(+), 10 deletions(-)

Comments

Johannes Thumshirn March 15, 2021, 7:14 a.m. UTC | #1
Looks good,
Reviewed-by: Johannes Thumshirn <jhannes.thumshirn@wdc.com>
kernel test robot March 15, 2021, 7:15 a.m. UTC | #2
Hi Damien,

I love your patch! Perhaps something to improve:

[auto build test WARNING on linus/master]
[also build test WARNING on v5.12-rc3 next-20210315]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Damien-Le-Moal/zonefs-fixes/20210315-115123
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 1e28eed17697bcf343c6743f0028cc3b5dd88bf0
config: x86_64-randconfig-a013-20210315 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project a28facba1ccdc957f386b7753f4958307f1bfde8)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # https://github.com/0day-ci/linux/commit/1e8bb76723bbf4072c032334104026a611b2634e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Damien-Le-Moal/zonefs-fixes/20210315-115123
        git checkout 1e8bb76723bbf4072c032334104026a611b2634e
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> fs/zonefs/super.c:844:6: warning: variable 'ret' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
           if (count <= 0)
               ^~~~~~~~~~
   fs/zonefs/super.c:881:9: note: uninitialized use occurs here
           return ret;
                  ^~~
   fs/zonefs/super.c:844:2: note: remove the 'if' if its condition is always false
           if (count <= 0)
           ^~~~~~~~~~~~~~~
   fs/zonefs/super.c:825:13: note: initialize the variable 'ret' to silence this warning
           ssize_t ret, count;
                      ^
                       = 0
   1 warning generated.


vim +844 fs/zonefs/super.c

   807	
   808	/*
   809	 * Handle direct writes. For sequential zone files, this is the only possible
   810	 * write path. For these files, check that the user is issuing writes
   811	 * sequentially from the end of the file. This code assumes that the block layer
   812	 * delivers write requests to the device in sequential order. This is always the
   813	 * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE
   814	 * elevator feature is being used (e.g. mq-deadline). The block layer always
   815	 * automatically select such an elevator for zoned block devices during the
   816	 * device initialization.
   817	 */
   818	static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
   819	{
   820		struct inode *inode = file_inode(iocb->ki_filp);
   821		struct zonefs_inode_info *zi = ZONEFS_I(inode);
   822		struct super_block *sb = inode->i_sb;
   823		bool sync = is_sync_kiocb(iocb);
   824		bool append = false;
   825		ssize_t ret, count;
   826	
   827		/*
   828		 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
   829		 * as this can cause write reordering (e.g. the first aio gets EAGAIN
   830		 * on the inode lock but the second goes through but is now unaligned).
   831		 */
   832		if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
   833		    (iocb->ki_flags & IOCB_NOWAIT))
   834			return -EOPNOTSUPP;
   835	
   836		if (iocb->ki_flags & IOCB_NOWAIT) {
   837			if (!inode_trylock(inode))
   838				return -EAGAIN;
   839		} else {
   840			inode_lock(inode);
   841		}
   842	
   843		count = zonefs_write_checks(iocb, from);
 > 844		if (count <= 0)
   845			goto inode_unlock;
   846	
   847		if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
   848			ret = -EINVAL;
   849			goto inode_unlock;
   850		}
   851	
   852		/* Enforce sequential writes (append only) in sequential zones */
   853		if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
   854			mutex_lock(&zi->i_truncate_mutex);
   855			if (iocb->ki_pos != zi->i_wpoffset) {
   856				mutex_unlock(&zi->i_truncate_mutex);
   857				ret = -EINVAL;
   858				goto inode_unlock;
   859			}
   860			mutex_unlock(&zi->i_truncate_mutex);
   861			append = sync;
   862		}
   863	
   864		if (append)
   865			ret = zonefs_file_dio_append(iocb, from);
   866		else
   867			ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
   868					   &zonefs_write_dio_ops, 0);
   869		if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
   870		    (ret > 0 || ret == -EIOCBQUEUED)) {
   871			if (ret > 0)
   872				count = ret;
   873			mutex_lock(&zi->i_truncate_mutex);
   874			zi->i_wpoffset += count;
   875			mutex_unlock(&zi->i_truncate_mutex);
   876		}
   877	
   878	inode_unlock:
   879		inode_unlock(inode);
   880	
   881		return ret;
   882	}
   883	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Johannes Thumshirn March 15, 2021, 7:21 a.m. UTC | #3
On 15/03/2021 08:16, kernel test robot wrote:
> 818	static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
>    819	{
>    820		struct inode *inode = file_inode(iocb->ki_filp);
>    821		struct zonefs_inode_info *zi = ZONEFS_I(inode);
>    822		struct super_block *sb = inode->i_sb;
>    823		bool sync = is_sync_kiocb(iocb);
>    824		bool append = false;
>    825		ssize_t ret, count;

>    843		count = zonefs_write_checks(iocb, from);
>  > 844		if (count <= 0)
>    845			goto inode_unlock;

Args that needs to be:
			if (count <= 0) {
				ret = count;
				goto inode_unlock;
			}

Sorry for not spotting it.

>    878	inode_unlock:
>    879		inode_unlock(inode);
>    880	
>    881		return ret;
>    882	}
>    883
Damien Le Moal March 15, 2021, 7:22 a.m. UTC | #4
On 2021/03/15 16:21, Johannes Thumshirn wrote:
> On 15/03/2021 08:16, kernel test robot wrote:
>> 818	static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
>>    819	{
>>    820		struct inode *inode = file_inode(iocb->ki_filp);
>>    821		struct zonefs_inode_info *zi = ZONEFS_I(inode);
>>    822		struct super_block *sb = inode->i_sb;
>>    823		bool sync = is_sync_kiocb(iocb);
>>    824		bool append = false;
>>    825		ssize_t ret, count;
> 
>>    843		count = zonefs_write_checks(iocb, from);
>>  > 844		if (count <= 0)
>>    845			goto inode_unlock;
> 
> Args that needs to be:
> 			if (count <= 0) {
> 				ret = count;
> 				goto inode_unlock;
> 			}
> 
> Sorry for not spotting it.

Yep. Sending v2. Weird that gcc does not complain on my local compile...

> 
>>    878	inode_unlock:
>>    879		inode_unlock(inode);
>>    880	
>>    881		return ret;
>>    882	}
>>    883	
>
Nathan Chancellor March 15, 2021, 5:08 p.m. UTC | #5
On Mon, Mar 15, 2021 at 07:22:56AM +0000, Damien Le Moal wrote:
> On 2021/03/15 16:21, Johannes Thumshirn wrote:
> > On 15/03/2021 08:16, kernel test robot wrote:
> >> 818	static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
> >>    819	{
> >>    820		struct inode *inode = file_inode(iocb->ki_filp);
> >>    821		struct zonefs_inode_info *zi = ZONEFS_I(inode);
> >>    822		struct super_block *sb = inode->i_sb;
> >>    823		bool sync = is_sync_kiocb(iocb);
> >>    824		bool append = false;
> >>    825		ssize_t ret, count;
> > 
> >>    843		count = zonefs_write_checks(iocb, from);
> >>  > 844		if (count <= 0)
> >>    845			goto inode_unlock;
> > 
> > Args that needs to be:
> > 			if (count <= 0) {
> > 				ret = count;
> > 				goto inode_unlock;
> > 			}
> > 
> > Sorry for not spotting it.
> 
> Yep. Sending v2. Weird that gcc does not complain on my local compile...

Unfortunately, GCC's version of this warning was disabled for default
compiles by Linus in commit 78a5255ffb6a ("Stop the ad-hoc games with
-Wno-maybe-initialized"). W=2 is required, which can be quite noisy from
my understanding. KCFLAGS=-Wmaybe-uninitialized is a good option.

Cheers,
Nathan

> > 
> >>    878	inode_unlock:
> >>    879		inode_unlock(inode);
> >>    880	
> >>    881		return ret;
> >>    882	}
> >>    883	
> > 
> 
> -- 
> Damien Le Moal
> Western Digital Research
Damien Le Moal March 16, 2021, 1:11 a.m. UTC | #6
On 2021/03/16 2:09, Nathan Chancellor wrote:
> On Mon, Mar 15, 2021 at 07:22:56AM +0000, Damien Le Moal wrote:
>> On 2021/03/15 16:21, Johannes Thumshirn wrote:
>>> On 15/03/2021 08:16, kernel test robot wrote:
>>>> 818	static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
>>>>    819	{
>>>>    820		struct inode *inode = file_inode(iocb->ki_filp);
>>>>    821		struct zonefs_inode_info *zi = ZONEFS_I(inode);
>>>>    822		struct super_block *sb = inode->i_sb;
>>>>    823		bool sync = is_sync_kiocb(iocb);
>>>>    824		bool append = false;
>>>>    825		ssize_t ret, count;
>>>
>>>>    843		count = zonefs_write_checks(iocb, from);
>>>>  > 844		if (count <= 0)
>>>>    845			goto inode_unlock;
>>>
>>> Args that needs to be:
>>> 			if (count <= 0) {
>>> 				ret = count;
>>> 				goto inode_unlock;
>>> 			}
>>>
>>> Sorry for not spotting it.
>>
>> Yep. Sending v2. Weird that gcc does not complain on my local compile...
> 
> Unfortunately, GCC's version of this warning was disabled for default
> compiles by Linus in commit 78a5255ffb6a ("Stop the ad-hoc games with
> -Wno-maybe-initialized"). W=2 is required, which can be quite noisy from
> my understanding. KCFLAGS=-Wmaybe-uninitialized is a good option.

I was not aware of that change. Thanks for the information !
diff mbox series

Patch

diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index a3d074f98660..81836a5b436e 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -743,6 +743,68 @@  static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
 	return ret;
 }
 
+/*
+ * Do not exceed the LFS limits nor the file zone size. If pos is under the
+ * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
+ */
+static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
+					loff_t count)
+{
+	struct inode *inode = file_inode(file);
+	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+	loff_t limit = rlimit(RLIMIT_FSIZE);
+	loff_t max_size = zi->i_max_size;
+
+	if (limit != RLIM_INFINITY) {
+		if (pos >= limit) {
+			send_sig(SIGXFSZ, current, 0);
+			return -EFBIG;
+		}
+		count = min(count, limit - pos);
+	}
+
+	if (!(file->f_flags & O_LARGEFILE))
+		max_size = min_t(loff_t, MAX_NON_LFS, max_size);
+
+	if (unlikely(pos >= max_size))
+		return -EFBIG;
+
+	return min(count, max_size - pos);
+}
+
+static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
+	struct zonefs_inode_info *zi = ZONEFS_I(inode);
+	loff_t count;
+
+	if (IS_SWAPFILE(inode))
+		return -ETXTBSY;
+
+	if (!iov_iter_count(from))
+		return 0;
+
+	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+		return -EINVAL;
+
+	if (iocb->ki_flags & IOCB_APPEND) {
+		if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+			return -EINVAL;
+		mutex_lock(&zi->i_truncate_mutex);
+		iocb->ki_pos = zi->i_wpoffset;
+		mutex_unlock(&zi->i_truncate_mutex);
+	}
+
+	count = zonefs_write_check_limits(file, iocb->ki_pos,
+					  iov_iter_count(from));
+	if (count < 0)
+		return count;
+
+	iov_iter_truncate(from, count);
+	return iov_iter_count(from);
+}
+
 /*
  * Handle direct writes. For sequential zone files, this is the only possible
  * write path. For these files, check that the user is issuing writes
@@ -760,8 +822,7 @@  static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
 	struct super_block *sb = inode->i_sb;
 	bool sync = is_sync_kiocb(iocb);
 	bool append = false;
-	size_t count;
-	ssize_t ret;
+	ssize_t ret, count;
 
 	/*
 	 * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
@@ -779,13 +840,10 @@  static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
 		inode_lock(inode);
 	}
 
-	ret = generic_write_checks(iocb, from);
-	if (ret <= 0)
+	count = zonefs_write_checks(iocb, from);
+	if (count <= 0)
 		goto inode_unlock;
 
-	iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos);
-	count = iov_iter_count(from);
-
 	if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
 		ret = -EINVAL;
 		goto inode_unlock;
@@ -844,12 +902,10 @@  static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
 		inode_lock(inode);
 	}
 
-	ret = generic_write_checks(iocb, from);
+	ret = zonefs_write_checks(iocb, from);
 	if (ret <= 0)
 		goto inode_unlock;
 
-	iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos);
-
 	ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops);
 	if (ret > 0)
 		iocb->ki_pos += ret;