diff mbox series

[RFC,v2,1/5] nfs: Fix write to swapfile failure due to generic_write_checks()

Message ID 162879972678.3306668.10709543333474121000.stgit@warthog.procyon.org.uk (mailing list archive)
State New
Headers show
Series mm: Fix NFS swapfiles and use DIO for swapfiles | expand

Commit Message

David Howells Aug. 12, 2021, 8:22 p.m. UTC
Trying to use a swapfile on NFS results in every DIO write failing with
ETXTBSY because generic_write_checks(), as called by nfs_direct_write()
from nfs_direct_IO(), forbids writes to swapfiles.

Fix this by introducing a new kiocb flag, IOCB_SWAP, that's set by the swap
code to indicate that the swapper is doing this operation and so overrule
the check in generic_write_checks().

Without this patch, the following is seen:

	Write error on dio swapfile (3800334336)

Altering __swap_writepage() to show the error shows:

	Write error (-26) on dio swapfile (3800334336)

Tested by swapping off all swap partitions and then swapping on a prepared
NFS file (CONFIG_NFS_SWAP=y is also needed).  Enough copies of the
following program then need to be run to force swapping to occur (at least
one per gigabyte of RAM):

	#include <stdbool.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <unistd.h>
	#include <sys/mman.h>
	int main()
	{
		unsigned int pid = getpid(), iterations = 0;
		size_t i, j, size = 1024 * 1024 * 1024;
		char *p;
		bool mismatch;
		p = malloc(size);
		if (!p) {
			perror("malloc");
			exit(1);
		}
		srand(pid);
		for (i = 0; i < size; i += 4)
			*(unsigned int *)(p + i) = rand();
		do {
			for (j = 0; j < 16; j++) {
				for (i = 0; i < size; i += 4096)
					*(unsigned int *)(p + i) += 1;
				iterations++;
			}
			mismatch = false;
			srand(pid);
			for (i = 0; i < size; i += 4) {
				unsigned int r = rand();
				unsigned int v = *(unsigned int *)(p + i);
				if (i % 4096 == 0)
					v -= iterations;
				if (v != r) {
					fprintf(stderr, "mismatch %zx: %x != %x (diff %x)\n",
						i, v, r, v - r);
					mismatch = true;
				}
			}
		} while (!mismatch);
		exit(1);
	}


Fixes: dc617f29dbe5 ("vfs: don't allow writes to swap files")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Darrick J. Wong <darrick.wong@oracle.com>
cc: Christoph Hellwig <hch@lst.de>
cc: Trond Myklebust <trond.myklebust@primarydata.com>
cc: linux-nfs@vger.kernel.org
---

 fs/read_write.c    |    2 +-
 include/linux/fs.h |    1 +
 mm/page_io.c       |    7 ++++---
 3 files changed, 6 insertions(+), 4 deletions(-)

Comments

Matthew Wilcox Aug. 13, 2021, 3:09 a.m. UTC | #1
On Thu, Aug 12, 2021 at 09:22:06PM +0100, David Howells wrote:
> Trying to use a swapfile on NFS results in every DIO write failing with
> ETXTBSY because generic_write_checks(), as called by nfs_direct_write()
> from nfs_direct_IO(), forbids writes to swapfiles.

Why does nfs_direct_write() call generic_write_checks()?

ie call generic_write_checks() earlier, and only swap would bypass them.

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2e894fec036b..7e2ca6b5fc5f 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -905,9 +905,6 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
 	dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
 		file, iov_iter_count(iter), (long long) iocb->ki_pos);
 
-	result = generic_write_checks(iocb, iter);
-	if (result <= 0)
-		return result;
 	count = result;
 	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
 
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 1fef107961bc..91b2e3214836 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -611,6 +611,10 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	errseq_t since;
 	int error;
 
+	result = generic_write_checks(iocb, from);
+	if (result < 0)
+		return result;
+
 	result = nfs_key_timeout_notify(file, inode);
 	if (result)
 		return result;
@@ -621,8 +625,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	dprintk("NFS: write(%pD2, %zu@%Ld)\n",
 		file, iov_iter_count(from), (long long) iocb->ki_pos);
 
-	if (IS_SWAPFILE(inode))
-		goto out_swapfile;
 	/*
 	 * O_APPEND implies that we must revalidate the file length.
 	 */
@@ -636,7 +638,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 
 	since = filemap_sample_wb_err(file->f_mapping);
 	nfs_start_io_write(inode);
-	result = generic_write_checks(iocb, from);
 	if (result > 0) {
 		current->backing_dev_info = inode_to_bdi(inode);
 		result = generic_perform_write(file, from, iocb->ki_pos);
@@ -677,10 +678,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
 out:
 	return result;
-
-out_swapfile:
-	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
-	return -ETXTBSY;
 }
 EXPORT_SYMBOL_GPL(nfs_file_write);
Christoph Hellwig Aug. 13, 2021, 7:02 a.m. UTC | #2
On Fri, Aug 13, 2021 at 04:09:39AM +0100, Matthew Wilcox wrote:
> On Thu, Aug 12, 2021 at 09:22:06PM +0100, David Howells wrote:
> > Trying to use a swapfile on NFS results in every DIO write failing with
> > ETXTBSY because generic_write_checks(), as called by nfs_direct_write()
> > from nfs_direct_IO(), forbids writes to swapfiles.
> 
> Why does nfs_direct_write() call generic_write_checks()?
> 
> ie call generic_write_checks() earlier, and only swap would bypass them.

Yes, something like that is a good idea probably.  Additionally I'd like
to move to a separate of for swap I/O ASAP given that NFS only
implemens ->direct_IO for swap.
NeilBrown Aug. 19, 2021, 10:38 p.m. UTC | #3
On Fri, 13 Aug 2021, David Howells wrote:
> Trying to use a swapfile on NFS results in every DIO write failing with
> ETXTBSY because generic_write_checks(), as called by nfs_direct_write()
> from nfs_direct_IO(), forbids writes to swapfiles.

Could we just remove this check from generic_write_checks(), and instead
call deny_write_access() in swap_on?
Then user-space wouldn't be able to open a swap-file for write, so there
would be no need to check on every single write.

NeilBrown


> 
> Fix this by introducing a new kiocb flag, IOCB_SWAP, that's set by the swap
> code to indicate that the swapper is doing this operation and so overrule
> the check in generic_write_checks().
> 
> Without this patch, the following is seen:
> 
> 	Write error on dio swapfile (3800334336)
> 
> Altering __swap_writepage() to show the error shows:
> 
> 	Write error (-26) on dio swapfile (3800334336)
> 
> Tested by swapping off all swap partitions and then swapping on a prepared
> NFS file (CONFIG_NFS_SWAP=y is also needed).  Enough copies of the
> following program then need to be run to force swapping to occur (at least
> one per gigabyte of RAM):
> 
> 	#include <stdbool.h>
> 	#include <stdio.h>
> 	#include <stdlib.h>
> 	#include <unistd.h>
> 	#include <sys/mman.h>
> 	int main()
> 	{
> 		unsigned int pid = getpid(), iterations = 0;
> 		size_t i, j, size = 1024 * 1024 * 1024;
> 		char *p;
> 		bool mismatch;
> 		p = malloc(size);
> 		if (!p) {
> 			perror("malloc");
> 			exit(1);
> 		}
> 		srand(pid);
> 		for (i = 0; i < size; i += 4)
> 			*(unsigned int *)(p + i) = rand();
> 		do {
> 			for (j = 0; j < 16; j++) {
> 				for (i = 0; i < size; i += 4096)
> 					*(unsigned int *)(p + i) += 1;
> 				iterations++;
> 			}
> 			mismatch = false;
> 			srand(pid);
> 			for (i = 0; i < size; i += 4) {
> 				unsigned int r = rand();
> 				unsigned int v = *(unsigned int *)(p + i);
> 				if (i % 4096 == 0)
> 					v -= iterations;
> 				if (v != r) {
> 					fprintf(stderr, "mismatch %zx: %x != %x (diff %x)\n",
> 						i, v, r, v - r);
> 					mismatch = true;
> 				}
> 			}
> 		} while (!mismatch);
> 		exit(1);
> 	}
> 
> 
> Fixes: dc617f29dbe5 ("vfs: don't allow writes to swap files")
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: Darrick J. Wong <darrick.wong@oracle.com>
> cc: Christoph Hellwig <hch@lst.de>
> cc: Trond Myklebust <trond.myklebust@primarydata.com>
> cc: linux-nfs@vger.kernel.org
> ---
> 
>  fs/read_write.c    |    2 +-
>  include/linux/fs.h |    1 +
>  mm/page_io.c       |    7 ++++---
>  3 files changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/read_write.c b/fs/read_write.c
> index 9db7adf160d2..daef721ca67e 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1646,7 +1646,7 @@ ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
>  	loff_t count;
>  	int ret;
>  
> -	if (IS_SWAPFILE(inode))
> +	if (IS_SWAPFILE(inode) && !(iocb->ki_flags & IOCB_SWAP))
>  		return -ETXTBSY;
>  
>  	if (!iov_iter_count(from))
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 640574294216..b3e6a20f28ef 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -319,6 +319,7 @@ enum rw_hint {
>  /* iocb->ki_waitq is valid */
>  #define IOCB_WAITQ		(1 << 19)
>  #define IOCB_NOIO		(1 << 20)
> +#define IOCB_SWAP		(1 << 21)	/* This is a swap request */
>  
>  struct kiocb {
>  	struct file		*ki_filp;
> diff --git a/mm/page_io.c b/mm/page_io.c
> index d597bc6e6e45..edb72bf624d2 100644
> --- a/mm/page_io.c
> +++ b/mm/page_io.c
> @@ -303,7 +303,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
>  
>  		iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
>  		init_sync_kiocb(&kiocb, swap_file);
> -		kiocb.ki_pos = page_file_offset(page);
> +		kiocb.ki_pos	= page_file_offset(page);
> +		kiocb.ki_flags	= IOCB_DIRECT | IOCB_WRITE | IOCB_SWAP;
>  
>  		set_page_writeback(page);
>  		unlock_page(page);
> @@ -324,8 +325,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
>  			 */
>  			set_page_dirty(page);
>  			ClearPageReclaim(page);
> -			pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
> -					   page_file_offset(page));
> +			pr_err_ratelimited("Write error (%d) on dio swapfile (%llu)\n",
> +					   ret, page_file_offset(page));
>  		}
>  		end_page_writeback(page);
>  		return ret;
> 
> 
>
diff mbox series

Patch

diff --git a/fs/read_write.c b/fs/read_write.c
index 9db7adf160d2..daef721ca67e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1646,7 +1646,7 @@  ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 	loff_t count;
 	int ret;
 
-	if (IS_SWAPFILE(inode))
+	if (IS_SWAPFILE(inode) && !(iocb->ki_flags & IOCB_SWAP))
 		return -ETXTBSY;
 
 	if (!iov_iter_count(from))
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 640574294216..b3e6a20f28ef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -319,6 +319,7 @@  enum rw_hint {
 /* iocb->ki_waitq is valid */
 #define IOCB_WAITQ		(1 << 19)
 #define IOCB_NOIO		(1 << 20)
+#define IOCB_SWAP		(1 << 21)	/* This is a swap request */
 
 struct kiocb {
 	struct file		*ki_filp;
diff --git a/mm/page_io.c b/mm/page_io.c
index d597bc6e6e45..edb72bf624d2 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -303,7 +303,8 @@  int __swap_writepage(struct page *page, struct writeback_control *wbc,
 
 		iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
 		init_sync_kiocb(&kiocb, swap_file);
-		kiocb.ki_pos = page_file_offset(page);
+		kiocb.ki_pos	= page_file_offset(page);
+		kiocb.ki_flags	= IOCB_DIRECT | IOCB_WRITE | IOCB_SWAP;
 
 		set_page_writeback(page);
 		unlock_page(page);
@@ -324,8 +325,8 @@  int __swap_writepage(struct page *page, struct writeback_control *wbc,
 			 */
 			set_page_dirty(page);
 			ClearPageReclaim(page);
-			pr_err_ratelimited("Write error on dio swapfile (%llu)\n",
-					   page_file_offset(page));
+			pr_err_ratelimited("Write error (%d) on dio swapfile (%llu)\n",
+					   ret, page_file_offset(page));
 		}
 		end_page_writeback(page);
 		return ret;