Message ID | 162879972678.3306668.10709543333474121000.stgit@warthog.procyon.org.uk (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: Fix NFS swapfiles and use DIO for swapfiles | expand |
On Thu, Aug 12, 2021 at 09:22:06PM +0100, David Howells wrote: > Trying to use a swapfile on NFS results in every DIO write failing with > ETXTBSY because generic_write_checks(), as called by nfs_direct_write() > from nfs_direct_IO(), forbids writes to swapfiles. Why does nfs_direct_write() call generic_write_checks()? ie call generic_write_checks() earlier, and only swap would bypass them. diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2e894fec036b..7e2ca6b5fc5f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -905,9 +905,6 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(iocb, iter); - if (result <= 0) - return result; count = result; nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 1fef107961bc..91b2e3214836 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -611,6 +611,10 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) errseq_t since; int error; + result = generic_write_checks(iocb, from); + if (result < 0) + return result; + result = nfs_key_timeout_notify(file, inode); if (result) return result; @@ -621,8 +625,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) dprintk("NFS: write(%pD2, %zu@%Ld)\n", file, iov_iter_count(from), (long long) iocb->ki_pos); - if (IS_SWAPFILE(inode)) - goto out_swapfile; /* * O_APPEND implies that we must revalidate the file length. */ @@ -636,7 +638,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) since = filemap_sample_wb_err(file->f_mapping); nfs_start_io_write(inode); - result = generic_write_checks(iocb, from); if (result > 0) { current->backing_dev_info = inode_to_bdi(inode); result = generic_perform_write(file, from, iocb->ki_pos); @@ -677,10 +678,6 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); out: return result; - -out_swapfile: - printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); - return -ETXTBSY; } EXPORT_SYMBOL_GPL(nfs_file_write);
On Fri, Aug 13, 2021 at 04:09:39AM +0100, Matthew Wilcox wrote: > On Thu, Aug 12, 2021 at 09:22:06PM +0100, David Howells wrote: > > Trying to use a swapfile on NFS results in every DIO write failing with > > ETXTBSY because generic_write_checks(), as called by nfs_direct_write() > > from nfs_direct_IO(), forbids writes to swapfiles. > > Why does nfs_direct_write() call generic_write_checks()? > > ie call generic_write_checks() earlier, and only swap would bypass them. Yes, something like that is a good idea probably. Additionally I'd like to move to a separate of for swap I/O ASAP given that NFS only implemens ->direct_IO for swap.
On Fri, 13 Aug 2021, David Howells wrote: > Trying to use a swapfile on NFS results in every DIO write failing with > ETXTBSY because generic_write_checks(), as called by nfs_direct_write() > from nfs_direct_IO(), forbids writes to swapfiles. Could we just remove this check from generic_write_checks(), and instead call deny_write_access() in swap_on? Then user-space wouldn't be able to open a swap-file for write, so there would be no need to check on every single write. NeilBrown > > Fix this by introducing a new kiocb flag, IOCB_SWAP, that's set by the swap > code to indicate that the swapper is doing this operation and so overrule > the check in generic_write_checks(). > > Without this patch, the following is seen: > > Write error on dio swapfile (3800334336) > > Altering __swap_writepage() to show the error shows: > > Write error (-26) on dio swapfile (3800334336) > > Tested by swapping off all swap partitions and then swapping on a prepared > NFS file (CONFIG_NFS_SWAP=y is also needed). Enough copies of the > following program then need to be run to force swapping to occur (at least > one per gigabyte of RAM): > > #include <stdbool.h> > #include <stdio.h> > #include <stdlib.h> > #include <unistd.h> > #include <sys/mman.h> > int main() > { > unsigned int pid = getpid(), iterations = 0; > size_t i, j, size = 1024 * 1024 * 1024; > char *p; > bool mismatch; > p = malloc(size); > if (!p) { > perror("malloc"); > exit(1); > } > srand(pid); > for (i = 0; i < size; i += 4) > *(unsigned int *)(p + i) = rand(); > do { > for (j = 0; j < 16; j++) { > for (i = 0; i < size; i += 4096) > *(unsigned int *)(p + i) += 1; > iterations++; > } > mismatch = false; > srand(pid); > for (i = 0; i < size; i += 4) { > unsigned int r = rand(); > unsigned int v = *(unsigned int *)(p + i); > if (i % 4096 == 0) > v -= iterations; > if (v != r) { > fprintf(stderr, "mismatch %zx: %x != %x (diff %x)\n", > i, v, r, v - r); > mismatch = true; > } > } > } while (!mismatch); > exit(1); > } > > > Fixes: dc617f29dbe5 ("vfs: don't allow writes to swap files") > Signed-off-by: David Howells <dhowells@redhat.com> > cc: Darrick J. Wong <darrick.wong@oracle.com> > cc: Christoph Hellwig <hch@lst.de> > cc: Trond Myklebust <trond.myklebust@primarydata.com> > cc: linux-nfs@vger.kernel.org > --- > > fs/read_write.c | 2 +- > include/linux/fs.h | 1 + > mm/page_io.c | 7 ++++--- > 3 files changed, 6 insertions(+), 4 deletions(-) > > diff --git a/fs/read_write.c b/fs/read_write.c > index 9db7adf160d2..daef721ca67e 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -1646,7 +1646,7 @@ ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) > loff_t count; > int ret; > > - if (IS_SWAPFILE(inode)) > + if (IS_SWAPFILE(inode) && !(iocb->ki_flags & IOCB_SWAP)) > return -ETXTBSY; > > if (!iov_iter_count(from)) > diff --git a/include/linux/fs.h b/include/linux/fs.h > index 640574294216..b3e6a20f28ef 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -319,6 +319,7 @@ enum rw_hint { > /* iocb->ki_waitq is valid */ > #define IOCB_WAITQ (1 << 19) > #define IOCB_NOIO (1 << 20) > +#define IOCB_SWAP (1 << 21) /* This is a swap request */ > > struct kiocb { > struct file *ki_filp; > diff --git a/mm/page_io.c b/mm/page_io.c > index d597bc6e6e45..edb72bf624d2 100644 > --- a/mm/page_io.c > +++ b/mm/page_io.c > @@ -303,7 +303,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, > > iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE); > init_sync_kiocb(&kiocb, swap_file); > - kiocb.ki_pos = page_file_offset(page); > + kiocb.ki_pos = page_file_offset(page); > + kiocb.ki_flags = IOCB_DIRECT | IOCB_WRITE | IOCB_SWAP; > > set_page_writeback(page); > unlock_page(page); > @@ -324,8 +325,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, > */ > set_page_dirty(page); > ClearPageReclaim(page); > - pr_err_ratelimited("Write error on dio swapfile (%llu)\n", > - page_file_offset(page)); > + pr_err_ratelimited("Write error (%d) on dio swapfile (%llu)\n", > + ret, page_file_offset(page)); > } > end_page_writeback(page); > return ret; > > >
diff --git a/fs/read_write.c b/fs/read_write.c index 9db7adf160d2..daef721ca67e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1646,7 +1646,7 @@ ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) loff_t count; int ret; - if (IS_SWAPFILE(inode)) + if (IS_SWAPFILE(inode) && !(iocb->ki_flags & IOCB_SWAP)) return -ETXTBSY; if (!iov_iter_count(from)) diff --git a/include/linux/fs.h b/include/linux/fs.h index 640574294216..b3e6a20f28ef 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -319,6 +319,7 @@ enum rw_hint { /* iocb->ki_waitq is valid */ #define IOCB_WAITQ (1 << 19) #define IOCB_NOIO (1 << 20) +#define IOCB_SWAP (1 << 21) /* This is a swap request */ struct kiocb { struct file *ki_filp; diff --git a/mm/page_io.c b/mm/page_io.c index d597bc6e6e45..edb72bf624d2 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -303,7 +303,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE); init_sync_kiocb(&kiocb, swap_file); - kiocb.ki_pos = page_file_offset(page); + kiocb.ki_pos = page_file_offset(page); + kiocb.ki_flags = IOCB_DIRECT | IOCB_WRITE | IOCB_SWAP; set_page_writeback(page); unlock_page(page); @@ -324,8 +325,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, */ set_page_dirty(page); ClearPageReclaim(page); - pr_err_ratelimited("Write error on dio swapfile (%llu)\n", - page_file_offset(page)); + pr_err_ratelimited("Write error (%d) on dio swapfile (%llu)\n", + ret, page_file_offset(page)); } end_page_writeback(page); return ret;
Trying to use a swapfile on NFS results in every DIO write failing with ETXTBSY because generic_write_checks(), as called by nfs_direct_write() from nfs_direct_IO(), forbids writes to swapfiles. Fix this by introducing a new kiocb flag, IOCB_SWAP, that's set by the swap code to indicate that the swapper is doing this operation and so overrule the check in generic_write_checks(). Without this patch, the following is seen: Write error on dio swapfile (3800334336) Altering __swap_writepage() to show the error shows: Write error (-26) on dio swapfile (3800334336) Tested by swapping off all swap partitions and then swapping on a prepared NFS file (CONFIG_NFS_SWAP=y is also needed). Enough copies of the following program then need to be run to force swapping to occur (at least one per gigabyte of RAM): #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sys/mman.h> int main() { unsigned int pid = getpid(), iterations = 0; size_t i, j, size = 1024 * 1024 * 1024; char *p; bool mismatch; p = malloc(size); if (!p) { perror("malloc"); exit(1); } srand(pid); for (i = 0; i < size; i += 4) *(unsigned int *)(p + i) = rand(); do { for (j = 0; j < 16; j++) { for (i = 0; i < size; i += 4096) *(unsigned int *)(p + i) += 1; iterations++; } mismatch = false; srand(pid); for (i = 0; i < size; i += 4) { unsigned int r = rand(); unsigned int v = *(unsigned int *)(p + i); if (i % 4096 == 0) v -= iterations; if (v != r) { fprintf(stderr, "mismatch %zx: %x != %x (diff %x)\n", i, v, r, v - r); mismatch = true; } } } while (!mismatch); exit(1); } Fixes: dc617f29dbe5 ("vfs: don't allow writes to swap files") Signed-off-by: David Howells <dhowells@redhat.com> cc: Darrick J. Wong <darrick.wong@oracle.com> cc: Christoph Hellwig <hch@lst.de> cc: Trond Myklebust <trond.myklebust@primarydata.com> cc: linux-nfs@vger.kernel.org --- fs/read_write.c | 2 +- include/linux/fs.h | 1 + mm/page_io.c | 7 ++++--- 3 files changed, 6 insertions(+), 4 deletions(-)