Message ID | 20191212190133.18473-6-axboe@kernel.dk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support for RWF_UNCACHED | expand |
On Thu, Dec 12, 2019 at 12:01:33PM -0700, Jens Axboe wrote: > This adds support for RWF_UNCACHED for file systems using iomap to > perform buffered writes. We use the generic infrastructure for this, > by tracking pages we created and calling write_drop_cached_pages() > to issue writeback and prune those pages. > > Signed-off-by: Jens Axboe <axboe@kernel.dk> > --- > fs/iomap/apply.c | 24 ++++++++++++++++++++++++ > fs/iomap/buffered-io.c | 23 +++++++++++++++++++---- > include/linux/iomap.h | 5 +++++ > 3 files changed, 48 insertions(+), 4 deletions(-) > > diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c > index e76148db03b8..11b6812f7b37 100644 > --- a/fs/iomap/apply.c > +++ b/fs/iomap/apply.c > @@ -92,5 +92,29 @@ iomap_apply(struct iomap_data *data, const struct iomap_ops *ops, > data->flags, &iomap); > } > > + if (written && (data->flags & IOMAP_UNCACHED)) { Hmmm... why is a chunk of buffered write(?) code landing in the iomap apply function? The #define for IOMAP_UNCACHED doesn't have a comment, so I don't know what this is supposed to mean. Judging from the one place it gets set in the buffered write function I gather that this is how you implement the "write through page cache and immediately unmap the page if it wasn't there before" behavior? So based on that, I think you want ... if IOMAP_WRITE && _UNCACHED && !_DIRECT && written > 0: flush and invalidate Since direct writes are never going to create page cache, right? And in that case, why not put this at the end of iomap_write_actor? (Sorry if this came up in the earlier discussions, I've been busy this week and still have a long way to go for catching up...) > + struct address_space *mapping = data->inode->i_mapping; > + > + end = data->pos + written; > + ret = filemap_write_and_wait_range(mapping, data->pos, end); > + if (ret) > + goto out; > + > + /* > + * No pages were created for this range, we're done > + */ > + if (!(iomap.flags & IOMAP_F_PAGE_CREATE)) > + goto out; > + > + /* > + * Try to invalidate cache pages for the range we just wrote. > + * We don't care if invalidation fails as the write has still > + * worked and leaving clean uptodate pages in the page cache > + * isn't a corruption vector for uncached IO. > + */ > + invalidate_inode_pages2_range(mapping, > + data->pos >> PAGE_SHIFT, end >> PAGE_SHIFT); > + } > +out: > return written ? written : ret; > } > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index 0a1a195ed1cc..df9d6002858e 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -659,6 +659,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, > struct page **pagep, struct iomap *iomap, struct iomap *srcmap) > { > const struct iomap_page_ops *page_ops = iomap->page_ops; > + unsigned aop_flags; > struct page *page; > int status = 0; > > @@ -675,8 +676,11 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, > return status; > } > > + aop_flags = AOP_FLAG_NOFS; > + if (flags & IOMAP_UNCACHED) > + aop_flags |= AOP_FLAG_UNCACHED; > page = grab_cache_page_write_begin(inode->i_mapping, pos >> PAGE_SHIFT, > - AOP_FLAG_NOFS); > + aop_flags); > if (!page) { > status = -ENOMEM; > goto out_no_page; > @@ -818,6 +822,7 @@ iomap_write_actor(const struct iomap_data *data, struct iomap *iomap, > { > struct inode *inode = data->inode; > struct iov_iter *i = data->priv; > + unsigned flags = data->flags; > loff_t length = data->len; > loff_t pos = data->pos; > long status = 0; > @@ -851,10 +856,17 @@ iomap_write_actor(const struct iomap_data *data, struct iomap *iomap, > break; > } > > - status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, > - srcmap); > - if (unlikely(status)) > +retry: > + status = iomap_write_begin(inode, pos, bytes, flags, > + &page, iomap, srcmap); > + if (unlikely(status)) { > + if (status == -ENOMEM && (flags & IOMAP_UNCACHED)) { > + iomap->flags |= IOMAP_F_PAGE_CREATE; > + flags &= ~IOMAP_UNCACHED; > + goto retry; > + } > break; > + } > > if (mapping_writably_mapped(inode->i_mapping)) > flush_dcache_page(page); > @@ -907,6 +919,9 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter, > }; > loff_t ret = 0, written = 0; > > + if (iocb->ki_flags & IOCB_UNCACHED) > + data.flags |= IOMAP_UNCACHED; > + > while (iov_iter_count(iter)) { > data.len = iov_iter_count(iter); > ret = iomap_apply(&data, ops, iomap_write_actor); > diff --git a/include/linux/iomap.h b/include/linux/iomap.h > index 30f40145a9e9..30bb248e1d0d 100644 > --- a/include/linux/iomap.h > +++ b/include/linux/iomap.h > @@ -48,12 +48,16 @@ struct vm_fault; > * > * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of > * buffer heads for this mapping. > + * > + * IOMAP_F_PAGE_CREATE indicates that pages had to be allocated to satisfy > + * this operation. > */ > #define IOMAP_F_NEW 0x01 > #define IOMAP_F_DIRTY 0x02 > #define IOMAP_F_SHARED 0x04 > #define IOMAP_F_MERGED 0x08 > #define IOMAP_F_BUFFER_HEAD 0x10 > +#define IOMAP_F_PAGE_CREATE 0x20 I think these new flags need an update to the _STRINGS arrays in fs/iomap/trace.h. > > /* > * Flags set by the core iomap code during operations: > @@ -121,6 +125,7 @@ struct iomap_page_ops { > #define IOMAP_FAULT (1 << 3) /* mapping for page fault */ > #define IOMAP_DIRECT (1 << 4) /* direct I/O */ > #define IOMAP_NOWAIT (1 << 5) /* do not block */ > +#define IOMAP_UNCACHED (1 << 6) No comment? --D > > struct iomap_ops { > /* > -- > 2.24.1 >
On 12/12/19 7:26 PM, Darrick J. Wong wrote: > On Thu, Dec 12, 2019 at 12:01:33PM -0700, Jens Axboe wrote: >> This adds support for RWF_UNCACHED for file systems using iomap to >> perform buffered writes. We use the generic infrastructure for this, >> by tracking pages we created and calling write_drop_cached_pages() >> to issue writeback and prune those pages. >> >> Signed-off-by: Jens Axboe <axboe@kernel.dk> >> --- >> fs/iomap/apply.c | 24 ++++++++++++++++++++++++ >> fs/iomap/buffered-io.c | 23 +++++++++++++++++++---- >> include/linux/iomap.h | 5 +++++ >> 3 files changed, 48 insertions(+), 4 deletions(-) >> >> diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c >> index e76148db03b8..11b6812f7b37 100644 >> --- a/fs/iomap/apply.c >> +++ b/fs/iomap/apply.c >> @@ -92,5 +92,29 @@ iomap_apply(struct iomap_data *data, const struct iomap_ops *ops, >> data->flags, &iomap); >> } >> >> + if (written && (data->flags & IOMAP_UNCACHED)) { > > Hmmm... why is a chunk of buffered write(?) code landing in the iomap > apply function? I'm going to say that Dave suggested it ;-) > The #define for IOMAP_UNCACHED doesn't have a comment, so I don't know > what this is supposed to mean. Judging from the one place it gets set > in the buffered write function I gather that this is how you implement > the "write through page cache and immediately unmap the page if it > wasn't there before" behavior? > > So based on that, I think you want ... > > if IOMAP_WRITE && _UNCACHED && !_DIRECT && written > 0: > flush and invalidate Looking at the comments, I did think it was just for writes, but it looks generic. I'll take the blame for that, we should only call into that sync-and-invalidate code for buffered writes. I'll make that change. > Since direct writes are never going to create page cache, right? If they do, it's handled separately. > And in that case, why not put this at the end of iomap_write_actor? > > (Sorry if this came up in the earlier discussions, I've been busy this > week and still have a long way to go for catching up...) It did come up, the idea is to do it for the full range, not per chunk. Does that help? >> diff --git a/include/linux/iomap.h b/include/linux/iomap.h >> index 30f40145a9e9..30bb248e1d0d 100644 >> --- a/include/linux/iomap.h >> +++ b/include/linux/iomap.h >> @@ -48,12 +48,16 @@ struct vm_fault; >> * >> * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of >> * buffer heads for this mapping. >> + * >> + * IOMAP_F_PAGE_CREATE indicates that pages had to be allocated to satisfy >> + * this operation. >> */ >> #define IOMAP_F_NEW 0x01 >> #define IOMAP_F_DIRTY 0x02 >> #define IOMAP_F_SHARED 0x04 >> #define IOMAP_F_MERGED 0x08 >> #define IOMAP_F_BUFFER_HEAD 0x10 >> +#define IOMAP_F_PAGE_CREATE 0x20 > > I think these new flags need an update to the _STRINGS arrays in > fs/iomap/trace.h. I'll add that. >> /* >> * Flags set by the core iomap code during operations: >> @@ -121,6 +125,7 @@ struct iomap_page_ops { >> #define IOMAP_FAULT (1 << 3) /* mapping for page fault */ >> #define IOMAP_DIRECT (1 << 4) /* direct I/O */ >> #define IOMAP_NOWAIT (1 << 5) /* do not block */ >> +#define IOMAP_UNCACHED (1 << 6) > > No comment? Definitely, I'll add a comment. Thanks for taking a look! I'll incorporate your suggestions.
diff --git a/fs/iomap/apply.c b/fs/iomap/apply.c index e76148db03b8..11b6812f7b37 100644 --- a/fs/iomap/apply.c +++ b/fs/iomap/apply.c @@ -92,5 +92,29 @@ iomap_apply(struct iomap_data *data, const struct iomap_ops *ops, data->flags, &iomap); } + if (written && (data->flags & IOMAP_UNCACHED)) { + struct address_space *mapping = data->inode->i_mapping; + + end = data->pos + written; + ret = filemap_write_and_wait_range(mapping, data->pos, end); + if (ret) + goto out; + + /* + * No pages were created for this range, we're done + */ + if (!(iomap.flags & IOMAP_F_PAGE_CREATE)) + goto out; + + /* + * Try to invalidate cache pages for the range we just wrote. + * We don't care if invalidation fails as the write has still + * worked and leaving clean uptodate pages in the page cache + * isn't a corruption vector for uncached IO. + */ + invalidate_inode_pages2_range(mapping, + data->pos >> PAGE_SHIFT, end >> PAGE_SHIFT); + } +out: return written ? written : ret; } diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 0a1a195ed1cc..df9d6002858e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -659,6 +659,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, struct page **pagep, struct iomap *iomap, struct iomap *srcmap) { const struct iomap_page_ops *page_ops = iomap->page_ops; + unsigned aop_flags; struct page *page; int status = 0; @@ -675,8 +676,11 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, return status; } + aop_flags = AOP_FLAG_NOFS; + if (flags & IOMAP_UNCACHED) + aop_flags |= AOP_FLAG_UNCACHED; page = grab_cache_page_write_begin(inode->i_mapping, pos >> PAGE_SHIFT, - AOP_FLAG_NOFS); + aop_flags); if (!page) { status = -ENOMEM; goto out_no_page; @@ -818,6 +822,7 @@ iomap_write_actor(const struct iomap_data *data, struct iomap *iomap, { struct inode *inode = data->inode; struct iov_iter *i = data->priv; + unsigned flags = data->flags; loff_t length = data->len; loff_t pos = data->pos; long status = 0; @@ -851,10 +856,17 @@ iomap_write_actor(const struct iomap_data *data, struct iomap *iomap, break; } - status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, - srcmap); - if (unlikely(status)) +retry: + status = iomap_write_begin(inode, pos, bytes, flags, + &page, iomap, srcmap); + if (unlikely(status)) { + if (status == -ENOMEM && (flags & IOMAP_UNCACHED)) { + iomap->flags |= IOMAP_F_PAGE_CREATE; + flags &= ~IOMAP_UNCACHED; + goto retry; + } break; + } if (mapping_writably_mapped(inode->i_mapping)) flush_dcache_page(page); @@ -907,6 +919,9 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *iter, }; loff_t ret = 0, written = 0; + if (iocb->ki_flags & IOCB_UNCACHED) + data.flags |= IOMAP_UNCACHED; + while (iov_iter_count(iter)) { data.len = iov_iter_count(iter); ret = iomap_apply(&data, ops, iomap_write_actor); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 30f40145a9e9..30bb248e1d0d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -48,12 +48,16 @@ struct vm_fault; * * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of * buffer heads for this mapping. + * + * IOMAP_F_PAGE_CREATE indicates that pages had to be allocated to satisfy + * this operation. */ #define IOMAP_F_NEW 0x01 #define IOMAP_F_DIRTY 0x02 #define IOMAP_F_SHARED 0x04 #define IOMAP_F_MERGED 0x08 #define IOMAP_F_BUFFER_HEAD 0x10 +#define IOMAP_F_PAGE_CREATE 0x20 /* * Flags set by the core iomap code during operations: @@ -121,6 +125,7 @@ struct iomap_page_ops { #define IOMAP_FAULT (1 << 3) /* mapping for page fault */ #define IOMAP_DIRECT (1 << 4) /* direct I/O */ #define IOMAP_NOWAIT (1 << 5) /* do not block */ +#define IOMAP_UNCACHED (1 << 6) struct iomap_ops { /*
This adds support for RWF_UNCACHED for file systems using iomap to perform buffered writes. We use the generic infrastructure for this, by tracking pages we created and calling write_drop_cached_pages() to issue writeback and prune those pages. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- fs/iomap/apply.c | 24 ++++++++++++++++++++++++ fs/iomap/buffered-io.c | 23 +++++++++++++++++++---- include/linux/iomap.h | 5 +++++ 3 files changed, 48 insertions(+), 4 deletions(-)