Message ID | 20220601210141.3773402-7-shr@fb.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | io-uring/xfs: support async buffered writes | expand |
On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote: > Change the signature of iomap_write_iter() to return an error code. In > case we cannot allocate a page in iomap_write_begin(), we will not retry > the memory alloction in iomap_write_begin(). loff_t can already represent an error code. And it's already used like that. > @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) > length -= status; > } while (iov_iter_count(i) && length); > > - return written ? written : status; > + *processed = written ? written : error; > + return error; I think the change you really want is: if (status == -EAGAIN) return -EAGAIN; if (written) return written; return status; > @@ -843,12 +845,15 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, > .flags = IOMAP_WRITE, > }; > int ret; > + int error = 0; > > if (iocb->ki_flags & IOCB_NOWAIT) > iter.flags |= IOMAP_NOWAIT; > > - while ((ret = iomap_iter(&iter, ops)) > 0) > - iter.processed = iomap_write_iter(&iter, i); > + while ((ret = iomap_iter(&iter, ops)) > 0) { > + if (error != -EAGAIN) > + error = iomap_write_iter(&iter, i, &iter.processed); > + } You don't need to change any of this. Look at how iomap_iter_advance() works.
On 6/2/22 5:38 AM, Matthew Wilcox wrote: > On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote: >> Change the signature of iomap_write_iter() to return an error code. In >> case we cannot allocate a page in iomap_write_begin(), we will not retry >> the memory alloction in iomap_write_begin(). > > loff_t can already represent an error code. And it's already used like > that. > >> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) >> length -= status; >> } while (iov_iter_count(i) && length); >> >> - return written ? written : status; >> + *processed = written ? written : error; >> + return error; > > I think the change you really want is: > > if (status == -EAGAIN) > return -EAGAIN; > if (written) > return written; > return status; > Correct, I made the above change. >> @@ -843,12 +845,15 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, >> .flags = IOMAP_WRITE, >> }; >> int ret; >> + int error = 0; >> >> if (iocb->ki_flags & IOCB_NOWAIT) >> iter.flags |= IOMAP_NOWAIT; >> >> - while ((ret = iomap_iter(&iter, ops)) > 0) >> - iter.processed = iomap_write_iter(&iter, i); >> + while ((ret = iomap_iter(&iter, ops)) > 0) { >> + if (error != -EAGAIN) >> + error = iomap_write_iter(&iter, i, &iter.processed); >> + } > > You don't need to change any of this. Look at how iomap_iter_advance() > works. >
On 6/2/22 5:38 AM, Matthew Wilcox wrote: > On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote: >> Change the signature of iomap_write_iter() to return an error code. In >> case we cannot allocate a page in iomap_write_begin(), we will not retry >> the memory alloction in iomap_write_begin(). > > loff_t can already represent an error code. And it's already used like > that. > >> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) >> length -= status; >> } while (iov_iter_count(i) && length); >> >> - return written ? written : status; >> + *processed = written ? written : error; >> + return error; > > I think the change you really want is: > > if (status == -EAGAIN) > return -EAGAIN; > if (written) > return written; > return status; > I think the change needs to be: - return written ? written : status; + if (status == -EAGAIN) { + iov_iter_revert(i, written); + return -EAGAIN; + } + if (written) + return written; + return status; >> @@ -843,12 +845,15 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, >> .flags = IOMAP_WRITE, >> }; >> int ret; >> + int error = 0; >> >> if (iocb->ki_flags & IOCB_NOWAIT) >> iter.flags |= IOMAP_NOWAIT; >> >> - while ((ret = iomap_iter(&iter, ops)) > 0) >> - iter.processed = iomap_write_iter(&iter, i); >> + while ((ret = iomap_iter(&iter, ops)) > 0) { >> + if (error != -EAGAIN) >> + error = iomap_write_iter(&iter, i, &iter.processed); >> + } > > You don't need to change any of this. Look at how iomap_iter_advance() > works. >
On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote: > > > On 6/2/22 5:38 AM, Matthew Wilcox wrote: > > On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote: > >> Change the signature of iomap_write_iter() to return an error code. In > >> case we cannot allocate a page in iomap_write_begin(), we will not retry > >> the memory alloction in iomap_write_begin(). > > > > loff_t can already represent an error code. And it's already used like > > that. > > > >> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) > >> length -= status; > >> } while (iov_iter_count(i) && length); > >> > >> - return written ? written : status; > >> + *processed = written ? written : error; > >> + return error; > > > > I think the change you really want is: > > > > if (status == -EAGAIN) > > return -EAGAIN; > > if (written) > > return written; > > return status; > > > > I think the change needs to be: > > - return written ? written : status; > + if (status == -EAGAIN) { > + iov_iter_revert(i, written); > + return -EAGAIN; > + } > + if (written) > + return written; > + return status; Ah yes, I think you're right. Does it work to leave everything the way it is, call back into the iomap_write_iter() after having done a short write, get the -EAGAIN at that point and pass the already-advanced iterator to the worker thread? I haven't looked into the details of how that works, so maybe you just can't do that.
On 6/6/22 12:18 PM, Matthew Wilcox wrote: > On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote: >> >> >> On 6/2/22 5:38 AM, Matthew Wilcox wrote: >>> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote: >>>> Change the signature of iomap_write_iter() to return an error code. In >>>> case we cannot allocate a page in iomap_write_begin(), we will not retry >>>> the memory alloction in iomap_write_begin(). >>> >>> loff_t can already represent an error code. And it's already used like >>> that. >>> >>>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) >>>> length -= status; >>>> } while (iov_iter_count(i) && length); >>>> >>>> - return written ? written : status; >>>> + *processed = written ? written : error; >>>> + return error; >>> >>> I think the change you really want is: >>> >>> if (status == -EAGAIN) >>> return -EAGAIN; >>> if (written) >>> return written; >>> return status; >>> >> >> I think the change needs to be: >> >> - return written ? written : status; >> + if (status == -EAGAIN) { >> + iov_iter_revert(i, written); >> + return -EAGAIN; >> + } >> + if (written) >> + return written; >> + return status; > > Ah yes, I think you're right. > > Does it work to leave everything the way it is, call back into the > iomap_write_iter() after having done a short write, get the -EAGAIN at > that point and pass the already-advanced iterator to the worker thread? > I haven't looked into the details of how that works, so maybe you just > can't do that. With the above change, short writes are handled correctly.
On Mon, Jun 06, 2022 at 12:21:28PM -0700, Stefan Roesch wrote: > > > On 6/6/22 12:18 PM, Matthew Wilcox wrote: > > On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote: > >> > >> > >> On 6/2/22 5:38 AM, Matthew Wilcox wrote: > >>> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote: > >>>> Change the signature of iomap_write_iter() to return an error code. In > >>>> case we cannot allocate a page in iomap_write_begin(), we will not retry > >>>> the memory alloction in iomap_write_begin(). > >>> > >>> loff_t can already represent an error code. And it's already used like > >>> that. > >>> > >>>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) > >>>> length -= status; > >>>> } while (iov_iter_count(i) && length); > >>>> > >>>> - return written ? written : status; > >>>> + *processed = written ? written : error; > >>>> + return error; > >>> > >>> I think the change you really want is: > >>> > >>> if (status == -EAGAIN) > >>> return -EAGAIN; > >>> if (written) > >>> return written; > >>> return status; > >>> > >> > >> I think the change needs to be: > >> > >> - return written ? written : status; > >> + if (status == -EAGAIN) { > >> + iov_iter_revert(i, written); > >> + return -EAGAIN; > >> + } > >> + if (written) > >> + return written; > >> + return status; > > > > Ah yes, I think you're right. > > > > Does it work to leave everything the way it is, call back into the > > iomap_write_iter() after having done a short write, get the -EAGAIN at > > that point and pass the already-advanced iterator to the worker thread? > > I haven't looked into the details of how that works, so maybe you just > > can't do that. > > With the above change, short writes are handled correctly. Are they though? What about a write that crosses an extent boundary? iomap_write_iter() gets called once per extent and I have a feeling that you really need to revert the entire write, rather than just the part of the write that was to that extent. Anyway, my question was whether we need to revert or whether the worker thread can continue from where we left off.
On 6/6/22 12:25 PM, Matthew Wilcox wrote: > On Mon, Jun 06, 2022 at 12:21:28PM -0700, Stefan Roesch wrote: >> >> >> On 6/6/22 12:18 PM, Matthew Wilcox wrote: >>> On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote: >>>> >>>> >>>> On 6/2/22 5:38 AM, Matthew Wilcox wrote: >>>>> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote: >>>>>> Change the signature of iomap_write_iter() to return an error code. In >>>>>> case we cannot allocate a page in iomap_write_begin(), we will not retry >>>>>> the memory alloction in iomap_write_begin(). >>>>> >>>>> loff_t can already represent an error code. And it's already used like >>>>> that. >>>>> >>>>>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) >>>>>> length -= status; >>>>>> } while (iov_iter_count(i) && length); >>>>>> >>>>>> - return written ? written : status; >>>>>> + *processed = written ? written : error; >>>>>> + return error; >>>>> >>>>> I think the change you really want is: >>>>> >>>>> if (status == -EAGAIN) >>>>> return -EAGAIN; >>>>> if (written) >>>>> return written; >>>>> return status; >>>>> >>>> >>>> I think the change needs to be: >>>> >>>> - return written ? written : status; >>>> + if (status == -EAGAIN) { >>>> + iov_iter_revert(i, written); >>>> + return -EAGAIN; >>>> + } >>>> + if (written) >>>> + return written; >>>> + return status; >>> >>> Ah yes, I think you're right. >>> >>> Does it work to leave everything the way it is, call back into the >>> iomap_write_iter() after having done a short write, get the -EAGAIN at >>> that point and pass the already-advanced iterator to the worker thread? >>> I haven't looked into the details of how that works, so maybe you just >>> can't do that. >> >> With the above change, short writes are handled correctly. > > Are they though? What about a write that crosses an extent boundary? > iomap_write_iter() gets called once per extent and I have a feeling that > you really need to revert the entire write, rather than just the part > of the write that was to that extent. > > Anyway, my question was whether we need to revert or whether the worker > thread can continue from where we left off. Without iov_iter_revert() fsx fails with errors in short writes and also my test program which issues short writes fails.
On Mon, Jun 06, 2022 at 12:28:04PM -0700, Stefan Roesch wrote: > On 6/6/22 12:25 PM, Matthew Wilcox wrote: > > On Mon, Jun 06, 2022 at 12:21:28PM -0700, Stefan Roesch wrote: > >> On 6/6/22 12:18 PM, Matthew Wilcox wrote: > >>> On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote: > >>>> On 6/2/22 5:38 AM, Matthew Wilcox wrote: > >>>>> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote: > >>>>>> Change the signature of iomap_write_iter() to return an error code. In > >>>>>> case we cannot allocate a page in iomap_write_begin(), we will not retry > >>>>>> the memory alloction in iomap_write_begin(). > >>>>> > >>>>> loff_t can already represent an error code. And it's already used like > >>>>> that. > >>>>> > >>>>>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) > >>>>>> length -= status; > >>>>>> } while (iov_iter_count(i) && length); > >>>>>> > >>>>>> - return written ? written : status; > >>>>>> + *processed = written ? written : error; > >>>>>> + return error; > >>>>> > >>>>> I think the change you really want is: > >>>>> > >>>>> if (status == -EAGAIN) > >>>>> return -EAGAIN; > >>>>> if (written) > >>>>> return written; > >>>>> return status; > >>>>> > >>>> > >>>> I think the change needs to be: > >>>> > >>>> - return written ? written : status; > >>>> + if (status == -EAGAIN) { > >>>> + iov_iter_revert(i, written); > >>>> + return -EAGAIN; > >>>> + } > >>>> + if (written) > >>>> + return written; > >>>> + return status; > >>> > >>> Ah yes, I think you're right. > >>> > >>> Does it work to leave everything the way it is, call back into the > >>> iomap_write_iter() after having done a short write, get the -EAGAIN at > >>> that point and pass the already-advanced iterator to the worker thread? > >>> I haven't looked into the details of how that works, so maybe you just > >>> can't do that. > >> > >> With the above change, short writes are handled correctly. > > > > Are they though? What about a write that crosses an extent boundary? > > iomap_write_iter() gets called once per extent and I have a feeling that > > you really need to revert the entire write, rather than just the part > > of the write that was to that extent. > > > > Anyway, my question was whether we need to revert or whether the worker > > thread can continue from where we left off. > > Without iov_iter_revert() fsx fails with errors in short writes and also my test > program which issues short writes fails. Does your test program include starting in one extent, completing the portion of the write which is in that extent successfully, and having the portion of the write which is in the second extent be short?
On 6/6/22 1:30 PM, Matthew Wilcox wrote: > On Mon, Jun 06, 2022 at 12:28:04PM -0700, Stefan Roesch wrote: >> On 6/6/22 12:25 PM, Matthew Wilcox wrote: >>> On Mon, Jun 06, 2022 at 12:21:28PM -0700, Stefan Roesch wrote: >>>> On 6/6/22 12:18 PM, Matthew Wilcox wrote: >>>>> On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote: >>>>>> On 6/2/22 5:38 AM, Matthew Wilcox wrote: >>>>>>> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote: >>>>>>>> Change the signature of iomap_write_iter() to return an error code. In >>>>>>>> case we cannot allocate a page in iomap_write_begin(), we will not retry >>>>>>>> the memory alloction in iomap_write_begin(). >>>>>>> >>>>>>> loff_t can already represent an error code. And it's already used like >>>>>>> that. >>>>>>> >>>>>>>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) >>>>>>>> length -= status; >>>>>>>> } while (iov_iter_count(i) && length); >>>>>>>> >>>>>>>> - return written ? written : status; >>>>>>>> + *processed = written ? written : error; >>>>>>>> + return error; >>>>>>> >>>>>>> I think the change you really want is: >>>>>>> >>>>>>> if (status == -EAGAIN) >>>>>>> return -EAGAIN; >>>>>>> if (written) >>>>>>> return written; >>>>>>> return status; >>>>>>> >>>>>> >>>>>> I think the change needs to be: >>>>>> >>>>>> - return written ? written : status; >>>>>> + if (status == -EAGAIN) { >>>>>> + iov_iter_revert(i, written); >>>>>> + return -EAGAIN; >>>>>> + } >>>>>> + if (written) >>>>>> + return written; >>>>>> + return status; >>>>> >>>>> Ah yes, I think you're right. >>>>> >>>>> Does it work to leave everything the way it is, call back into the >>>>> iomap_write_iter() after having done a short write, get the -EAGAIN at >>>>> that point and pass the already-advanced iterator to the worker thread? >>>>> I haven't looked into the details of how that works, so maybe you just >>>>> can't do that. >>>> >>>> With the above change, short writes are handled correctly. >>> >>> Are they though? What about a write that crosses an extent boundary? >>> iomap_write_iter() gets called once per extent and I have a feeling that >>> you really need to revert the entire write, rather than just the part >>> of the write that was to that extent. >>> >>> Anyway, my question was whether we need to revert or whether the worker >>> thread can continue from where we left off. >> >> Without iov_iter_revert() fsx fails with errors in short writes and also my test >> program which issues short writes fails. > > Does your test program include starting in one extent, completing the > portion of the write which is in that extent successfully, and having > the portion of the write which is in the second extent be short? I do a 3k write, where the first 2k write is serviced from one extent and the last 1k is served from another extent. Does that answer the question?
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index b06a5c24a4db..e96ab9a3072c 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -754,12 +754,13 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, return ret; } -static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) +static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i, loff_t *processed) { loff_t length = iomap_length(iter); loff_t pos = iter->pos; ssize_t written = 0; long status = 0; + int error = 0; struct address_space *mapping = iter->inode->i_mapping; unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0; @@ -774,9 +775,9 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) bytes = min_t(unsigned long, PAGE_SIZE - offset, iov_iter_count(i)); again: - status = balance_dirty_pages_ratelimited_flags(mapping, + error = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags); - if (unlikely(status)) + if (unlikely(error)) break; if (bytes > length) @@ -793,12 +794,12 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) * faulting the user page. */ if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) { - status = -EFAULT; + error = -EFAULT; break; } - status = iomap_write_begin(iter, pos, bytes, &folio); - if (unlikely(status)) + error = iomap_write_begin(iter, pos, bytes, &folio); + if (unlikely(error)) break; page = folio_file_page(folio, pos >> PAGE_SHIFT); @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) length -= status; } while (iov_iter_count(i) && length); - return written ? written : status; + *processed = written ? written : error; + return error; } ssize_t @@ -843,12 +845,15 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, .flags = IOMAP_WRITE, }; int ret; + int error = 0; if (iocb->ki_flags & IOCB_NOWAIT) iter.flags |= IOMAP_NOWAIT; - while ((ret = iomap_iter(&iter, ops)) > 0) - iter.processed = iomap_write_iter(&iter, i); + while ((ret = iomap_iter(&iter, ops)) > 0) { + if (error != -EAGAIN) + error = iomap_write_iter(&iter, i, &iter.processed); + } if (iter.pos == iocb->ki_pos) return ret; return iter.pos - iocb->ki_pos;
Change the signature of iomap_write_iter() to return an error code. In case we cannot allocate a page in iomap_write_begin(), we will not retry the memory alloction in iomap_write_begin(). Signed-off-by: Stefan Roesch <shr@fb.com> --- fs/iomap/buffered-io.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-)