diff mbox series

[v7,06/15] iomap: Return error code from iomap_write_iter()

Message ID 20220601210141.3773402-7-shr@fb.com (mailing list archive)
State New
Headers show
Series io-uring/xfs: support async buffered writes | expand

Commit Message

Stefan Roesch June 1, 2022, 9:01 p.m. UTC
Change the signature of iomap_write_iter() to return an error code. In
case we cannot allocate a page in iomap_write_begin(), we will not retry
the memory alloction in iomap_write_begin().

Signed-off-by: Stefan Roesch <shr@fb.com>
---
 fs/iomap/buffered-io.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

Comments

Matthew Wilcox June 2, 2022, 12:38 p.m. UTC | #1
On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote:
> Change the signature of iomap_write_iter() to return an error code. In
> case we cannot allocate a page in iomap_write_begin(), we will not retry
> the memory alloction in iomap_write_begin().

loff_t can already represent an error code.  And it's already used like
that.

> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>  		length -= status;
>  	} while (iov_iter_count(i) && length);
>  
> -	return written ? written : status;
> +	*processed = written ? written : error;
> +	return error;

I think the change you really want is:

	if (status == -EAGAIN)
		return -EAGAIN;
	if (written)
		return written;
	return status;

> @@ -843,12 +845,15 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
>  		.flags		= IOMAP_WRITE,
>  	};
>  	int ret;
> +	int error = 0;
>  
>  	if (iocb->ki_flags & IOCB_NOWAIT)
>  		iter.flags |= IOMAP_NOWAIT;
>  
> -	while ((ret = iomap_iter(&iter, ops)) > 0)
> -		iter.processed = iomap_write_iter(&iter, i);
> +	while ((ret = iomap_iter(&iter, ops)) > 0) {
> +		if (error != -EAGAIN)
> +			error = iomap_write_iter(&iter, i, &iter.processed);
> +	}

You don't need to change any of this.  Look at how iomap_iter_advance()
works.
Stefan Roesch June 2, 2022, 5:08 p.m. UTC | #2
On 6/2/22 5:38 AM, Matthew Wilcox wrote:
> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote:
>> Change the signature of iomap_write_iter() to return an error code. In
>> case we cannot allocate a page in iomap_write_begin(), we will not retry
>> the memory alloction in iomap_write_begin().
> 
> loff_t can already represent an error code.  And it's already used like
> that.
> 
>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>>  		length -= status;
>>  	} while (iov_iter_count(i) && length);
>>  
>> -	return written ? written : status;
>> +	*processed = written ? written : error;
>> +	return error;
> 
> I think the change you really want is:
> 
> 	if (status == -EAGAIN)
> 		return -EAGAIN;
> 	if (written)
> 		return written;
> 	return status;
> 

Correct, I made the above change.

>> @@ -843,12 +845,15 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
>>  		.flags		= IOMAP_WRITE,
>>  	};
>>  	int ret;
>> +	int error = 0;
>>  
>>  	if (iocb->ki_flags & IOCB_NOWAIT)
>>  		iter.flags |= IOMAP_NOWAIT;
>>  
>> -	while ((ret = iomap_iter(&iter, ops)) > 0)
>> -		iter.processed = iomap_write_iter(&iter, i);
>> +	while ((ret = iomap_iter(&iter, ops)) > 0) {
>> +		if (error != -EAGAIN)
>> +			error = iomap_write_iter(&iter, i, &iter.processed);
>> +	}
> 
> You don't need to change any of this.  Look at how iomap_iter_advance()
> works.
>
Stefan Roesch June 6, 2022, 4:39 p.m. UTC | #3
On 6/2/22 5:38 AM, Matthew Wilcox wrote:
> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote:
>> Change the signature of iomap_write_iter() to return an error code. In
>> case we cannot allocate a page in iomap_write_begin(), we will not retry
>> the memory alloction in iomap_write_begin().
> 
> loff_t can already represent an error code.  And it's already used like
> that.
> 
>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>>  		length -= status;
>>  	} while (iov_iter_count(i) && length);
>>  
>> -	return written ? written : status;
>> +	*processed = written ? written : error;
>> +	return error;
> 
> I think the change you really want is:
> 
> 	if (status == -EAGAIN)
> 		return -EAGAIN;
> 	if (written)
> 		return written;
> 	return status;
> 

I think the change needs to be:

-    return written ? written : status;
+    if (status == -EAGAIN) {
+        iov_iter_revert(i, written);
+        return -EAGAIN;
+    }
+    if (written)
+        return written;
+    return status;

>> @@ -843,12 +845,15 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
>>  		.flags		= IOMAP_WRITE,
>>  	};
>>  	int ret;
>> +	int error = 0;
>>  
>>  	if (iocb->ki_flags & IOCB_NOWAIT)
>>  		iter.flags |= IOMAP_NOWAIT;
>>  
>> -	while ((ret = iomap_iter(&iter, ops)) > 0)
>> -		iter.processed = iomap_write_iter(&iter, i);
>> +	while ((ret = iomap_iter(&iter, ops)) > 0) {
>> +		if (error != -EAGAIN)
>> +			error = iomap_write_iter(&iter, i, &iter.processed);
>> +	}
> 
> You don't need to change any of this.  Look at how iomap_iter_advance()
> works.
>
Matthew Wilcox June 6, 2022, 7:18 p.m. UTC | #4
On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote:
> 
> 
> On 6/2/22 5:38 AM, Matthew Wilcox wrote:
> > On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote:
> >> Change the signature of iomap_write_iter() to return an error code. In
> >> case we cannot allocate a page in iomap_write_begin(), we will not retry
> >> the memory alloction in iomap_write_begin().
> > 
> > loff_t can already represent an error code.  And it's already used like
> > that.
> > 
> >> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
> >>  		length -= status;
> >>  	} while (iov_iter_count(i) && length);
> >>  
> >> -	return written ? written : status;
> >> +	*processed = written ? written : error;
> >> +	return error;
> > 
> > I think the change you really want is:
> > 
> > 	if (status == -EAGAIN)
> > 		return -EAGAIN;
> > 	if (written)
> > 		return written;
> > 	return status;
> > 
> 
> I think the change needs to be:
> 
> -    return written ? written : status;
> +    if (status == -EAGAIN) {
> +        iov_iter_revert(i, written);
> +        return -EAGAIN;
> +    }
> +    if (written)
> +        return written;
> +    return status;

Ah yes, I think you're right.

Does it work to leave everything the way it is, call back into the
iomap_write_iter() after having done a short write, get the -EAGAIN at
that point and pass the already-advanced iterator to the worker thread?
I haven't looked into the details of how that works, so maybe you just
can't do that.
Stefan Roesch June 6, 2022, 7:21 p.m. UTC | #5
On 6/6/22 12:18 PM, Matthew Wilcox wrote:
> On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote:
>>
>>
>> On 6/2/22 5:38 AM, Matthew Wilcox wrote:
>>> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote:
>>>> Change the signature of iomap_write_iter() to return an error code. In
>>>> case we cannot allocate a page in iomap_write_begin(), we will not retry
>>>> the memory alloction in iomap_write_begin().
>>>
>>> loff_t can already represent an error code.  And it's already used like
>>> that.
>>>
>>>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>>>>  		length -= status;
>>>>  	} while (iov_iter_count(i) && length);
>>>>  
>>>> -	return written ? written : status;
>>>> +	*processed = written ? written : error;
>>>> +	return error;
>>>
>>> I think the change you really want is:
>>>
>>> 	if (status == -EAGAIN)
>>> 		return -EAGAIN;
>>> 	if (written)
>>> 		return written;
>>> 	return status;
>>>
>>
>> I think the change needs to be:
>>
>> -    return written ? written : status;
>> +    if (status == -EAGAIN) {
>> +        iov_iter_revert(i, written);
>> +        return -EAGAIN;
>> +    }
>> +    if (written)
>> +        return written;
>> +    return status;
> 
> Ah yes, I think you're right.
> 
> Does it work to leave everything the way it is, call back into the
> iomap_write_iter() after having done a short write, get the -EAGAIN at
> that point and pass the already-advanced iterator to the worker thread?
> I haven't looked into the details of how that works, so maybe you just
> can't do that.

With the above change, short writes are handled correctly.
Matthew Wilcox June 6, 2022, 7:25 p.m. UTC | #6
On Mon, Jun 06, 2022 at 12:21:28PM -0700, Stefan Roesch wrote:
> 
> 
> On 6/6/22 12:18 PM, Matthew Wilcox wrote:
> > On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote:
> >>
> >>
> >> On 6/2/22 5:38 AM, Matthew Wilcox wrote:
> >>> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote:
> >>>> Change the signature of iomap_write_iter() to return an error code. In
> >>>> case we cannot allocate a page in iomap_write_begin(), we will not retry
> >>>> the memory alloction in iomap_write_begin().
> >>>
> >>> loff_t can already represent an error code.  And it's already used like
> >>> that.
> >>>
> >>>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
> >>>>  		length -= status;
> >>>>  	} while (iov_iter_count(i) && length);
> >>>>  
> >>>> -	return written ? written : status;
> >>>> +	*processed = written ? written : error;
> >>>> +	return error;
> >>>
> >>> I think the change you really want is:
> >>>
> >>> 	if (status == -EAGAIN)
> >>> 		return -EAGAIN;
> >>> 	if (written)
> >>> 		return written;
> >>> 	return status;
> >>>
> >>
> >> I think the change needs to be:
> >>
> >> -    return written ? written : status;
> >> +    if (status == -EAGAIN) {
> >> +        iov_iter_revert(i, written);
> >> +        return -EAGAIN;
> >> +    }
> >> +    if (written)
> >> +        return written;
> >> +    return status;
> > 
> > Ah yes, I think you're right.
> > 
> > Does it work to leave everything the way it is, call back into the
> > iomap_write_iter() after having done a short write, get the -EAGAIN at
> > that point and pass the already-advanced iterator to the worker thread?
> > I haven't looked into the details of how that works, so maybe you just
> > can't do that.
> 
> With the above change, short writes are handled correctly.

Are they though?  What about a write that crosses an extent boundary?
iomap_write_iter() gets called once per extent and I have a feeling that
you really need to revert the entire write, rather than just the part
of the write that was to that extent.

Anyway, my question was whether we need to revert or whether the worker
thread can continue from where we left off.
Stefan Roesch June 6, 2022, 7:28 p.m. UTC | #7
On 6/6/22 12:25 PM, Matthew Wilcox wrote:
> On Mon, Jun 06, 2022 at 12:21:28PM -0700, Stefan Roesch wrote:
>>
>>
>> On 6/6/22 12:18 PM, Matthew Wilcox wrote:
>>> On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote:
>>>>
>>>>
>>>> On 6/2/22 5:38 AM, Matthew Wilcox wrote:
>>>>> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote:
>>>>>> Change the signature of iomap_write_iter() to return an error code. In
>>>>>> case we cannot allocate a page in iomap_write_begin(), we will not retry
>>>>>> the memory alloction in iomap_write_begin().
>>>>>
>>>>> loff_t can already represent an error code.  And it's already used like
>>>>> that.
>>>>>
>>>>>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>>>>>>  		length -= status;
>>>>>>  	} while (iov_iter_count(i) && length);
>>>>>>  
>>>>>> -	return written ? written : status;
>>>>>> +	*processed = written ? written : error;
>>>>>> +	return error;
>>>>>
>>>>> I think the change you really want is:
>>>>>
>>>>> 	if (status == -EAGAIN)
>>>>> 		return -EAGAIN;
>>>>> 	if (written)
>>>>> 		return written;
>>>>> 	return status;
>>>>>
>>>>
>>>> I think the change needs to be:
>>>>
>>>> -    return written ? written : status;
>>>> +    if (status == -EAGAIN) {
>>>> +        iov_iter_revert(i, written);
>>>> +        return -EAGAIN;
>>>> +    }
>>>> +    if (written)
>>>> +        return written;
>>>> +    return status;
>>>
>>> Ah yes, I think you're right.
>>>
>>> Does it work to leave everything the way it is, call back into the
>>> iomap_write_iter() after having done a short write, get the -EAGAIN at
>>> that point and pass the already-advanced iterator to the worker thread?
>>> I haven't looked into the details of how that works, so maybe you just
>>> can't do that.
>>
>> With the above change, short writes are handled correctly.
> 
> Are they though?  What about a write that crosses an extent boundary?
> iomap_write_iter() gets called once per extent and I have a feeling that
> you really need to revert the entire write, rather than just the part
> of the write that was to that extent.
> 
> Anyway, my question was whether we need to revert or whether the worker
> thread can continue from where we left off.

Without iov_iter_revert() fsx fails with errors in short writes and also my test
program which issues short writes fails.
Matthew Wilcox June 6, 2022, 8:30 p.m. UTC | #8
On Mon, Jun 06, 2022 at 12:28:04PM -0700, Stefan Roesch wrote:
> On 6/6/22 12:25 PM, Matthew Wilcox wrote:
> > On Mon, Jun 06, 2022 at 12:21:28PM -0700, Stefan Roesch wrote:
> >> On 6/6/22 12:18 PM, Matthew Wilcox wrote:
> >>> On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote:
> >>>> On 6/2/22 5:38 AM, Matthew Wilcox wrote:
> >>>>> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote:
> >>>>>> Change the signature of iomap_write_iter() to return an error code. In
> >>>>>> case we cannot allocate a page in iomap_write_begin(), we will not retry
> >>>>>> the memory alloction in iomap_write_begin().
> >>>>>
> >>>>> loff_t can already represent an error code.  And it's already used like
> >>>>> that.
> >>>>>
> >>>>>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
> >>>>>>  		length -= status;
> >>>>>>  	} while (iov_iter_count(i) && length);
> >>>>>>  
> >>>>>> -	return written ? written : status;
> >>>>>> +	*processed = written ? written : error;
> >>>>>> +	return error;
> >>>>>
> >>>>> I think the change you really want is:
> >>>>>
> >>>>> 	if (status == -EAGAIN)
> >>>>> 		return -EAGAIN;
> >>>>> 	if (written)
> >>>>> 		return written;
> >>>>> 	return status;
> >>>>>
> >>>>
> >>>> I think the change needs to be:
> >>>>
> >>>> -    return written ? written : status;
> >>>> +    if (status == -EAGAIN) {
> >>>> +        iov_iter_revert(i, written);
> >>>> +        return -EAGAIN;
> >>>> +    }
> >>>> +    if (written)
> >>>> +        return written;
> >>>> +    return status;
> >>>
> >>> Ah yes, I think you're right.
> >>>
> >>> Does it work to leave everything the way it is, call back into the
> >>> iomap_write_iter() after having done a short write, get the -EAGAIN at
> >>> that point and pass the already-advanced iterator to the worker thread?
> >>> I haven't looked into the details of how that works, so maybe you just
> >>> can't do that.
> >>
> >> With the above change, short writes are handled correctly.
> > 
> > Are they though?  What about a write that crosses an extent boundary?
> > iomap_write_iter() gets called once per extent and I have a feeling that
> > you really need to revert the entire write, rather than just the part
> > of the write that was to that extent.
> > 
> > Anyway, my question was whether we need to revert or whether the worker
> > thread can continue from where we left off.
> 
> Without iov_iter_revert() fsx fails with errors in short writes and also my test
> program which issues short writes fails.

Does your test program include starting in one extent, completing the
portion of the write which is in that extent successfully, and having
the portion of the write which is in the second extent be short?
Stefan Roesch June 6, 2022, 8:34 p.m. UTC | #9
On 6/6/22 1:30 PM, Matthew Wilcox wrote:
> On Mon, Jun 06, 2022 at 12:28:04PM -0700, Stefan Roesch wrote:
>> On 6/6/22 12:25 PM, Matthew Wilcox wrote:
>>> On Mon, Jun 06, 2022 at 12:21:28PM -0700, Stefan Roesch wrote:
>>>> On 6/6/22 12:18 PM, Matthew Wilcox wrote:
>>>>> On Mon, Jun 06, 2022 at 09:39:03AM -0700, Stefan Roesch wrote:
>>>>>> On 6/2/22 5:38 AM, Matthew Wilcox wrote:
>>>>>>> On Wed, Jun 01, 2022 at 02:01:32PM -0700, Stefan Roesch wrote:
>>>>>>>> Change the signature of iomap_write_iter() to return an error code. In
>>>>>>>> case we cannot allocate a page in iomap_write_begin(), we will not retry
>>>>>>>> the memory alloction in iomap_write_begin().
>>>>>>>
>>>>>>> loff_t can already represent an error code.  And it's already used like
>>>>>>> that.
>>>>>>>
>>>>>>>> @@ -829,7 +830,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>>>>>>>>  		length -= status;
>>>>>>>>  	} while (iov_iter_count(i) && length);
>>>>>>>>  
>>>>>>>> -	return written ? written : status;
>>>>>>>> +	*processed = written ? written : error;
>>>>>>>> +	return error;
>>>>>>>
>>>>>>> I think the change you really want is:
>>>>>>>
>>>>>>> 	if (status == -EAGAIN)
>>>>>>> 		return -EAGAIN;
>>>>>>> 	if (written)
>>>>>>> 		return written;
>>>>>>> 	return status;
>>>>>>>
>>>>>>
>>>>>> I think the change needs to be:
>>>>>>
>>>>>> -    return written ? written : status;
>>>>>> +    if (status == -EAGAIN) {
>>>>>> +        iov_iter_revert(i, written);
>>>>>> +        return -EAGAIN;
>>>>>> +    }
>>>>>> +    if (written)
>>>>>> +        return written;
>>>>>> +    return status;
>>>>>
>>>>> Ah yes, I think you're right.
>>>>>
>>>>> Does it work to leave everything the way it is, call back into the
>>>>> iomap_write_iter() after having done a short write, get the -EAGAIN at
>>>>> that point and pass the already-advanced iterator to the worker thread?
>>>>> I haven't looked into the details of how that works, so maybe you just
>>>>> can't do that.
>>>>
>>>> With the above change, short writes are handled correctly.
>>>
>>> Are they though?  What about a write that crosses an extent boundary?
>>> iomap_write_iter() gets called once per extent and I have a feeling that
>>> you really need to revert the entire write, rather than just the part
>>> of the write that was to that extent.
>>>
>>> Anyway, my question was whether we need to revert or whether the worker
>>> thread can continue from where we left off.
>>
>> Without iov_iter_revert() fsx fails with errors in short writes and also my test
>> program which issues short writes fails.
> 
> Does your test program include starting in one extent, completing the
> portion of the write which is in that extent successfully, and having
> the portion of the write which is in the second extent be short?

I do a 3k write, where the first 2k write is serviced from one extent and
the last 1k is served from another extent.

Does that answer the question?
diff mbox series

Patch

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index b06a5c24a4db..e96ab9a3072c 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -754,12 +754,13 @@  static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
 	return ret;
 }
 
-static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
+static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i, loff_t *processed)
 {
 	loff_t length = iomap_length(iter);
 	loff_t pos = iter->pos;
 	ssize_t written = 0;
 	long status = 0;
+	int error = 0;
 	struct address_space *mapping = iter->inode->i_mapping;
 	unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
 
@@ -774,9 +775,9 @@  static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 		bytes = min_t(unsigned long, PAGE_SIZE - offset,
 						iov_iter_count(i));
 again:
-		status = balance_dirty_pages_ratelimited_flags(mapping,
+		error = balance_dirty_pages_ratelimited_flags(mapping,
 							       bdp_flags);
-		if (unlikely(status))
+		if (unlikely(error))
 			break;
 
 		if (bytes > length)
@@ -793,12 +794,12 @@  static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 		 * faulting the user page.
 		 */
 		if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) {
-			status = -EFAULT;
+			error = -EFAULT;
 			break;
 		}
 
-		status = iomap_write_begin(iter, pos, bytes, &folio);
-		if (unlikely(status))
+		error = iomap_write_begin(iter, pos, bytes, &folio);
+		if (unlikely(error))
 			break;
 
 		page = folio_file_page(folio, pos >> PAGE_SHIFT);
@@ -829,7 +830,8 @@  static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 		length -= status;
 	} while (iov_iter_count(i) && length);
 
-	return written ? written : status;
+	*processed = written ? written : error;
+	return error;
 }
 
 ssize_t
@@ -843,12 +845,15 @@  iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
 		.flags		= IOMAP_WRITE,
 	};
 	int ret;
+	int error = 0;
 
 	if (iocb->ki_flags & IOCB_NOWAIT)
 		iter.flags |= IOMAP_NOWAIT;
 
-	while ((ret = iomap_iter(&iter, ops)) > 0)
-		iter.processed = iomap_write_iter(&iter, i);
+	while ((ret = iomap_iter(&iter, ops)) > 0) {
+		if (error != -EAGAIN)
+			error = iomap_write_iter(&iter, i, &iter.processed);
+	}
 	if (iter.pos == iocb->ki_pos)
 		return ret;
 	return iter.pos - iocb->ki_pos;