diff mbox series

[v2] io_uring/net: ensure async prep handlers always initialize ->done_io

Message ID 472ec1d4-f928-4a52-8a93-7ccc1af4f362@kernel.dk (mailing list archive)
State New
Headers show
Series [v2] io_uring/net: ensure async prep handlers always initialize ->done_io | expand

Commit Message

Jens Axboe March 15, 2024, 10:48 p.m. UTC
If we get a request with IOSQE_ASYNC set, then we first run the prep
async handlers. But if we then fail setting it up and want to post
a CQE with -EINVAL, we use ->done_io. This was previously guarded with
REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
potential errors, but we need to cover the async setup too.

Fixes: 9817ad85899f ("io_uring/net: remove dependency on REQ_F_PARTIAL_IO for sr->done_io")
Reported-by: syzbot+f8e9a371388aa62ecab4@syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

V2: missed a refresh, and hence v1 had io_send_prep_async() overwriting
    sr->done_io if we had already set it up.

Comments

Pavel Begunkov March 15, 2024, 11:09 p.m. UTC | #1
On 3/15/24 22:48, Jens Axboe wrote:
> If we get a request with IOSQE_ASYNC set, then we first run the prep
> async handlers. But if we then fail setting it up and want to post
> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
> potential errors, but we need to cover the async setup too.

You can hit io_req_defer_failed() { opdef->fail(); }
off of an early submission failure path where def->prep has
not yet been called, I don't think the patch will fix the
problem.

->fail() handlers are fragile, maybe we should skip them
if def->prep() wasn't called. Not even compile tested:


diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 846d67a9c72e..56eed1490571 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -993,7 +993,7 @@ void io_req_defer_failed(struct io_kiocb *req, s32 res)
  
  	req_set_fail(req);
  	io_req_set_res(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED));
-	if (def->fail)
+	if ((req->flags & REQ_F_EARLY_FAIL) && def->fail)
  		def->fail(req);
  	io_req_complete_defer(req);
  }
@@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
  		}
  		req->flags |= REQ_F_CREDS;
  	}
-
-	return def->prep(req, sqe);
+	return 0;
  }
  
  static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
@@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
  	int ret;
  
  	ret = io_init_req(ctx, req, sqe);
-	if (unlikely(ret))
+	if (unlikely(ret)) {
+fail:
+		req->flags |= REQ_F_EARLY_FAIL;
  		return io_submit_fail_init(sqe, req, ret);
+	}
+
+	ret = def->prep(req, sqe);
+	if (unlikely(ret))
+		goto fail;
  
  	trace_io_uring_submit_req(req);
Pavel Begunkov March 15, 2024, 11:13 p.m. UTC | #2
On 3/15/24 23:09, Pavel Begunkov wrote:
> On 3/15/24 22:48, Jens Axboe wrote:
>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>> async handlers. But if we then fail setting it up and want to post
>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>> potential errors, but we need to cover the async setup too.
> 
> You can hit io_req_defer_failed() { opdef->fail(); }
> off of an early submission failure path where def->prep has
> not yet been called, I don't think the patch will fix the
> problem.
> 
> ->fail() handlers are fragile, maybe we should skip them
> if def->prep() wasn't called. Not even compile tested:
> 
> 
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 846d67a9c72e..56eed1490571 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -993,7 +993,7 @@ void io_req_defer_failed(struct io_kiocb *req, s32 res)
> 
>       req_set_fail(req);
>       io_req_set_res(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED));
> -    if (def->fail)
> +    if ((req->flags & REQ_F_EARLY_FAIL) && def->fail)

it rather should've been

!(req->flags & REQ_F_EARLY_FAIL)


>           def->fail(req);
>       io_req_complete_defer(req);
>   }
> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>           }
>           req->flags |= REQ_F_CREDS;
>       }
> -
> -    return def->prep(req, sqe);
> +    return 0;
>   }
> 
>   static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>       int ret;
> 
>       ret = io_init_req(ctx, req, sqe);
> -    if (unlikely(ret))
> +    if (unlikely(ret)) {
> +fail:
> +        req->flags |= REQ_F_EARLY_FAIL;
>           return io_submit_fail_init(sqe, req, ret);
> +    }
> +
> +    ret = def->prep(req, sqe);
> +    if (unlikely(ret))
> +        goto fail;
> 
>       trace_io_uring_submit_req(req);
>
Jens Axboe March 15, 2024, 11:13 p.m. UTC | #3
On 3/15/24 5:09 PM, Pavel Begunkov wrote:
> On 3/15/24 22:48, Jens Axboe wrote:
>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>> async handlers. But if we then fail setting it up and want to post
>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>> potential errors, but we need to cover the async setup too.
> 
> You can hit io_req_defer_failed() { opdef->fail(); }
> off of an early submission failure path where def->prep has
> not yet been called, I don't think the patch will fix the
> problem.
> 
> ->fail() handlers are fragile, maybe we should skip them
> if def->prep() wasn't called. Not even compile tested:

Yeah they are a mess honestly. Maybe we're better off just flagging it
like in your below patch, and avoid needing opcode handling for this.
Was going to suggest having a PREP_DONE flag, but it's better to have a
FAIL_EARLY and avoid needing to fiddle with it in the normal path.
Pavel Begunkov March 15, 2024, 11:19 p.m. UTC | #4
On 3/15/24 23:13, Pavel Begunkov wrote:
> On 3/15/24 23:09, Pavel Begunkov wrote:
>> On 3/15/24 22:48, Jens Axboe wrote:
>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>> async handlers. But if we then fail setting it up and want to post
>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>> potential errors, but we need to cover the async setup too.
>>
>> You can hit io_req_defer_failed() { opdef->fail(); }
>> off of an early submission failure path where def->prep has
>> not yet been called, I don't think the patch will fix the
>> problem.
>>
>> ->fail() handlers are fragile, maybe we should skip them
>> if def->prep() wasn't called. Not even compile tested:
>>
>>
>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>> index 846d67a9c72e..56eed1490571 100644
>> --- a/io_uring/io_uring.c
>> +++ b/io_uring/io_uring.c
[...]
>>           def->fail(req);
>>       io_req_complete_defer(req);
>>   }
>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>           }
>>           req->flags |= REQ_F_CREDS;
>>       }
>> -
>> -    return def->prep(req, sqe);
>> +    return 0;
>>   }
>>
>>   static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>       int ret;
>>
>>       ret = io_init_req(ctx, req, sqe);
>> -    if (unlikely(ret))
>> +    if (unlikely(ret)) {
>> +fail:

Obvious the diff is crap, but still bugging me enough to write
that the label should've been one line below, otherwise we'd
flag after ->prep as well.


>> +        req->flags |= REQ_F_EARLY_FAIL;
>>           return io_submit_fail_init(sqe, req, ret);
>> +    }
>> +
>> +    ret = def->prep(req, sqe);
>> +    if (unlikely(ret))
>> +        goto fail;
>>
>>       trace_io_uring_submit_req(req);
>>
>
Jens Axboe March 15, 2024, 11:25 p.m. UTC | #5
On 3/15/24 5:19 PM, Pavel Begunkov wrote:
> On 3/15/24 23:13, Pavel Begunkov wrote:
>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>> async handlers. But if we then fail setting it up and want to post
>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>> potential errors, but we need to cover the async setup too.
>>>
>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>> off of an early submission failure path where def->prep has
>>> not yet been called, I don't think the patch will fix the
>>> problem.
>>>
>>> ->fail() handlers are fragile, maybe we should skip them
>>> if def->prep() wasn't called. Not even compile tested:
>>>
>>>
>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>> index 846d67a9c72e..56eed1490571 100644
>>> --- a/io_uring/io_uring.c
>>> +++ b/io_uring/io_uring.c
> [...]
>>>           def->fail(req);
>>>       io_req_complete_defer(req);
>>>   }
>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>           }
>>>           req->flags |= REQ_F_CREDS;
>>>       }
>>> -
>>> -    return def->prep(req, sqe);
>>> +    return 0;
>>>   }
>>>
>>>   static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>       int ret;
>>>
>>>       ret = io_init_req(ctx, req, sqe);
>>> -    if (unlikely(ret))
>>> +    if (unlikely(ret)) {
>>> +fail:
> 
> Obvious the diff is crap, but still bugging me enough to write
> that the label should've been one line below, otherwise we'd
> flag after ->prep as well.

It certainly needs testing :-)

We can go either way - patch up the net thing, or do a proper EARLY_FAIL
and hopefully not have to worry about it again. Do you want to clean it
up, test it, and send it out?
Pavel Begunkov March 15, 2024, 11:28 p.m. UTC | #6
On 3/15/24 23:25, Jens Axboe wrote:
> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>> async handlers. But if we then fail setting it up and want to post
>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>> potential errors, but we need to cover the async setup too.
>>>>
>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>> off of an early submission failure path where def->prep has
>>>> not yet been called, I don't think the patch will fix the
>>>> problem.
>>>>
>>>> ->fail() handlers are fragile, maybe we should skip them
>>>> if def->prep() wasn't called. Not even compile tested:
>>>>
>>>>
>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>> index 846d67a9c72e..56eed1490571 100644
>>>> --- a/io_uring/io_uring.c
>>>> +++ b/io_uring/io_uring.c
>> [...]
>>>>            def->fail(req);
>>>>        io_req_complete_defer(req);
>>>>    }
>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>            }
>>>>            req->flags |= REQ_F_CREDS;
>>>>        }
>>>> -
>>>> -    return def->prep(req, sqe);
>>>> +    return 0;
>>>>    }
>>>>
>>>>    static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>        int ret;
>>>>
>>>>        ret = io_init_req(ctx, req, sqe);
>>>> -    if (unlikely(ret))
>>>> +    if (unlikely(ret)) {
>>>> +fail:
>>
>> Obvious the diff is crap, but still bugging me enough to write
>> that the label should've been one line below, otherwise we'd
>> flag after ->prep as well.
> 
> It certainly needs testing :-)
> 
> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
> and hopefully not have to worry about it again. Do you want to clean it
> up, test it, and send it out?

I'd rather leave it to you, I suspect it wouldn't fix the syzbot
report w/o fiddling with done_io as in your patch.
Jens Axboe March 15, 2024, 11:53 p.m. UTC | #7
On 3/15/24 5:28 PM, Pavel Begunkov wrote:
> On 3/15/24 23:25, Jens Axboe wrote:
>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>> potential errors, but we need to cover the async setup too.
>>>>>
>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>> off of an early submission failure path where def->prep has
>>>>> not yet been called, I don't think the patch will fix the
>>>>> problem.
>>>>>
>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>
>>>>>
>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>> --- a/io_uring/io_uring.c
>>>>> +++ b/io_uring/io_uring.c
>>> [...]
>>>>>            def->fail(req);
>>>>>        io_req_complete_defer(req);
>>>>>    }
>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>            }
>>>>>            req->flags |= REQ_F_CREDS;
>>>>>        }
>>>>> -
>>>>> -    return def->prep(req, sqe);
>>>>> +    return 0;
>>>>>    }
>>>>>
>>>>>    static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>        int ret;
>>>>>
>>>>>        ret = io_init_req(ctx, req, sqe);
>>>>> -    if (unlikely(ret))
>>>>> +    if (unlikely(ret)) {
>>>>> +fail:
>>>
>>> Obvious the diff is crap, but still bugging me enough to write
>>> that the label should've been one line below, otherwise we'd
>>> flag after ->prep as well.
>>
>> It certainly needs testing :-)
>>
>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>> and hopefully not have to worry about it again. Do you want to clean it
>> up, test it, and send it out?
> 
> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
> report w/o fiddling with done_io as in your patch.

I'd probably be in favor of just doing the net one for now, ensuring
it's OK. Then we can do a generic version for 6.10.
Jens Axboe March 16, 2024, 4:14 p.m. UTC | #8
On 3/15/24 5:28 PM, Pavel Begunkov wrote:
> On 3/15/24 23:25, Jens Axboe wrote:
>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>> potential errors, but we need to cover the async setup too.
>>>>>
>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>> off of an early submission failure path where def->prep has
>>>>> not yet been called, I don't think the patch will fix the
>>>>> problem.
>>>>>
>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>
>>>>>
>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>> --- a/io_uring/io_uring.c
>>>>> +++ b/io_uring/io_uring.c
>>> [...]
>>>>>            def->fail(req);
>>>>>        io_req_complete_defer(req);
>>>>>    }
>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>            }
>>>>>            req->flags |= REQ_F_CREDS;
>>>>>        }
>>>>> -
>>>>> -    return def->prep(req, sqe);
>>>>> +    return 0;
>>>>>    }
>>>>>
>>>>>    static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>        int ret;
>>>>>
>>>>>        ret = io_init_req(ctx, req, sqe);
>>>>> -    if (unlikely(ret))
>>>>> +    if (unlikely(ret)) {
>>>>> +fail:
>>>
>>> Obvious the diff is crap, but still bugging me enough to write
>>> that the label should've been one line below, otherwise we'd
>>> flag after ->prep as well.
>>
>> It certainly needs testing :-)
>>
>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>> and hopefully not have to worry about it again. Do you want to clean it
>> up, test it, and send it out?
> 
> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
> report w/o fiddling with done_io as in your patch.

I gave this a shot, but some fail handlers do want to get called. But
they can't use sr->done_io at that point. I'll ponder this a bit and see
what the best generic solution is.
Pavel Begunkov March 16, 2024, 4:28 p.m. UTC | #9
On 3/16/24 16:14, Jens Axboe wrote:
> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>> On 3/15/24 23:25, Jens Axboe wrote:
>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>
>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>> off of an early submission failure path where def->prep has
>>>>>> not yet been called, I don't think the patch will fix the
>>>>>> problem.
>>>>>>
>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>
>>>>>>
>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>> --- a/io_uring/io_uring.c
>>>>>> +++ b/io_uring/io_uring.c
>>>> [...]
>>>>>>             def->fail(req);
>>>>>>         io_req_complete_defer(req);
>>>>>>     }
>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>             }
>>>>>>             req->flags |= REQ_F_CREDS;
>>>>>>         }
>>>>>> -
>>>>>> -    return def->prep(req, sqe);
>>>>>> +    return 0;
>>>>>>     }
>>>>>>
>>>>>>     static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>         int ret;
>>>>>>
>>>>>>         ret = io_init_req(ctx, req, sqe);
>>>>>> -    if (unlikely(ret))
>>>>>> +    if (unlikely(ret)) {
>>>>>> +fail:
>>>>
>>>> Obvious the diff is crap, but still bugging me enough to write
>>>> that the label should've been one line below, otherwise we'd
>>>> flag after ->prep as well.
>>>
>>> It certainly needs testing :-)
>>>
>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>> and hopefully not have to worry about it again. Do you want to clean it
>>> up, test it, and send it out?
>>
>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>> report w/o fiddling with done_io as in your patch.
> 
> I gave this a shot, but some fail handlers do want to get called. But

Which one and/or which part of it?

> they can't use sr->done_io at that point. I'll ponder this a bit and see
> what the best generic solution is.
Jens Axboe March 16, 2024, 4:31 p.m. UTC | #10
On 3/16/24 10:28 AM, Pavel Begunkov wrote:
> On 3/16/24 16:14, Jens Axboe wrote:
>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>
>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>> off of an early submission failure path where def->prep has
>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>> problem.
>>>>>>>
>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>
>>>>>>>
>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>> --- a/io_uring/io_uring.c
>>>>>>> +++ b/io_uring/io_uring.c
>>>>> [...]
>>>>>>>             def->fail(req);
>>>>>>>         io_req_complete_defer(req);
>>>>>>>     }
>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>             }
>>>>>>>             req->flags |= REQ_F_CREDS;
>>>>>>>         }
>>>>>>> -
>>>>>>> -    return def->prep(req, sqe);
>>>>>>> +    return 0;
>>>>>>>     }
>>>>>>>
>>>>>>>     static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>         int ret;
>>>>>>>
>>>>>>>         ret = io_init_req(ctx, req, sqe);
>>>>>>> -    if (unlikely(ret))
>>>>>>> +    if (unlikely(ret)) {
>>>>>>> +fail:
>>>>>
>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>> that the label should've been one line below, otherwise we'd
>>>>> flag after ->prep as well.
>>>>
>>>> It certainly needs testing :-)
>>>>
>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>> up, test it, and send it out?
>>>
>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>> report w/o fiddling with done_io as in your patch.
>>
>> I gave this a shot, but some fail handlers do want to get called. But
> 
> Which one and/or which part of it?

send zc

I think the sanest is:

1) Opcode handlers should always initialize whatever they need before
   failure
2) If we fail before ->prep, don't call ->fail

Yes that doesn't cover the case where opcode handlers do stupid things
like use opcode members in fail if they fail the prep, but that should
be the smallest part.
Pavel Begunkov March 16, 2024, 4:32 p.m. UTC | #11
On 3/16/24 16:31, Jens Axboe wrote:
> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>> On 3/16/24 16:14, Jens Axboe wrote:
>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>
>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>> problem.
>>>>>>>>
>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>
>>>>>>>>
>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>> [...]
>>>>>>>>              def->fail(req);
>>>>>>>>          io_req_complete_defer(req);
>>>>>>>>      }
>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>              }
>>>>>>>>              req->flags |= REQ_F_CREDS;
>>>>>>>>          }
>>>>>>>> -
>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>> +    return 0;
>>>>>>>>      }
>>>>>>>>
>>>>>>>>      static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>          int ret;
>>>>>>>>
>>>>>>>>          ret = io_init_req(ctx, req, sqe);
>>>>>>>> -    if (unlikely(ret))
>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>> +fail:
>>>>>>
>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>> that the label should've been one line below, otherwise we'd
>>>>>> flag after ->prep as well.
>>>>>
>>>>> It certainly needs testing :-)
>>>>>
>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>> up, test it, and send it out?
>>>>
>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>> report w/o fiddling with done_io as in your patch.
>>>
>>> I gave this a shot, but some fail handlers do want to get called. But
>>
>> Which one and/or which part of it?
> 
> send zc

I don't think so. If prep wasn't called there wouldn't be
a notif allocated, and so no F_MORE required. If you take
at the code path it's under REQ_F_NEED_CLEANUP, which is only
set by opcode handlers


> 
> I think the sanest is:
> 
> 1) Opcode handlers should always initialize whatever they need before
>     failure
> 2) If we fail before ->prep, don't call ->fail
> 
> Yes that doesn't cover the case where opcode handlers do stupid things
> like use opcode members in fail if they fail the prep, but that should
> be the smallest part.
>
Pavel Begunkov March 16, 2024, 4:34 p.m. UTC | #12
On 3/16/24 16:32, Pavel Begunkov wrote:
> On 3/16/24 16:31, Jens Axboe wrote:
>> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>>> On 3/16/24 16:14, Jens Axboe wrote:
>>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>>
>>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>>> problem.
>>>>>>>>>
>>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>>> [...]
>>>>>>>>>              def->fail(req);
>>>>>>>>>          io_req_complete_defer(req);
>>>>>>>>>      }
>>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>              }
>>>>>>>>>              req->flags |= REQ_F_CREDS;
>>>>>>>>>          }
>>>>>>>>> -
>>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>>> +    return 0;
>>>>>>>>>      }
>>>>>>>>>
>>>>>>>>>      static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>          int ret;
>>>>>>>>>
>>>>>>>>>          ret = io_init_req(ctx, req, sqe);
>>>>>>>>> -    if (unlikely(ret))
>>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>>> +fail:
>>>>>>>
>>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>>> that the label should've been one line below, otherwise we'd
>>>>>>> flag after ->prep as well.
>>>>>>
>>>>>> It certainly needs testing :-)
>>>>>>
>>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>>> up, test it, and send it out?
>>>>>
>>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>>> report w/o fiddling with done_io as in your patch.
>>>>
>>>> I gave this a shot, but some fail handlers do want to get called. But

Maybe I didn't get you right. I assumed you're saying "the zc's ->fail
wants to get called even if prep didn't happen. ?


>>> Which one and/or which part of it?
>>
>> send zc
> 
> I don't think so. If prep wasn't called there wouldn't be
> a notif allocated, and so no F_MORE required. If you take
> at the code path it's under REQ_F_NEED_CLEANUP, which is only
> set by opcode handlers
> 
> 
>>
>> I think the sanest is:
>>
>> 1) Opcode handlers should always initialize whatever they need before
>>     failure

Yes

>> 2) If we fail before ->prep, don't call ->fail

That's what I suggested

>> Yes that doesn't cover the case where opcode handlers do stupid things
>> like use opcode members in fail if they fail the prep, but that should
>> be the smallest part.
>>
>
Jens Axboe March 16, 2024, 4:36 p.m. UTC | #13
On 3/16/24 10:32 AM, Pavel Begunkov wrote:
> On 3/16/24 16:31, Jens Axboe wrote:
>> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>>> On 3/16/24 16:14, Jens Axboe wrote:
>>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>>
>>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>>> problem.
>>>>>>>>>
>>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>>> [...]
>>>>>>>>>              def->fail(req);
>>>>>>>>>          io_req_complete_defer(req);
>>>>>>>>>      }
>>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>              }
>>>>>>>>>              req->flags |= REQ_F_CREDS;
>>>>>>>>>          }
>>>>>>>>> -
>>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>>> +    return 0;
>>>>>>>>>      }
>>>>>>>>>
>>>>>>>>>      static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>          int ret;
>>>>>>>>>
>>>>>>>>>          ret = io_init_req(ctx, req, sqe);
>>>>>>>>> -    if (unlikely(ret))
>>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>>> +fail:
>>>>>>>
>>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>>> that the label should've been one line below, otherwise we'd
>>>>>>> flag after ->prep as well.
>>>>>>
>>>>>> It certainly needs testing :-)
>>>>>>
>>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>>> up, test it, and send it out?
>>>>>
>>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>>> report w/o fiddling with done_io as in your patch.
>>>>
>>>> I gave this a shot, but some fail handlers do want to get called. But
>>>
>>> Which one and/or which part of it?
>>
>> send zc
> 
> I don't think so. If prep wasn't called there wouldn't be
> a notif allocated, and so no F_MORE required. If you take
> at the code path it's under REQ_F_NEED_CLEANUP, which is only
> set by opcode handlers

I'm not making this up, your test case will literally fail as it doesn't
get to flag MORE for that case. FWIW, this was done with EARLY_FAIL
being flagged, and failing if we fail during or before prep.
Pavel Begunkov March 16, 2024, 4:36 p.m. UTC | #14
On 3/16/24 16:36, Jens Axboe wrote:
> On 3/16/24 10:32 AM, Pavel Begunkov wrote:
>> On 3/16/24 16:31, Jens Axboe wrote:
>>> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>>>> On 3/16/24 16:14, Jens Axboe wrote:
>>>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>>>
>>>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>>>> problem.
>>>>>>>>>>
>>>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>>>> [...]
>>>>>>>>>>               def->fail(req);
>>>>>>>>>>           io_req_complete_defer(req);
>>>>>>>>>>       }
>>>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>               }
>>>>>>>>>>               req->flags |= REQ_F_CREDS;
>>>>>>>>>>           }
>>>>>>>>>> -
>>>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>>>> +    return 0;
>>>>>>>>>>       }
>>>>>>>>>>
>>>>>>>>>>       static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>           int ret;
>>>>>>>>>>
>>>>>>>>>>           ret = io_init_req(ctx, req, sqe);
>>>>>>>>>> -    if (unlikely(ret))
>>>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>>>> +fail:
>>>>>>>>
>>>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>>>> that the label should've been one line below, otherwise we'd
>>>>>>>> flag after ->prep as well.
>>>>>>>
>>>>>>> It certainly needs testing :-)
>>>>>>>
>>>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>>>> up, test it, and send it out?
>>>>>>
>>>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>>>> report w/o fiddling with done_io as in your patch.
>>>>>
>>>>> I gave this a shot, but some fail handlers do want to get called. But
>>>>
>>>> Which one and/or which part of it?
>>>
>>> send zc
>>
>> I don't think so. If prep wasn't called there wouldn't be
>> a notif allocated, and so no F_MORE required. If you take
>> at the code path it's under REQ_F_NEED_CLEANUP, which is only
>> set by opcode handlers
> 
> I'm not making this up, your test case will literally fail as it doesn't
> get to flag MORE for that case. FWIW, this was done with EARLY_FAIL
> being flagged, and failing if we fail during or before prep.

Maybe the test is too strict, but your approach is different
from what I mentioned yesterday

-	return def->prep(req, sqe);
+	ret = def->prep(req, sqe);
+	if (unlikely(ret)) {
+		req->flags |= REQ_F_EARLY_FAIL;
+		return ret;
+	}
+
+	return 0;

It should only set REQ_F_EARLY_FAIL if we fail
_before_ prep is called
Pavel Begunkov March 16, 2024, 4:40 p.m. UTC | #15
On 3/16/24 16:36, Pavel Begunkov wrote:
> On 3/16/24 16:36, Jens Axboe wrote:
>> On 3/16/24 10:32 AM, Pavel Begunkov wrote:
>>> On 3/16/24 16:31, Jens Axboe wrote:
>>>> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>>>>> On 3/16/24 16:14, Jens Axboe wrote:
>>>>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>>>>
>>>>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>>>>> problem.
>>>>>>>>>>>
>>>>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>>>>> [...]
>>>>>>>>>>>               def->fail(req);
>>>>>>>>>>>           io_req_complete_defer(req);
>>>>>>>>>>>       }
>>>>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>               }
>>>>>>>>>>>               req->flags |= REQ_F_CREDS;
>>>>>>>>>>>           }
>>>>>>>>>>> -
>>>>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>>>>> +    return 0;
>>>>>>>>>>>       }
>>>>>>>>>>>
>>>>>>>>>>>       static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>           int ret;
>>>>>>>>>>>
>>>>>>>>>>>           ret = io_init_req(ctx, req, sqe);
>>>>>>>>>>> -    if (unlikely(ret))
>>>>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>>>>> +fail:
>>>>>>>>>
>>>>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>>>>> that the label should've been one line below, otherwise we'd
>>>>>>>>> flag after ->prep as well.
>>>>>>>>
>>>>>>>> It certainly needs testing :-)
>>>>>>>>
>>>>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>>>>> up, test it, and send it out?
>>>>>>>
>>>>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>>>>> report w/o fiddling with done_io as in your patch.
>>>>>>
>>>>>> I gave this a shot, but some fail handlers do want to get called. But
>>>>>
>>>>> Which one and/or which part of it?
>>>>
>>>> send zc
>>>
>>> I don't think so. If prep wasn't called there wouldn't be
>>> a notif allocated, and so no F_MORE required. If you take
>>> at the code path it's under REQ_F_NEED_CLEANUP, which is only
>>> set by opcode handlers
>>
>> I'm not making this up, your test case will literally fail as it doesn't
>> get to flag MORE for that case. FWIW, this was done with EARLY_FAIL
>> being flagged, and failing if we fail during or before prep.
> 
> Maybe the test is too strict, but your approach is different
> from what I mentioned yesterday
> 
> -    return def->prep(req, sqe);
> +    ret = def->prep(req, sqe);
> +    if (unlikely(ret)) {
> +        req->flags |= REQ_F_EARLY_FAIL;
> +        return ret;
> +    }
> +
> +    return 0;
> 
> It should only set REQ_F_EARLY_FAIL if we fail
> _before_ prep is called

We can't post a notif unless we allocated it, which couldn't
possibly happen without ->prep being called fist.

Let's better call it UNPREPPED_FAIL or somehow more meaningfully,
I expect a lot of confusion around "EARLY_FAIL"
Jens Axboe March 16, 2024, 4:42 p.m. UTC | #16
On 3/16/24 10:36 AM, Pavel Begunkov wrote:
> On 3/16/24 16:36, Jens Axboe wrote:
>> On 3/16/24 10:32 AM, Pavel Begunkov wrote:
>>> On 3/16/24 16:31, Jens Axboe wrote:
>>>> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>>>>> On 3/16/24 16:14, Jens Axboe wrote:
>>>>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>>>>
>>>>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>>>>> problem.
>>>>>>>>>>>
>>>>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>>>>> [...]
>>>>>>>>>>>               def->fail(req);
>>>>>>>>>>>           io_req_complete_defer(req);
>>>>>>>>>>>       }
>>>>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>               }
>>>>>>>>>>>               req->flags |= REQ_F_CREDS;
>>>>>>>>>>>           }
>>>>>>>>>>> -
>>>>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>>>>> +    return 0;
>>>>>>>>>>>       }
>>>>>>>>>>>
>>>>>>>>>>>       static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>           int ret;
>>>>>>>>>>>
>>>>>>>>>>>           ret = io_init_req(ctx, req, sqe);
>>>>>>>>>>> -    if (unlikely(ret))
>>>>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>>>>> +fail:
>>>>>>>>>
>>>>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>>>>> that the label should've been one line below, otherwise we'd
>>>>>>>>> flag after ->prep as well.
>>>>>>>>
>>>>>>>> It certainly needs testing :-)
>>>>>>>>
>>>>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>>>>> up, test it, and send it out?
>>>>>>>
>>>>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>>>>> report w/o fiddling with done_io as in your patch.
>>>>>>
>>>>>> I gave this a shot, but some fail handlers do want to get called. But
>>>>>
>>>>> Which one and/or which part of it?
>>>>
>>>> send zc
>>>
>>> I don't think so. If prep wasn't called there wouldn't be
>>> a notif allocated, and so no F_MORE required. If you take
>>> at the code path it's under REQ_F_NEED_CLEANUP, which is only
>>> set by opcode handlers
>>
>> I'm not making this up, your test case will literally fail as it doesn't
>> get to flag MORE for that case. FWIW, this was done with EARLY_FAIL
>> being flagged, and failing if we fail during or before prep.
> 
> Maybe the test is too strict, but your approach is different
> from what I mentioned yesterday
> 
> -    return def->prep(req, sqe);
> +    ret = def->prep(req, sqe);
> +    if (unlikely(ret)) {
> +        req->flags |= REQ_F_EARLY_FAIL;
> +        return ret;
> +    }
> +
> +    return 0;
> 
> It should only set REQ_F_EARLY_FAIL if we fail
> _before_ prep is called

I did try both ways, fails if we just have:

	return def->prep(req, sqe);
fail:
	req->flags |= REQ_F_EARLY_FAIL;
	...

as well.
Pavel Begunkov March 16, 2024, 4:46 p.m. UTC | #17
On 3/16/24 16:42, Jens Axboe wrote:
> On 3/16/24 10:36 AM, Pavel Begunkov wrote:
>> On 3/16/24 16:36, Jens Axboe wrote:
>>> On 3/16/24 10:32 AM, Pavel Begunkov wrote:
>>>> On 3/16/24 16:31, Jens Axboe wrote:
>>>>> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>>>>>> On 3/16/24 16:14, Jens Axboe wrote:
>>>>>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>>>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>>>>>
>>>>>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>>>>>> problem.
>>>>>>>>>>>>
>>>>>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>>>>>> [...]
>>>>>>>>>>>>                def->fail(req);
>>>>>>>>>>>>            io_req_complete_defer(req);
>>>>>>>>>>>>        }
>>>>>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>                }
>>>>>>>>>>>>                req->flags |= REQ_F_CREDS;
>>>>>>>>>>>>            }
>>>>>>>>>>>> -
>>>>>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>>>>>> +    return 0;
>>>>>>>>>>>>        }
>>>>>>>>>>>>
>>>>>>>>>>>>        static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>            int ret;
>>>>>>>>>>>>
>>>>>>>>>>>>            ret = io_init_req(ctx, req, sqe);
>>>>>>>>>>>> -    if (unlikely(ret))
>>>>>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>>>>>> +fail:
>>>>>>>>>>
>>>>>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>>>>>> that the label should've been one line below, otherwise we'd
>>>>>>>>>> flag after ->prep as well.
>>>>>>>>>
>>>>>>>>> It certainly needs testing :-)
>>>>>>>>>
>>>>>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>>>>>> up, test it, and send it out?
>>>>>>>>
>>>>>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>>>>>> report w/o fiddling with done_io as in your patch.
>>>>>>>
>>>>>>> I gave this a shot, but some fail handlers do want to get called. But
>>>>>>
>>>>>> Which one and/or which part of it?
>>>>>
>>>>> send zc
>>>>
>>>> I don't think so. If prep wasn't called there wouldn't be
>>>> a notif allocated, and so no F_MORE required. If you take
>>>> at the code path it's under REQ_F_NEED_CLEANUP, which is only
>>>> set by opcode handlers
>>>
>>> I'm not making this up, your test case will literally fail as it doesn't
>>> get to flag MORE for that case. FWIW, this was done with EARLY_FAIL
>>> being flagged, and failing if we fail during or before prep.
>>
>> Maybe the test is too strict, but your approach is different
>> from what I mentioned yesterday
>>
>> -    return def->prep(req, sqe);
>> +    ret = def->prep(req, sqe);
>> +    if (unlikely(ret)) {
>> +        req->flags |= REQ_F_EARLY_FAIL;
>> +        return ret;
>> +    }
>> +
>> +    return 0;
>>
>> It should only set REQ_F_EARLY_FAIL if we fail
>> _before_ prep is called
> 
> I did try both ways, fails if we just have:

Ok, but the point is that the sendzc's ->fail doesn't
need to be called unless you've done ->prep first.


> 	return def->prep(req, sqe);
> fail:
> 	req->flags |= REQ_F_EARLY_FAIL;
> 	...
> 
> as well.
>
Jens Axboe March 16, 2024, 4:51 p.m. UTC | #18
On 3/16/24 10:46 AM, Pavel Begunkov wrote:
> On 3/16/24 16:42, Jens Axboe wrote:
>> On 3/16/24 10:36 AM, Pavel Begunkov wrote:
>>> On 3/16/24 16:36, Jens Axboe wrote:
>>>> On 3/16/24 10:32 AM, Pavel Begunkov wrote:
>>>>> On 3/16/24 16:31, Jens Axboe wrote:
>>>>>> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>>>>>>> On 3/16/24 16:14, Jens Axboe wrote:
>>>>>>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>>>>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>>>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>>>>>>
>>>>>>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>>>>>>> problem.
>>>>>>>>>>>>>
>>>>>>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>>>>>>> [...]
>>>>>>>>>>>>>                def->fail(req);
>>>>>>>>>>>>>            io_req_complete_defer(req);
>>>>>>>>>>>>>        }
>>>>>>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>>                }
>>>>>>>>>>>>>                req->flags |= REQ_F_CREDS;
>>>>>>>>>>>>>            }
>>>>>>>>>>>>> -
>>>>>>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>>>>>>> +    return 0;
>>>>>>>>>>>>>        }
>>>>>>>>>>>>>
>>>>>>>>>>>>>        static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>>            int ret;
>>>>>>>>>>>>>
>>>>>>>>>>>>>            ret = io_init_req(ctx, req, sqe);
>>>>>>>>>>>>> -    if (unlikely(ret))
>>>>>>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>>>>>>> +fail:
>>>>>>>>>>>
>>>>>>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>>>>>>> that the label should've been one line below, otherwise we'd
>>>>>>>>>>> flag after ->prep as well.
>>>>>>>>>>
>>>>>>>>>> It certainly needs testing :-)
>>>>>>>>>>
>>>>>>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>>>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>>>>>>> up, test it, and send it out?
>>>>>>>>>
>>>>>>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>>>>>>> report w/o fiddling with done_io as in your patch.
>>>>>>>>
>>>>>>>> I gave this a shot, but some fail handlers do want to get called. But
>>>>>>>
>>>>>>> Which one and/or which part of it?
>>>>>>
>>>>>> send zc
>>>>>
>>>>> I don't think so. If prep wasn't called there wouldn't be
>>>>> a notif allocated, and so no F_MORE required. If you take
>>>>> at the code path it's under REQ_F_NEED_CLEANUP, which is only
>>>>> set by opcode handlers
>>>>
>>>> I'm not making this up, your test case will literally fail as it doesn't
>>>> get to flag MORE for that case. FWIW, this was done with EARLY_FAIL
>>>> being flagged, and failing if we fail during or before prep.
>>>
>>> Maybe the test is too strict, but your approach is different
>>> from what I mentioned yesterday
>>>
>>> -    return def->prep(req, sqe);
>>> +    ret = def->prep(req, sqe);
>>> +    if (unlikely(ret)) {
>>> +        req->flags |= REQ_F_EARLY_FAIL;
>>> +        return ret;
>>> +    }
>>> +
>>> +    return 0;
>>>
>>> It should only set REQ_F_EARLY_FAIL if we fail
>>> _before_ prep is called
>>
>> I did try both ways, fails if we just have:
> 
> Ok, but the point is that the sendzc's ->fail doesn't
> need to be called unless you've done ->prep first.

But it fails, not sure how else to say it.

FWIW, the current io_uring-6.9 branch has two patches on top, looks fine
for me so far. We'll see if syzbot agrees. I'll send them out later
today, unless I change my mind and try a different approach.
Pavel Begunkov March 16, 2024, 4:57 p.m. UTC | #19
On 3/16/24 16:51, Jens Axboe wrote:
> On 3/16/24 10:46 AM, Pavel Begunkov wrote:
>> On 3/16/24 16:42, Jens Axboe wrote:
>>> On 3/16/24 10:36 AM, Pavel Begunkov wrote:
>>>> On 3/16/24 16:36, Jens Axboe wrote:
>>>>> On 3/16/24 10:32 AM, Pavel Begunkov wrote:
>>>>>> On 3/16/24 16:31, Jens Axboe wrote:
>>>>>>> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>>>>>>>> On 3/16/24 16:14, Jens Axboe wrote:
>>>>>>>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>>>>>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>>>>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>>>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>>>>>>>> problem.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>>>>>>>> [...]
>>>>>>>>>>>>>>                 def->fail(req);
>>>>>>>>>>>>>>             io_req_complete_defer(req);
>>>>>>>>>>>>>>         }
>>>>>>>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>>>                 }
>>>>>>>>>>>>>>                 req->flags |= REQ_F_CREDS;
>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>> -
>>>>>>>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>>>>>>>> +    return 0;
>>>>>>>>>>>>>>         }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>         static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>>>             int ret;
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>             ret = io_init_req(ctx, req, sqe);
>>>>>>>>>>>>>> -    if (unlikely(ret))
>>>>>>>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>>>>>>>> +fail:
>>>>>>>>>>>>
>>>>>>>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>>>>>>>> that the label should've been one line below, otherwise we'd
>>>>>>>>>>>> flag after ->prep as well.
>>>>>>>>>>>
>>>>>>>>>>> It certainly needs testing :-)
>>>>>>>>>>>
>>>>>>>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>>>>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>>>>>>>> up, test it, and send it out?
>>>>>>>>>>
>>>>>>>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>>>>>>>> report w/o fiddling with done_io as in your patch.
>>>>>>>>>
>>>>>>>>> I gave this a shot, but some fail handlers do want to get called. But
>>>>>>>>
>>>>>>>> Which one and/or which part of it?
>>>>>>>
>>>>>>> send zc
>>>>>>
>>>>>> I don't think so. If prep wasn't called there wouldn't be
>>>>>> a notif allocated, and so no F_MORE required. If you take
>>>>>> at the code path it's under REQ_F_NEED_CLEANUP, which is only
>>>>>> set by opcode handlers
>>>>>
>>>>> I'm not making this up, your test case will literally fail as it doesn't
>>>>> get to flag MORE for that case. FWIW, this was done with EARLY_FAIL
>>>>> being flagged, and failing if we fail during or before prep.
>>>>
>>>> Maybe the test is too strict, but your approach is different
>>>> from what I mentioned yesterday
>>>>
>>>> -    return def->prep(req, sqe);
>>>> +    ret = def->prep(req, sqe);
>>>> +    if (unlikely(ret)) {
>>>> +        req->flags |= REQ_F_EARLY_FAIL;
>>>> +        return ret;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>>
>>>> It should only set REQ_F_EARLY_FAIL if we fail
>>>> _before_ prep is called
>>>
>>> I did try both ways, fails if we just have:
>>
>> Ok, but the point is that the sendzc's ->fail doesn't
>> need to be called unless you've done ->prep first.
> 
> But it fails, not sure how else to say it.

liburing tests? Which test case? If so, it should be another
bug. REQ_F_NEED_CLEANUP is only set by opcodes, if a request is
terminated before ->prep is called, it means it never entered
any of the opdef callbacks and have never seen any of net.c
code, so there should be no REQ_F_NEED_CLEANUP, and so
io_sendrecv_fail() wouldn't try to set F_MORE. I don't know
what's wrong.


> FWIW, the current io_uring-6.9 branch has two patches on top, looks fine
> for me so far. We'll see if syzbot agrees. I'll send them out later
> today, unless I change my mind and try a different approach.
>
Jens Axboe March 16, 2024, 5:01 p.m. UTC | #20
On 3/16/24 10:57 AM, Pavel Begunkov wrote:
> On 3/16/24 16:51, Jens Axboe wrote:
>> On 3/16/24 10:46 AM, Pavel Begunkov wrote:
>>> On 3/16/24 16:42, Jens Axboe wrote:
>>>> On 3/16/24 10:36 AM, Pavel Begunkov wrote:
>>>>> On 3/16/24 16:36, Jens Axboe wrote:
>>>>>> On 3/16/24 10:32 AM, Pavel Begunkov wrote:
>>>>>>> On 3/16/24 16:31, Jens Axboe wrote:
>>>>>>>> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>>>>>>>>> On 3/16/24 16:14, Jens Axboe wrote:
>>>>>>>>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>>>>>>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>>>>>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>>>>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>>>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>>>>>>>>> problem.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>>>>>>>>> [...]
>>>>>>>>>>>>>>>                 def->fail(req);
>>>>>>>>>>>>>>>             io_req_complete_defer(req);
>>>>>>>>>>>>>>>         }
>>>>>>>>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>>>>                 }
>>>>>>>>>>>>>>>                 req->flags |= REQ_F_CREDS;
>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>>>>>>>>> +    return 0;
>>>>>>>>>>>>>>>         }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>         static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>>>>             int ret;
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>             ret = io_init_req(ctx, req, sqe);
>>>>>>>>>>>>>>> -    if (unlikely(ret))
>>>>>>>>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>>>>>>>>> +fail:
>>>>>>>>>>>>>
>>>>>>>>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>>>>>>>>> that the label should've been one line below, otherwise we'd
>>>>>>>>>>>>> flag after ->prep as well.
>>>>>>>>>>>>
>>>>>>>>>>>> It certainly needs testing :-)
>>>>>>>>>>>>
>>>>>>>>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>>>>>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>>>>>>>>> up, test it, and send it out?
>>>>>>>>>>>
>>>>>>>>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>>>>>>>>> report w/o fiddling with done_io as in your patch.
>>>>>>>>>>
>>>>>>>>>> I gave this a shot, but some fail handlers do want to get called. But
>>>>>>>>>
>>>>>>>>> Which one and/or which part of it?
>>>>>>>>
>>>>>>>> send zc
>>>>>>>
>>>>>>> I don't think so. If prep wasn't called there wouldn't be
>>>>>>> a notif allocated, and so no F_MORE required. If you take
>>>>>>> at the code path it's under REQ_F_NEED_CLEANUP, which is only
>>>>>>> set by opcode handlers
>>>>>>
>>>>>> I'm not making this up, your test case will literally fail as it doesn't
>>>>>> get to flag MORE for that case. FWIW, this was done with EARLY_FAIL
>>>>>> being flagged, and failing if we fail during or before prep.
>>>>>
>>>>> Maybe the test is too strict, but your approach is different
>>>>> from what I mentioned yesterday
>>>>>
>>>>> -    return def->prep(req, sqe);
>>>>> +    ret = def->prep(req, sqe);
>>>>> +    if (unlikely(ret)) {
>>>>> +        req->flags |= REQ_F_EARLY_FAIL;
>>>>> +        return ret;
>>>>> +    }
>>>>> +
>>>>> +    return 0;
>>>>>
>>>>> It should only set REQ_F_EARLY_FAIL if we fail
>>>>> _before_ prep is called
>>>>
>>>> I did try both ways, fails if we just have:
>>>
>>> Ok, but the point is that the sendzc's ->fail doesn't
>>> need to be called unless you've done ->prep first.
>>
>> But it fails, not sure how else to say it.
> 
> liburing tests? Which test case? If so, it should be another

Like I mentioned earlier, it's send zc and it's failing the test case
for that. test/send-zerocopy.t.

> bug. REQ_F_NEED_CLEANUP is only set by opcodes, if a request is
> terminated before ->prep is called, it means it never entered
> any of the opdef callbacks and have never seen any of net.c
> code, so there should be no REQ_F_NEED_CLEANUP, and so
> io_sendrecv_fail() wouldn't try to set F_MORE. I don't know
> what's wrong.

Feel free to take a look! I do like the simplicity of the early error
flag.
Pavel Begunkov March 16, 2024, 5:42 p.m. UTC | #21
On 3/16/24 17:01, Jens Axboe wrote:
> On 3/16/24 10:57 AM, Pavel Begunkov wrote:
>> On 3/16/24 16:51, Jens Axboe wrote:
>>> On 3/16/24 10:46 AM, Pavel Begunkov wrote:
>>>> On 3/16/24 16:42, Jens Axboe wrote:
>>>>> On 3/16/24 10:36 AM, Pavel Begunkov wrote:
>>>>>> On 3/16/24 16:36, Jens Axboe wrote:
>>>>>>> On 3/16/24 10:32 AM, Pavel Begunkov wrote:
>>>>>>>> On 3/16/24 16:31, Jens Axboe wrote:
>>>>>>>>> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>>>>>>>>>> On 3/16/24 16:14, Jens Axboe wrote:
>>>>>>>>>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>>>>>>>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>>>>>>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>>>>>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>>>>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>>>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>>>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>>>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>>>>>>>>>> problem.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>>>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>>>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>>>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>>>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>>>>>>>>>> [...]
>>>>>>>>>>>>>>>>                  def->fail(req);
>>>>>>>>>>>>>>>>              io_req_complete_defer(req);
>>>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>>>>>                  }
>>>>>>>>>>>>>>>>                  req->flags |= REQ_F_CREDS;
>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>>>>>>>>>> +    return 0;
>>>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>          static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>>>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>>>>>              int ret;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>              ret = io_init_req(ctx, req, sqe);
>>>>>>>>>>>>>>>> -    if (unlikely(ret))
>>>>>>>>>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>>>>>>>>>> +fail:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>>>>>>>>>> that the label should've been one line below, otherwise we'd
>>>>>>>>>>>>>> flag after ->prep as well.
>>>>>>>>>>>>>
>>>>>>>>>>>>> It certainly needs testing :-)
>>>>>>>>>>>>>
>>>>>>>>>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>>>>>>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>>>>>>>>>> up, test it, and send it out?
>>>>>>>>>>>>
>>>>>>>>>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>>>>>>>>>> report w/o fiddling with done_io as in your patch.
>>>>>>>>>>>
>>>>>>>>>>> I gave this a shot, but some fail handlers do want to get called. But
>>>>>>>>>>
>>>>>>>>>> Which one and/or which part of it?
>>>>>>>>>
>>>>>>>>> send zc
>>>>>>>>
>>>>>>>> I don't think so. If prep wasn't called there wouldn't be
>>>>>>>> a notif allocated, and so no F_MORE required. If you take
>>>>>>>> at the code path it's under REQ_F_NEED_CLEANUP, which is only
>>>>>>>> set by opcode handlers
>>>>>>>
>>>>>>> I'm not making this up, your test case will literally fail as it doesn't
>>>>>>> get to flag MORE for that case. FWIW, this was done with EARLY_FAIL
>>>>>>> being flagged, and failing if we fail during or before prep.
>>>>>>
>>>>>> Maybe the test is too strict, but your approach is different
>>>>>> from what I mentioned yesterday
>>>>>>
>>>>>> -    return def->prep(req, sqe);
>>>>>> +    ret = def->prep(req, sqe);
>>>>>> +    if (unlikely(ret)) {
>>>>>> +        req->flags |= REQ_F_EARLY_FAIL;
>>>>>> +        return ret;
>>>>>> +    }
>>>>>> +
>>>>>> +    return 0;
>>>>>>
>>>>>> It should only set REQ_F_EARLY_FAIL if we fail
>>>>>> _before_ prep is called
>>>>>
>>>>> I did try both ways, fails if we just have:
>>>>
>>>> Ok, but the point is that the sendzc's ->fail doesn't
>>>> need to be called unless you've done ->prep first.
>>>
>>> But it fails, not sure how else to say it.
>>
>> liburing tests? Which test case? If so, it should be another
> 
> Like I mentioned earlier, it's send zc and it's failing the test case
> for that. test/send-zerocopy.t.
> 
>> bug. REQ_F_NEED_CLEANUP is only set by opcodes, if a request is
>> terminated before ->prep is called, it means it never entered
>> any of the opdef callbacks and have never seen any of net.c
>> code, so there should be no REQ_F_NEED_CLEANUP, and so
>> io_sendrecv_fail() wouldn't try to set F_MORE. I don't know
>> what's wrong.
> 
> Feel free to take a look! I do like the simplicity of the early error
> flag.

./send-zerocopy.t works fine


diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index ea7e5488b3be..de3a2c67c4a7 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -478,6 +478,7 @@ enum {
  	REQ_F_CAN_POLL_BIT,
  	REQ_F_BL_EMPTY_BIT,
  	REQ_F_BL_NO_RECYCLE_BIT,
+	REQ_F_UNPREPPED_FAIL_BIT,
  
  	/* not a real bit, just to check we're not overflowing the space */
  	__REQ_F_LAST_BIT,
@@ -556,6 +557,8 @@ enum {
  	REQ_F_BL_EMPTY		= IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT),
  	/* don't recycle provided buffers for this request */
  	REQ_F_BL_NO_RECYCLE	= IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT),
+
+	REQ_F_UNPREPPED_FAIL	= IO_REQ_FLAG(REQ_F_UNPREPPED_FAIL_BIT),
  };
  
  typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 846d67a9c72e..6523fa4c5630 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -993,7 +993,7 @@ void io_req_defer_failed(struct io_kiocb *req, s32 res)
  
  	req_set_fail(req);
  	io_req_set_res(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED));
-	if (def->fail)
+	if (!(req->flags & REQ_F_UNPREPPED_FAIL) && def->fail)
  		def->fail(req);
  	io_req_complete_defer(req);
  }
@@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
  		}
  		req->flags |= REQ_F_CREDS;
  	}
-
-	return def->prep(req, sqe);
+	return 0;
  }
  
  static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
@@ -2250,7 +2249,13 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
  	int ret;
  
  	ret = io_init_req(ctx, req, sqe);
-	if (unlikely(ret))
+	if (unlikely(ret)) {
+		req->flags |= REQ_F_UNPREPPED_FAIL;
+		return io_submit_fail_init(sqe, req, ret);
+	}
+
+	ret = def->prep(req, sqe);
+	if (ret)
  		return io_submit_fail_init(sqe, req, ret);
  
  	trace_io_uring_submit_req(req);
Jens Axboe March 16, 2024, 11:58 p.m. UTC | #22
On 3/16/24 11:42 AM, Pavel Begunkov wrote:
> On 3/16/24 17:01, Jens Axboe wrote:
>> On 3/16/24 10:57 AM, Pavel Begunkov wrote:
>>> On 3/16/24 16:51, Jens Axboe wrote:
>>>> On 3/16/24 10:46 AM, Pavel Begunkov wrote:
>>>>> On 3/16/24 16:42, Jens Axboe wrote:
>>>>>> On 3/16/24 10:36 AM, Pavel Begunkov wrote:
>>>>>>> On 3/16/24 16:36, Jens Axboe wrote:
>>>>>>>> On 3/16/24 10:32 AM, Pavel Begunkov wrote:
>>>>>>>>> On 3/16/24 16:31, Jens Axboe wrote:
>>>>>>>>>> On 3/16/24 10:28 AM, Pavel Begunkov wrote:
>>>>>>>>>>> On 3/16/24 16:14, Jens Axboe wrote:
>>>>>>>>>>>> On 3/15/24 5:28 PM, Pavel Begunkov wrote:
>>>>>>>>>>>>> On 3/15/24 23:25, Jens Axboe wrote:
>>>>>>>>>>>>>> On 3/15/24 5:19 PM, Pavel Begunkov wrote:
>>>>>>>>>>>>>>> On 3/15/24 23:13, Pavel Begunkov wrote:
>>>>>>>>>>>>>>>> On 3/15/24 23:09, Pavel Begunkov wrote:
>>>>>>>>>>>>>>>>> On 3/15/24 22:48, Jens Axboe wrote:
>>>>>>>>>>>>>>>>>> If we get a request with IOSQE_ASYNC set, then we first run the prep
>>>>>>>>>>>>>>>>>> async handlers. But if we then fail setting it up and want to post
>>>>>>>>>>>>>>>>>> a CQE with -EINVAL, we use ->done_io. This was previously guarded with
>>>>>>>>>>>>>>>>>> REQ_F_PARTIAL_IO, and the normal setup handlers do set it up before any
>>>>>>>>>>>>>>>>>> potential errors, but we need to cover the async setup too.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> You can hit io_req_defer_failed() { opdef->fail(); }
>>>>>>>>>>>>>>>>> off of an early submission failure path where def->prep has
>>>>>>>>>>>>>>>>> not yet been called, I don't think the patch will fix the
>>>>>>>>>>>>>>>>> problem.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> ->fail() handlers are fragile, maybe we should skip them
>>>>>>>>>>>>>>>>> if def->prep() wasn't called. Not even compile tested:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>>>>>>>>>>>>>>>>> index 846d67a9c72e..56eed1490571 100644
>>>>>>>>>>>>>>>>> --- a/io_uring/io_uring.c
>>>>>>>>>>>>>>>>> +++ b/io_uring/io_uring.c
>>>>>>>>>>>>>>> [...]
>>>>>>>>>>>>>>>>>                  def->fail(req);
>>>>>>>>>>>>>>>>>              io_req_complete_defer(req);
>>>>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>>>> @@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>>>>>>                  }
>>>>>>>>>>>>>>>>>                  req->flags |= REQ_F_CREDS;
>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>> -
>>>>>>>>>>>>>>>>> -    return def->prep(req, sqe);
>>>>>>>>>>>>>>>>> +    return 0;
>>>>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>          static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>>>>>>>>>>>>>>>>> @@ -2250,8 +2249,15 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>>>>>>>>>>>>>>>>              int ret;
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>              ret = io_init_req(ctx, req, sqe);
>>>>>>>>>>>>>>>>> -    if (unlikely(ret))
>>>>>>>>>>>>>>>>> +    if (unlikely(ret)) {
>>>>>>>>>>>>>>>>> +fail:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Obvious the diff is crap, but still bugging me enough to write
>>>>>>>>>>>>>>> that the label should've been one line below, otherwise we'd
>>>>>>>>>>>>>>> flag after ->prep as well.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> It certainly needs testing :-)
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> We can go either way - patch up the net thing, or do a proper EARLY_FAIL
>>>>>>>>>>>>>> and hopefully not have to worry about it again. Do you want to clean it
>>>>>>>>>>>>>> up, test it, and send it out?
>>>>>>>>>>>>>
>>>>>>>>>>>>> I'd rather leave it to you, I suspect it wouldn't fix the syzbot
>>>>>>>>>>>>> report w/o fiddling with done_io as in your patch.
>>>>>>>>>>>>
>>>>>>>>>>>> I gave this a shot, but some fail handlers do want to get called. But
>>>>>>>>>>>
>>>>>>>>>>> Which one and/or which part of it?
>>>>>>>>>>
>>>>>>>>>> send zc
>>>>>>>>>
>>>>>>>>> I don't think so. If prep wasn't called there wouldn't be
>>>>>>>>> a notif allocated, and so no F_MORE required. If you take
>>>>>>>>> at the code path it's under REQ_F_NEED_CLEANUP, which is only
>>>>>>>>> set by opcode handlers
>>>>>>>>
>>>>>>>> I'm not making this up, your test case will literally fail as it doesn't
>>>>>>>> get to flag MORE for that case. FWIW, this was done with EARLY_FAIL
>>>>>>>> being flagged, and failing if we fail during or before prep.
>>>>>>>
>>>>>>> Maybe the test is too strict, but your approach is different
>>>>>>> from what I mentioned yesterday
>>>>>>>
>>>>>>> -    return def->prep(req, sqe);
>>>>>>> +    ret = def->prep(req, sqe);
>>>>>>> +    if (unlikely(ret)) {
>>>>>>> +        req->flags |= REQ_F_EARLY_FAIL;
>>>>>>> +        return ret;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>>
>>>>>>> It should only set REQ_F_EARLY_FAIL if we fail
>>>>>>> _before_ prep is called
>>>>>>
>>>>>> I did try both ways, fails if we just have:
>>>>>
>>>>> Ok, but the point is that the sendzc's ->fail doesn't
>>>>> need to be called unless you've done ->prep first.
>>>>
>>>> But it fails, not sure how else to say it.
>>>
>>> liburing tests? Which test case? If so, it should be another
>>
>> Like I mentioned earlier, it's send zc and it's failing the test case
>> for that. test/send-zerocopy.t.
>>
>>> bug. REQ_F_NEED_CLEANUP is only set by opcodes, if a request is
>>> terminated before ->prep is called, it means it never entered
>>> any of the opdef callbacks and have never seen any of net.c
>>> code, so there should be no REQ_F_NEED_CLEANUP, and so
>>> io_sendrecv_fail() wouldn't try to set F_MORE. I don't know
>>> what's wrong.
>>
>> Feel free to take a look! I do like the simplicity of the early error
>> flag.
> 
> ./send-zerocopy.t works fine

Huh, I wonder what I messed up. But:

> @@ -2250,7 +2249,13 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>      int ret;
>  
>      ret = io_init_req(ctx, req, sqe);
> -    if (unlikely(ret))
> +    if (unlikely(ret)) {
> +        req->flags |= REQ_F_UNPREPPED_FAIL;
> +        return io_submit_fail_init(sqe, req, ret);
> +    }
> +
> +    ret = def->prep(req, sqe);
> +    if (ret)
>          return io_submit_fail_init(sqe, req, ret);

this obviously won't compile, assuming this is not the one you ran.

In any case, I do like the one I sent out for review. It moves all the
slow path out of line and shrinks things nicely too. And clearing the
cmd.data area seems like a good idea for that case.
Pavel Begunkov March 17, 2024, 8:45 p.m. UTC | #23
On 3/16/24 23:58, Jens Axboe wrote:
> On 3/16/24 11:42 AM, Pavel Begunkov wrote:
>> On 3/16/24 17:01, Jens Axboe wrote:
>>> On 3/16/24 10:57 AM, Pavel Begunkov wrote:
>>>> On 3/16/24 16:51, Jens Axboe wrote:
>>>>> On 3/16/24 10:46 AM, Pavel Begunkov wrote:
>>>>>> On 3/16/24 16:42, Jens Axboe wrote:
>>>>>>> On 3/16/24 10:36 AM, Pavel Begunkov wrote:
>>>>>>>> On 3/16/24 16:36, Jens Axboe wrote:
...
>>>>>>>>
>>>>>>>> It should only set REQ_F_EARLY_FAIL if we fail
>>>>>>>> _before_ prep is called
>>>>>>>
>>>>>>> I did try both ways, fails if we just have:
>>>>>>
>>>>>> Ok, but the point is that the sendzc's ->fail doesn't
>>>>>> need to be called unless you've done ->prep first.
>>>>>
>>>>> But it fails, not sure how else to say it.
>>>>
>>>> liburing tests? Which test case? If so, it should be another
>>>
>>> Like I mentioned earlier, it's send zc and it's failing the test case
>>> for that. test/send-zerocopy.t.
>>>
>>>> bug. REQ_F_NEED_CLEANUP is only set by opcodes, if a request is
>>>> terminated before ->prep is called, it means it never entered
>>>> any of the opdef callbacks and have never seen any of net.c
>>>> code, so there should be no REQ_F_NEED_CLEANUP, and so
>>>> io_sendrecv_fail() wouldn't try to set F_MORE. I don't know
>>>> what's wrong.
>>>
>>> Feel free to take a look! I do like the simplicity of the early error
>>> flag.
>>
>> ./send-zerocopy.t works fine
> 
> Huh, I wonder what I messed up. But:

My blind guess would be it called ->prep(), which is assumingly
failed, but then there was no ->fail() following it. BTW, we might
want to loose up that case, similar to mshots sendzc might decide
not to post notifications for any reason and the user should always
check F_MORE first.

>> @@ -2250,7 +2249,13 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>>       int ret;
>>   
>>       ret = io_init_req(ctx, req, sqe);
>> -    if (unlikely(ret))
>> +    if (unlikely(ret)) {
>> +        req->flags |= REQ_F_UNPREPPED_FAIL;
>> +        return io_submit_fail_init(sqe, req, ret);
>> +    }
>> +
>> +    ret = def->prep(req, sqe);
>> +    if (ret)
>>           return io_submit_fail_init(sqe, req, ret);
> 
> this obviously won't compile, assuming this is not the one you ran.

Urgh, yeah, some left unstaged



diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index ea7e5488b3be..de3a2c67c4a7 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -478,6 +478,7 @@ enum {
  	REQ_F_CAN_POLL_BIT,
  	REQ_F_BL_EMPTY_BIT,
  	REQ_F_BL_NO_RECYCLE_BIT,
+	REQ_F_UNPREPPED_FAIL_BIT,
  
  	/* not a real bit, just to check we're not overflowing the space */
  	__REQ_F_LAST_BIT,
@@ -556,6 +557,8 @@ enum {
  	REQ_F_BL_EMPTY		= IO_REQ_FLAG(REQ_F_BL_EMPTY_BIT),
  	/* don't recycle provided buffers for this request */
  	REQ_F_BL_NO_RECYCLE	= IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT),
+
+	REQ_F_UNPREPPED_FAIL	= IO_REQ_FLAG(REQ_F_UNPREPPED_FAIL_BIT),
  };
  
  typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 846d67a9c72e..1231f8c53014 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -993,7 +993,7 @@ void io_req_defer_failed(struct io_kiocb *req, s32 res)
  
  	req_set_fail(req);
  	io_req_set_res(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED));
-	if (def->fail)
+	if (!(req->flags & REQ_F_UNPREPPED_FAIL) && def->fail)
  		def->fail(req);
  	io_req_complete_defer(req);
  }
@@ -2201,8 +2201,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
  		}
  		req->flags |= REQ_F_CREDS;
  	}
-
-	return def->prep(req, sqe);
+	return 0;
  }
  
  static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
@@ -2250,7 +2249,13 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
  	int ret;
  
  	ret = io_init_req(ctx, req, sqe);
-	if (unlikely(ret))
+	if (unlikely(ret)) {
+		req->flags |= REQ_F_UNPREPPED_FAIL;
+		return io_submit_fail_init(sqe, req, ret);
+	}
+
+	ret = io_issue_defs[req->opcode].prep(req, sqe);
+	if (ret)
  		return io_submit_fail_init(sqe, req, ret);
  
  	trace_io_uring_submit_req(req);
diff mbox series

Patch

diff --git a/io_uring/net.c b/io_uring/net.c
index 19451f0dbf81..1e7665ff6ef7 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -326,7 +326,10 @@  int io_send_prep_async(struct io_kiocb *req)
 	struct io_async_msghdr *io;
 	int ret;
 
-	if (!zc->addr || req_has_async_data(req))
+	if (req_has_async_data(req))
+		return 0;
+	zc->done_io = 0;
+	if (!zc->addr)
 		return 0;
 	io = io_msg_alloc_async_prep(req);
 	if (!io)
@@ -353,8 +356,10 @@  static int io_setup_async_addr(struct io_kiocb *req,
 
 int io_sendmsg_prep_async(struct io_kiocb *req)
 {
+	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 	int ret;
 
+	sr->done_io = 0;
 	if (!io_msg_alloc_async_prep(req))
 		return -ENOMEM;
 	ret = io_sendmsg_copy_hdr(req, req->async_data);
@@ -608,9 +613,11 @@  static int io_recvmsg_copy_hdr(struct io_kiocb *req,
 
 int io_recvmsg_prep_async(struct io_kiocb *req)
 {
+	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
 	struct io_async_msghdr *iomsg;
 	int ret;
 
+	sr->done_io = 0;
 	if (!io_msg_alloc_async_prep(req))
 		return -ENOMEM;
 	iomsg = req->async_data;