Message ID | 20240527-sched_per_packet-v1-2-09a41d405f7c@gmail.com (mailing list archive) |
---|---|
State | RFC |
Headers | show |
Series | mptcp: update scheduler API | expand |
Context | Check | Description |
---|---|---|
matttbe/build | success | Build and static analysis OK |
matttbe/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 70 lines checked |
matttbe/shellcheck | success | MPTCP selftests files have not been modified |
matttbe/KVM_Validation__normal | success | Success! ✅ |
matttbe/KVM_Validation__debug | success | Success! ✅ |
matttbe/KVM_Validation__btf__only_bpftest_all_ | success | Success! ✅ |
On Mon, 27 May 2024, Gregory Detal wrote: > This commit modifies the subflow push pending function to call for > each data that will be sent over this subflow. > > Only send the amount of data defined by the scheduler. If it sets > MPTCP_SCHED_FLAG_RESCHEDULE, the function will ignore further data to > be sent. This will cause the get_subflow function to be called again. > > The previous behavior is maintained if the push function isn't defined > by the scheduler. > > Signed-off-by: Gregory Detal <gregory.detal@gmail.com> > --- > net/mptcp/protocol.c | 14 ++++++++++---- > net/mptcp/protocol.h | 9 +++++++++ > net/mptcp/sched.c | 20 ++++++++++++++++++++ > 3 files changed, 39 insertions(+), 4 deletions(-) > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index 2f8f32cd31d3..abfd5b6748f2 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -1526,12 +1526,15 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, > struct mptcp_sock *msk = mptcp_sk(sk); > struct mptcp_data_frag *dfrag; > int len, copied = 0, err = 0; > + u16 sflags = 0; > + > + while ((dfrag = mptcp_send_head(sk)) && !(sflags & MPTCP_SCHED_FLAG_RESCHEDULE)) { > + u16 limit = mptcp_sched_push(msk, ssk, dfrag, &sflags); > > - while ((dfrag = mptcp_send_head(sk))) { > info->sent = dfrag->already_sent; > - info->limit = dfrag->data_len; > len = dfrag->data_len - dfrag->already_sent; > - while (len > 0) { > + info->limit = limit ? info->sent + min_t(u16, limit, len) : dfrag->data_len; > + while (len > 0 && info->sent < info->limit) { > int ret = 0; > > ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info); > @@ -1546,7 +1549,10 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, > > mptcp_update_post_push(msk, dfrag, ret); > } > - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); > + > + /* if the whole data has been sent, move to next data segment: */ > + if (len <= 0) > + WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); Hi Gregory - It's important to be careful with possible race conditions here, since this changes existing assumptions about updates to msk->first_pending. I believe __mptcp_clean_una() (called when processing acks) will not get called until the msk socket lock is released so the data in msk->rtx_queue _should_ be available for reinjection. But we should be careful to verify all locking assumptions around these msk values. For example, sometimes the msk socket lock is held here but other times it's the mptcp_data_lock. Also, it looks like there are some error paths that could exit this loop without updating msk->first_pending, which would be problematic. - Mat > > if (msk->snd_burst <= 0 || > !sk_stream_memory_free(ssk) || > diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h > index 19d60b6d5b45..5e4d62bed142 100644 > --- a/net/mptcp/protocol.h > +++ b/net/mptcp/protocol.h > @@ -742,6 +742,15 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk); > struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk); > int mptcp_sched_get_send(struct mptcp_sock *msk); > int mptcp_sched_get_retrans(struct mptcp_sock *msk); > +u16 __mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk, > + struct mptcp_data_frag *dfrag, u16 *flags); > +static inline u16 mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk, > + struct mptcp_data_frag *dfrag, u16 *flags) { > + if (likely(!msk->sched || !msk->sched->push)) > + return 0; > + > + return __mptcp_sched_push(msk, ssk, dfrag, flags); > +} > > static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) > { > diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c > index 8def10abd60e..d6ff82bde641 100644 > --- a/net/mptcp/sched.c > +++ b/net/mptcp/sched.c > @@ -193,6 +193,26 @@ int mptcp_sched_get_send(struct mptcp_sock *msk) > return msk->sched->get_subflow(msk, &data); > } > > +/* This is called before the data has been sent on a subflow. It returns flags > + * that defines what the scheduler should do with the ongoing data. > + */ > +u16 __mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk, > + struct mptcp_data_frag *dfrag, u16 *flags) > +{ > + struct mptcp_sched_chunk chunk = { > + .data_seq = dfrag->data_seq + dfrag->already_sent, > + .limit = 0, > + .flags = 0, > + }; > + > + msk_owned_by_me(msk); > + > + msk->sched->push(msk, mptcp_subflow_ctx(ssk), &chunk); > + *flags = chunk.flags; > + > + return chunk.limit; > +} > + > int mptcp_sched_get_retrans(struct mptcp_sock *msk) > { > struct mptcp_subflow_context *subflow; > > -- > 2.43.0 > > >
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2f8f32cd31d3..abfd5b6748f2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1526,12 +1526,15 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dfrag; int len, copied = 0, err = 0; + u16 sflags = 0; + + while ((dfrag = mptcp_send_head(sk)) && !(sflags & MPTCP_SCHED_FLAG_RESCHEDULE)) { + u16 limit = mptcp_sched_push(msk, ssk, dfrag, &sflags); - while ((dfrag = mptcp_send_head(sk))) { info->sent = dfrag->already_sent; - info->limit = dfrag->data_len; len = dfrag->data_len - dfrag->already_sent; - while (len > 0) { + info->limit = limit ? info->sent + min_t(u16, limit, len) : dfrag->data_len; + while (len > 0 && info->sent < info->limit) { int ret = 0; ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info); @@ -1546,7 +1549,10 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, mptcp_update_post_push(msk, dfrag, ret); } - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + + /* if the whole data has been sent, move to next data segment: */ + if (len <= 0) + WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); if (msk->snd_burst <= 0 || !sk_stream_memory_free(ssk) || diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 19d60b6d5b45..5e4d62bed142 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -742,6 +742,15 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk); struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk); int mptcp_sched_get_send(struct mptcp_sock *msk); int mptcp_sched_get_retrans(struct mptcp_sock *msk); +u16 __mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk, + struct mptcp_data_frag *dfrag, u16 *flags); +static inline u16 mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk, + struct mptcp_data_frag *dfrag, u16 *flags) { + if (likely(!msk->sched || !msk->sched->push)) + return 0; + + return __mptcp_sched_push(msk, ssk, dfrag, flags); +} static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) { diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index 8def10abd60e..d6ff82bde641 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -193,6 +193,26 @@ int mptcp_sched_get_send(struct mptcp_sock *msk) return msk->sched->get_subflow(msk, &data); } +/* This is called before the data has been sent on a subflow. It returns flags + * that defines what the scheduler should do with the ongoing data. + */ +u16 __mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk, + struct mptcp_data_frag *dfrag, u16 *flags) +{ + struct mptcp_sched_chunk chunk = { + .data_seq = dfrag->data_seq + dfrag->already_sent, + .limit = 0, + .flags = 0, + }; + + msk_owned_by_me(msk); + + msk->sched->push(msk, mptcp_subflow_ctx(ssk), &chunk); + *flags = chunk.flags; + + return chunk.limit; +} + int mptcp_sched_get_retrans(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow;
This commit modifies the subflow push pending function to call for each data that will be sent over this subflow. Only send the amount of data defined by the scheduler. If it sets MPTCP_SCHED_FLAG_RESCHEDULE, the function will ignore further data to be sent. This will cause the get_subflow function to be called again. The previous behavior is maintained if the push function isn't defined by the scheduler. Signed-off-by: Gregory Detal <gregory.detal@gmail.com> --- net/mptcp/protocol.c | 14 ++++++++++---- net/mptcp/protocol.h | 9 +++++++++ net/mptcp/sched.c | 20 ++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-)