diff mbox series

[mptcp-next,RFC,2/4] mptcp: use new push callback to schedule chunks

Message ID 20240527-sched_per_packet-v1-2-09a41d405f7c@gmail.com (mailing list archive)
State RFC
Headers show
Series mptcp: update scheduler API | expand

Checks

Context Check Description
matttbe/build success Build and static analysis OK
matttbe/checkpatch success total: 0 errors, 0 warnings, 0 checks, 70 lines checked
matttbe/shellcheck success MPTCP selftests files have not been modified
matttbe/KVM_Validation__normal success Success! ✅
matttbe/KVM_Validation__debug success Success! ✅
matttbe/KVM_Validation__btf__only_bpftest_all_ success Success! ✅

Commit Message

Gregory Detal May 27, 2024, 1:17 p.m. UTC
This commit modifies the subflow push pending function to call for
each data that will be sent over this subflow.

Only send the amount of data defined by the scheduler. If it sets
MPTCP_SCHED_FLAG_RESCHEDULE, the function will ignore further data to
be sent. This will cause the get_subflow function to be called again.

The previous behavior is maintained if the push function isn't defined
by the scheduler.

Signed-off-by: Gregory Detal <gregory.detal@gmail.com>
---
 net/mptcp/protocol.c | 14 ++++++++++----
 net/mptcp/protocol.h |  9 +++++++++
 net/mptcp/sched.c    | 20 ++++++++++++++++++++
 3 files changed, 39 insertions(+), 4 deletions(-)

Comments

Mat Martineau June 19, 2024, 1:05 a.m. UTC | #1
On Mon, 27 May 2024, Gregory Detal wrote:

> This commit modifies the subflow push pending function to call for
> each data that will be sent over this subflow.
>
> Only send the amount of data defined by the scheduler. If it sets
> MPTCP_SCHED_FLAG_RESCHEDULE, the function will ignore further data to
> be sent. This will cause the get_subflow function to be called again.
>
> The previous behavior is maintained if the push function isn't defined
> by the scheduler.
>
> Signed-off-by: Gregory Detal <gregory.detal@gmail.com>
> ---
> net/mptcp/protocol.c | 14 ++++++++++----
> net/mptcp/protocol.h |  9 +++++++++
> net/mptcp/sched.c    | 20 ++++++++++++++++++++
> 3 files changed, 39 insertions(+), 4 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 2f8f32cd31d3..abfd5b6748f2 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1526,12 +1526,15 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> 	struct mptcp_data_frag *dfrag;
> 	int len, copied = 0, err = 0;
> +	u16 sflags = 0;
> +
> +	while ((dfrag = mptcp_send_head(sk)) && !(sflags & MPTCP_SCHED_FLAG_RESCHEDULE)) {
> +		u16 limit = mptcp_sched_push(msk, ssk, dfrag, &sflags);
>
> -	while ((dfrag = mptcp_send_head(sk))) {
> 		info->sent = dfrag->already_sent;
> -		info->limit = dfrag->data_len;
> 		len = dfrag->data_len - dfrag->already_sent;
> -		while (len > 0) {
> +		info->limit = limit ? info->sent + min_t(u16, limit, len) : dfrag->data_len;
> +		while (len > 0 && info->sent < info->limit) {
> 			int ret = 0;
>
> 			ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info);
> @@ -1546,7 +1549,10 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
>
> 			mptcp_update_post_push(msk, dfrag, ret);
> 		}
> -		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> +
> +		/* if the whole data has been sent, move to next data segment: */
> +		if (len <= 0)
> +			WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));

Hi Gregory -

It's important to be careful with possible race conditions here, since 
this changes existing assumptions about updates to msk->first_pending. I 
believe __mptcp_clean_una() (called when processing acks) will not get 
called until the msk socket lock is released so the data in msk->rtx_queue 
_should_ be available for reinjection. But we should be careful to verify 
all locking assumptions around these msk values. For example, sometimes 
the msk socket lock is held here but other times it's the mptcp_data_lock.

Also, it looks like there are some error paths that could exit this loop 
without updating msk->first_pending, which would be problematic.


- Mat


>
> 		if (msk->snd_burst <= 0 ||
> 		    !sk_stream_memory_free(ssk) ||
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index 19d60b6d5b45..5e4d62bed142 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -742,6 +742,15 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
> struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
> int mptcp_sched_get_send(struct mptcp_sock *msk);
> int mptcp_sched_get_retrans(struct mptcp_sock *msk);
> +u16 __mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk,
> +		       struct mptcp_data_frag *dfrag, u16 *flags);
> +static inline u16 mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk,
> +				   struct mptcp_data_frag *dfrag, u16 *flags) {
> +	if (likely(!msk->sched || !msk->sched->push))
> +		return 0;
> +
> +	return __mptcp_sched_push(msk, ssk, dfrag, flags);
> +}
>
> static inline u64 mptcp_data_avail(const struct mptcp_sock *msk)
> {
> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> index 8def10abd60e..d6ff82bde641 100644
> --- a/net/mptcp/sched.c
> +++ b/net/mptcp/sched.c
> @@ -193,6 +193,26 @@ int mptcp_sched_get_send(struct mptcp_sock *msk)
> 	return msk->sched->get_subflow(msk, &data);
> }
>
> +/* This is called before the data has been sent on a subflow. It returns flags
> + * that defines what the scheduler should do with the ongoing data.
> + */
> +u16 __mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk,
> +		       struct mptcp_data_frag *dfrag, u16 *flags)
> +{
> +	struct mptcp_sched_chunk chunk = {
> +		.data_seq = dfrag->data_seq + dfrag->already_sent,
> +		.limit = 0,
> +		.flags = 0,
> +	};
> +
> +	msk_owned_by_me(msk);
> +
> +	msk->sched->push(msk, mptcp_subflow_ctx(ssk), &chunk);
> +	*flags = chunk.flags;
> +
> +	return chunk.limit;
> +}
> +
> int mptcp_sched_get_retrans(struct mptcp_sock *msk)
> {
> 	struct mptcp_subflow_context *subflow;
>
> -- 
> 2.43.0
>
>
>
diff mbox series

Patch

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2f8f32cd31d3..abfd5b6748f2 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1526,12 +1526,15 @@  static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct mptcp_data_frag *dfrag;
 	int len, copied = 0, err = 0;
+	u16 sflags = 0;
+
+	while ((dfrag = mptcp_send_head(sk)) && !(sflags & MPTCP_SCHED_FLAG_RESCHEDULE)) {
+		u16 limit = mptcp_sched_push(msk, ssk, dfrag, &sflags);
 
-	while ((dfrag = mptcp_send_head(sk))) {
 		info->sent = dfrag->already_sent;
-		info->limit = dfrag->data_len;
 		len = dfrag->data_len - dfrag->already_sent;
-		while (len > 0) {
+		info->limit = limit ? info->sent + min_t(u16, limit, len) : dfrag->data_len;
+		while (len > 0 && info->sent < info->limit) {
 			int ret = 0;
 
 			ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info);
@@ -1546,7 +1549,10 @@  static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
 
 			mptcp_update_post_push(msk, dfrag, ret);
 		}
-		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+
+		/* if the whole data has been sent, move to next data segment: */
+		if (len <= 0)
+			WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
 
 		if (msk->snd_burst <= 0 ||
 		    !sk_stream_memory_free(ssk) ||
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 19d60b6d5b45..5e4d62bed142 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -742,6 +742,15 @@  struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk);
 struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk);
 int mptcp_sched_get_send(struct mptcp_sock *msk);
 int mptcp_sched_get_retrans(struct mptcp_sock *msk);
+u16 __mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk,
+		       struct mptcp_data_frag *dfrag, u16 *flags);
+static inline u16 mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk,
+				   struct mptcp_data_frag *dfrag, u16 *flags) {
+	if (likely(!msk->sched || !msk->sched->push))
+		return 0;
+
+	return __mptcp_sched_push(msk, ssk, dfrag, flags);
+}
 
 static inline u64 mptcp_data_avail(const struct mptcp_sock *msk)
 {
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 8def10abd60e..d6ff82bde641 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -193,6 +193,26 @@  int mptcp_sched_get_send(struct mptcp_sock *msk)
 	return msk->sched->get_subflow(msk, &data);
 }
 
+/* This is called before the data has been sent on a subflow. It returns flags
+ * that defines what the scheduler should do with the ongoing data.
+ */
+u16 __mptcp_sched_push(struct mptcp_sock *msk, struct sock *ssk,
+		       struct mptcp_data_frag *dfrag, u16 *flags)
+{
+	struct mptcp_sched_chunk chunk = {
+		.data_seq = dfrag->data_seq + dfrag->already_sent,
+		.limit = 0,
+		.flags = 0,
+	};
+
+	msk_owned_by_me(msk);
+
+	msk->sched->push(msk, mptcp_subflow_ctx(ssk), &chunk);
+	*flags = chunk.flags;
+
+	return chunk.limit;
+}
+
 int mptcp_sched_get_retrans(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow;