From patchwork Mon Aug 21 22:25:12 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mat Martineau X-Patchwork-Id: 13359878 X-Patchwork-Delegate: kuba@kernel.org Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0BE5517755; Mon, 21 Aug 2023 22:25:23 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id B6CDCC433CB; Mon, 21 Aug 2023 22:25:22 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1692656722; bh=ZegXcvFdUUUbQrQhYHQhTMtH3PwvZ1GQ2Ih8qmeynjw=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=TrW6DIpLvGwjbWQZYqE/4N10iaYht1a2I04DSKCrM0jJ1sfKQDBa4lVkyRlfULrEb XfPLcfrnmAhFyjVLMZt/VUGHqSCMpOK9Ucj2QtE0e7tdle87txEIKsveTLCSmOsRZH xRajB0du2UG+uBpJ7jpCiP+oeA5o8dpj+TO2ush/GFiEB4b4lHGOINSkLThSm9VYMd GwOt8AEm85cMyugDYTvbdCBC3YsHPDZ07dRNHbHBUsRI9rduw2NV2MDExIsQeaKu33 Ij8LVSaV7MNqkgiDy0slWoi9diEdM1ZN0Y3rOo8jFtgjI/eqz47+eVMfY7Alu1bxwP CvI2fAGbVBI7g== From: Mat Martineau Date: Mon, 21 Aug 2023 15:25:12 -0700 Subject: [PATCH net-next 01/10] mptcp: refactor push_pending logic Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Message-Id: <20230821-upstream-net-next-20230818-v1-1-0c860fb256a8@kernel.org> References: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> In-Reply-To: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Geliang Tang , Mat Martineau X-Mailer: b4 0.12.3 X-Patchwork-Delegate: kuba@kernel.org From: Geliang Tang To support redundant package schedulers more easily, this patch refactors __mptcp_push_pending() logic from: For each dfrag: While sends succeed: Call the scheduler (selects subflow and msk->snd_burst) Update subflow locks (push/release/acquire as needed) Send the dfrag data with mptcp_sendmsg_frag() Update already_sent, snd_nxt, snd_burst Update msk->first_pending Push/release on final subflow -> While first_pending isn't empty: Call the scheduler (selects subflow and msk->snd_burst) Update subflow locks (push/release/acquire as needed) For each pending dfrag: While sends succeed: Send the dfrag data with mptcp_sendmsg_frag() Update already_sent, snd_nxt, snd_burst Update msk->first_pending Break if required by msk->snd_burst / etc Push/release on final subflow Refactors __mptcp_subflow_push_pending logic from: For each dfrag: While sends succeed: Call the scheduler (selects subflow and msk->snd_burst) Send the dfrag data with mptcp_subflow_delegate(), break Send the dfrag data with mptcp_sendmsg_frag() Update dfrag->already_sent, msk->snd_nxt, msk->snd_burst Update msk->first_pending -> While first_pending isn't empty: Call the scheduler (selects subflow and msk->snd_burst) Send the dfrag data with mptcp_subflow_delegate(), break Send the dfrag data with mptcp_sendmsg_frag() For each pending dfrag: While sends succeed: Send the dfrag data with mptcp_sendmsg_frag() Update already_sent, snd_nxt, snd_burst Update msk->first_pending Break if required by msk->snd_burst / etc Move the duplicate code from __mptcp_push_pending() and __mptcp_subflow_push_pending() into a new helper function, named __subflow_push_pending(). Simplify __mptcp_push_pending() and __mptcp_subflow_push_pending() by invoking this helper. Also move the burst check conditions out of the function mptcp_subflow_get_send(), check them in __subflow_push_pending() in the inner "for each pending dfrag" loop. Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 153 +++++++++++++++++++++++++++------------------------ 1 file changed, 81 insertions(+), 72 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6019a3cf1625..29c662ffcd05 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1386,14 +1386,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) sk_stream_memory_free(msk->first) ? msk->first : NULL; } - /* re-use last subflow, if the burst allow that */ - if (msk->last_snd && msk->snd_burst > 0 && - sk_stream_memory_free(msk->last_snd) && - mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) { - mptcp_set_timeout(sk); - return msk->last_snd; - } - /* pick the subflow with the lower wmem/wspace ratio */ for (i = 0; i < SSK_MODE_MAX; ++i) { send_info[i].ssk = NULL; @@ -1499,57 +1491,86 @@ void mptcp_check_and_set_pending(struct sock *sk) mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING); } -void __mptcp_push_pending(struct sock *sk, unsigned int flags) +static int __subflow_push_pending(struct sock *sk, struct sock *ssk, + struct mptcp_sendmsg_info *info) { - struct sock *prev_ssk = NULL, *ssk = NULL; struct mptcp_sock *msk = mptcp_sk(sk); - struct mptcp_sendmsg_info info = { - .flags = flags, - }; - bool do_check_data_fin = false; struct mptcp_data_frag *dfrag; - int len; + int len, copied = 0, err = 0; while ((dfrag = mptcp_send_head(sk))) { - info.sent = dfrag->already_sent; - info.limit = dfrag->data_len; + info->sent = dfrag->already_sent; + info->limit = dfrag->data_len; len = dfrag->data_len - dfrag->already_sent; while (len > 0) { int ret = 0; - prev_ssk = ssk; - ssk = mptcp_subflow_get_send(msk); - - /* First check. If the ssk has changed since - * the last round, release prev_ssk - */ - if (ssk != prev_ssk && prev_ssk) - mptcp_push_release(prev_ssk, &info); - if (!ssk) - goto out; - - /* Need to lock the new subflow only if different - * from the previous one, otherwise we are still - * helding the relevant lock - */ - if (ssk != prev_ssk) - lock_sock(ssk); - - ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); + ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info); if (ret <= 0) { - if (ret == -EAGAIN) - continue; - mptcp_push_release(ssk, &info); + err = copied ? : ret; goto out; } - do_check_data_fin = true; - info.sent += ret; + info->sent += ret; + copied += ret; len -= ret; mptcp_update_post_push(msk, dfrag, ret); } WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + + if (msk->snd_burst <= 0 || + !sk_stream_memory_free(ssk) || + !mptcp_subflow_active(mptcp_subflow_ctx(ssk))) { + err = copied; + goto out; + } + mptcp_set_timeout(sk); + } + err = copied; + +out: + return err; +} + +void __mptcp_push_pending(struct sock *sk, unsigned int flags) +{ + struct sock *prev_ssk = NULL, *ssk = NULL; + struct mptcp_sock *msk = mptcp_sk(sk); + struct mptcp_sendmsg_info info = { + .flags = flags, + }; + bool do_check_data_fin = false; + + while (mptcp_send_head(sk)) { + int ret = 0; + + prev_ssk = ssk; + ssk = mptcp_subflow_get_send(msk); + + /* First check. If the ssk has changed since + * the last round, release prev_ssk + */ + if (ssk != prev_ssk && prev_ssk) + mptcp_push_release(prev_ssk, &info); + if (!ssk) + goto out; + + /* Need to lock the new subflow only if different + * from the previous one, otherwise we are still + * helding the relevant lock + */ + if (ssk != prev_ssk) + lock_sock(ssk); + + ret = __subflow_push_pending(sk, ssk, &info); + if (ret <= 0) { + if (ret == -EAGAIN) + continue; + mptcp_push_release(ssk, &info); + goto out; + } + do_check_data_fin = true; } /* at this point we held the socket lock for the last subflow we used */ @@ -1570,42 +1591,30 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool struct mptcp_sendmsg_info info = { .data_lock_held = true, }; - struct mptcp_data_frag *dfrag; struct sock *xmit_ssk; - int len, copied = 0; + int copied = 0; info.flags = 0; - while ((dfrag = mptcp_send_head(sk))) { - info.sent = dfrag->already_sent; - info.limit = dfrag->data_len; - len = dfrag->data_len - dfrag->already_sent; - while (len > 0) { - int ret = 0; - - /* check for a different subflow usage only after - * spooling the first chunk of data - */ - xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk); - if (!xmit_ssk) - goto out; - if (xmit_ssk != ssk) { - mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), - MPTCP_DELEGATE_SEND); - goto out; - } - - ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); - if (ret <= 0) - goto out; + while (mptcp_send_head(sk)) { + int ret = 0; - info.sent += ret; - copied += ret; - len -= ret; - first = false; - - mptcp_update_post_push(msk, dfrag, ret); + /* check for a different subflow usage only after + * spooling the first chunk of data + */ + xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk); + if (!xmit_ssk) + goto out; + if (xmit_ssk != ssk) { + mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), + MPTCP_DELEGATE_SEND); + goto out; } - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + + ret = __subflow_push_pending(sk, ssk, &info); + first = false; + if (ret <= 0) + break; + copied += ret; } out: From patchwork Mon Aug 21 22:25:13 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mat Martineau X-Patchwork-Id: 13359881 X-Patchwork-Delegate: kuba@kernel.org Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 93C1318AE5; Mon, 21 Aug 2023 22:25:23 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id EAAF2C433CC; Mon, 21 Aug 2023 22:25:22 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1692656723; bh=EtjkMo5Utfqyb2HvZTJ81wkGvDCfsqeXl+lqQcBmj0I=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=vK/16cRIZTOf1gUU0TMhI0AXu3D9oHzRBO3GIJkMsbmoA7pNJ8S02nvP4j0Q6mJEZ YSdrnUrkJYvWlMSSONZ0osbaSgI7zOgwSohZdJiMkOGg3gBB6baNzED9dBRwyBROjJ WZOYIk0DaPQJuI1QuFuJtMMU/UdCUJnqe01kIANieTA6Dj2WvXbzF2BreJFu+hf/SP X2oXKDbknliN3GaZLEhLh3MHEYx+cvvv0W1mxD45dnrvcRvYm8L8hlkRkKKsfs76f1 BLABacmaKBxw+u/DrUbQExVhuH75QuF1A7HuxJ7xoC0GdQd+TuEx8yZ/ew61H4spKe KPhkrkukT21gQ== From: Mat Martineau Date: Mon, 21 Aug 2023 15:25:13 -0700 Subject: [PATCH net-next 02/10] mptcp: drop last_snd and MPTCP_RESET_SCHEDULER Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Message-Id: <20230821-upstream-net-next-20230818-v1-2-0c860fb256a8@kernel.org> References: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> In-Reply-To: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Geliang Tang , Mat Martineau X-Mailer: b4 0.12.3 X-Patchwork-Delegate: kuba@kernel.org From: Geliang Tang Since the burst check conditions have moved out of the function mptcp_subflow_get_send(), it makes all msk->last_snd useless. This patch drops them as well as the macro MPTCP_RESET_SCHEDULER. Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau --- net/mptcp/pm.c | 9 +-------- net/mptcp/pm_netlink.c | 3 --- net/mptcp/protocol.c | 11 +---------- net/mptcp/protocol.h | 2 -- 4 files changed, 2 insertions(+), 23 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 7dbbad1e4f55..d8da5374d9e1 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -299,15 +299,8 @@ void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup) pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); msk = mptcp_sk(sk); - if (subflow->backup != bkup) { + if (subflow->backup != bkup) subflow->backup = bkup; - mptcp_data_lock(sk); - if (!sock_owned_by_user(sk)) - msk->last_snd = NULL; - else - __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags); - mptcp_data_unlock(sk); - } mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC); } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c75d9d88a053..9661f3812682 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -472,9 +472,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con slow = lock_sock_fast(ssk); if (prio) { - if (subflow->backup != backup) - msk->last_snd = NULL; - subflow->send_mp_prio = 1; subflow->backup = backup; subflow->request_bkup = backup; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 29c662ffcd05..f15ff80be30f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1438,16 +1438,13 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt); wmem = READ_ONCE(ssk->sk_wmem_queued); - if (!burst) { - msk->last_snd = NULL; + if (!burst) return ssk; - } subflow = mptcp_subflow_ctx(ssk); subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem + READ_ONCE(ssk->sk_pacing_rate) * burst, burst + wmem); - msk->last_snd = ssk; msk->snd_burst = burst; return ssk; } @@ -2379,9 +2376,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, WRITE_ONCE(msk->first, NULL); out: - if (ssk == msk->last_snd) - msk->last_snd = NULL; - if (need_push) __mptcp_push_pending(sk, 0); } @@ -3046,7 +3040,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) * subflow */ mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE); - msk->last_snd = NULL; WRITE_ONCE(msk->flags, 0); msk->cb_flags = 0; msk->push_pending = 0; @@ -3316,8 +3309,6 @@ static void mptcp_release_cb(struct sock *sk) __mptcp_set_connected(sk); if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) __mptcp_error_report(sk); - if (__test_and_clear_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags)) - msk->last_snd = NULL; } __mptcp_update_rmem(sk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 38c7ea013361..cbf9a9e176b2 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -123,7 +123,6 @@ #define MPTCP_RETRANSMIT 4 #define MPTCP_FLUSH_JOIN_LIST 5 #define MPTCP_CONNECTED 6 -#define MPTCP_RESET_SCHEDULER 7 struct mptcp_skb_cb { u64 map_seq; @@ -269,7 +268,6 @@ struct mptcp_sock { u64 rcv_data_fin_seq; u64 bytes_retrans; int rmem_fwd_alloc; - struct sock *last_snd; int snd_burst; int old_wspace; u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; From patchwork Mon Aug 21 22:25:14 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mat Martineau X-Patchwork-Id: 13359879 X-Patchwork-Delegate: kuba@kernel.org Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CB751198B0; Mon, 21 Aug 2023 22:25:23 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2B6FEC43397; Mon, 21 Aug 2023 22:25:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1692656723; bh=KL6HAUlN74y/BKjFAEiuc9d9I/kw0+Ps2Yd9sgSgLik=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=OYfDdc+Z6RVmNE3x8h1PGcMSPI79LGfVcW2wiltI+R/36z8gsxtsurYwakpei3VXG akN7zeSg7LwlpXmVTD1cdHrg5ac7h48uChKLBgV/kJnGUcoTYnurLAsi3/Imj0L6XX 1T8k7AvsmHds+qNp2K0y9zKpE2s63D0KBa4PnEt9pK19D7su5bBe3cqe4hqIDAii34 kfgyFdreFHh0n9PuKyNkqofCHYfTYEoBNStoBSUvSV7QGHoWKfOLWCHK2hscTfL67a 4dxBEpU2QOCBYAEWB1+V/4VxZZw+8LgS0BZlOdn4CIxZbWuuc7HJKukYZmOLACKtmg xsHryAJzrP50w== From: Mat Martineau Date: Mon, 21 Aug 2023 15:25:14 -0700 Subject: [PATCH net-next 03/10] mptcp: add struct mptcp_sched_ops Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Message-Id: <20230821-upstream-net-next-20230818-v1-3-0c860fb256a8@kernel.org> References: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> In-Reply-To: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Geliang Tang , Mat Martineau X-Mailer: b4 0.12.3 X-Patchwork-Delegate: kuba@kernel.org From: Geliang Tang This patch defines struct mptcp_sched_ops, which has three struct members, name, owner and list, and four function pointers: init(), release() and get_subflow(). The scheduler function get_subflow() have a struct mptcp_sched_data parameter, which contains a reinject flag for retrans or not, a subflows number and a mptcp_subflow_context array. Add the scheduler registering, unregistering and finding functions to add, delete and find a packet scheduler on the global list mptcp_sched_list. Acked-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau --- include/net/mptcp.h | 21 ++++++++++++++++++++ net/mptcp/Makefile | 2 +- net/mptcp/protocol.h | 3 +++ net/mptcp/sched.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 3c5c68618fcc..fb996124b3d5 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -96,6 +96,27 @@ struct mptcp_out_options { #endif }; +#define MPTCP_SCHED_NAME_MAX 16 +#define MPTCP_SUBFLOWS_MAX 8 + +struct mptcp_sched_data { + bool reinject; + u8 subflows; + struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX]; +}; + +struct mptcp_sched_ops { + int (*get_subflow)(struct mptcp_sock *msk, + struct mptcp_sched_data *data); + + char name[MPTCP_SCHED_NAME_MAX]; + struct module *owner; + struct list_head list; + + void (*init)(struct mptcp_sock *msk); + void (*release)(struct mptcp_sock *msk); +} ____cacheline_aligned_in_smp; + #ifdef CONFIG_MPTCP void mptcp_init(void); diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index a3829ce548f9..84e531f86b82 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ - mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o + mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index cbf9a9e176b2..985e8f86668d 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -655,6 +655,9 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); +struct mptcp_sched_ops *mptcp_sched_find(const char *name); +int mptcp_register_scheduler(struct mptcp_sched_ops *sched); +void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); static inline bool __tcp_can_send(const struct sock *ssk) { diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c new file mode 100644 index 000000000000..c5d3bbafba71 --- /dev/null +++ b/net/mptcp/sched.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Multipath TCP + * + * Copyright (c) 2022, SUSE. + */ + +#define pr_fmt(fmt) "MPTCP: " fmt + +#include +#include +#include +#include +#include +#include "protocol.h" + +static DEFINE_SPINLOCK(mptcp_sched_list_lock); +static LIST_HEAD(mptcp_sched_list); + +/* Must be called with rcu read lock held */ +struct mptcp_sched_ops *mptcp_sched_find(const char *name) +{ + struct mptcp_sched_ops *sched, *ret = NULL; + + list_for_each_entry_rcu(sched, &mptcp_sched_list, list) { + if (!strcmp(sched->name, name)) { + ret = sched; + break; + } + } + + return ret; +} + +int mptcp_register_scheduler(struct mptcp_sched_ops *sched) +{ + if (!sched->get_subflow) + return -EINVAL; + + spin_lock(&mptcp_sched_list_lock); + if (mptcp_sched_find(sched->name)) { + spin_unlock(&mptcp_sched_list_lock); + return -EEXIST; + } + list_add_tail_rcu(&sched->list, &mptcp_sched_list); + spin_unlock(&mptcp_sched_list_lock); + + pr_debug("%s registered", sched->name); + return 0; +} + +void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched) +{ + spin_lock(&mptcp_sched_list_lock); + list_del_rcu(&sched->list); + spin_unlock(&mptcp_sched_list_lock); +} From patchwork Mon Aug 21 22:25:15 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mat Martineau X-Patchwork-Id: 13359880 X-Patchwork-Delegate: kuba@kernel.org Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DDD0919BC1; Mon, 21 Aug 2023 22:25:23 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5EE73C4339A; Mon, 21 Aug 2023 22:25:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1692656723; bh=l/E+nyy0Y8DtnrpBmW/CTFj63vuiRbKjHHBK8R6u1BA=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=Crbx4IeWZYFleHluk0uyz/xqbbrWFGMJV94GxjI0S1HGpgaYxVoBkrGyjdB5fhHUJ QweCIF2THzfQxIsYe8Glh4fqv15YDjo9MDqnOfR3Is+AIN5vQ2QF8Vwd2zHi15xC1i iDsMcT7Dn3VsN/RblSVuTGPYQ0e6gghihsVe6CfrGlfO77k8AvFQQ7FldJZra2ON/K Tmd/pvq50KW+uiOmXmrHetpEKwwU7MqEqtyeRYHM19HFcvc7BJ7/IvkQmToxmPL4Ev quF5eTWgW6xxyoonXbCXLoByTWSOBAkA+B625DPSTmepNLdw+Z4TSkYj2/8+kVZuRg PKnFVQusLY++g== From: Mat Martineau Date: Mon, 21 Aug 2023 15:25:15 -0700 Subject: [PATCH net-next 04/10] mptcp: add a new sysctl scheduler Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Message-Id: <20230821-upstream-net-next-20230818-v1-4-0c860fb256a8@kernel.org> References: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> In-Reply-To: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Geliang Tang , Mat Martineau X-Mailer: b4 0.12.3 X-Patchwork-Delegate: kuba@kernel.org From: Geliang Tang This patch adds a new sysctl, named scheduler, to support for selection of different schedulers. Export mptcp_get_scheduler helper to get this sysctl. Acked-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau --- Documentation/networking/mptcp-sysctl.rst | 8 ++++++++ net/mptcp/ctrl.c | 14 ++++++++++++++ net/mptcp/protocol.h | 1 + 3 files changed, 23 insertions(+) diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 213510698014..15f1919d640c 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -74,3 +74,11 @@ stale_loss_cnt - INTEGER This is a per-namespace sysctl. Default: 4 + +scheduler - STRING + Select the scheduler of your choice. + + Support for selection of different schedulers. This is a per-namespace + sysctl. + + Default: "default" diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index ae20b7d92e28..c46c22a84d23 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -32,6 +32,7 @@ struct mptcp_pernet { u8 checksum_enabled; u8 allow_join_initial_addr_port; u8 pm_type; + char scheduler[MPTCP_SCHED_NAME_MAX]; }; static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) @@ -69,6 +70,11 @@ int mptcp_get_pm_type(const struct net *net) return mptcp_get_pernet(net)->pm_type; } +const char *mptcp_get_scheduler(const struct net *net) +{ + return mptcp_get_pernet(net)->scheduler; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; @@ -77,6 +83,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) pernet->allow_join_initial_addr_port = 1; pernet->stale_loss_cnt = 4; pernet->pm_type = MPTCP_PM_TYPE_KERNEL; + strcpy(pernet->scheduler, "default"); } #ifdef CONFIG_SYSCTL @@ -128,6 +135,12 @@ static struct ctl_table mptcp_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &mptcp_pm_type_max }, + { + .procname = "scheduler", + .maxlen = MPTCP_SCHED_NAME_MAX, + .mode = 0644, + .proc_handler = proc_dostring, + }, {} }; @@ -149,6 +162,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[3].data = &pernet->allow_join_initial_addr_port; table[4].data = &pernet->stale_loss_cnt; table[5].data = &pernet->pm_type; + table[6].data = &pernet->scheduler; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 985e8f86668d..bfa13a50f276 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -623,6 +623,7 @@ int mptcp_is_checksum_enabled(const struct net *net); int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); int mptcp_get_pm_type(const struct net *net); +const char *mptcp_get_scheduler(const struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); From patchwork Mon Aug 21 22:25:16 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mat Martineau X-Patchwork-Id: 13359884 X-Patchwork-Delegate: kuba@kernel.org Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B4C081ADFC; Mon, 21 Aug 2023 22:25:24 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 965CEC433C8; Mon, 21 Aug 2023 22:25:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1692656723; bh=mCFf41OAOMZ9vigTZ31U28QDeMg6gYJIm3A8aWPJDAo=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=Acgaf8VcVt4cmcgU8e2vvMxZ0raLlT+i3htooA+dRrcwoc0H72lpT6LAuPeSp9WNJ r6FrxvHr/eOiPaGR1UObvDkeFZELvImyq+m1+2Md0U4KMwsRgyfNh7sFtHf1BRWFxa /nXaKE34Q6365r6aqAIOGY6V+JjM6Mf/xtij0Yv7i79NV2N+AZASsuC13H/c8amRnj HvSeWSNU8hPB3TJ0+KI1BAWd5kLhACw4zh/Lq7NqDYltJHAh0Fry7aFVlqAxmptFYV Q9x6vcZ+UXOHiDJWvtIfEUJEL+QnlRcP4T7dHy5n4asHOxaU0szzls/oFHf1FAyJsy 1t6kSOf6bWG+w== From: Mat Martineau Date: Mon, 21 Aug 2023 15:25:16 -0700 Subject: [PATCH net-next 05/10] mptcp: add sched in mptcp_sock Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Message-Id: <20230821-upstream-net-next-20230818-v1-5-0c860fb256a8@kernel.org> References: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> In-Reply-To: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Geliang Tang , Mat Martineau X-Mailer: b4 0.12.3 X-Patchwork-Delegate: kuba@kernel.org From: Geliang Tang This patch adds a new struct member sched in struct mptcp_sock. And two helpers mptcp_init_sched() and mptcp_release_sched() to init and release it. Init it with the sysctl scheduler in mptcp_init_sock(), copy the scheduler from the parent in mptcp_sk_clone(), and release it in __mptcp_destroy_sock(). Acked-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 8 ++++++++ net/mptcp/protocol.h | 4 ++++ net/mptcp/sched.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f15ff80be30f..54a3eccfa731 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2697,6 +2697,7 @@ static void mptcp_ca_reset(struct sock *sk) static int mptcp_init_sock(struct sock *sk) { struct net *net = sock_net(sk); + int ret; __mptcp_init_sock(sk); @@ -2706,6 +2707,11 @@ static int mptcp_init_sock(struct sock *sk) if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net)) return -ENOMEM; + ret = mptcp_init_sched(mptcp_sk(sk), + mptcp_sched_find(mptcp_get_scheduler(net))); + if (ret) + return ret; + set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); /* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will @@ -2851,6 +2857,7 @@ static void __mptcp_destroy_sock(struct sock *sk) mptcp_stop_timer(sk); sk_stop_timer(sk, &sk->sk_timer); msk->pm.status = 0; + mptcp_release_sched(msk); sk->sk_prot->destroy(sk); @@ -3105,6 +3112,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, msk->snd_una = msk->write_seq; msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; + mptcp_init_sched(msk, mptcp_sk(sk)->sched); /* passive msk is created after the first/MPC subflow */ msk->subflow_id = 2; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index bfa13a50f276..548c302a757e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -312,6 +312,7 @@ struct mptcp_sock { * lock as such sock is freed after close(). */ struct mptcp_pm_data pm; + struct mptcp_sched_ops *sched; struct { u32 space; /* bytes copied in last measurement window */ u32 copied; /* bytes copied in this measurement window */ @@ -659,6 +660,9 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct mptcp_sched_ops *mptcp_sched_find(const char *name); int mptcp_register_scheduler(struct mptcp_sched_ops *sched); void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); +int mptcp_init_sched(struct mptcp_sock *msk, + struct mptcp_sched_ops *sched); +void mptcp_release_sched(struct mptcp_sock *msk); static inline bool __tcp_can_send(const struct sock *ssk) { diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index c5d3bbafba71..53773668b5ee 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -54,3 +54,36 @@ void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched) list_del_rcu(&sched->list); spin_unlock(&mptcp_sched_list_lock); } + +int mptcp_init_sched(struct mptcp_sock *msk, + struct mptcp_sched_ops *sched) +{ + if (!sched) + goto out; + + if (!bpf_try_module_get(sched, sched->owner)) + return -EBUSY; + + msk->sched = sched; + if (msk->sched->init) + msk->sched->init(msk); + + pr_debug("sched=%s", msk->sched->name); + +out: + return 0; +} + +void mptcp_release_sched(struct mptcp_sock *msk) +{ + struct mptcp_sched_ops *sched = msk->sched; + + if (!sched) + return; + + msk->sched = NULL; + if (sched->release) + sched->release(msk); + + bpf_module_put(sched, sched->owner); +} From patchwork Mon Aug 21 22:25:17 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mat Martineau X-Patchwork-Id: 13359883 X-Patchwork-Delegate: kuba@kernel.org Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 604FF1ADCE; Mon, 21 Aug 2023 22:25:24 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id C9E8CC433B7; Mon, 21 Aug 2023 22:25:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1692656723; bh=bnVF7wxEKyiRFifLwfrCEM0L/IllcnM6/xPEkTFqHNk=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=oXnU/r2QQWpb0nedNOc53foOYlFrbIiMyCf85oftoTl5ahxQhEOx58D942PoXQAFk VD8smiJBQ8JbJXI2GbabNYOGWEqd/2a1HuX2cgCgQkKWbhP5iUZRgChb5qaWpixBBk 7ElIx87xVJoJvsm3rntRU85uSFbZfSKlH3NLQPRPCLawQqArVA2c7nlwT0McIav2Pl Ahr7dlSXhPMzkX8bF9YYCVWXjhbkQUJxfNfnT3ti/JnThSYKtk3s+Y0iM8BFfakhQe DgLyIUDJ71GdhJ61aLYpc45tLT2PXUEkwQJwipq39xrwgpuH+L5BEKL34BlTjlp0xH SHZMP4Xn/ViGQ== From: Mat Martineau Date: Mon, 21 Aug 2023 15:25:17 -0700 Subject: [PATCH net-next 06/10] mptcp: add scheduled in mptcp_subflow_context Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Message-Id: <20230821-upstream-net-next-20230818-v1-6-0c860fb256a8@kernel.org> References: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> In-Reply-To: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Geliang Tang , Mat Martineau X-Mailer: b4 0.12.3 X-Patchwork-Delegate: kuba@kernel.org From: Geliang Tang This patch adds a new member scheduled in struct mptcp_subflow_context, which will be set in the MPTCP scheduler context when the scheduler picks this subflow to send data. Add a new helper mptcp_subflow_set_scheduled() to set this flag using WRITE_ONCE(). Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau --- net/mptcp/protocol.h | 3 +++ net/mptcp/sched.c | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 548c302a757e..e7523a40132f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -491,6 +491,7 @@ struct mptcp_subflow_context { is_mptfo : 1, /* subflow is doing TFO */ __unused : 9; enum mptcp_data_avail data_avail; + bool scheduled; u32 remote_nonce; u64 thmac; u32 local_nonce; @@ -663,6 +664,8 @@ void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); int mptcp_init_sched(struct mptcp_sock *msk, struct mptcp_sched_ops *sched); void mptcp_release_sched(struct mptcp_sock *msk); +void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, + bool scheduled); static inline bool __tcp_can_send(const struct sock *ssk) { diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index 53773668b5ee..d295b92a5789 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -87,3 +87,9 @@ void mptcp_release_sched(struct mptcp_sock *msk) bpf_module_put(sched, sched->owner); } + +void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, + bool scheduled) +{ + WRITE_ONCE(subflow->scheduled, scheduled); +} From patchwork Mon Aug 21 22:25:18 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mat Martineau X-Patchwork-Id: 13359882 X-Patchwork-Delegate: kuba@kernel.org Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E44631ADF3; Mon, 21 Aug 2023 22:25:24 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0AC71C43391; Mon, 21 Aug 2023 22:25:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1692656724; bh=UWFKrgEuKj5gvqGj0uOv7arr3/5WueKv/2GS6+ChiYQ=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=C+B+OUtEspLOOZ1YQs8pPeyBUd+WrixVByHnIBDKN85IkXWuq8bDd7q0gqbS7BHJ8 teVuVb6H5unBM9qD/mw0d3Bc4JTzqiihpqs1s8tOPLwmQzViXdMLlj5AoxLKB7BdEe yIDQZoLd78hoOQzgQrTUqMhc92ongv8qCQvYYGOke2hR5yCmOEi7eqIn6vpH7L/GyW NnjhJ7QxQhhOzBeVNOdVZ6QPWzgCq4+dKCnDnpY9hVy/cqB+2o4NiIX9N07sSNtggZ M/C+owNthaw8iu5p10QVRJkvgp48UWReTa9NBl6O9MCuxz6r4ZDV+qrTE1AyHMu3Dr qHuDRiSAEApYw== From: Mat Martineau Date: Mon, 21 Aug 2023 15:25:18 -0700 Subject: [PATCH net-next 07/10] mptcp: add scheduler wrappers Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Message-Id: <20230821-upstream-net-next-20230818-v1-7-0c860fb256a8@kernel.org> References: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> In-Reply-To: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Geliang Tang , Mat Martineau X-Mailer: b4 0.12.3 X-Patchwork-Delegate: kuba@kernel.org From: Geliang Tang This patch defines two packet scheduler wrappers mptcp_sched_get_send() and mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them. Set data->reinject to true in mptcp_sched_get_retrans(), set it false in mptcp_sched_get_send(). If msk->sched is NULL, use default functions mptcp_subflow_get_send() and mptcp_subflow_get_retrans() to send data. Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 4 ++-- net/mptcp/protocol.h | 4 ++++ net/mptcp/sched.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 54a3eccfa731..9cd172d2c8d6 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1366,7 +1366,7 @@ bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) * returns the subflow that will transmit the next DSS * additionally updates the rtx timeout */ -static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) +struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) { struct subflow_send_info send_info[SSK_MODE_MAX]; struct mptcp_subflow_context *subflow; @@ -2204,7 +2204,7 @@ static void mptcp_timeout_timer(struct timer_list *t) * * A backup subflow is returned only if that is the only kind available. */ -static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk) +struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk) { struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e7523a40132f..78562f695c46 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -666,6 +666,10 @@ int mptcp_init_sched(struct mptcp_sock *msk, void mptcp_release_sched(struct mptcp_sock *msk); void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled); +struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk); +struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk); +int mptcp_sched_get_send(struct mptcp_sock *msk); +int mptcp_sched_get_retrans(struct mptcp_sock *msk); static inline bool __tcp_can_send(const struct sock *ssk) { diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index d295b92a5789..884606686cfe 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -93,3 +93,51 @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, { WRITE_ONCE(subflow->scheduled, scheduled); } + +int mptcp_sched_get_send(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sched_data data; + + mptcp_for_each_subflow(msk, subflow) { + if (READ_ONCE(subflow->scheduled)) + return 0; + } + + if (!msk->sched) { + struct sock *ssk; + + ssk = mptcp_subflow_get_send(msk); + if (!ssk) + return -EINVAL; + mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true); + return 0; + } + + data.reinject = false; + return msk->sched->get_subflow(msk, &data); +} + +int mptcp_sched_get_retrans(struct mptcp_sock *msk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sched_data data; + + mptcp_for_each_subflow(msk, subflow) { + if (READ_ONCE(subflow->scheduled)) + return 0; + } + + if (!msk->sched) { + struct sock *ssk; + + ssk = mptcp_subflow_get_retrans(msk); + if (!ssk) + return -EINVAL; + mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true); + return 0; + } + + data.reinject = true; + return msk->sched->get_subflow(msk, &data); +} From patchwork Mon Aug 21 22:25:19 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mat Martineau X-Patchwork-Id: 13359887 X-Patchwork-Delegate: kuba@kernel.org Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 36D2C1B7D0; Mon, 21 Aug 2023 22:25:24 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3E192C433C9; Mon, 21 Aug 2023 22:25:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1692656724; bh=844ERc/SpZdh5idxnvmNmp7nv/VNlGd0NRgXWs3/ito=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=MTGJcu395Hz2GYiE25yo3BBQcRQTxjtw5vTTMYDZ67tOikD843v2xu8bjZDwB+NtO BQ0HCT1eFV3fwxBF6GkqWq1XOMdeZVL5DRsXvozx9SO+avQUmyQMeXpGWcJeZVb9B8 IONNolOy5HEUcygTc4vr+2EorNig3bHQy1LcwzQj1+d0EV9IGiGD97Qsma/1GDLF9T grSMYNm7ar6m6pO99q46Yy0PirdcOaoBsgDbCT0lOhCzduK2cA4++mIlrErjte/5cf 9rooT4sN5/K0bETOQDqDsOWY+WJqSluqE0HIqq+iw12JhCb/7UCvsdDqi3XSrfEQTR nwUazMGjc0YJg== From: Mat Martineau Date: Mon, 21 Aug 2023 15:25:19 -0700 Subject: [PATCH net-next 08/10] mptcp: use get_send wrapper Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Message-Id: <20230821-upstream-net-next-20230818-v1-8-0c860fb256a8@kernel.org> References: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> In-Reply-To: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Geliang Tang , Mat Martineau X-Mailer: b4 0.12.3 X-Patchwork-Delegate: kuba@kernel.org From: Geliang Tang This patch adds the multiple subflows support for __mptcp_push_pending and __mptcp_subflow_push_pending. Use get_send() wrapper instead of mptcp_subflow_get_send() in them. Check the subflow scheduled flags to test which subflow or subflows are picked by the scheduler, use them to send data. Move msk_owned_by_me() and fallback checks into get_send() wrapper from mptcp_subflow_get_send(). This commit allows the scheduler to set the subflow->scheduled bit in multiple subflows, but it does not allow for sending redundant data. Multiple scheduled subflows will send sequential data on each subflow. Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 113 +++++++++++++++++++++++++++++++-------------------- net/mptcp/sched.c | 13 ++++++ 2 files changed, 81 insertions(+), 45 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9cd172d2c8d6..77e94ee82859 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1377,15 +1377,6 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) u64 linger_time; long tout = 0; - msk_owned_by_me(msk); - - if (__mptcp_check_fallback(msk)) { - if (!msk->first) - return NULL; - return __tcp_can_send(msk->first) && - sk_stream_memory_free(msk->first) ? msk->first : NULL; - } - /* pick the subflow with the lower wmem/wspace ratio */ for (i = 0; i < SSK_MODE_MAX; ++i) { send_info[i].ssk = NULL; @@ -1538,43 +1529,56 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) .flags = flags, }; bool do_check_data_fin = false; + int push_count = 1; - while (mptcp_send_head(sk)) { + while (mptcp_send_head(sk) && (push_count > 0)) { + struct mptcp_subflow_context *subflow; int ret = 0; - prev_ssk = ssk; - ssk = mptcp_subflow_get_send(msk); + if (mptcp_sched_get_send(msk)) + break; - /* First check. If the ssk has changed since - * the last round, release prev_ssk - */ - if (ssk != prev_ssk && prev_ssk) - mptcp_push_release(prev_ssk, &info); - if (!ssk) - goto out; + push_count = 0; - /* Need to lock the new subflow only if different - * from the previous one, otherwise we are still - * helding the relevant lock - */ - if (ssk != prev_ssk) - lock_sock(ssk); + mptcp_for_each_subflow(msk, subflow) { + if (READ_ONCE(subflow->scheduled)) { + mptcp_subflow_set_scheduled(subflow, false); - ret = __subflow_push_pending(sk, ssk, &info); - if (ret <= 0) { - if (ret == -EAGAIN) - continue; - mptcp_push_release(ssk, &info); - goto out; + prev_ssk = ssk; + ssk = mptcp_subflow_tcp_sock(subflow); + if (ssk != prev_ssk) { + /* First check. If the ssk has changed since + * the last round, release prev_ssk + */ + if (prev_ssk) + mptcp_push_release(prev_ssk, &info); + + /* Need to lock the new subflow only if different + * from the previous one, otherwise we are still + * helding the relevant lock + */ + lock_sock(ssk); + } + + push_count++; + + ret = __subflow_push_pending(sk, ssk, &info); + if (ret <= 0) { + if (ret != -EAGAIN || + (1 << ssk->sk_state) & + (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSE)) + push_count--; + continue; + } + do_check_data_fin = true; + } } - do_check_data_fin = true; } /* at this point we held the socket lock for the last subflow we used */ if (ssk) mptcp_push_release(ssk, &info); -out: /* ensure the rtx timer is running */ if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); @@ -1588,30 +1592,49 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool struct mptcp_sendmsg_info info = { .data_lock_held = true, }; + bool keep_pushing = true; struct sock *xmit_ssk; int copied = 0; info.flags = 0; - while (mptcp_send_head(sk)) { + while (mptcp_send_head(sk) && keep_pushing) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); int ret = 0; /* check for a different subflow usage only after * spooling the first chunk of data */ - xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk); - if (!xmit_ssk) - goto out; - if (xmit_ssk != ssk) { - mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), - MPTCP_DELEGATE_SEND); + if (first) { + mptcp_subflow_set_scheduled(subflow, false); + ret = __subflow_push_pending(sk, ssk, &info); + first = false; + if (ret <= 0) + break; + copied += ret; + continue; + } + + if (mptcp_sched_get_send(msk)) goto out; + + if (READ_ONCE(subflow->scheduled)) { + mptcp_subflow_set_scheduled(subflow, false); + ret = __subflow_push_pending(sk, ssk, &info); + if (ret <= 0) + keep_pushing = false; + copied += ret; } - ret = __subflow_push_pending(sk, ssk, &info); - first = false; - if (ret <= 0) - break; - copied += ret; + mptcp_for_each_subflow(msk, subflow) { + if (READ_ONCE(subflow->scheduled)) { + xmit_ssk = mptcp_subflow_tcp_sock(subflow); + if (xmit_ssk != ssk) { + mptcp_subflow_delegate(subflow, + MPTCP_DELEGATE_SEND); + keep_pushing = false; + } + } + } } out: diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index 884606686cfe..078b5d44978d 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -99,6 +99,19 @@ int mptcp_sched_get_send(struct mptcp_sock *msk) struct mptcp_subflow_context *subflow; struct mptcp_sched_data data; + msk_owned_by_me(msk); + + /* the following check is moved out of mptcp_subflow_get_send */ + if (__mptcp_check_fallback(msk)) { + if (msk->first && + __tcp_can_send(msk->first) && + sk_stream_memory_free(msk->first)) { + mptcp_subflow_set_scheduled(mptcp_subflow_ctx(msk->first), true); + return 0; + } + return -EINVAL; + } + mptcp_for_each_subflow(msk, subflow) { if (READ_ONCE(subflow->scheduled)) return 0; From patchwork Mon Aug 21 22:25:20 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mat Martineau X-Patchwork-Id: 13359886 X-Patchwork-Delegate: kuba@kernel.org Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6CA4C1BB32; Mon, 21 Aug 2023 22:25:26 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 72BDDC43395; Mon, 21 Aug 2023 22:25:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1692656724; bh=Miq3TLRI/pTEoF1BRBHAq+glR+w1Dr/v8PRF9aArIpk=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=bMj7gzI37ZuRwKhK9iobu3o5mAacB7hXDB/EIVIC3Y2xpDx8M8yp8KKmPnl7PbVyw bZrY3tk8cRfzYKVfTZquRhQ/trkLXGkyHDeY34AYOQy3xnDPA9QP6Pebc1wvmP9tfn Q7mEmAKqdbKDtk2vwPav8ChniRM55RyKj9cI4tsKeHDp/9FkzSCjKKCuQQHcqDFndI foFB5FpebIU6feZfbo1JgZE2Cm0xLnNGBzPN2vfB7F5pUmiYFI+SyQ9WqOXEdyBdGJ JFlyZnCKwbw/z9a/SHFOO4X+XSe/cZ3qtV0l+0tRvE+MxFmO5SSkS9LBdRUxOZV9OE JS9e6GK501uyg== From: Mat Martineau Date: Mon, 21 Aug 2023 15:25:20 -0700 Subject: [PATCH net-next 09/10] mptcp: use get_retrans wrapper Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Message-Id: <20230821-upstream-net-next-20230818-v1-9-0c860fb256a8@kernel.org> References: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> In-Reply-To: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Geliang Tang , Mat Martineau X-Mailer: b4 0.12.3 X-Patchwork-Delegate: kuba@kernel.org From: Geliang Tang This patch adds the multiple subflows support for __mptcp_retrans(). Use get_retrans() wrapper instead of mptcp_subflow_get_retrans() in it. Check the subflow scheduled flags to test which subflow or subflows are picked by the scheduler, use them to send data. Move msk_owned_by_me() and fallback checks into get_retrans() wrapper from mptcp_subflow_get_retrans(). Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 65 ++++++++++++++++++++++++++++++---------------------- net/mptcp/sched.c | 6 +++++ 2 files changed, 43 insertions(+), 28 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 77e94ee82859..61590ff2b9ee 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2233,11 +2233,6 @@ struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk) struct mptcp_subflow_context *subflow; int min_stale_count = INT_MAX; - msk_owned_by_me(msk); - - if (__mptcp_check_fallback(msk)) - return NULL; - mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -2515,16 +2510,17 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) static void __mptcp_retrans(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); + struct mptcp_subflow_context *subflow; struct mptcp_sendmsg_info info = {}; struct mptcp_data_frag *dfrag; - size_t copied = 0; struct sock *ssk; - int ret; + int ret, err; + u16 len = 0; mptcp_clean_una_wakeup(sk); /* first check ssk: need to kick "stale" logic */ - ssk = mptcp_subflow_get_retrans(msk); + err = mptcp_sched_get_retrans(msk); dfrag = mptcp_rtx_head(sk); if (!dfrag) { if (mptcp_data_fin_enabled(msk)) { @@ -2543,32 +2539,45 @@ static void __mptcp_retrans(struct sock *sk) goto reset_timer; } - if (!ssk) + if (err) goto reset_timer; - lock_sock(ssk); + mptcp_for_each_subflow(msk, subflow) { + if (READ_ONCE(subflow->scheduled)) { + u16 copied = 0; - /* limit retransmission to the bytes already sent on some subflows */ - info.sent = 0; - info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent; - while (info.sent < info.limit) { - ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); - if (ret <= 0) - break; + mptcp_subflow_set_scheduled(subflow, false); - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); - copied += ret; - info.sent += ret; - } - if (copied) { - dfrag->already_sent = max(dfrag->already_sent, info.sent); - msk->bytes_retrans += copied; - tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, - info.size_goal); - WRITE_ONCE(msk->allow_infinite_fallback, false); + ssk = mptcp_subflow_tcp_sock(subflow); + + lock_sock(ssk); + + /* limit retransmission to the bytes already sent on some subflows */ + info.sent = 0; + info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : + dfrag->already_sent; + while (info.sent < info.limit) { + ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); + if (ret <= 0) + break; + + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); + copied += ret; + info.sent += ret; + } + if (copied) { + len = max(copied, len); + tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, + info.size_goal); + WRITE_ONCE(msk->allow_infinite_fallback, false); + } + + release_sock(ssk); + } } - release_sock(ssk); + msk->bytes_retrans += len; + dfrag->already_sent = max(dfrag->already_sent, len); reset_timer: mptcp_check_and_set_pending(sk); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index 078b5d44978d..cac1cc1fa3b0 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -136,6 +136,12 @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk) struct mptcp_subflow_context *subflow; struct mptcp_sched_data data; + msk_owned_by_me(msk); + + /* the following check is moved out of mptcp_subflow_get_retrans */ + if (__mptcp_check_fallback(msk)) + return -EINVAL; + mptcp_for_each_subflow(msk, subflow) { if (READ_ONCE(subflow->scheduled)) return 0; From patchwork Mon Aug 21 22:25:21 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mat Martineau X-Patchwork-Id: 13359885 X-Patchwork-Delegate: kuba@kernel.org Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 696051BB2B; Mon, 21 Aug 2023 22:25:26 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id B17B0C433AB; Mon, 21 Aug 2023 22:25:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1692656724; bh=tc2xDya1T5Y4oXutUgVLxid1Ewv0x3sfXACYqZv2uGk=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=T1yM9POoWNazPxOCyvR0R8ILF78NsIPwSn/2pBNUVzFVlK0TbejK8//W1+OTnl5Xc jlpT2/B6+a+3spoyJI91+VDwjmvZfFAfUN/p5xMR9xioyuo8uhZhi44gWm6sDCu6ru ciCe+cPXX7IB3bnIMGxZAh776JDiiCGaWUCusDLgM3O8U3Xdan5+8zo+zoQGDoygWd 1Thb68Ojgasdi8Y7MC1BOLisZrKvNuJ8bnZbwrUXtmVownnPNpiABjRzJilsu8t/Dk war49dxeLDYvZFedwwczN7nhKrLos6W3bGeYxxobiDWxJ3TN61ZD9pgD5W+aaMV5Pm z985/OjVFB3Kw== From: Mat Martineau Date: Mon, 21 Aug 2023 15:25:21 -0700 Subject: [PATCH net-next 10/10] mptcp: register default scheduler Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Message-Id: <20230821-upstream-net-next-20230818-v1-10-0c860fb256a8@kernel.org> References: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> In-Reply-To: <20230821-upstream-net-next-20230818-v1-0-0c860fb256a8@kernel.org> To: Matthieu Baerts , "David S. Miller" , Eric Dumazet , Jakub Kicinski , Paolo Abeni Cc: netdev@vger.kernel.org, mptcp@lists.linux.dev, Geliang Tang , Mat Martineau X-Mailer: b4 0.12.3 X-Patchwork-Delegate: kuba@kernel.org From: Geliang Tang This patch defines the default packet scheduler mptcp_sched_default. Register it in mptcp_sched_init(), which is invoked in mptcp_proto_init(). Skip deleting this default scheduler in mptcp_unregister_scheduler(). Set msk->sched to the default scheduler when the input parameter of mptcp_init_sched() is NULL. Invoke mptcp_sched_default_get_subflow in get_send() and get_retrans() if the defaut scheduler is set or msk->sched is NULL. Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau --- net/mptcp/protocol.c | 1 + net/mptcp/protocol.h | 1 + net/mptcp/sched.c | 55 +++++++++++++++++++++++++++++++--------------------- 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 61590ff2b9ee..933b257eee02 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3965,6 +3965,7 @@ void __init mptcp_proto_init(void) mptcp_subflow_init(); mptcp_pm_init(); + mptcp_sched_init(); mptcp_token_init(); if (proto_register(&mptcp_prot, 1) != 0) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 78562f695c46..7254b3562575 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -661,6 +661,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct mptcp_sched_ops *mptcp_sched_find(const char *name); int mptcp_register_scheduler(struct mptcp_sched_ops *sched); void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); +void mptcp_sched_init(void); int mptcp_init_sched(struct mptcp_sock *msk, struct mptcp_sched_ops *sched); void mptcp_release_sched(struct mptcp_sock *msk); diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index cac1cc1fa3b0..4ab0693c069c 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -16,6 +16,26 @@ static DEFINE_SPINLOCK(mptcp_sched_list_lock); static LIST_HEAD(mptcp_sched_list); +static int mptcp_sched_default_get_subflow(struct mptcp_sock *msk, + struct mptcp_sched_data *data) +{ + struct sock *ssk; + + ssk = data->reinject ? mptcp_subflow_get_retrans(msk) : + mptcp_subflow_get_send(msk); + if (!ssk) + return -EINVAL; + + mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true); + return 0; +} + +static struct mptcp_sched_ops mptcp_sched_default = { + .get_subflow = mptcp_sched_default_get_subflow, + .name = "default", + .owner = THIS_MODULE, +}; + /* Must be called with rcu read lock held */ struct mptcp_sched_ops *mptcp_sched_find(const char *name) { @@ -50,16 +70,24 @@ int mptcp_register_scheduler(struct mptcp_sched_ops *sched) void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched) { + if (sched == &mptcp_sched_default) + return; + spin_lock(&mptcp_sched_list_lock); list_del_rcu(&sched->list); spin_unlock(&mptcp_sched_list_lock); } +void mptcp_sched_init(void) +{ + mptcp_register_scheduler(&mptcp_sched_default); +} + int mptcp_init_sched(struct mptcp_sock *msk, struct mptcp_sched_ops *sched) { if (!sched) - goto out; + sched = &mptcp_sched_default; if (!bpf_try_module_get(sched, sched->owner)) return -EBUSY; @@ -70,7 +98,6 @@ int mptcp_init_sched(struct mptcp_sock *msk, pr_debug("sched=%s", msk->sched->name); -out: return 0; } @@ -117,17 +144,9 @@ int mptcp_sched_get_send(struct mptcp_sock *msk) return 0; } - if (!msk->sched) { - struct sock *ssk; - - ssk = mptcp_subflow_get_send(msk); - if (!ssk) - return -EINVAL; - mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true); - return 0; - } - data.reinject = false; + if (msk->sched == &mptcp_sched_default || !msk->sched) + return mptcp_sched_default_get_subflow(msk, &data); return msk->sched->get_subflow(msk, &data); } @@ -147,16 +166,8 @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk) return 0; } - if (!msk->sched) { - struct sock *ssk; - - ssk = mptcp_subflow_get_retrans(msk); - if (!ssk) - return -EINVAL; - mptcp_subflow_set_scheduled(mptcp_subflow_ctx(ssk), true); - return 0; - } - data.reinject = true; + if (msk->sched == &mptcp_sched_default || !msk->sched) + return mptcp_sched_default_get_subflow(msk, &data); return msk->sched->get_subflow(msk, &data); }