diff mbox series

[net-next,1/2] sctp: add fair capacity stream scheduler

Message ID 3e977ca635d6b8ef8440d5eed2617a4f3a04b15b.1678224012.git.lucien.xin@gmail.com (mailing list archive)
State Accepted
Commit 4821a076eb602a6238528e9ebafeac853c833415
Delegated to: Netdev Maintainers
Headers show
Series sctp: add another two stream schedulers | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 75 this patch: 76
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 23 this patch: 23
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 75 this patch: 76
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 86 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Xin Long March 7, 2023, 9:23 p.m. UTC
As it says in rfc8260#section-3.5 about the fair capacity scheduler:

   A fair capacity distribution between the streams is used.  This
   scheduler considers the lengths of the messages of each stream and
   schedules them in a specific way to maintain an equal capacity for
   all streams.  The details are implementation dependent.  interleaving
   user messages allows for a better realization of the fair capacity
   usage.

This patch adds Fair Capacity Scheduler based on the foundations added
by commit 5bbbbe32a431 ("sctp: introduce stream scheduler foundations"):

A fc_list and a fc_length are added into struct sctp_stream_out_ext and
a fc_list is added into struct sctp_stream. In .enqueue, when there are
chunks enqueued into a stream, this stream will be linked into stream->
fc_list by its fc_list ordered by its fc_length. In .dequeue, it always
picks up the 1st skb from stream->fc_list. In .dequeue_done, fc_length
is increased by chunk's len and update its location in stream->fc_list
according to the its new fc_length.

Note that when the new fc_length overflows in .dequeue_done, instead of
resetting all fc_lengths to 0, we only reduced them by U32_MAX / 4 to
avoid a moment of imbalance in the scheduling, as Marcelo suggested.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 include/net/sctp/stream_sched.h |   1 +
 include/net/sctp/structs.h      |   7 ++
 include/uapi/linux/sctp.h       |   3 +-
 net/sctp/Makefile               |   3 +-
 net/sctp/stream_sched.c         |   1 +
 net/sctp/stream_sched_fc.c      | 183 ++++++++++++++++++++++++++++++++
 6 files changed, 196 insertions(+), 2 deletions(-)
 create mode 100644 net/sctp/stream_sched_fc.c

Comments

Marcelo Ricardo Leitner March 7, 2023, 9:48 p.m. UTC | #1
On Tue, Mar 07, 2023 at 04:23:26PM -0500, Xin Long wrote:
> As it says in rfc8260#section-3.5 about the fair capacity scheduler:
> 
>    A fair capacity distribution between the streams is used.  This
>    scheduler considers the lengths of the messages of each stream and
>    schedules them in a specific way to maintain an equal capacity for
>    all streams.  The details are implementation dependent.  interleaving
>    user messages allows for a better realization of the fair capacity
>    usage.
> 
> This patch adds Fair Capacity Scheduler based on the foundations added
> by commit 5bbbbe32a431 ("sctp: introduce stream scheduler foundations"):
> 
> A fc_list and a fc_length are added into struct sctp_stream_out_ext and
> a fc_list is added into struct sctp_stream. In .enqueue, when there are
> chunks enqueued into a stream, this stream will be linked into stream->
> fc_list by its fc_list ordered by its fc_length. In .dequeue, it always
> picks up the 1st skb from stream->fc_list. In .dequeue_done, fc_length
> is increased by chunk's len and update its location in stream->fc_list
> according to the its new fc_length.
> 
> Note that when the new fc_length overflows in .dequeue_done, instead of
> resetting all fc_lengths to 0, we only reduced them by U32_MAX / 4 to
> avoid a moment of imbalance in the scheduling, as Marcelo suggested.
> 
> Signed-off-by: Xin Long <lucien.xin@gmail.com>

Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Paolo Abeni March 9, 2023, 10:31 a.m. UTC | #2
On Tue, 2023-03-07 at 16:23 -0500, Xin Long wrote:
> diff --git a/net/sctp/stream_sched_fc.c b/net/sctp/stream_sched_fc.c
> new file mode 100644
> index 000000000000..b336c2f5486b
> --- /dev/null
> +++ b/net/sctp/stream_sched_fc.c
> @@ -0,0 +1,183 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/* SCTP kernel implementation
> + * (C) Copyright Red Hat Inc. 2022
> + *
> + * This file is part of the SCTP kernel implementation
> + *
> + * These functions manipulate sctp stream queue/scheduling.
> + *
> + * Please send any bug reports or fixes you make to the
> + * email addresched(es):
> + *    lksctp developers <linux-sctp@vger.kernel.org>
> + *
> + * Written or modified by:
> + *    Xin Long <lucien.xin@gmail.com>
> + */
> +
> +#include <linux/list.h>
> +#include <net/sctp/sctp.h>

Note that the above introduces a new compile warning:

../net/sctp/stream_sched_fc.c: note: in included file (through ../include/net/sctp/sctp.h):
../include/net/sctp/structs.h:335:41: warning: array of flexible structures

that is not really a fault of the new code, it's due to:

	struct sctp_init_chunk peer_init[];

struct sctp_init_chunk {
        struct sctp_chunkhdr chunk_hdr;
        struct sctp_inithdr init_hdr;
};

struct sctp_inithdr {
        __be32 init_tag;
        __be32 a_rwnd;
        __be16 num_outbound_streams;
        __be16 num_inbound_streams;
        __be32 initial_tsn;
        __u8  params[]; // <- this!
};

Any chance a future patch could remove the 'params' field from the
struct included by sctp_init_chunk?

e.g. 

struct sctp_inithdr_base {
        __be32 init_tag;
        __be32 a_rwnd;
        __be16 num_outbound_streams;
        __be16 num_inbound_streams;
        __be32 initial_tsn;
};

struct sctp_init_chunk {
        struct sctp_chunkhdr chunk_hdr;
        struct sctp_inithdr_base init_hdr;
};

and then cast 'init_hdr' to 'struct sctp_inithdr' if/where needed.

In any case, the above is not blocking this series.

Cheers,

Paolo
Xin Long March 10, 2023, 12:38 a.m. UTC | #3
On Thu, Mar 9, 2023 at 5:31 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On Tue, 2023-03-07 at 16:23 -0500, Xin Long wrote:
> > diff --git a/net/sctp/stream_sched_fc.c b/net/sctp/stream_sched_fc.c
> > new file mode 100644
> > index 000000000000..b336c2f5486b
> > --- /dev/null
> > +++ b/net/sctp/stream_sched_fc.c
> > @@ -0,0 +1,183 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/* SCTP kernel implementation
> > + * (C) Copyright Red Hat Inc. 2022
> > + *
> > + * This file is part of the SCTP kernel implementation
> > + *
> > + * These functions manipulate sctp stream queue/scheduling.
> > + *
> > + * Please send any bug reports or fixes you make to the
> > + * email addresched(es):
> > + *    lksctp developers <linux-sctp@vger.kernel.org>
> > + *
> > + * Written or modified by:
> > + *    Xin Long <lucien.xin@gmail.com>
> > + */
> > +
> > +#include <linux/list.h>
> > +#include <net/sctp/sctp.h>
>
> Note that the above introduces a new compile warning:
>
> ../net/sctp/stream_sched_fc.c: note: in included file (through ../include/net/sctp/sctp.h):
> ../include/net/sctp/structs.h:335:41: warning: array of flexible structures
>
> that is not really a fault of the new code, it's due to:
>
>         struct sctp_init_chunk peer_init[];
>
> struct sctp_init_chunk {
>         struct sctp_chunkhdr chunk_hdr;
>         struct sctp_inithdr init_hdr;
> };
>
> struct sctp_inithdr {
>         __be32 init_tag;
>         __be32 a_rwnd;
>         __be16 num_outbound_streams;
>         __be16 num_inbound_streams;
>         __be32 initial_tsn;
>         __u8  params[]; // <- this!
> };
>
> Any chance a future patch could remove the 'params' field from the
> struct included by sctp_init_chunk?
>
> e.g.
>
> struct sctp_inithdr_base {
>         __be32 init_tag;
>         __be32 a_rwnd;
>         __be16 num_outbound_streams;
>         __be16 num_inbound_streams;
>         __be32 initial_tsn;
> };
>
> struct sctp_init_chunk {
>         struct sctp_chunkhdr chunk_hdr;
>         struct sctp_inithdr_base init_hdr;
> };
>
> and then cast 'init_hdr' to 'struct sctp_inithdr' if/where needed.
>
> In any case, the above is not blocking this series.
>
Hi, Paolo,

There are actually quite some warnings like this in SCTP:

# make C=1 CF="-Wflexible-array-nested" M=./net/sctp/ 2> /tmp/warnings
# grep "nested flexible array" /tmp/warnings |grep sctp |sort |uniq

./include/linux/sctp.h:230:29: warning: nested flexible array
./include/linux/sctp.h:278:29: warning: nested flexible array
./include/linux/sctp.h:348:29: warning: nested flexible array
./include/linux/sctp.h:393:29: warning: nested flexible array
./include/linux/sctp.h:451:28: warning: nested flexible array
./include/linux/sctp.h:611:32: warning: nested flexible array
./include/linux/sctp.h:628:33: warning: nested flexible array
./include/linux/sctp.h:675:30: warning: nested flexible array
./include/linux/sctp.h:735:29: warning: nested flexible array
./include/net/sctp/structs.h:1145:41: warning: nested flexible array
./include/net/sctp/structs.h:1588:28: warning: nested flexible array
./include/net/sctp/structs.h:343:28: warning: nested flexible array
./include/uapi/linux/sctp.h:641:34: warning: nested flexible array
./include/uapi/linux/sctp.h:643:34: warning: nested flexible array
./include/uapi/linux/sctp.h:644:33: warning: nested flexible array
./include/uapi/linux/sctp.h:650:40: warning: nested flexible array
./include/uapi/linux/sctp.h:653:39: warning: nested flexible array

Other than the uapis, I can try to give others a cleanup by deleting
these flexible array members and casting it by (struct xxx *) (hdr + 1)
when accessing it.

This warning is reported if a structure having a flexible array member is
included by other structures, and I also noticed there is the same problem
on some core structures like:

struct ip_options
struct nft_set_ext

which are included in many other structures, and can also cause such warnings.
I guess we'll just leave it as it is, right?

Thanks.
diff mbox series

Patch

diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
index fa00dc20a0d7..913170710adb 100644
--- a/include/net/sctp/stream_sched.h
+++ b/include/net/sctp/stream_sched.h
@@ -58,5 +58,6 @@  void sctp_sched_ops_register(enum sctp_sched_type sched,
 			     struct sctp_sched_ops *sched_ops);
 void sctp_sched_ops_prio_init(void);
 void sctp_sched_ops_rr_init(void);
+void sctp_sched_ops_fc_init(void);
 
 #endif /* __sctp_stream_sched_h__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e1f6e7fc2b11..2f1c9f50b352 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1429,6 +1429,10 @@  struct sctp_stream_out_ext {
 		struct {
 			struct list_head rr_list;
 		};
+		struct {
+			struct list_head fc_list;
+			__u32 fc_length;
+		};
 	};
 };
 
@@ -1475,6 +1479,9 @@  struct sctp_stream {
 			/* The next stream in line */
 			struct sctp_stream_out_ext *rr_next;
 		};
+		struct {
+			struct list_head fc_list;
+		};
 	};
 	struct sctp_stream_interleave *si;
 };
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index ed7d4ecbf53d..6814c5a1c4bc 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -1211,7 +1211,8 @@  enum sctp_sched_type {
 	SCTP_SS_DEFAULT = SCTP_SS_FCFS,
 	SCTP_SS_PRIO,
 	SCTP_SS_RR,
-	SCTP_SS_MAX = SCTP_SS_RR
+	SCTP_SS_FC,
+	SCTP_SS_MAX = SCTP_SS_FC
 };
 
 /* Probe Interval socket option */
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index e845e4588535..0448398408d8 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -13,7 +13,8 @@  sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  tsnmap.o bind_addr.o socket.o primitive.o \
 	  output.o input.o debug.o stream.o auth.o \
 	  offload.o stream_sched.o stream_sched_prio.o \
-	  stream_sched_rr.o stream_interleave.o
+	  stream_sched_rr.o stream_sched_fc.o \
+	  stream_interleave.o
 
 sctp_diag-y := diag.o
 
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 330067002deb..1ebd14ef8daa 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -124,6 +124,7 @@  void sctp_sched_ops_init(void)
 	sctp_sched_ops_fcfs_init();
 	sctp_sched_ops_prio_init();
 	sctp_sched_ops_rr_init();
+	sctp_sched_ops_fc_init();
 }
 
 static void sctp_sched_free_sched(struct sctp_stream *stream)
diff --git a/net/sctp/stream_sched_fc.c b/net/sctp/stream_sched_fc.c
new file mode 100644
index 000000000000..b336c2f5486b
--- /dev/null
+++ b/net/sctp/stream_sched_fc.c
@@ -0,0 +1,183 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2022
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ *    lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ *    Xin Long <lucien.xin@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Fair Capacity handling
+ * RFC 8260 section 3.5
+ */
+static void sctp_sched_fc_unsched_all(struct sctp_stream *stream);
+
+static int sctp_sched_fc_set(struct sctp_stream *stream, __u16 sid,
+			     __u16 weight, gfp_t gfp)
+{
+	return 0;
+}
+
+static int sctp_sched_fc_get(struct sctp_stream *stream, __u16 sid,
+			     __u16 *value)
+{
+	return 0;
+}
+
+static int sctp_sched_fc_init(struct sctp_stream *stream)
+{
+	INIT_LIST_HEAD(&stream->fc_list);
+
+	return 0;
+}
+
+static int sctp_sched_fc_init_sid(struct sctp_stream *stream, __u16 sid,
+				  gfp_t gfp)
+{
+	struct sctp_stream_out_ext *soute = SCTP_SO(stream, sid)->ext;
+
+	INIT_LIST_HEAD(&soute->fc_list);
+	soute->fc_length = 0;
+
+	return 0;
+}
+
+static void sctp_sched_fc_free_sid(struct sctp_stream *stream, __u16 sid)
+{
+}
+
+static void sctp_sched_fc_sched(struct sctp_stream *stream,
+				struct sctp_stream_out_ext *soute)
+{
+	struct sctp_stream_out_ext *pos;
+
+	if (!list_empty(&soute->fc_list))
+		return;
+
+	list_for_each_entry(pos, &stream->fc_list, fc_list)
+		if (pos->fc_length >= soute->fc_length)
+			break;
+	list_add_tail(&soute->fc_list, &pos->fc_list);
+}
+
+static void sctp_sched_fc_enqueue(struct sctp_outq *q,
+				  struct sctp_datamsg *msg)
+{
+	struct sctp_stream *stream;
+	struct sctp_chunk *ch;
+	__u16 sid;
+
+	ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
+	sid = sctp_chunk_stream_no(ch);
+	stream = &q->asoc->stream;
+	sctp_sched_fc_sched(stream, SCTP_SO(stream, sid)->ext);
+}
+
+static struct sctp_chunk *sctp_sched_fc_dequeue(struct sctp_outq *q)
+{
+	struct sctp_stream *stream = &q->asoc->stream;
+	struct sctp_stream_out_ext *soute;
+	struct sctp_chunk *ch;
+
+	/* Bail out quickly if queue is empty */
+	if (list_empty(&q->out_chunk_list))
+		return NULL;
+
+	/* Find which chunk is next */
+	if (stream->out_curr)
+		soute = stream->out_curr->ext;
+	else
+		soute = list_entry(stream->fc_list.next, struct sctp_stream_out_ext, fc_list);
+	ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
+
+	sctp_sched_dequeue_common(q, ch);
+	return ch;
+}
+
+static void sctp_sched_fc_dequeue_done(struct sctp_outq *q,
+				       struct sctp_chunk *ch)
+{
+	struct sctp_stream *stream = &q->asoc->stream;
+	struct sctp_stream_out_ext *soute, *pos;
+	__u16 sid, i;
+
+	sid = sctp_chunk_stream_no(ch);
+	soute = SCTP_SO(stream, sid)->ext;
+	/* reduce all fc_lengths by U32_MAX / 4 if the current fc_length overflows. */
+	if (soute->fc_length > U32_MAX - ch->skb->len) {
+		for (i = 0; i < stream->outcnt; i++) {
+			pos = SCTP_SO(stream, i)->ext;
+			if (!pos)
+				continue;
+			if (pos->fc_length <= (U32_MAX >> 2)) {
+				pos->fc_length = 0;
+				continue;
+			}
+			pos->fc_length -= (U32_MAX >> 2);
+		}
+	}
+	soute->fc_length += ch->skb->len;
+
+	if (list_empty(&soute->outq)) {
+		list_del_init(&soute->fc_list);
+		return;
+	}
+
+	pos = soute;
+	list_for_each_entry_continue(pos, &stream->fc_list, fc_list)
+		if (pos->fc_length >= soute->fc_length)
+			break;
+	list_move_tail(&soute->fc_list, &pos->fc_list);
+}
+
+static void sctp_sched_fc_sched_all(struct sctp_stream *stream)
+{
+	struct sctp_association *asoc;
+	struct sctp_chunk *ch;
+
+	asoc = container_of(stream, struct sctp_association, stream);
+	list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+		__u16 sid = sctp_chunk_stream_no(ch);
+
+		if (SCTP_SO(stream, sid)->ext)
+			sctp_sched_fc_sched(stream, SCTP_SO(stream, sid)->ext);
+	}
+}
+
+static void sctp_sched_fc_unsched_all(struct sctp_stream *stream)
+{
+	struct sctp_stream_out_ext *soute, *tmp;
+
+	list_for_each_entry_safe(soute, tmp, &stream->fc_list, fc_list)
+		list_del_init(&soute->fc_list);
+}
+
+static struct sctp_sched_ops sctp_sched_fc = {
+	.set = sctp_sched_fc_set,
+	.get = sctp_sched_fc_get,
+	.init = sctp_sched_fc_init,
+	.init_sid = sctp_sched_fc_init_sid,
+	.free_sid = sctp_sched_fc_free_sid,
+	.enqueue = sctp_sched_fc_enqueue,
+	.dequeue = sctp_sched_fc_dequeue,
+	.dequeue_done = sctp_sched_fc_dequeue_done,
+	.sched_all = sctp_sched_fc_sched_all,
+	.unsched_all = sctp_sched_fc_unsched_all,
+};
+
+void sctp_sched_ops_fc_init(void)
+{
+	sctp_sched_ops_register(SCTP_SS_FC, &sctp_sched_fc);
+}