diff mbox series

[net-next,6/9] net-timestamp: add tx OPT_ID_TCP support for bpf case

Message ID 20241008095109.99918-7-kerneljasonxing@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net-timestamp: bpf extension to equip applications transparently | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 6 this patch: 6
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5 this patch: 5
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 55 this patch: 55
netdev/source_inline success Was 0 now: 0

Commit Message

Jason Xing Oct. 8, 2024, 9:51 a.m. UTC
From: Jason Xing <kernelxing@tencent.com>

We can set OPT_ID|OPT_ID_TCP before we initialize the last skb
from each sendmsg. We only set the socket once like how we use
setsockopt() with OPT_ID|OPT_ID_TCP flags.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
 net/core/skbuff.c | 16 +++++++++++++---
 net/ipv4/tcp.c    | 19 +++++++++++++++----
 2 files changed, 28 insertions(+), 7 deletions(-)

Comments

Willem de Bruijn Oct. 8, 2024, 6:56 p.m. UTC | #1
Jason Xing wrote:
> From: Jason Xing <kernelxing@tencent.com>
> 
> We can set OPT_ID|OPT_ID_TCP before we initialize the last skb
> from each sendmsg. We only set the socket once like how we use
> setsockopt() with OPT_ID|OPT_ID_TCP flags.
> 
> Signed-off-by: Jason Xing <kernelxing@tencent.com>
> ---
>  net/core/skbuff.c | 16 +++++++++++++---
>  net/ipv4/tcp.c    | 19 +++++++++++++++----
>  2 files changed, 28 insertions(+), 7 deletions(-)
> 

> @@ -491,10 +491,21 @@ static u32 bpf_tcp_tx_timestamp(struct sock *sk)
>  	if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
>  		return 0;
>  
> +	/* We require users to set both OPT_ID and OPT_ID_TCP flags
> +	 * together here, or else the key might be inaccurate.
> +	 */
> +	if (flags & SOF_TIMESTAMPING_OPT_ID &&
> +	    flags & SOF_TIMESTAMPING_OPT_ID_TCP &&
> +	    !(sk->sk_tsflags & (SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_OPT_ID_TCP))) {
> +		atomic_set(&sk->sk_tskey, (tcp_sk(sk)->write_seq - copied));
> +		sk->sk_tsflags |= (SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_OPT_ID_TCP);

So user and BPF admin conflict on both sk_tsflags and sktskey?

I think BPF resetting this key, or incrementing it, may break user
expectations.
Jason Xing Oct. 8, 2024, 11:18 p.m. UTC | #2
On Wed, Oct 9, 2024 at 2:56 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Jason Xing wrote:
> > From: Jason Xing <kernelxing@tencent.com>
> >
> > We can set OPT_ID|OPT_ID_TCP before we initialize the last skb
> > from each sendmsg. We only set the socket once like how we use
> > setsockopt() with OPT_ID|OPT_ID_TCP flags.
> >
> > Signed-off-by: Jason Xing <kernelxing@tencent.com>
> > ---
> >  net/core/skbuff.c | 16 +++++++++++++---
> >  net/ipv4/tcp.c    | 19 +++++++++++++++----
> >  2 files changed, 28 insertions(+), 7 deletions(-)
> >
>
> > @@ -491,10 +491,21 @@ static u32 bpf_tcp_tx_timestamp(struct sock *sk)
> >       if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
> >               return 0;
> >
> > +     /* We require users to set both OPT_ID and OPT_ID_TCP flags
> > +      * together here, or else the key might be inaccurate.
> > +      */
> > +     if (flags & SOF_TIMESTAMPING_OPT_ID &&
> > +         flags & SOF_TIMESTAMPING_OPT_ID_TCP &&
> > +         !(sk->sk_tsflags & (SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_OPT_ID_TCP))) {
> > +             atomic_set(&sk->sk_tskey, (tcp_sk(sk)->write_seq - copied));
> > +             sk->sk_tsflags |= (SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_OPT_ID_TCP);
>
> So user and BPF admin conflict on both sk_tsflags and sktskey?
>
> I think BPF resetting this key, or incrementing it, may break user
> expectations.

Yes, when it comes to OPT_ID and OPT_ID_TCP, conflict could happen.
The reason why I don't use it like BPF_SOCK_OPS_TS_SCHED_OPT_CB flags
(which is set along with each last skb) is that OPT_ID logic is a
little bit complex. If we want to avoid touching sk_tsflags field in
struct sock, we have to re-implement a similiar logic as you've
already done in these years.

Now, this patch is easier but as you said it may "break" users... But
I wonder if we can give the bpf program the first priority like what
TCP_BPF_RTO_MIN does. TCP_BPF_RTO_MIN can override icsk_rto_min field
in struct inet_connection_sock.

Thanks,
Jason
Willem de Bruijn Oct. 9, 2024, 1:19 p.m. UTC | #3
Jason Xing wrote:
> On Wed, Oct 9, 2024 at 2:56 AM Willem de Bruijn
> <willemdebruijn.kernel@gmail.com> wrote:
> >
> > Jason Xing wrote:
> > > From: Jason Xing <kernelxing@tencent.com>
> > >
> > > We can set OPT_ID|OPT_ID_TCP before we initialize the last skb
> > > from each sendmsg. We only set the socket once like how we use
> > > setsockopt() with OPT_ID|OPT_ID_TCP flags.
> > >
> > > Signed-off-by: Jason Xing <kernelxing@tencent.com>
> > > ---
> > >  net/core/skbuff.c | 16 +++++++++++++---
> > >  net/ipv4/tcp.c    | 19 +++++++++++++++----
> > >  2 files changed, 28 insertions(+), 7 deletions(-)
> > >
> >
> > > @@ -491,10 +491,21 @@ static u32 bpf_tcp_tx_timestamp(struct sock *sk)
> > >       if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
> > >               return 0;
> > >
> > > +     /* We require users to set both OPT_ID and OPT_ID_TCP flags
> > > +      * together here, or else the key might be inaccurate.
> > > +      */
> > > +     if (flags & SOF_TIMESTAMPING_OPT_ID &&
> > > +         flags & SOF_TIMESTAMPING_OPT_ID_TCP &&
> > > +         !(sk->sk_tsflags & (SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_OPT_ID_TCP))) {
> > > +             atomic_set(&sk->sk_tskey, (tcp_sk(sk)->write_seq - copied));
> > > +             sk->sk_tsflags |= (SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_OPT_ID_TCP);
> >
> > So user and BPF admin conflict on both sk_tsflags and sktskey?
> >
> > I think BPF resetting this key, or incrementing it, may break user
> > expectations.
> 
> Yes, when it comes to OPT_ID and OPT_ID_TCP, conflict could happen.
> The reason why I don't use it like BPF_SOCK_OPS_TS_SCHED_OPT_CB flags
> (which is set along with each last skb) is that OPT_ID logic is a
> little bit complex. If we want to avoid touching sk_tsflags field in
> struct sock, we have to re-implement a similiar logic as you've
> already done in these years.

One option may be to only allow BPF to use sk_tsflags and sk_tskey if
sk_tsflags is not set by the user, and to fail user access to these
fields later.

That enforces mutual exclusion between either user or admin
timestamping.

Of course, it may still break users if BPF is first, but the user
socket tries to enable it later. So an imperfect solution.

Ideally the two would use separate per socket state. I don't know
all the options the various BPF hooks may have for this.
Jason Xing Oct. 9, 2024, 1:52 p.m. UTC | #4
On Wed, Oct 9, 2024 at 9:19 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Jason Xing wrote:
> > On Wed, Oct 9, 2024 at 2:56 AM Willem de Bruijn
> > <willemdebruijn.kernel@gmail.com> wrote:
> > >
> > > Jason Xing wrote:
> > > > From: Jason Xing <kernelxing@tencent.com>
> > > >
> > > > We can set OPT_ID|OPT_ID_TCP before we initialize the last skb
> > > > from each sendmsg. We only set the socket once like how we use
> > > > setsockopt() with OPT_ID|OPT_ID_TCP flags.
> > > >
> > > > Signed-off-by: Jason Xing <kernelxing@tencent.com>
> > > > ---
> > > >  net/core/skbuff.c | 16 +++++++++++++---
> > > >  net/ipv4/tcp.c    | 19 +++++++++++++++----
> > > >  2 files changed, 28 insertions(+), 7 deletions(-)
> > > >
> > >
> > > > @@ -491,10 +491,21 @@ static u32 bpf_tcp_tx_timestamp(struct sock *sk)
> > > >       if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
> > > >               return 0;
> > > >
> > > > +     /* We require users to set both OPT_ID and OPT_ID_TCP flags
> > > > +      * together here, or else the key might be inaccurate.
> > > > +      */
> > > > +     if (flags & SOF_TIMESTAMPING_OPT_ID &&
> > > > +         flags & SOF_TIMESTAMPING_OPT_ID_TCP &&
> > > > +         !(sk->sk_tsflags & (SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_OPT_ID_TCP))) {
> > > > +             atomic_set(&sk->sk_tskey, (tcp_sk(sk)->write_seq - copied));
> > > > +             sk->sk_tsflags |= (SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_OPT_ID_TCP);
> > >
> > > So user and BPF admin conflict on both sk_tsflags and sktskey?
> > >
> > > I think BPF resetting this key, or incrementing it, may break user
> > > expectations.
> >
> > Yes, when it comes to OPT_ID and OPT_ID_TCP, conflict could happen.
> > The reason why I don't use it like BPF_SOCK_OPS_TS_SCHED_OPT_CB flags
> > (which is set along with each last skb) is that OPT_ID logic is a
> > little bit complex. If we want to avoid touching sk_tsflags field in
> > struct sock, we have to re-implement a similiar logic as you've
> > already done in these years.
>
> One option may be to only allow BPF to use sk_tsflags and sk_tskey if
> sk_tsflags is not set by the user, and to fail user access to these
> fields later.
>
> That enforces mutual exclusion between either user or admin
> timestamping.
>
> Of course, it may still break users if BPF is first, but the user
> socket tries to enable it later. So an imperfect solution.
>
> Ideally the two would use separate per socket state. I don't know
> all the options the various BPF hooks may have for this.

Adding a new sk state or skb state is a much clearer way. That is also
what I commented on the patch [0/9]. While waiting for BPF experts'
reply, I keep investigating if there is a good way to add a new field.

Thanks,
Jason
diff mbox series

Patch

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2b1b2f7d271a..a60aec450970 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5540,6 +5540,7 @@  void skb_complete_tx_timestamp(struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
 
 static bool bpf_skb_tstamp_tx(struct sock *sk, u32 scm_flag,
+			      struct sk_buff *skb,
 			      struct skb_shared_hwtstamps *hwtstamps)
 {
 	struct tcp_sock *tp;
@@ -5550,7 +5551,7 @@  static bool bpf_skb_tstamp_tx(struct sock *sk, u32 scm_flag,
 	tp = tcp_sk(sk);
 	if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_TX_TIMESTAMPING_OPT_CB_FLAG)) {
 		struct timespec64 tstamp;
-		u32 cb_flag;
+		u32 cb_flag, key = 0;
 
 		switch (scm_flag) {
 		case SCM_TSTAMP_SCHED:
@@ -5566,11 +5567,20 @@  static bool bpf_skb_tstamp_tx(struct sock *sk, u32 scm_flag,
 			return true;
 		}
 
+		/* We require user to set OPT_ID_TCP to generate key value
+		 * in a robust way.
+		 */
+		if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID &&
+		    READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID_TCP) {
+			key = skb_shinfo(skb)->tskey;
+			key -= atomic_read(&sk->sk_tskey);
+		}
+
 		if (hwtstamps)
 			tstamp = ktime_to_timespec64(hwtstamps->hwtstamp);
 		else
 			tstamp = ktime_to_timespec64(ktime_get_real());
-		tcp_call_bpf_2arg(sk, cb_flag, tstamp.tv_sec, tstamp.tv_nsec);
+		tcp_call_bpf_3arg(sk, cb_flag, key, tstamp.tv_sec, tstamp.tv_nsec);
 		return true;
 	}
 
@@ -5589,7 +5599,7 @@  void __skb_tstamp_tx(struct sk_buff *orig_skb,
 	if (!sk)
 		return;
 
-	if (bpf_skb_tstamp_tx(sk, tstype, hwtstamps))
+	if (bpf_skb_tstamp_tx(sk, tstype, orig_skb, hwtstamps))
 		return;
 
 	tsflags = READ_ONCE(sk->sk_tsflags);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ddf4089779b5..1d52640f9ff4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -477,7 +477,7 @@  void tcp_init_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(tcp_init_sock);
 
-static u32 bpf_tcp_tx_timestamp(struct sock *sk)
+static u32 bpf_tcp_tx_timestamp(struct sock *sk, int copied)
 {
 	u32 flags;
 
@@ -491,10 +491,21 @@  static u32 bpf_tcp_tx_timestamp(struct sock *sk)
 	if (!(flags & SOF_TIMESTAMPING_TX_RECORD_MASK))
 		return 0;
 
+	/* We require users to set both OPT_ID and OPT_ID_TCP flags
+	 * together here, or else the key might be inaccurate.
+	 */
+	if (flags & SOF_TIMESTAMPING_OPT_ID &&
+	    flags & SOF_TIMESTAMPING_OPT_ID_TCP &&
+	    !(sk->sk_tsflags & (SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_OPT_ID_TCP))) {
+		atomic_set(&sk->sk_tskey, (tcp_sk(sk)->write_seq - copied));
+		sk->sk_tsflags |= (SOF_TIMESTAMPING_OPT_ID | SOF_TIMESTAMPING_OPT_ID_TCP);
+	}
+
 	return flags;
 }
 
-static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
+static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc,
+			     int copied)
 {
 	struct sk_buff *skb = tcp_write_queue_tail(sk);
 	u32 tsflags = sockc->tsflags;
@@ -503,7 +514,7 @@  static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
 	if (!skb)
 		return;
 
-	flags = bpf_tcp_tx_timestamp(sk);
+	flags = bpf_tcp_tx_timestamp(sk, copied);
 	if (flags)
 		tsflags = flags;
 
@@ -1347,7 +1358,7 @@  int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 
 out:
 	if (copied) {
-		tcp_tx_timestamp(sk, &sockc);
+		tcp_tx_timestamp(sk, &sockc, copied);
 		tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
 	}
 out_nopush: