diff mbox series

[net-next,v2,01/12] net-timestamp: introduce socket tsflag requestors

Message ID 20241012040651.95616-2-kerneljasonxing@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net-timestamp: bpf extension to equip applications transparently | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 18 this patch: 18
netdev/build_tools success Errors and warnings before: 109 (+0) this patch: 109 (+0)
netdev/cc_maintainers warning 10 maintainers not CCed: lucien.xin@gmail.com marcelo.leitner@gmail.com robin@protonic.nl o.rempel@pengutronix.de mkl@pengutronix.de kernel@pengutronix.de linux-can@vger.kernel.org linux-sctp@vger.kernel.org socketcan@hartkopp.net horms@kernel.org
netdev/build_clang success Errors and warnings before: 36 this patch: 36
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2609 this patch: 2609
netdev/checkpatch warning WARNING: line length of 100 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 95 this patch: 95
netdev/source_inline success Was 0 now: 0

Commit Message

Jason Xing Oct. 12, 2024, 4:06 a.m. UTC
From: Jason Xing <kernelxing@tencent.com>

We need a separate tsflag to control bpf extension feature so that
we will not affect the behaviors of existing applications.

The idea of introducing requestors for better extension (not only
serving bpf extension) comes from Vadim Fedorenko.

Signed-off-by: Jason Xing <kernelxing@tencent.com>
---
 include/net/ip.h       |  2 +-
 include/net/sock.h     | 15 +++++++++++----
 net/can/j1939/socket.c |  2 +-
 net/core/skbuff.c      |  5 +++--
 net/core/sock.c        |  8 ++++----
 net/ipv4/ip_output.c   |  2 +-
 net/ipv4/ip_sockglue.c |  2 +-
 net/ipv4/tcp.c         |  2 +-
 net/ipv6/ip6_output.c  |  2 +-
 net/ipv6/ping.c        |  2 +-
 net/ipv6/raw.c         |  2 +-
 net/ipv6/udp.c         |  2 +-
 net/sctp/socket.c      |  2 +-
 net/socket.c           |  4 ++--
 14 files changed, 30 insertions(+), 22 deletions(-)

Comments

Willem de Bruijn Oct. 15, 2024, 1:30 a.m. UTC | #1
Jason Xing wrote:
> From: Jason Xing <kernelxing@tencent.com>
> 
> We need a separate tsflag to control bpf extension feature so that
> we will not affect the behaviors of existing applications.
> 
> The idea of introducing requestors for better extension (not only
> serving bpf extension) comes from Vadim Fedorenko.

As also said in the cover letter: I prefer sk_tstflags_bpf.

This array approach adds code churn, may have cacheline effects by
moving other fields and anticipates I don't see a third requestor
happening. And if it does, we'll deal with it then.
Jason Xing Oct. 15, 2024, 1:50 a.m. UTC | #2
On Tue, Oct 15, 2024 at 9:30 AM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> Jason Xing wrote:
> > From: Jason Xing <kernelxing@tencent.com>
> >
> > We need a separate tsflag to control bpf extension feature so that
> > we will not affect the behaviors of existing applications.
> >
> > The idea of introducing requestors for better extension (not only
> > serving bpf extension) comes from Vadim Fedorenko.
>
> As also said in the cover letter: I prefer sk_tstflags_bpf.
>
> This array approach adds code churn, may have cacheline effects by
> moving other fields and anticipates I don't see a third requestor
> happening. And if it does, we'll deal with it then.

Got it. Thanks. I will adjust it accordingly.
diff mbox series

Patch

diff --git a/include/net/ip.h b/include/net/ip.h
index bab084df1567..b0a836aebc33 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -96,7 +96,7 @@  static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
 	ipcm_init(ipcm);
 
 	ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
-	ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
+	ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
 	ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
 	ipcm->addr = inet->inet_saddr;
 	ipcm->protocol = inet->inet_num;
diff --git a/include/net/sock.h b/include/net/sock.h
index b32f1424ecc5..8cf278c957b3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -234,6 +234,13 @@  struct sock_common {
 struct bpf_local_storage;
 struct sk_filter;
 
+enum {
+	SOCKETOPT_TS_REQUESTOR = 0,
+	BPFPROG_TS_REQUESTOR,
+
+	__MAX_TS_REQUESTOR,
+};
+
 /**
   *	struct sock - network layer representation of sockets
   *	@__sk_common: shared layout with inet_timewait_sock
@@ -444,7 +451,7 @@  struct sock {
 	socket_lock_t		sk_lock;
 	u32			sk_reserved_mem;
 	int			sk_forward_alloc;
-	u32			sk_tsflags;
+	u32			sk_tsflags[__MAX_TS_REQUESTOR];
 	__cacheline_group_end(sock_write_rxtx);
 
 	__cacheline_group_begin(sock_write_tx);
@@ -1809,7 +1816,7 @@  static inline void sockcm_init(struct sockcm_cookie *sockc,
 			       const struct sock *sk)
 {
 	*sockc = (struct sockcm_cookie) {
-		.tsflags = READ_ONCE(sk->sk_tsflags)
+		.tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR])
 	};
 }
 
@@ -2617,7 +2624,7 @@  static inline void
 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 {
 	struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
-	u32 tsflags = READ_ONCE(sk->sk_tsflags);
+	u32 tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
 	ktime_t kt = skb->tstamp;
 	/*
 	 * generate control messages if
@@ -2652,7 +2659,7 @@  static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
 			   SOF_TIMESTAMPING_RAW_HARDWARE)
 
 	if (sk->sk_flags & FLAGS_RECV_CMSGS ||
-	    READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
+	    READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) & TSFLAGS_ANY)
 		__sock_recv_cmsgs(msg, sk, skb);
 	else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
 		sock_write_timestamp(sk, skb->tstamp);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 305dd72c844c..8f5799930a93 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -996,7 +996,7 @@  static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
 	if (!(jsk->state & J1939_SOCK_ERRQUEUE))
 		return;
 
-	tsflags = READ_ONCE(sk->sk_tsflags);
+	tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
 	switch (type) {
 	case J1939_ERRQUEUE_TX_ACK:
 		if (!(tsflags & SOF_TIMESTAMPING_TX_ACK))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 00afeb90c23a..ab0a59f1e14d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5490,7 +5490,8 @@  static void __skb_complete_tx_timestamp(struct sk_buff *skb,
 	serr->ee.ee_info = tstype;
 	serr->opt_stats = opt_stats;
 	serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
-	if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+	if (READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) &
+	    SOF_TIMESTAMPING_OPT_ID) {
 		serr->ee.ee_data = skb_shinfo(skb)->tskey;
 		if (sk_is_tcp(sk))
 			serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
@@ -5551,7 +5552,7 @@  void __skb_tstamp_tx(struct sk_buff *orig_skb,
 	if (!sk)
 		return;
 
-	tsflags = READ_ONCE(sk->sk_tsflags);
+	tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
 	if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
 	    skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
 		return;
diff --git a/net/core/sock.c b/net/core/sock.c
index 083d438d8b6f..52c8c5a5ba27 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -908,7 +908,7 @@  int sock_set_timestamping(struct sock *sk, int optname,
 		return -EINVAL;
 
 	if (val & SOF_TIMESTAMPING_OPT_ID &&
-	    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
+	    !(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR] & SOF_TIMESTAMPING_OPT_ID)) {
 		if (sk_is_tcp(sk)) {
 			if ((1 << sk->sk_state) &
 			    (TCPF_CLOSE | TCPF_LISTEN))
@@ -932,7 +932,7 @@  int sock_set_timestamping(struct sock *sk, int optname,
 			return ret;
 	}
 
-	WRITE_ONCE(sk->sk_tsflags, val);
+	WRITE_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR], val);
 	sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
 
 	if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
@@ -1797,7 +1797,7 @@  int sk_getsockopt(struct sock *sk, int level, int optname,
 		 * Don't change the beviour for the old case SO_TIMESTAMPING_OLD.
 		 */
 		if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) {
-			v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+			v.timestamping.flags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
 			v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
 		}
 		break;
@@ -2930,7 +2930,7 @@  int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
 	case SCM_TS_OPT_ID:
 		if (sk_is_tcp(sk))
 			return -EINVAL;
-		tsflags = READ_ONCE(sk->sk_tsflags);
+		tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
 		if (!(tsflags & SOF_TIMESTAMPING_OPT_ID))
 			return -EINVAL;
 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e5c55a95063d..ded504458d5d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1050,7 +1050,7 @@  static int __ip_append_data(struct sock *sk,
 	cork->length += length;
 
 	if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
-	    READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+	    READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) & SOF_TIMESTAMPING_OPT_ID) {
 		if (cork->flags & IPCORK_TS_OPT_ID) {
 			tskey = cork->ts_opt_id;
 		} else {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index cf377377b52d..fac8f593c43a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -509,7 +509,7 @@  static bool ipv4_datagram_support_cmsg(const struct sock *sk,
 	 * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
 	 */
 	info = PKTINFO_SKB_CB(skb);
-	if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
+	if (!(READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) & SOF_TIMESTAMPING_OPT_CMSG) ||
 	    !info->ipi_ifindex)
 		return false;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 82cc4a5633ce..6c8968eb4427 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2245,7 +2245,7 @@  void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
 			struct scm_timestamping_internal *tss)
 {
 	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
-	u32 tsflags = READ_ONCE(sk->sk_tsflags);
+	u32 tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
 	bool has_timestamping = false;
 
 	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 205673179b3c..c983e0ca6f72 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1547,7 +1547,7 @@  static int __ip6_append_data(struct sock *sk,
 	}
 
 	if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
-	    READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+	    READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) & SOF_TIMESTAMPING_OPT_ID) {
 		if (cork->flags & IPCORK_TS_OPT_ID) {
 			tskey = cork->ts_opt_id;
 		} else {
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 88b3fcacd4f9..0080b7c3a475 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -119,7 +119,7 @@  static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		return -EINVAL;
 
 	ipcm6_init_sk(&ipc6, sk);
-	ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+	ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
 	ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
 
 	fl6.flowi6_oif = oif;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8476a3944a88..cd02aa02d813 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -778,7 +778,7 @@  static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.flowi6_uid = sk->sk_uid;
 
 	ipcm6_init(&ipc6);
-	ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+	ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
 	ipc6.sockc.mark = fl6.flowi6_mark;
 
 	if (sin6) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 52dfbb2ff1a8..008cc0282338 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1349,7 +1349,7 @@  int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 	ipcm6_init(&ipc6);
 	ipc6.gso_size = READ_ONCE(up->gso_size);
-	ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
+	ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
 	ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
 
 	/* destination address check */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 078bcb3858c7..f66f21d6363e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -9463,7 +9463,7 @@  void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 	newsk->sk_type = sk->sk_type;
 	newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
 	newsk->sk_flags = sk->sk_flags;
-	newsk->sk_tsflags = sk->sk_tsflags;
+	memcpy(newsk->sk_tsflags, sk->sk_tsflags, sizeof(u32) * __MAX_TS_REQUESTOR);
 	newsk->sk_no_check_tx = sk->sk_no_check_tx;
 	newsk->sk_no_check_rx = sk->sk_no_check_rx;
 	newsk->sk_reuse = sk->sk_reuse;
diff --git a/net/socket.c b/net/socket.c
index 3b1b65b9f471..24619a27909a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -845,7 +845,7 @@  static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
 
 static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
 {
-	bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
+	bool cycles = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]) & SOF_TIMESTAMPING_BIND_PHC;
 	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
 	struct net_device *orig_dev;
 	ktime_t hwtstamp;
@@ -944,7 +944,7 @@  void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	}
 
 	memset(&tss, 0, sizeof(tss));
-	tsflags = READ_ONCE(sk->sk_tsflags);
+	tsflags = READ_ONCE(sk->sk_tsflags[SOCKETOPT_TS_REQUESTOR]);
 	if ((tsflags & SOF_TIMESTAMPING_SOFTWARE &&
 	     (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE ||
 	      skb_is_err_queue(skb) ||