diff mbox series

[bpf-next] bpf, sockmap: Bundle psock->sk_redir and redir_ingress into a tagged pointer

Message ID 1699609302-8605-1-git-send-email-yangpc@wangsu.com (mailing list archive)
State Rejected
Delegated to: BPF
Headers show
Series [bpf-next] bpf, sockmap: Bundle psock->sk_redir and redir_ingress into a tagged pointer | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1175 this patch: 1175
netdev/cc_maintainers warning 5 maintainers not CCed: borisp@nvidia.com edumazet@google.com pabeni@redhat.com kuba@kernel.org dsahern@kernel.org
netdev/build_clang success Errors and warnings before: 1162 this patch: 1162
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1202 this patch: 1202
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-3 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-9 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-17 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-llvm-16 / build / build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-llvm-16 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-16 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-16 / veristat
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-16 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-llvm-16 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc

Commit Message

Pengcheng Yang Nov. 10, 2023, 9:41 a.m. UTC
Like skb->_sk_redir, we bundle the sock redirect pointer and
the ingress bit to manage them together.

Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/87cz97cnz8.fsf@cloudflare.com
Signed-off-by: Pengcheng Yang <yangpc@wangsu.com>
---
 include/linux/skmsg.h | 30 ++++++++++++++++++++++++++++--
 net/core/skmsg.c      | 18 ++++++++++--------
 net/ipv4/tcp_bpf.c    | 13 +++++++------
 net/tls/tls_sw.c      | 11 ++++++-----
 4 files changed, 51 insertions(+), 21 deletions(-)

Comments

Alexei Starovoitov Nov. 10, 2023, 3:42 p.m. UTC | #1
On Fri, Nov 10, 2023 at 1:44 AM Pengcheng Yang <yangpc@wangsu.com> wrote:
>
> Like skb->_sk_redir, we bundle the sock redirect pointer and
> the ingress bit to manage them together.
>
> Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
> Link: https://lore.kernel.org/bpf/87cz97cnz8.fsf@cloudflare.com
> Signed-off-by: Pengcheng Yang <yangpc@wangsu.com>
> ---
>  include/linux/skmsg.h | 30 ++++++++++++++++++++++++++++--
>  net/core/skmsg.c      | 18 ++++++++++--------
>  net/ipv4/tcp_bpf.c    | 13 +++++++------
>  net/tls/tls_sw.c      | 11 ++++++-----
>  4 files changed, 51 insertions(+), 21 deletions(-)
>
> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> index c1637515a8a4..ae021f511f46 100644
> --- a/include/linux/skmsg.h
> +++ b/include/linux/skmsg.h
> @@ -78,11 +78,10 @@ struct sk_psock_work_state {
>
>  struct sk_psock {
>         struct sock                     *sk;
> -       struct sock                     *sk_redir;
> +       unsigned long                   _sk_redir;

Please don't.
There is no need to bundle them together.
Jakub Sitnicki Nov. 13, 2023, 9:02 a.m. UTC | #2
On Fri, Nov 10, 2023 at 07:42 AM -08, Alexei Starovoitov wrote:
> On Fri, Nov 10, 2023 at 1:44 AM Pengcheng Yang <yangpc@wangsu.com> wrote:
>>
>> Like skb->_sk_redir, we bundle the sock redirect pointer and
>> the ingress bit to manage them together.
>>
>> Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
>> Link: https://lore.kernel.org/bpf/87cz97cnz8.fsf@cloudflare.com
>> Signed-off-by: Pengcheng Yang <yangpc@wangsu.com>
>> ---
>>  include/linux/skmsg.h | 30 ++++++++++++++++++++++++++++--
>>  net/core/skmsg.c      | 18 ++++++++++--------
>>  net/ipv4/tcp_bpf.c    | 13 +++++++------
>>  net/tls/tls_sw.c      | 11 ++++++-----
>>  4 files changed, 51 insertions(+), 21 deletions(-)
>>
>> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
>> index c1637515a8a4..ae021f511f46 100644
>> --- a/include/linux/skmsg.h
>> +++ b/include/linux/skmsg.h
>> @@ -78,11 +78,10 @@ struct sk_psock_work_state {
>>
>>  struct sk_psock {
>>         struct sock                     *sk;
>> -       struct sock                     *sk_redir;
>> +       unsigned long                   _sk_redir;
>
> Please don't.
> There is no need to bundle them together.

Seeing how the code turned out, I agree - it didn't work out.
Code is not any simpler. My gut feeling was wrong here.

I gotta ask for, for the future, though -
this is not a "no" to tagged pointers in general, right?
Alexei Starovoitov Nov. 13, 2023, 1:36 p.m. UTC | #3
On Mon, Nov 13, 2023 at 1:14 AM Jakub Sitnicki <jakub@cloudflare.com> wrote:
>
> On Fri, Nov 10, 2023 at 07:42 AM -08, Alexei Starovoitov wrote:
> > On Fri, Nov 10, 2023 at 1:44 AM Pengcheng Yang <yangpc@wangsu.com> wrote:
> >>
> >> Like skb->_sk_redir, we bundle the sock redirect pointer and
> >> the ingress bit to manage them together.
> >>
> >> Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
> >> Link: https://lore.kernel.org/bpf/87cz97cnz8.fsf@cloudflare.com
> >> Signed-off-by: Pengcheng Yang <yangpc@wangsu.com>
> >> ---
> >>  include/linux/skmsg.h | 30 ++++++++++++++++++++++++++++--
> >>  net/core/skmsg.c      | 18 ++++++++++--------
> >>  net/ipv4/tcp_bpf.c    | 13 +++++++------
> >>  net/tls/tls_sw.c      | 11 ++++++-----
> >>  4 files changed, 51 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
> >> index c1637515a8a4..ae021f511f46 100644
> >> --- a/include/linux/skmsg.h
> >> +++ b/include/linux/skmsg.h
> >> @@ -78,11 +78,10 @@ struct sk_psock_work_state {
> >>
> >>  struct sk_psock {
> >>         struct sock                     *sk;
> >> -       struct sock                     *sk_redir;
> >> +       unsigned long                   _sk_redir;
> >
> > Please don't.
> > There is no need to bundle them together.
>
> Seeing how the code turned out, I agree - it didn't work out.
> Code is not any simpler. My gut feeling was wrong here.
>
> I gotta ask for, for the future, though -
> this is not a "no" to tagged pointers in general, right?

In 99% cases there is no need to mix pointers with flags.
When you need to set/unset both pls use a lock.
A lot more obvious to humans and tools.
diff mbox series

Patch

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index c1637515a8a4..ae021f511f46 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -78,11 +78,10 @@  struct sk_psock_work_state {
 
 struct sk_psock {
 	struct sock			*sk;
-	struct sock			*sk_redir;
+	unsigned long			_sk_redir;
 	u32				apply_bytes;
 	u32				cork_bytes;
 	u32				eval;
-	bool				redir_ingress; /* undefined if sk_redir is null */
 	struct sk_msg			*cork;
 	struct sk_psock_progs		progs;
 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
@@ -283,6 +282,33 @@  static inline struct sk_psock *sk_psock(const struct sock *sk)
 							 SK_USER_DATA_PSOCK);
 }
 
+static inline bool sk_psock_ingress(const struct sk_psock *psock)
+{
+	unsigned long sk_redir = psock->_sk_redir;
+
+	return sk_redir & BPF_F_INGRESS;
+}
+
+static inline void sk_psock_set_redir(struct sk_psock *psock, struct sock *sk_redir,
+				      bool ingress)
+{
+	psock->_sk_redir = (unsigned long)sk_redir;
+	if (ingress)
+		psock->_sk_redir |= BPF_F_INGRESS;
+}
+
+static inline struct sock *sk_psock_get_redir(struct sk_psock *psock)
+{
+	unsigned long sk_redir = psock->_sk_redir;
+
+	return (struct sock *)(sk_redir & ~(BPF_F_INGRESS));
+}
+
+static inline void sk_psock_clear_redir(struct sk_psock *psock)
+{
+	psock->_sk_redir = 0;
+}
+
 static inline void sk_psock_set_state(struct sk_psock *psock,
 				      enum sk_psock_state_bits bit)
 {
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 6c31eefbd777..d994621f1f95 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -811,6 +811,7 @@  static void sk_psock_destroy(struct work_struct *work)
 {
 	struct sk_psock *psock = container_of(to_rcu_work(work),
 					      struct sk_psock, rwork);
+	struct sock *sk_redir = sk_psock_get_redir(psock);
 	/* No sk_callback_lock since already detached. */
 
 	sk_psock_done_strp(psock);
@@ -824,8 +825,8 @@  static void sk_psock_destroy(struct work_struct *work)
 	sk_psock_link_destroy(psock);
 	sk_psock_cork_free(psock);
 
-	if (psock->sk_redir)
-		sock_put(psock->sk_redir);
+	if (sk_redir)
+		sock_put(sk_redir);
 	sock_put(psock->sk);
 	kfree(psock);
 }
@@ -865,6 +866,7 @@  int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
 			 struct sk_msg *msg)
 {
 	struct bpf_prog *prog;
+	struct sock *sk_redir;
 	int ret;
 
 	rcu_read_lock();
@@ -880,17 +882,17 @@  int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
 	ret = sk_psock_map_verd(ret, msg->sk_redir);
 	psock->apply_bytes = msg->apply_bytes;
 	if (ret == __SK_REDIRECT) {
-		if (psock->sk_redir) {
-			sock_put(psock->sk_redir);
-			psock->sk_redir = NULL;
+		sk_redir = sk_psock_get_redir(psock);
+		if (sk_redir) {
+			sock_put(sk_redir);
+			sk_psock_clear_redir(psock);
 		}
 		if (!msg->sk_redir) {
 			ret = __SK_DROP;
 			goto out;
 		}
-		psock->redir_ingress = sk_msg_to_ingress(msg);
-		psock->sk_redir = msg->sk_redir;
-		sock_hold(psock->sk_redir);
+		sk_psock_set_redir(psock, msg->sk_redir, sk_msg_to_ingress(msg));
+		sock_hold(msg->sk_redir);
 	}
 out:
 	rcu_read_unlock();
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 53b0d62fd2c2..b3c847dc87dc 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -427,14 +427,14 @@  static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
 		sk_msg_apply_bytes(psock, tosend);
 		break;
 	case __SK_REDIRECT:
-		redir_ingress = psock->redir_ingress;
-		sk_redir = psock->sk_redir;
+		redir_ingress = sk_psock_ingress(psock);
+		sk_redir = sk_psock_get_redir(psock);
 		sk_msg_apply_bytes(psock, tosend);
 		if (!psock->apply_bytes) {
 			/* Clean up before releasing the sock lock. */
 			eval = psock->eval;
 			psock->eval = __SK_NONE;
-			psock->sk_redir = NULL;
+			sk_psock_clear_redir(psock);
 		}
 		if (psock->cork) {
 			cork = true;
@@ -476,9 +476,10 @@  static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
 	if (likely(!ret)) {
 		if (!psock->apply_bytes) {
 			psock->eval =  __SK_NONE;
-			if (psock->sk_redir) {
-				sock_put(psock->sk_redir);
-				psock->sk_redir = NULL;
+			sk_redir = sk_psock_get_redir(psock);
+			if (sk_redir) {
+				sock_put(sk_redir);
+				sk_psock_clear_redir(psock);
 			}
 		}
 		if (msg &&
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index e9d1e83a859d..c91cd07c1285 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -854,8 +854,8 @@  static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
 		}
 		break;
 	case __SK_REDIRECT:
-		redir_ingress = psock->redir_ingress;
-		sk_redir = psock->sk_redir;
+		redir_ingress = sk_psock_ingress(psock);
+		sk_redir = sk_psock_get_redir(psock);
 		memcpy(&msg_redir, msg, sizeof(*msg));
 		if (msg->apply_bytes < send)
 			msg->apply_bytes = 0;
@@ -898,9 +898,10 @@  static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
 		}
 		if (reset_eval) {
 			psock->eval = __SK_NONE;
-			if (psock->sk_redir) {
-				sock_put(psock->sk_redir);
-				psock->sk_redir = NULL;
+			sk_redir = sk_psock_get_redir(psock);
+			if (sk_redir) {
+				sock_put(sk_redir);
+				sk_psock_clear_redir(psock);
 			}
 		}
 		if (rec)