diff mbox series

[bpf,v2] af_unix: Disable MSG_OOB handling for sockets in sockmap/sockhash

Message ID 20240622223324.3337956-1-mhal@rbox.co (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [bpf,v2] af_unix: Disable MSG_OOB handling for sockets in sockmap/sockhash | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 45 this patch: 45
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang fail Errors and warnings before: 36 this patch: 36
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn fail Errors and warnings before: 49 this patch: 49
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 59 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-VM_Test-31 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-7 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-10 success Logs for x86_64-gcc / build-release

Commit Message

Michal Luczaj June 22, 2024, 10:25 p.m. UTC
AF_UNIX socket tracks the most recent OOB packet (in its receive queue)
with an `oob_skb` pointer. BPF redirecting does not account for that: when
an OOB packet is moved between sockets, `oob_skb` is left outdated. This
results in a single skb that may be accessed from two different sockets.

Take the easy way out: silently drop MSG_OOB data targeting any socket that
is in a sockmap or a sockhash. Note that such silent drop is akin to the
fate of redirected skb's scm_fp_list (SCM_RIGHTS, SCM_CREDENTIALS).

For symmetry, forbid MSG_OOB in unix_bpf_recvmsg().

Suggested-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Fixes: 314001f0bf92 ("af_unix: Add OOB support")
Signed-off-by: Michal Luczaj <mhal@rbox.co>
---
v2:
  - Reduce time under mutex, restructure (Kuniyuki)
  - Handle unix_release_sock() race

v1: https://lore.kernel.org/netdev/20240620203009.2610301-1-mhal@rbox.co/

 net/unix/af_unix.c  | 41 ++++++++++++++++++++++++++++++++++++++++-
 net/unix/unix_bpf.c |  3 +++
 2 files changed, 43 insertions(+), 1 deletion(-)

Comments

Jakub Sitnicki June 24, 2024, 2:15 p.m. UTC | #1
On Sun, Jun 23, 2024 at 12:25 AM +02, Michal Luczaj wrote:
> AF_UNIX socket tracks the most recent OOB packet (in its receive queue)
> with an `oob_skb` pointer. BPF redirecting does not account for that: when
> an OOB packet is moved between sockets, `oob_skb` is left outdated. This
> results in a single skb that may be accessed from two different sockets.
>
> Take the easy way out: silently drop MSG_OOB data targeting any socket that
> is in a sockmap or a sockhash. Note that such silent drop is akin to the
> fate of redirected skb's scm_fp_list (SCM_RIGHTS, SCM_CREDENTIALS).
>
> For symmetry, forbid MSG_OOB in unix_bpf_recvmsg().
>
> Suggested-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> Fixes: 314001f0bf92 ("af_unix: Add OOB support")
> Signed-off-by: Michal Luczaj <mhal@rbox.co>
> ---

[+CC Cong who authored ->read_skb]

I'm guessing you have a test program that you're developing the fix
against. Would you like to extend the test case for sockmap redirect
from unix stream [1] to incorporate it?

Sadly unix_inet_redir_to_connected needs a fix first because it
hardcodes sotype to SOCK_DGRAM.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/tree/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c#n1884
Shoaib Rao June 24, 2024, 8:02 p.m. UTC | #2
On 6/24/24 07:15, Jakub Sitnicki wrote:
> On Sun, Jun 23, 2024 at 12:25 AM +02, Michal Luczaj wrote:
>> AF_UNIX socket tracks the most recent OOB packet (in its receive queue)
>> with an `oob_skb` pointer. BPF redirecting does not account for that: when
>> an OOB packet is moved between sockets, `oob_skb` is left outdated. This
>> results in a single skb that may be accessed from two different sockets.
>>
>> Take the easy way out: silently drop MSG_OOB data targeting any socket that
>> is in a sockmap or a sockhash. Note that such silent drop is akin to the
>> fate of redirected skb's scm_fp_list (SCM_RIGHTS, SCM_CREDENTIALS).
>>
>> For symmetry, forbid MSG_OOB in unix_bpf_recvmsg().
>>
>> Suggested-by: Kuniyuki Iwashima <kuniyu@amazon.com>
>> Fixes: 314001f0bf92 ("af_unix: Add OOB support")
>> Signed-off-by: Michal Luczaj <mhal@rbox.co>
>> ---
> [+CC Cong who authored ->read_skb]
>
> I'm guessing you have a test program that you're developing the fix
> against. Would you like to extend the test case for sockmap redirect
> from unix stream [1] to incorporate it?
>
> Sadly unix_inet_redir_to_connected needs a fix first because it
> hardcodes sotype to SOCK_DGRAM.
>
> [1] https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/tree/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c*n1884__;Iw!!ACWV5N9M2RV99hQ!K-FmI13Wd7NvcxnWmsgoiqJtOSe4b4ydFXIvMs4JFOWWesx2LLtlg8LG22_Fd49e67cl50SdkB4JFg4-$ 

I would like to understand why this is not an issue for TCP as we try to
mimic TCP OOB behavior for AF_UNIX sockets. However, I am out of the
office till July 8th and can only look at the issue after my return.

Shoaib
Michal Luczaj June 26, 2024, 10:19 a.m. UTC | #3
On 6/24/24 16:15, Jakub Sitnicki wrote:
> On Sun, Jun 23, 2024 at 12:25 AM +02, Michal Luczaj wrote:
>> AF_UNIX socket tracks the most recent OOB packet (in its receive queue)
>> with an `oob_skb` pointer. BPF redirecting does not account for that: when
>> an OOB packet is moved between sockets, `oob_skb` is left outdated. This
>> results in a single skb that may be accessed from two different sockets.
>>
>> Take the easy way out: silently drop MSG_OOB data targeting any socket that
>> is in a sockmap or a sockhash. Note that such silent drop is akin to the
>> fate of redirected skb's scm_fp_list (SCM_RIGHTS, SCM_CREDENTIALS).
>>
>> For symmetry, forbid MSG_OOB in unix_bpf_recvmsg().
>>
>> Suggested-by: Kuniyuki Iwashima <kuniyu@amazon.com>
>> Fixes: 314001f0bf92 ("af_unix: Add OOB support")
>> Signed-off-by: Michal Luczaj <mhal@rbox.co>
>> ---
> 
> [+CC Cong who authored ->read_skb]
> 
> I'm guessing you have a test program that you're developing the fix
> against. Would you like to extend the test case for sockmap redirect
> from unix stream [1] to incorporate it?
> 
> Sadly unix_inet_redir_to_connected needs a fix first because it
> hardcodes sotype to SOCK_DGRAM.

Ugh, my last two replies got silently dropped by vger. Is there any way to
tell what went wrong?

So, again, sure, I'll extend the sockmap redirect test.

And regarding Rao's comment, I took a look and I think sockmap'ed TCP OOB
does indeed act the same way. I'll try to add that into selftest as well.

Thanks,
Michal
Jakub Sitnicki June 27, 2024, 7:40 a.m. UTC | #4
On Wed, Jun 26, 2024 at 12:19 PM +02, Michal Luczaj wrote:
> On 6/24/24 16:15, Jakub Sitnicki wrote:
>> On Sun, Jun 23, 2024 at 12:25 AM +02, Michal Luczaj wrote:
>>> AF_UNIX socket tracks the most recent OOB packet (in its receive queue)
>>> with an `oob_skb` pointer. BPF redirecting does not account for that: when
>>> an OOB packet is moved between sockets, `oob_skb` is left outdated. This
>>> results in a single skb that may be accessed from two different sockets.
>>>
>>> Take the easy way out: silently drop MSG_OOB data targeting any socket that
>>> is in a sockmap or a sockhash. Note that such silent drop is akin to the
>>> fate of redirected skb's scm_fp_list (SCM_RIGHTS, SCM_CREDENTIALS).
>>>
>>> For symmetry, forbid MSG_OOB in unix_bpf_recvmsg().
>>>
>>> Suggested-by: Kuniyuki Iwashima <kuniyu@amazon.com>
>>> Fixes: 314001f0bf92 ("af_unix: Add OOB support")
>>> Signed-off-by: Michal Luczaj <mhal@rbox.co>
>>> ---
>> 
>> [+CC Cong who authored ->read_skb]
>> 
>> I'm guessing you have a test program that you're developing the fix
>> against. Would you like to extend the test case for sockmap redirect
>> from unix stream [1] to incorporate it?
>> 
>> Sadly unix_inet_redir_to_connected needs a fix first because it
>> hardcodes sotype to SOCK_DGRAM.
>
> Ugh, my last two replies got silently dropped by vger. Is there any way to
> tell what went wrong?

Not sure if it was vger or lore archive. Your reply hit my inbox but is
nowhere to be found in the archive:

https://lore.kernel.org/r/4bac0a8a-eeaa-48ef-aeba-2a6e73c0b982@rbox.co

I think we can reach out to Konstantin Ryabitsev at
konstantin@linuxfoundation.org. AFAIK he maintains the lore.kernel.org
archive.

> So, again, sure, I'll extend the sockmap redirect test.

Appreciate the help with adding a regression test, if time allows.
Fixes are of course very welcome even without them.

> And regarding Rao's comment, I took a look and I think sockmap'ed TCP OOB
> does indeed act the same way. I'll try to add that into selftest as well.n

Right, it does sound like we're not clearing the offset kept in
tcp_sock::urg_data when skb is redirected.
Shoaib Rao July 6, 2024, 9:42 a.m. UTC | #5
On 6/27/24 12:40, Jakub Sitnicki wrote:
> On Wed, Jun 26, 2024 at 12:19 PM +02, Michal Luczaj wrote:
>> On 6/24/24 16:15, Jakub Sitnicki wrote:
>>> On Sun, Jun 23, 2024 at 12:25 AM +02, Michal Luczaj wrote:
>>>> AF_UNIX socket tracks the most recent OOB packet (in its receive queue)
>>>> with an `oob_skb` pointer. BPF redirecting does not account for that: when
>>>> an OOB packet is moved between sockets, `oob_skb` is left outdated. This
>>>> results in a single skb that may be accessed from two different sockets.
>>>>
>>>> Take the easy way out: silently drop MSG_OOB data targeting any socket that
>>>> is in a sockmap or a sockhash. Note that such silent drop is akin to the
>>>> fate of redirected skb's scm_fp_list (SCM_RIGHTS, SCM_CREDENTIALS).
>>>>
>>>> For symmetry, forbid MSG_OOB in unix_bpf_recvmsg().
>>>>
>>>> Suggested-by: Kuniyuki Iwashima <kuniyu@amazon.com>
>>>> Fixes: 314001f0bf92 ("af_unix: Add OOB support")
>>>> Signed-off-by: Michal Luczaj <mhal@rbox.co>
>>>> ---
>>>
>>> [+CC Cong who authored ->read_skb]
>>>
>>> I'm guessing you have a test program that you're developing the fix
>>> against. Would you like to extend the test case for sockmap redirect
>>> from unix stream [1] to incorporate it?
>>>
>>> Sadly unix_inet_redir_to_connected needs a fix first because it
>>> hardcodes sotype to SOCK_DGRAM.
>>
>> Ugh, my last two replies got silently dropped by vger. Is there any way to
>> tell what went wrong?
> 
> Not sure if it was vger or lore archive. Your reply hit my inbox but is
> nowhere to be found in the archive:
> 
> https://urldefense.com/v3/__https://lore.kernel.org/r/4bac0a8a-eeaa-48ef-aeba-2a6e73c0b982@rbox.co__;!!ACWV5N9M2RV99hQ!INUzIF25cVLOogl5HVn1FemXyw-iTBF358Wi77LDaYGg2UY3mi7Q1sTnZiUhkZhEc1qGZgUEGnRhkq4C$ 
> 
> I think we can reach out to Konstantin Ryabitsev at
> konstantin@linuxfoundation.org. AFAIK he maintains the lore.kernel.org
> archive.
> 
>> So, again, sure, I'll extend the sockmap redirect test.
> 
> Appreciate the help with adding a regression test, if time allows.
> Fixes are of course very welcome even without them.
> 
>> And regarding Rao's comment, I took a look and I think sockmap'ed TCP OOB
>> does indeed act the same way. I'll try to add that into selftest as well.n
> 
> Right, it does sound like we're not clearing the offset kept in
> tcp_sock::urg_data when skb is redirected.
> 

I am fine if the behavior is same as TCP. Thanks a lot for looking into
this.

Regards,

Shoaib

> 
> 
>
Michal Luczaj July 7, 2024, 11:10 p.m. UTC | #6
On 6/27/24 09:40, Jakub Sitnicki wrote:
> On Wed, Jun 26, 2024 at 12:19 PM +02, Michal Luczaj wrote:
>> On 6/24/24 16:15, Jakub Sitnicki wrote:
>>> On Sun, Jun 23, 2024 at 12:25 AM +02, Michal Luczaj wrote:
>>>> AF_UNIX socket tracks the most recent OOB packet (in its receive queue)
>>>> with an `oob_skb` pointer. BPF redirecting does not account for that: when
>>>> an OOB packet is moved between sockets, `oob_skb` is left outdated. This
>>>> results in a single skb that may be accessed from two different sockets.
>>>>
>>>> Take the easy way out: silently drop MSG_OOB data targeting any socket that
>>>> is in a sockmap or a sockhash. Note that such silent drop is akin to the
>>>> fate of redirected skb's scm_fp_list (SCM_RIGHTS, SCM_CREDENTIALS).
>>>>
>>>> For symmetry, forbid MSG_OOB in unix_bpf_recvmsg().
>>>>
>>>> Suggested-by: Kuniyuki Iwashima <kuniyu@amazon.com>
>>>> Fixes: 314001f0bf92 ("af_unix: Add OOB support")
>>>> Signed-off-by: Michal Luczaj <mhal@rbox.co>
>>>> ---
>>>
>>> [+CC Cong who authored ->read_skb]
>>>
>>> I'm guessing you have a test program that you're developing the fix
>>> against. Would you like to extend the test case for sockmap redirect
>>> from unix stream [1] to incorporate it?
>>>
>>> Sadly unix_inet_redir_to_connected needs a fix first because it
>>> hardcodes sotype to SOCK_DGRAM.
>>
>> Ugh, my last two replies got silently dropped by vger. Is there any way to
>> tell what went wrong?
> 
> Not sure if it was vger or lore archive. Your reply hit my inbox but is
> nowhere to be found in the archive:> [...]

24h later mailer daemon revealed that my SMTP server got (temporarily) on
the Spamhaus Blocklist. Oh well.

>> So, again, sure, I'll extend the sockmap redirect test.
> 
> Appreciate the help with adding a regression test, if time allows.
> Fixes are of course very welcome even without them.

No problem, fix along with the test sent. Let me know what you think.

>> And regarding Rao's comment, I took a look and I think sockmap'ed TCP OOB
>> does indeed act the same way. I'll try to add that into selftest as well.n
> 
> Right, it does sound like we're not clearing the offset kept in
> tcp_sock::urg_data when skb is redirected.

Yeah, so I also wanted to extend the TCP's redir_to_connected(), but is
that code correct? It seems to be testing REDIR_INGRESS, yet
prog_stream_verdict() doesn't run bpf_sk_redirect_map() with the
BPF_F_INGRESS flag.

Thanks,
Michal
Jakub Sitnicki July 9, 2024, 10:21 a.m. UTC | #7
On Mon, Jul 08, 2024 at 01:10 AM +02, Michal Luczaj wrote:
> On 6/27/24 09:40, Jakub Sitnicki wrote:
>> On Wed, Jun 26, 2024 at 12:19 PM +02, Michal Luczaj wrote:
>>> On 6/24/24 16:15, Jakub Sitnicki wrote:
>>>> On Sun, Jun 23, 2024 at 12:25 AM +02, Michal Luczaj wrote:
>>>>> AF_UNIX socket tracks the most recent OOB packet (in its receive queue)
>>>>> with an `oob_skb` pointer. BPF redirecting does not account for that: when
>>>>> an OOB packet is moved between sockets, `oob_skb` is left outdated. This
>>>>> results in a single skb that may be accessed from two different sockets.
>>>>>
>>>>> Take the easy way out: silently drop MSG_OOB data targeting any socket that
>>>>> is in a sockmap or a sockhash. Note that such silent drop is akin to the
>>>>> fate of redirected skb's scm_fp_list (SCM_RIGHTS, SCM_CREDENTIALS).
>>>>>
>>>>> For symmetry, forbid MSG_OOB in unix_bpf_recvmsg().
>>>>>
>>>>> Suggested-by: Kuniyuki Iwashima <kuniyu@amazon.com>
>>>>> Fixes: 314001f0bf92 ("af_unix: Add OOB support")
>>>>> Signed-off-by: Michal Luczaj <mhal@rbox.co>
>>>>> ---

[...]

>>> And regarding Rao's comment, I took a look and I think sockmap'ed TCP OOB
>>> does indeed act the same way. I'll try to add that into selftest as well.n
>> 
>> Right, it does sound like we're not clearing the offset kept in
>> tcp_sock::urg_data when skb is redirected.
>
> Yeah, so I also wanted to extend the TCP's redir_to_connected(), but is
> that code correct? It seems to be testing REDIR_INGRESS, yet
> prog_stream_verdict() doesn't run bpf_sk_redirect_map() with the
> BPF_F_INGRESS flag.

Right, we don't have the ingress-to-local [1] setup covered there.

This test case has outgrown its initial coverage purpose - using sockmap
with listening / unconnected sockets - and needs to be split up, IMO.

There are definitely gaps in the coverage of redirect cases, and I'd
like to extend it to cover all combinations (supported and unsupported
ones) [2].

[1] It's a term I coined to make it easier to communiate
    https://github.com/jsitnicki/kubecon-2024-sockmap/blob/main/cheatsheet-sockmap-redirect.png
[2] https://gist.github.com/jsitnicki/578fdd614d181bed2b02922b17972b4e
diff mbox series

Patch

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5e695a9a609c..b7b7ee84bec0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2653,10 +2653,49 @@  static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
 
 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
 {
+	struct unix_sock *u = unix_sk(sk);
+	struct sk_buff *skb;
+	int err;
+
 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
 		return -ENOTCONN;
 
-	return unix_read_skb(sk, recv_actor);
+	mutex_lock(&u->iolock);
+	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
+	mutex_unlock(&u->iolock);
+	if (!skb)
+		return err;
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+	if (unlikely(skb == READ_ONCE(u->oob_skb))) {
+		bool drop = false;
+
+		unix_state_lock(sk);
+
+		if (sock_flag(sk, SOCK_DEAD)) {
+			unix_state_unlock(sk);
+			kfree_skb(skb);
+			return -ECONNRESET;
+		}
+
+		spin_lock(&sk->sk_receive_queue.lock);
+		if (likely(skb == u->oob_skb)) {
+			WRITE_ONCE(u->oob_skb, NULL);
+			drop = true;
+		}
+		spin_unlock(&sk->sk_receive_queue.lock);
+
+		unix_state_unlock(sk);
+
+		if (drop) {
+			WARN_ON_ONCE(skb_unref(skb));
+			kfree_skb(skb);
+			return -EAGAIN;
+		}
+	}
+#endif
+
+	return recv_actor(sk, skb);
 }
 
 static int unix_stream_read_generic(struct unix_stream_read_state *state,
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index bd84785bf8d6..bca2d86ba97d 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -54,6 +54,9 @@  static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
 	struct sk_psock *psock;
 	int copied;
 
+	if (flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
 	if (!len)
 		return 0;