diff mbox series

[v3,bpf-next,2/2] selftest/bpf: Implement sample UNIX domain socket iterator program.

Message ID 20210804070851.97834-3-kuniyu@amazon.co.jp (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series BPF iterator for UNIX domain socket. | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 10 maintainers not CCed: linux-kselftest@vger.kernel.org clang-built-linux@googlegroups.com nathan@kernel.org edumazet@google.com revest@chromium.org lmb@cloudflare.com toke@redhat.com ndesaulniers@google.com shuah@kernel.org alan.maguire@oracle.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning CHECK: Macro argument 'unix_sk' may be better as '(unix_sk)' to avoid precedence issues CHECK: Prefer using the BIT macro WARNING: Missing a blank line after declarations WARNING: Prefer __aligned(8) over __attribute__((aligned(8))) WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: quoted string split across lines
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link

Commit Message

Iwashima, Kuniyuki Aug. 4, 2021, 7:08 a.m. UTC
If there are no abstract sockets, this prog can output the same result
compared to /proc/net/unix.

  # cat /sys/fs/bpf/unix | head -n 2
  Num       RefCount Protocol Flags    Type St Inode Path
  ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer

  # cat /proc/net/unix | head -n 2
  Num       RefCount Protocol Flags    Type St Inode Path
  ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer

According to the analysis by Yonghong Song (See the link), the BPF verifier
cannot load the code in the comment to print the name of the abstract UNIX
domain socket due to LLVM optimisation.  It can be uncommented once the
LLVM code gen is improved.

Link: https://lore.kernel.org/netdev/1994df05-8f01-371f-3c3b-d33d7836878c@fb.com/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 16 ++++
 tools/testing/selftests/bpf/progs/bpf_iter.h  |  8 ++
 .../selftests/bpf/progs/bpf_iter_unix.c       | 86 +++++++++++++++++++
 .../selftests/bpf/progs/bpf_tracing_net.h     |  4 +
 4 files changed, 114 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c

Comments

Yonghong Song Aug. 5, 2021, 4:59 p.m. UTC | #1
On 8/4/21 12:08 AM, Kuniyuki Iwashima wrote:
> If there are no abstract sockets, this prog can output the same result
> compared to /proc/net/unix.
> 
>    # cat /sys/fs/bpf/unix | head -n 2
>    Num       RefCount Protocol Flags    Type St Inode Path
>    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> 
>    # cat /proc/net/unix | head -n 2
>    Num       RefCount Protocol Flags    Type St Inode Path
>    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> 
> According to the analysis by Yonghong Song (See the link), the BPF verifier
> cannot load the code in the comment to print the name of the abstract UNIX
> domain socket due to LLVM optimisation.  It can be uncommented once the
> LLVM code gen is improved.

I have pushed the llvm fix to llvm14 trunk 
(https://reviews.llvm.org/D107483), and filed a request to backport to 
llvm13 (https://bugs.llvm.org/show_bug.cgi?id=51363), could you in the 
next revision uncomment the "for" loop code and tested it with latest 
llvm trunk compiler? Please also add an entry in selftests/bpf/README.rst
to mention the llvm commit https://reviews.llvm.org/D107483 is needed
for bpf_iter unix_socket selftest, otherwise, they will see an error
like ...

> 
> Link: https://lore.kernel.org/netdev/1994df05-8f01-371f-3c3b-d33d7836878c@fb.com/
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> ---
>   .../selftests/bpf/prog_tests/bpf_iter.c       | 16 ++++
>   tools/testing/selftests/bpf/progs/bpf_iter.h  |  8 ++
>   .../selftests/bpf/progs/bpf_iter_unix.c       | 86 +++++++++++++++++++
>   .../selftests/bpf/progs/bpf_tracing_net.h     |  4 +
>   4 files changed, 114 insertions(+)
>   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index 1f1aade56504..77ac24b191d4 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -13,6 +13,7 @@
>   #include "bpf_iter_tcp6.skel.h"
>   #include "bpf_iter_udp4.skel.h"
>   #include "bpf_iter_udp6.skel.h"
> +#include "bpf_iter_unix.skel.h"
>   #include "bpf_iter_test_kern1.skel.h"
>   #include "bpf_iter_test_kern2.skel.h"
>   #include "bpf_iter_test_kern3.skel.h"
> @@ -313,6 +314,19 @@ static void test_udp6(void)
>   	bpf_iter_udp6__destroy(skel);
>   }
>   
> +static void test_unix(void)
> +{
> +	struct bpf_iter_unix *skel;
> +
> +	skel = bpf_iter_unix__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "bpf_iter_unix__open_and_load"))
> +		return;
> +
> +	do_dummy_read(skel->progs.dump_unix);
> +
> +	bpf_iter_unix__destroy(skel);
> +}
> +
[...]
> +	if (unix_sk->addr) {
> +		if (!UNIX_ABSTRACT(unix_sk)) {
> +			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> +		} else {
> +			BPF_SEQ_PRINTF(seq, " @");
> +
> +			/* The name of the abstract UNIX domain socket starts
> +			 * with '\0' and can contain '\0'.  The null bytes
> +			 * should be escaped as done in unix_seq_show().
> +			 * However, the BPF verifier cannot load the code below
> +			 * because of the optimisation by LLVM.  So, print only
> +			 * the first escaped byte here for now.  Once LLVM code
> +			 * gen is improved, remove the BPF_SEQ_PRINTF() above
> +			 * and uncomment the code below.
> +			 *
> +			 * int i, len;
> +			 *
> +			 * len = unix_sk->addr->len - sizeof(short);
> +			 *
> +			 * BPF_SEQ_PRINTF(seq, " @");
> +			 *
> +			 * // unix_mkname() tests this upper bound.
> +			 * if (len < sizeof(struct sockaddr_un))
> +			 *	for (i = 1 ; i < len; i++)
> +			 *		BPF_SEQ_PRINTF(seq, "%c",
> +			 *			       unix_sk->addr->name->sun_path[i] ?:
> +			 *			       '@');
> +			 */
> +		}
> +	}
> +
[...]
Iwashima, Kuniyuki Aug. 6, 2021, 12:24 a.m. UTC | #2
From:   Yonghong Song <yhs@fb.com>
Date:   Thu, 5 Aug 2021 09:59:40 -0700
> On 8/4/21 12:08 AM, Kuniyuki Iwashima wrote:
> > If there are no abstract sockets, this prog can output the same result
> > compared to /proc/net/unix.
> > 
> >    # cat /sys/fs/bpf/unix | head -n 2
> >    Num       RefCount Protocol Flags    Type St Inode Path
> >    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> > 
> >    # cat /proc/net/unix | head -n 2
> >    Num       RefCount Protocol Flags    Type St Inode Path
> >    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> > 
> > According to the analysis by Yonghong Song (See the link), the BPF verifier
> > cannot load the code in the comment to print the name of the abstract UNIX
> > domain socket due to LLVM optimisation.  It can be uncommented once the
> > LLVM code gen is improved.
> 
> I have pushed the llvm fix to llvm14 trunk 
> (https://reviews.llvm.org/D107483), and filed a request to backport to 
> llvm13 (https://bugs.llvm.org/show_bug.cgi?id=51363), could you in the 
> next revision uncomment the "for" loop code and tested it with latest 
> llvm trunk compiler? Please also add an entry in selftests/bpf/README.rst
> to mention the llvm commit https://reviews.llvm.org/D107483 is needed
> for bpf_iter unix_socket selftest, otherwise, they will see an error
> like ...

Thank you for nice fixing so quickly!

I confirmed that the uncommented code can be loaded properly with the
latest LLVM master tree. :)

---8<---
$ sudo ./test_progs -t iter
...
#7/14 unix:OK
...
$ clang --version
clang version 14.0.0 (https://github.com/llvm/llvm-project.git 8a557d8311593627efd08d03178889971d5ae02b)
...
$ llvm-objdump -S bpf_iter_unix.o
...
; 				 for (i = 1 ; i < len; i++)
     110:	07 09 00 00 01 00 00 00	r9 += 1
     111:	ad 89 09 00 00 00 00 00	if r9 < r8 goto +9 <LBB0_18>
---8<---

In the next revision, I'll uncomment the code and add a note in README.rst
about your fix.
Andrii Nakryiko Aug. 6, 2021, 11:33 p.m. UTC | #3
On Wed, Aug 4, 2021 at 12:09 AM Kuniyuki Iwashima <kuniyu@amazon.co.jp> wrote:
>
> If there are no abstract sockets, this prog can output the same result
> compared to /proc/net/unix.
>
>   # cat /sys/fs/bpf/unix | head -n 2
>   Num       RefCount Protocol Flags    Type St Inode Path
>   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>
>   # cat /proc/net/unix | head -n 2
>   Num       RefCount Protocol Flags    Type St Inode Path
>   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>
> According to the analysis by Yonghong Song (See the link), the BPF verifier
> cannot load the code in the comment to print the name of the abstract UNIX
> domain socket due to LLVM optimisation.  It can be uncommented once the
> LLVM code gen is improved.
>
> Link: https://lore.kernel.org/netdev/1994df05-8f01-371f-3c3b-d33d7836878c@fb.com/

Our patchworks tooling, used to apply patches, is using Link: tag to
record original discussion, so this will be quite confusing if you use
the same "Link: " for referencing relevant discussions. Please use
standard link reference syntax:

According to the analysis by Yonghong Song ([0]), ...

...

  [0] https://lore.kernel.org/netdev/1994df05-8f01-371f-3c3b-d33d7836878c@fb.com/


> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> ---
>  .../selftests/bpf/prog_tests/bpf_iter.c       | 16 ++++
>  tools/testing/selftests/bpf/progs/bpf_iter.h  |  8 ++
>  .../selftests/bpf/progs/bpf_iter_unix.c       | 86 +++++++++++++++++++
>  .../selftests/bpf/progs/bpf_tracing_net.h     |  4 +
>  4 files changed, 114 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
>

[...]

> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
> index 3d83b185c4bc..d92648621bcb 100644
> --- a/tools/testing/selftests/bpf/progs/bpf_iter.h
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
> @@ -12,6 +12,7 @@
>  #define tcp6_sock tcp6_sock___not_used
>  #define bpf_iter__udp bpf_iter__udp___not_used
>  #define udp6_sock udp6_sock___not_used
> +#define bpf_iter__unix bpf_iter__unix___not_used
>  #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
>  #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
>  #define bpf_iter__sockmap bpf_iter__sockmap___not_used
> @@ -32,6 +33,7 @@
>  #undef tcp6_sock
>  #undef bpf_iter__udp
>  #undef udp6_sock
> +#undef bpf_iter__unix
>  #undef bpf_iter__bpf_map_elem
>  #undef bpf_iter__bpf_sk_storage_map
>  #undef bpf_iter__sockmap
> @@ -103,6 +105,12 @@ struct udp6_sock {
>         struct ipv6_pinfo inet6;
>  } __attribute__((preserve_access_index));
>
> +struct bpf_iter__unix {
> +       struct bpf_iter_meta *meta;
> +       struct unix_sock *unix_sk;
> +       uid_t uid __attribute__((aligned(8)));

just fyi, aligned doesn't matter here, CO-RE will relocate offsets
appropriately anyways

> +} __attribute__((preserve_access_index));
> +
>  struct bpf_iter__bpf_map_elem {
>         struct bpf_iter_meta *meta;
>         struct bpf_map *map;

[...]

> +SEC("iter/unix")
> +int dump_unix(struct bpf_iter__unix *ctx)
> +{
> +       struct unix_sock *unix_sk = ctx->unix_sk;
> +       struct sock *sk = (struct sock *)unix_sk;
> +       struct seq_file *seq;
> +       __u32 seq_num;
> +
> +       if (!unix_sk)
> +               return 0;
> +
> +       seq = ctx->meta->seq;
> +       seq_num = ctx->meta->seq_num;
> +       if (seq_num == 0)
> +               BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> +                              "Type St Inode Path\n");

nit: please keep format strings on a single line

> +
> +       BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> +                      unix_sk,
> +                      sk->sk_refcnt.refs.counter,
> +                      0,
> +                      sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> +                      sk->sk_type,
> +                      sk->sk_socket ?
> +                      (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
> +                      (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
> +                      sock_i_ino(sk));
> +

[...]
Iwashima, Kuniyuki Aug. 7, 2021, 12:09 a.m. UTC | #4
From:   Andrii Nakryiko <andrii.nakryiko@gmail.com>
Date:   Fri, 6 Aug 2021 16:33:22 -0700
> On Wed, Aug 4, 2021 at 12:09 AM Kuniyuki Iwashima <kuniyu@amazon.co.jp> wrote:
> >
> > If there are no abstract sockets, this prog can output the same result
> > compared to /proc/net/unix.
> >
> >   # cat /sys/fs/bpf/unix | head -n 2
> >   Num       RefCount Protocol Flags    Type St Inode Path
> >   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> >
> >   # cat /proc/net/unix | head -n 2
> >   Num       RefCount Protocol Flags    Type St Inode Path
> >   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> >
> > According to the analysis by Yonghong Song (See the link), the BPF verifier
> > cannot load the code in the comment to print the name of the abstract UNIX
> > domain socket due to LLVM optimisation.  It can be uncommented once the
> > LLVM code gen is improved.
> >
> > Link: https://lore.kernel.org/netdev/1994df05-8f01-371f-3c3b-d33d7836878c@fb.com/
> 
> Our patchworks tooling, used to apply patches, is using Link: tag to
> record original discussion, so this will be quite confusing if you use
> the same "Link: " for referencing relevant discussions. Please use
> standard link reference syntax:
> 
> According to the analysis by Yonghong Song ([0]), ...
> 
> ...
> 
>   [0] https://lore.kernel.org/netdev/1994df05-8f01-371f-3c3b-d33d7836878c@fb.com/

I'll use this format.


> 
> 
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> > ---
> >  .../selftests/bpf/prog_tests/bpf_iter.c       | 16 ++++
> >  tools/testing/selftests/bpf/progs/bpf_iter.h  |  8 ++
> >  .../selftests/bpf/progs/bpf_iter_unix.c       | 86 +++++++++++++++++++
> >  .../selftests/bpf/progs/bpf_tracing_net.h     |  4 +
> >  4 files changed, 114 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> >
> 
> [...]
> 
> > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
> > index 3d83b185c4bc..d92648621bcb 100644
> > --- a/tools/testing/selftests/bpf/progs/bpf_iter.h
> > +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
> > @@ -12,6 +12,7 @@
> >  #define tcp6_sock tcp6_sock___not_used
> >  #define bpf_iter__udp bpf_iter__udp___not_used
> >  #define udp6_sock udp6_sock___not_used
> > +#define bpf_iter__unix bpf_iter__unix___not_used
> >  #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
> >  #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
> >  #define bpf_iter__sockmap bpf_iter__sockmap___not_used
> > @@ -32,6 +33,7 @@
> >  #undef tcp6_sock
> >  #undef bpf_iter__udp
> >  #undef udp6_sock
> > +#undef bpf_iter__unix
> >  #undef bpf_iter__bpf_map_elem
> >  #undef bpf_iter__bpf_sk_storage_map
> >  #undef bpf_iter__sockmap
> > @@ -103,6 +105,12 @@ struct udp6_sock {
> >         struct ipv6_pinfo inet6;
> >  } __attribute__((preserve_access_index));
> >
> > +struct bpf_iter__unix {
> > +       struct bpf_iter_meta *meta;
> > +       struct unix_sock *unix_sk;
> > +       uid_t uid __attribute__((aligned(8)));
> 
> just fyi, aligned doesn't matter here, CO-RE will relocate offsets
> appropriately anyways

Thank you, I'll remove it.


> 
> > +} __attribute__((preserve_access_index));
> > +
> >  struct bpf_iter__bpf_map_elem {
> >         struct bpf_iter_meta *meta;
> >         struct bpf_map *map;
> 
> [...]
> 
> > +SEC("iter/unix")
> > +int dump_unix(struct bpf_iter__unix *ctx)
> > +{
> > +       struct unix_sock *unix_sk = ctx->unix_sk;
> > +       struct sock *sk = (struct sock *)unix_sk;
> > +       struct seq_file *seq;
> > +       __u32 seq_num;
> > +
> > +       if (!unix_sk)
> > +               return 0;
> > +
> > +       seq = ctx->meta->seq;
> > +       seq_num = ctx->meta->seq_num;
> > +       if (seq_num == 0)
> > +               BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> > +                              "Type St Inode Path\n");
> 
> nit: please keep format strings on a single line

I'll fix it.

Thanks for review.


> 
> > +
> > +       BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> > +                      unix_sk,
> > +                      sk->sk_refcnt.refs.counter,
> > +                      0,
> > +                      sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> > +                      sk->sk_type,
> > +                      sk->sk_socket ?
> > +                      (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
> > +                      (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
> > +                      sock_i_ino(sk));
> > +
> 
> [...]
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 1f1aade56504..77ac24b191d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -13,6 +13,7 @@ 
 #include "bpf_iter_tcp6.skel.h"
 #include "bpf_iter_udp4.skel.h"
 #include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_unix.skel.h"
 #include "bpf_iter_test_kern1.skel.h"
 #include "bpf_iter_test_kern2.skel.h"
 #include "bpf_iter_test_kern3.skel.h"
@@ -313,6 +314,19 @@  static void test_udp6(void)
 	bpf_iter_udp6__destroy(skel);
 }
 
+static void test_unix(void)
+{
+	struct bpf_iter_unix *skel;
+
+	skel = bpf_iter_unix__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_unix__open_and_load"))
+		return;
+
+	do_dummy_read(skel->progs.dump_unix);
+
+	bpf_iter_unix__destroy(skel);
+}
+
 /* The expected string is less than 16 bytes */
 static int do_read_with_fd(int iter_fd, const char *expected,
 			   bool read_one_char)
@@ -1255,6 +1269,8 @@  void test_bpf_iter(void)
 		test_udp4();
 	if (test__start_subtest("udp6"))
 		test_udp6();
+	if (test__start_subtest("unix"))
+		test_unix();
 	if (test__start_subtest("anon"))
 		test_anon_iter(false);
 	if (test__start_subtest("anon-read-one-char"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 3d83b185c4bc..d92648621bcb 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -12,6 +12,7 @@ 
 #define tcp6_sock tcp6_sock___not_used
 #define bpf_iter__udp bpf_iter__udp___not_used
 #define udp6_sock udp6_sock___not_used
+#define bpf_iter__unix bpf_iter__unix___not_used
 #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
 #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
 #define bpf_iter__sockmap bpf_iter__sockmap___not_used
@@ -32,6 +33,7 @@ 
 #undef tcp6_sock
 #undef bpf_iter__udp
 #undef udp6_sock
+#undef bpf_iter__unix
 #undef bpf_iter__bpf_map_elem
 #undef bpf_iter__bpf_sk_storage_map
 #undef bpf_iter__sockmap
@@ -103,6 +105,12 @@  struct udp6_sock {
 	struct ipv6_pinfo inet6;
 } __attribute__((preserve_access_index));
 
+struct bpf_iter__unix {
+	struct bpf_iter_meta *meta;
+	struct unix_sock *unix_sk;
+	uid_t uid __attribute__((aligned(8)));
+} __attribute__((preserve_access_index));
+
 struct bpf_iter__bpf_map_elem {
 	struct bpf_iter_meta *meta;
 	struct bpf_map *map;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
new file mode 100644
index 000000000000..048844ee4f32
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -0,0 +1,86 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+SEC("iter/unix")
+int dump_unix(struct bpf_iter__unix *ctx)
+{
+	struct unix_sock *unix_sk = ctx->unix_sk;
+	struct sock *sk = (struct sock *)unix_sk;
+	struct seq_file *seq;
+	__u32 seq_num;
+
+	if (!unix_sk)
+		return 0;
+
+	seq = ctx->meta->seq;
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
+			       "Type St Inode Path\n");
+
+	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
+		       unix_sk,
+		       sk->sk_refcnt.refs.counter,
+		       0,
+		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+		       sk->sk_type,
+		       sk->sk_socket ?
+		       (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
+		       (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
+		       sock_i_ino(sk));
+
+	if (unix_sk->addr) {
+		if (!UNIX_ABSTRACT(unix_sk)) {
+			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
+		} else {
+			BPF_SEQ_PRINTF(seq, " @");
+
+			/* The name of the abstract UNIX domain socket starts
+			 * with '\0' and can contain '\0'.  The null bytes
+			 * should be escaped as done in unix_seq_show().
+			 * However, the BPF verifier cannot load the code below
+			 * because of the optimisation by LLVM.  So, print only
+			 * the first escaped byte here for now.  Once LLVM code
+			 * gen is improved, remove the BPF_SEQ_PRINTF() above
+			 * and uncomment the code below.
+			 *
+			 * int i, len;
+			 *
+			 * len = unix_sk->addr->len - sizeof(short);
+			 *
+			 * BPF_SEQ_PRINTF(seq, " @");
+			 *
+			 * // unix_mkname() tests this upper bound.
+			 * if (len < sizeof(struct sockaddr_un))
+			 *	for (i = 1 ; i < len; i++)
+			 *		BPF_SEQ_PRINTF(seq, "%c",
+			 *			       unix_sk->addr->name->sun_path[i] ?:
+			 *			       '@');
+			 */
+		}
+	}
+
+	BPF_SEQ_PRINTF(seq, "\n");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 3af0998a0623..eef5646ddb19 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -5,6 +5,10 @@ 
 #define AF_INET			2
 #define AF_INET6		10
 
+#define __SO_ACCEPTCON		(1 << 16)
+#define UNIX_HASH_SIZE		256
+#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)
+
 #define SOL_TCP			6
 #define TCP_CONGESTION		13
 #define TCP_CA_NAME_MAX		16