diff mbox series

[bpf-next,2/2] selftest/bpf: Implement sample UNIX domain socket iterator program.

Message ID 20210729233645.4869-3-kuniyu@amazon.co.jp (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series BPF iterator for UNIX domain socket. | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 6 maintainers not CCed: linux-kselftest@vger.kernel.org davemarchevsky@fb.com revest@chromium.org toke@redhat.com shuah@kernel.org alan.maguire@oracle.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 9 this patch: 9
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning CHECK: Macro argument 'unix_sk' may be better as '(unix_sk)' to avoid precedence issues CHECK: Prefer using the BIT macro WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: quoted string split across lines
netdev/build_allmodconfig_warn fail Errors and warnings before: 9 this patch: 9
netdev/header_inline success Link

Commit Message

Iwashima, Kuniyuki July 29, 2021, 11:36 p.m. UTC
If there are no abstract sockets, this prog can output the same result
compared to /proc/net/unix.

  # cat /sys/fs/bpf/unix | head -n 2
  Num       RefCount Protocol Flags    Type St Inode Path
  ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer

  # cat /proc/net/unix | head -n 2
  Num       RefCount Protocol Flags    Type St Inode Path
  ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
---
 .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
 .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c

Comments

Yonghong Song July 30, 2021, 6:54 a.m. UTC | #1
On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
> If there are no abstract sockets, this prog can output the same result
> compared to /proc/net/unix.
> 
>    # cat /sys/fs/bpf/unix | head -n 2
>    Num       RefCount Protocol Flags    Type St Inode Path
>    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> 
>    # cat /proc/net/unix | head -n 2
>    Num       RefCount Protocol Flags    Type St Inode Path
>    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> 
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> ---
>   .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
>   .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
>   2 files changed, 92 insertions(+)
>   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index 1f1aade56504..4746bac68d36 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -13,6 +13,7 @@
>   #include "bpf_iter_tcp6.skel.h"
>   #include "bpf_iter_udp4.skel.h"
>   #include "bpf_iter_udp6.skel.h"
> +#include "bpf_iter_unix.skel.h"
>   #include "bpf_iter_test_kern1.skel.h"
>   #include "bpf_iter_test_kern2.skel.h"
>   #include "bpf_iter_test_kern3.skel.h"
> @@ -313,6 +314,20 @@ static void test_udp6(void)
>   	bpf_iter_udp6__destroy(skel);
>   }
>   
> +static void test_unix(void)
> +{
> +	struct bpf_iter_unix *skel;
> +
> +	skel = bpf_iter_unix__open_and_load();
> +	if (CHECK(!skel, "bpf_iter_unix__open_and_load",
> +		  "skeleton open_and_load failed\n"))
> +		return;
> +
> +	do_dummy_read(skel->progs.dump_unix);
> +
> +	bpf_iter_unix__destroy(skel);
> +}
> +
>   /* The expected string is less than 16 bytes */
>   static int do_read_with_fd(int iter_fd, const char *expected,
>   			   bool read_one_char)
> @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
>   		test_udp4();
>   	if (test__start_subtest("udp6"))
>   		test_udp6();
> +	if (test__start_subtest("unix"))
> +		test_unix();
>   	if (test__start_subtest("anon"))
>   		test_anon_iter(false);
>   	if (test__start_subtest("anon-read-one-char"))
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> new file mode 100644
> index 000000000000..285ec2f7944d
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> @@ -0,0 +1,75 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright Amazon.com Inc. or its affiliates. */
> +#include "bpf_iter.h"

Could you add bpf_iter__unix to bpf_iter.h similar to bpf_iter__sockmap?
The main purpose is to make test tolerating with old vmlinux.h.

> +#include "bpf_tracing_net.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_endian.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +#define __SO_ACCEPTCON		(1 << 16)
> +#define UNIX_HASH_SIZE		256
> +#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)

Could you add the above three define's in bpf_tracing_net.h?
We try to keep all these common defines in a common header for
potential reusability.

> +
> +static long sock_i_ino(const struct sock *sk)
> +{
> +	const struct socket *sk_socket = sk->sk_socket;
> +	const struct inode *inode;
> +	unsigned long ino;
> +
> +	if (!sk_socket)
> +		return 0;
> +
> +	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
> +	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
> +	return ino;
> +}
> +
> +SEC("iter/unix")
> +int dump_unix(struct bpf_iter__unix *ctx)
> +{
> +	struct unix_sock *unix_sk = ctx->unix_sk;
> +	struct sock *sk = (struct sock *)unix_sk;
> +	struct seq_file *seq;
> +	__u32 seq_num;
> +
> +	if (!unix_sk)
> +		return 0;
> +
> +	seq = ctx->meta->seq;
> +	seq_num = ctx->meta->seq_num;
> +	if (seq_num == 0)
> +		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> +			       "Type St Inode Path\n");
> +
> +	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> +		       unix_sk,
> +		       sk->sk_refcnt.refs.counter,
> +		       0,
> +		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> +		       sk->sk_type,
> +		       sk->sk_socket ?
> +		       (sk->sk_state == TCP_ESTABLISHED ?
> +			SS_CONNECTED : SS_UNCONNECTED) :
> +		       (sk->sk_state == TCP_ESTABLISHED ?
> +			SS_CONNECTING : SS_DISCONNECTING),
> +		       sock_i_ino(sk));
> +
> +	if (unix_sk->addr) {
> +		if (UNIX_ABSTRACT(unix_sk))
> +			/* Abstract UNIX domain socket can contain '\0' in
> +			 * the path, and it should be escaped.  However, it
> +			 * requires loops and the BPF verifier rejects it.
> +			 * So here, print only the escaped first byte to
> +			 * indicate it is an abstract UNIX domain socket.
> +			 * (See: unix_seq_show() and commit e7947ea770d0d)
> +			 */
> +			BPF_SEQ_PRINTF(seq, " @");
> +		else
> +			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> +	}

I looked at af_unix.c, for the above "if (unix_sk->addr) { ... }" code,
the following is the kernel source code,

                 if (u->addr) {  // under unix_table_lock here
                         int i, len;
                         seq_putc(seq, ' ');

                         i = 0;
                         len = u->addr->len - sizeof(short);
                         if (!UNIX_ABSTRACT(s))
                                 len--;
                         else {
                                 seq_putc(seq, '@');
                                 i++;
                         }
                         for ( ; i < len; i++)
                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
                                          '@');
                 }

It does not seem to match bpf program non UNIX_ABSTRACT case.
I am not familiar with unix socket so it would be good if you can 
explain a little more.

For verifier issue with loops, do we have a maximum upper bound for 
u->addr->len? If yes, does bounded loop work?

> +
> +	BPF_SEQ_PRINTF(seq, "\n");
> +
> +	return 0;
> +}
>
Iwashima, Kuniyuki July 30, 2021, 7:58 a.m. UTC | #2
From:   Yonghong Song <yhs@fb.com>
Date:   Thu, 29 Jul 2021 23:54:26 -0700
> On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
> > If there are no abstract sockets, this prog can output the same result
> > compared to /proc/net/unix.
> > 
> >    # cat /sys/fs/bpf/unix | head -n 2
> >    Num       RefCount Protocol Flags    Type St Inode Path
> >    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> > 
> >    # cat /proc/net/unix | head -n 2
> >    Num       RefCount Protocol Flags    Type St Inode Path
> >    ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> > 
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> > ---
> >   .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
> >   .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
> >   2 files changed, 92 insertions(+)
> >   create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> > 
> > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > index 1f1aade56504..4746bac68d36 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > @@ -13,6 +13,7 @@
> >   #include "bpf_iter_tcp6.skel.h"
> >   #include "bpf_iter_udp4.skel.h"
> >   #include "bpf_iter_udp6.skel.h"
> > +#include "bpf_iter_unix.skel.h"
> >   #include "bpf_iter_test_kern1.skel.h"
> >   #include "bpf_iter_test_kern2.skel.h"
> >   #include "bpf_iter_test_kern3.skel.h"
> > @@ -313,6 +314,20 @@ static void test_udp6(void)
> >   	bpf_iter_udp6__destroy(skel);
> >   }
> >   
> > +static void test_unix(void)
> > +{
> > +	struct bpf_iter_unix *skel;
> > +
> > +	skel = bpf_iter_unix__open_and_load();
> > +	if (CHECK(!skel, "bpf_iter_unix__open_and_load",
> > +		  "skeleton open_and_load failed\n"))
> > +		return;
> > +
> > +	do_dummy_read(skel->progs.dump_unix);
> > +
> > +	bpf_iter_unix__destroy(skel);
> > +}
> > +
> >   /* The expected string is less than 16 bytes */
> >   static int do_read_with_fd(int iter_fd, const char *expected,
> >   			   bool read_one_char)
> > @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
> >   		test_udp4();
> >   	if (test__start_subtest("udp6"))
> >   		test_udp6();
> > +	if (test__start_subtest("unix"))
> > +		test_unix();
> >   	if (test__start_subtest("anon"))
> >   		test_anon_iter(false);
> >   	if (test__start_subtest("anon-read-one-char"))
> > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> > new file mode 100644
> > index 000000000000..285ec2f7944d
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> > @@ -0,0 +1,75 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Copyright Amazon.com Inc. or its affiliates. */
> > +#include "bpf_iter.h"
> 
> Could you add bpf_iter__unix to bpf_iter.h similar to bpf_iter__sockmap?
> The main purpose is to make test tolerating with old vmlinux.h.

Thank you for explanation!
I've understood why it is needed even when the same struct is defined.
I'll add it in the next spin. 


> 
> > +#include "bpf_tracing_net.h"
> > +#include <bpf/bpf_helpers.h>
> > +#include <bpf/bpf_endian.h>
> > +
> > +char _license[] SEC("license") = "GPL";
> > +
> > +#define __SO_ACCEPTCON		(1 << 16)
> > +#define UNIX_HASH_SIZE		256
> > +#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)
> 
> Could you add the above three define's in bpf_tracing_net.h?
> We try to keep all these common defines in a common header for
> potential reusability.

Will do.


> 
> > +
> > +static long sock_i_ino(const struct sock *sk)
> > +{
> > +	const struct socket *sk_socket = sk->sk_socket;
> > +	const struct inode *inode;
> > +	unsigned long ino;
> > +
> > +	if (!sk_socket)
> > +		return 0;
> > +
> > +	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
> > +	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
> > +	return ino;
> > +}
> > +
> > +SEC("iter/unix")
> > +int dump_unix(struct bpf_iter__unix *ctx)
> > +{
> > +	struct unix_sock *unix_sk = ctx->unix_sk;
> > +	struct sock *sk = (struct sock *)unix_sk;
> > +	struct seq_file *seq;
> > +	__u32 seq_num;
> > +
> > +	if (!unix_sk)
> > +		return 0;
> > +
> > +	seq = ctx->meta->seq;
> > +	seq_num = ctx->meta->seq_num;
> > +	if (seq_num == 0)
> > +		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> > +			       "Type St Inode Path\n");
> > +
> > +	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> > +		       unix_sk,
> > +		       sk->sk_refcnt.refs.counter,
> > +		       0,
> > +		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> > +		       sk->sk_type,
> > +		       sk->sk_socket ?
> > +		       (sk->sk_state == TCP_ESTABLISHED ?
> > +			SS_CONNECTED : SS_UNCONNECTED) :
> > +		       (sk->sk_state == TCP_ESTABLISHED ?
> > +			SS_CONNECTING : SS_DISCONNECTING),
> > +		       sock_i_ino(sk));
> > +
> > +	if (unix_sk->addr) {
> > +		if (UNIX_ABSTRACT(unix_sk))
> > +			/* Abstract UNIX domain socket can contain '\0' in
> > +			 * the path, and it should be escaped.  However, it
> > +			 * requires loops and the BPF verifier rejects it.
> > +			 * So here, print only the escaped first byte to
> > +			 * indicate it is an abstract UNIX domain socket.
> > +			 * (See: unix_seq_show() and commit e7947ea770d0d)
> > +			 */
> > +			BPF_SEQ_PRINTF(seq, " @");
> > +		else
> > +			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> > +	}
> 
> I looked at af_unix.c, for the above "if (unix_sk->addr) { ... }" code,
> the following is the kernel source code,
> 
>                  if (u->addr) {  // under unix_table_lock here
>                          int i, len;
>                          seq_putc(seq, ' ');
> 
>                          i = 0;
>                          len = u->addr->len - sizeof(short);
>                          if (!UNIX_ABSTRACT(s))
>                                  len--;
>                          else {
>                                  seq_putc(seq, '@');
>                                  i++;
>                          }
>                          for ( ; i < len; i++)
>                                  seq_putc(seq, u->addr->name->sun_path[i] ?:
>                                           '@');
>                  }
> 
> It does not seem to match bpf program non UNIX_ABSTRACT case.
> I am not familiar with unix socket so it would be good if you can 
> explain a little more.

There is three kinds of unix sockets: pathname, unnamed, abstract.  The
first two terminate the addr with `\0`, but abstract must start with `\0`
and can contain `\0` anywhere in addr.  The `\0` in addr of abstract socket
does not have special meaning. [1]

They are inserted into the same hash table in unix_bind(), so the bpf prog
matches all of them.

``` net/unix/af_unix.c
  1114		if (sun_path[0])
  1115			err = unix_bind_bsd(sk, addr);
  1116		else
  1117			err = unix_bind_abstract(sk, addr);
```

[1]: https://man7.org/linux/man-pages/man7/unix.7.html


> 
> For verifier issue with loops, do we have a maximum upper bound for 
> u->addr->len? If yes, does bounded loop work?

That has a maximum length in unix_mkname(): sizeof(struct sockaddr_un).

``` net/unix/af_unix.c
   223	/*
   224	 *	Check unix socket name:
   225	 *		- should be not zero length.
   226	 *	        - if started by not zero, should be NULL terminated (FS object)
   227	 *		- if started by zero, it is abstract name.
   228	 */
   229	
   230	static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
   231	{
...
   234		if (len <= sizeof(short) || len > sizeof(*sunaddr))
   235			return -EINVAL;
...
   253	}
```

So, I rewrote the test like this, but it still causes an error.

```
	if (unix_sk->addr) {
		int i, len;

		len = unix_sk->addr->len - sizeof(short);

		if (!UNIX_ABSTRACT(unix_sk)) {
			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
		} else {
			BPF_SEQ_PRINTF(seq, " @");
			i++;

			if (len < sizeof(struct sockaddr_un)) {
				for (i = 1 ; i < len; i++)
					BPF_SEQ_PRINTF(seq, "%c",
						       unix_sk->addr->name->sun_path[i] ?:
						       '@');
			}
		}
	}
```

```
processed 196505 insns (limit 1000000) max_states_per_insn 4 total_states 1830 peak_states 1830 mark_read 3
```


> 
> > +
> > +	BPF_SEQ_PRINTF(seq, "\n");
> > +
> > +	return 0;
> > +}
Yonghong Song July 30, 2021, 4:22 p.m. UTC | #3
On 7/30/21 12:58 AM, Kuniyuki Iwashima wrote:
> From:   Yonghong Song <yhs@fb.com>
> Date:   Thu, 29 Jul 2021 23:54:26 -0700
>> On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
>>> If there are no abstract sockets, this prog can output the same result
>>> compared to /proc/net/unix.
>>>
>>>     # cat /sys/fs/bpf/unix | head -n 2
>>>     Num       RefCount Protocol Flags    Type St Inode Path
>>>     ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>>>
>>>     # cat /proc/net/unix | head -n 2
>>>     Num       RefCount Protocol Flags    Type St Inode Path
>>>     ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>>>
>>> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
>>> ---
>>>    .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
>>>    .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
>>>    2 files changed, 92 insertions(+)
>>>    create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
>>>
>>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>> index 1f1aade56504..4746bac68d36 100644
>>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
>>> @@ -13,6 +13,7 @@
>>>    #include "bpf_iter_tcp6.skel.h"
>>>    #include "bpf_iter_udp4.skel.h"
>>>    #include "bpf_iter_udp6.skel.h"
>>> +#include "bpf_iter_unix.skel.h"
>>>    #include "bpf_iter_test_kern1.skel.h"
>>>    #include "bpf_iter_test_kern2.skel.h"
>>>    #include "bpf_iter_test_kern3.skel.h"
>>> @@ -313,6 +314,20 @@ static void test_udp6(void)
>>>    	bpf_iter_udp6__destroy(skel);
>>>    }
>>>    
>>> +static void test_unix(void)
>>> +{
>>> +	struct bpf_iter_unix *skel;
>>> +
>>> +	skel = bpf_iter_unix__open_and_load();
>>> +	if (CHECK(!skel, "bpf_iter_unix__open_and_load",
>>> +		  "skeleton open_and_load failed\n"))
>>> +		return;
>>> +
>>> +	do_dummy_read(skel->progs.dump_unix);
>>> +
>>> +	bpf_iter_unix__destroy(skel);
>>> +}
>>> +
>>>    /* The expected string is less than 16 bytes */
>>>    static int do_read_with_fd(int iter_fd, const char *expected,
>>>    			   bool read_one_char)
>>> @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
>>>    		test_udp4();
>>>    	if (test__start_subtest("udp6"))
>>>    		test_udp6();
>>> +	if (test__start_subtest("unix"))
>>> +		test_unix();
>>>    	if (test__start_subtest("anon"))
>>>    		test_anon_iter(false);
>>>    	if (test__start_subtest("anon-read-one-char"))
>>> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
>>> new file mode 100644
>>> index 000000000000..285ec2f7944d
>>> --- /dev/null
>>> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
>>> @@ -0,0 +1,75 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/* Copyright Amazon.com Inc. or its affiliates. */
>>> +#include "bpf_iter.h"
>>
>> Could you add bpf_iter__unix to bpf_iter.h similar to bpf_iter__sockmap?
>> The main purpose is to make test tolerating with old vmlinux.h.
> 
> Thank you for explanation!
> I've understood why it is needed even when the same struct is defined.
> I'll add it in the next spin.
> 
> 
>>
>>> +#include "bpf_tracing_net.h"
>>> +#include <bpf/bpf_helpers.h>
>>> +#include <bpf/bpf_endian.h>
>>> +
>>> +char _license[] SEC("license") = "GPL";
>>> +
>>> +#define __SO_ACCEPTCON		(1 << 16)
>>> +#define UNIX_HASH_SIZE		256
>>> +#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)
>>
>> Could you add the above three define's in bpf_tracing_net.h?
>> We try to keep all these common defines in a common header for
>> potential reusability.
> 
> Will do.
> 
> 
>>
>>> +
>>> +static long sock_i_ino(const struct sock *sk)
>>> +{
>>> +	const struct socket *sk_socket = sk->sk_socket;
>>> +	const struct inode *inode;
>>> +	unsigned long ino;
>>> +
>>> +	if (!sk_socket)
>>> +		return 0;
>>> +
>>> +	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
>>> +	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
>>> +	return ino;
>>> +}
>>> +
>>> +SEC("iter/unix")
>>> +int dump_unix(struct bpf_iter__unix *ctx)
>>> +{
>>> +	struct unix_sock *unix_sk = ctx->unix_sk;
>>> +	struct sock *sk = (struct sock *)unix_sk;
>>> +	struct seq_file *seq;
>>> +	__u32 seq_num;
>>> +
>>> +	if (!unix_sk)
>>> +		return 0;
>>> +
>>> +	seq = ctx->meta->seq;
>>> +	seq_num = ctx->meta->seq_num;
>>> +	if (seq_num == 0)
>>> +		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
>>> +			       "Type St Inode Path\n");
>>> +
>>> +	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
>>> +		       unix_sk,
>>> +		       sk->sk_refcnt.refs.counter,
>>> +		       0,
>>> +		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
>>> +		       sk->sk_type,
>>> +		       sk->sk_socket ?
>>> +		       (sk->sk_state == TCP_ESTABLISHED ?
>>> +			SS_CONNECTED : SS_UNCONNECTED) :
>>> +		       (sk->sk_state == TCP_ESTABLISHED ?
>>> +			SS_CONNECTING : SS_DISCONNECTING),
>>> +		       sock_i_ino(sk));
>>> +
>>> +	if (unix_sk->addr) {
>>> +		if (UNIX_ABSTRACT(unix_sk))
>>> +			/* Abstract UNIX domain socket can contain '\0' in
>>> +			 * the path, and it should be escaped.  However, it
>>> +			 * requires loops and the BPF verifier rejects it.
>>> +			 * So here, print only the escaped first byte to
>>> +			 * indicate it is an abstract UNIX domain socket.
>>> +			 * (See: unix_seq_show() and commit e7947ea770d0d)
>>> +			 */
>>> +			BPF_SEQ_PRINTF(seq, " @");
>>> +		else
>>> +			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
>>> +	}
>>
>> I looked at af_unix.c, for the above "if (unix_sk->addr) { ... }" code,
>> the following is the kernel source code,
>>
>>                   if (u->addr) {  // under unix_table_lock here
>>                           int i, len;
>>                           seq_putc(seq, ' ');
>>
>>                           i = 0;
>>                           len = u->addr->len - sizeof(short);
>>                           if (!UNIX_ABSTRACT(s))
>>                                   len--;
>>                           else {
>>                                   seq_putc(seq, '@');
>>                                   i++;
>>                           }
>>                           for ( ; i < len; i++)
>>                                   seq_putc(seq, u->addr->name->sun_path[i] ?:
>>                                            '@');
>>                   }
>>
>> It does not seem to match bpf program non UNIX_ABSTRACT case.
>> I am not familiar with unix socket so it would be good if you can
>> explain a little more.
> 
> There is three kinds of unix sockets: pathname, unnamed, abstract.  The
> first two terminate the addr with `\0`, but abstract must start with `\0`
> and can contain `\0` anywhere in addr.  The `\0` in addr of abstract socket
> does not have special meaning. [1]
> 
> They are inserted into the same hash table in unix_bind(), so the bpf prog
> matches all of them.
> 
> ``` net/unix/af_unix.c
>    1114		if (sun_path[0])
>    1115			err = unix_bind_bsd(sk, addr);
>    1116		else
>    1117			err = unix_bind_abstract(sk, addr);
> ```
> 
> [1]: https://man7.org/linux/man-pages/man7/unix.7.html
> 
> 
>>
>> For verifier issue with loops, do we have a maximum upper bound for
>> u->addr->len? If yes, does bounded loop work?
> 
> That has a maximum length in unix_mkname(): sizeof(struct sockaddr_un).
> 
> ``` net/unix/af_unix.c
>     223	/*
>     224	 *	Check unix socket name:
>     225	 *		- should be not zero length.
>     226	 *	        - if started by not zero, should be NULL terminated (FS object)
>     227	 *		- if started by zero, it is abstract name.
>     228	 */
>     229	
>     230	static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
>     231	{
> ...
>     234		if (len <= sizeof(short) || len > sizeof(*sunaddr))
>     235			return -EINVAL;
> ...
>     253	}
> ```
> 
> So, I rewrote the test like this, but it still causes an error.
> 
> ```
> 	if (unix_sk->addr) {
> 		int i, len;
> 
> 		len = unix_sk->addr->len - sizeof(short);
> 
> 		if (!UNIX_ABSTRACT(unix_sk)) {
> 			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> 		} else {
> 			BPF_SEQ_PRINTF(seq, " @");
> 			i++;

i++ is not useful here and "i" is not initialized earlier.

> 
> 			if (len < sizeof(struct sockaddr_un)) {
> 				for (i = 1 ; i < len; i++)
> 					BPF_SEQ_PRINTF(seq, "%c",
> 						       unix_sk->addr->name->sun_path[i] ?:
> 						       '@');
> 			}
> 		}
> 	}
> ```
> 
> ```
> processed 196505 insns (limit 1000000) max_states_per_insn 4 total_states 1830 peak_states 1830 mark_read 3
> ```

I did some debugging, the main reason is that llvm compiler used "!=" 
instead of "<" for "i < len" comparison.

      107:       b4 05 00 00 08 00 00 00 w5 = 8
      108:       85 00 00 00 7e 00 00 00 call 126
;                               for (i = 1 ; i < len; i++)
      109:       07 09 00 00 01 00 00 00 r9 += 1
      110:       5d 98 09 00 00 00 00 00 if r8 != r9 goto +9 <LBB0_18>


Considering "len" is not a constant, for verifier, r8 will never be 
equal to r9 in the above.

Digging into llvm compilation, it is llvm pass Induction Variable 
simplication pass made this code change. I will try to dig more and
find a solution. In your next revision, could you add the above code
as a comment so once llvm code gen is improved, we can have proper
implementation to match /proc/net/unix?

> 
>>
>>> +
>>> +	BPF_SEQ_PRINTF(seq, "\n");
>>> +
>>> +	return 0;
>>> +}
Andrii Nakryiko July 30, 2021, 7:34 p.m. UTC | #4
On Thu, Jul 29, 2021 at 4:37 PM Kuniyuki Iwashima <kuniyu@amazon.co.jp> wrote:
>
> If there are no abstract sockets, this prog can output the same result
> compared to /proc/net/unix.
>
>   # cat /sys/fs/bpf/unix | head -n 2
>   Num       RefCount Protocol Flags    Type St Inode Path
>   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>
>   # cat /proc/net/unix | head -n 2
>   Num       RefCount Protocol Flags    Type St Inode Path
>   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> ---
>  .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
>  .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
>  2 files changed, 92 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> index 1f1aade56504..4746bac68d36 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> @@ -13,6 +13,7 @@
>  #include "bpf_iter_tcp6.skel.h"
>  #include "bpf_iter_udp4.skel.h"
>  #include "bpf_iter_udp6.skel.h"
> +#include "bpf_iter_unix.skel.h"
>  #include "bpf_iter_test_kern1.skel.h"
>  #include "bpf_iter_test_kern2.skel.h"
>  #include "bpf_iter_test_kern3.skel.h"
> @@ -313,6 +314,20 @@ static void test_udp6(void)
>         bpf_iter_udp6__destroy(skel);
>  }
>
> +static void test_unix(void)
> +{
> +       struct bpf_iter_unix *skel;
> +
> +       skel = bpf_iter_unix__open_and_load();
> +       if (CHECK(!skel, "bpf_iter_unix__open_and_load",

please use new ASSERT_PTR_OK() macro instead

> +                 "skeleton open_and_load failed\n"))
> +               return;
> +
> +       do_dummy_read(skel->progs.dump_unix);
> +
> +       bpf_iter_unix__destroy(skel);
> +}
> +
>  /* The expected string is less than 16 bytes */
>  static int do_read_with_fd(int iter_fd, const char *expected,
>                            bool read_one_char)
> @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
>                 test_udp4();
>         if (test__start_subtest("udp6"))
>                 test_udp6();
> +       if (test__start_subtest("unix"))
> +               test_unix();
>         if (test__start_subtest("anon"))
>                 test_anon_iter(false);
>         if (test__start_subtest("anon-read-one-char"))
> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> new file mode 100644
> index 000000000000..285ec2f7944d
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> @@ -0,0 +1,75 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright Amazon.com Inc. or its affiliates. */
> +#include "bpf_iter.h"
> +#include "bpf_tracing_net.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_endian.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +#define __SO_ACCEPTCON         (1 << 16)
> +#define UNIX_HASH_SIZE         256
> +#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
> +
> +static long sock_i_ino(const struct sock *sk)
> +{
> +       const struct socket *sk_socket = sk->sk_socket;
> +       const struct inode *inode;
> +       unsigned long ino;
> +
> +       if (!sk_socket)
> +               return 0;
> +
> +       inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
> +       bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
> +       return ino;
> +}
> +
> +SEC("iter/unix")
> +int dump_unix(struct bpf_iter__unix *ctx)
> +{
> +       struct unix_sock *unix_sk = ctx->unix_sk;
> +       struct sock *sk = (struct sock *)unix_sk;
> +       struct seq_file *seq;
> +       __u32 seq_num;
> +
> +       if (!unix_sk)
> +               return 0;
> +
> +       seq = ctx->meta->seq;
> +       seq_num = ctx->meta->seq_num;
> +       if (seq_num == 0)
> +               BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> +                              "Type St Inode Path\n");
> +
> +       BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> +                      unix_sk,
> +                      sk->sk_refcnt.refs.counter,
> +                      0,
> +                      sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> +                      sk->sk_type,
> +                      sk->sk_socket ?
> +                      (sk->sk_state == TCP_ESTABLISHED ?
> +                       SS_CONNECTED : SS_UNCONNECTED) :
> +                      (sk->sk_state == TCP_ESTABLISHED ?
> +                       SS_CONNECTING : SS_DISCONNECTING),

nit: I'd keep these ternary operators on a single line for
readability. Same for header PRINTF above.

> +                      sock_i_ino(sk));
> +
> +       if (unix_sk->addr) {
> +               if (UNIX_ABSTRACT(unix_sk))
> +                       /* Abstract UNIX domain socket can contain '\0' in
> +                        * the path, and it should be escaped.  However, it
> +                        * requires loops and the BPF verifier rejects it.
> +                        * So here, print only the escaped first byte to
> +                        * indicate it is an abstract UNIX domain socket.
> +                        * (See: unix_seq_show() and commit e7947ea770d0d)
> +                        */
> +                       BPF_SEQ_PRINTF(seq, " @");
> +               else
> +                       BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> +       }
> +
> +       BPF_SEQ_PRINTF(seq, "\n");
> +
> +       return 0;
> +}
> --
> 2.30.2
>
Iwashima, Kuniyuki July 30, 2021, 10:55 p.m. UTC | #5
From:   Yonghong Song <yhs@fb.com>
Date:   Fri, 30 Jul 2021 09:22:32 -0700
> On 7/30/21 12:58 AM, Kuniyuki Iwashima wrote:
> > From:   Yonghong Song <yhs@fb.com>
> > Date:   Thu, 29 Jul 2021 23:54:26 -0700
> >> On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
> >>> If there are no abstract sockets, this prog can output the same result
> >>> compared to /proc/net/unix.
> >>>
> >>>     # cat /sys/fs/bpf/unix | head -n 2
> >>>     Num       RefCount Protocol Flags    Type St Inode Path
> >>>     ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> >>>
> >>>     # cat /proc/net/unix | head -n 2
> >>>     Num       RefCount Protocol Flags    Type St Inode Path
> >>>     ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> >>>
> >>> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> >>> ---
> >>>    .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
> >>>    .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
> >>>    2 files changed, 92 insertions(+)
> >>>    create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> >>>
> >>> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> >>> index 1f1aade56504..4746bac68d36 100644
> >>> --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> >>> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> >>> @@ -13,6 +13,7 @@
> >>>    #include "bpf_iter_tcp6.skel.h"
> >>>    #include "bpf_iter_udp4.skel.h"
> >>>    #include "bpf_iter_udp6.skel.h"
> >>> +#include "bpf_iter_unix.skel.h"
> >>>    #include "bpf_iter_test_kern1.skel.h"
> >>>    #include "bpf_iter_test_kern2.skel.h"
> >>>    #include "bpf_iter_test_kern3.skel.h"
> >>> @@ -313,6 +314,20 @@ static void test_udp6(void)
> >>>    	bpf_iter_udp6__destroy(skel);
> >>>    }
> >>>    
> >>> +static void test_unix(void)
> >>> +{
> >>> +	struct bpf_iter_unix *skel;
> >>> +
> >>> +	skel = bpf_iter_unix__open_and_load();
> >>> +	if (CHECK(!skel, "bpf_iter_unix__open_and_load",
> >>> +		  "skeleton open_and_load failed\n"))
> >>> +		return;
> >>> +
> >>> +	do_dummy_read(skel->progs.dump_unix);
> >>> +
> >>> +	bpf_iter_unix__destroy(skel);
> >>> +}
> >>> +
> >>>    /* The expected string is less than 16 bytes */
> >>>    static int do_read_with_fd(int iter_fd, const char *expected,
> >>>    			   bool read_one_char)
> >>> @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
> >>>    		test_udp4();
> >>>    	if (test__start_subtest("udp6"))
> >>>    		test_udp6();
> >>> +	if (test__start_subtest("unix"))
> >>> +		test_unix();
> >>>    	if (test__start_subtest("anon"))
> >>>    		test_anon_iter(false);
> >>>    	if (test__start_subtest("anon-read-one-char"))
> >>> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> >>> new file mode 100644
> >>> index 000000000000..285ec2f7944d
> >>> --- /dev/null
> >>> +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> >>> @@ -0,0 +1,75 @@
> >>> +// SPDX-License-Identifier: GPL-2.0
> >>> +/* Copyright Amazon.com Inc. or its affiliates. */
> >>> +#include "bpf_iter.h"
> >>
> >> Could you add bpf_iter__unix to bpf_iter.h similar to bpf_iter__sockmap?
> >> The main purpose is to make test tolerating with old vmlinux.h.
> > 
> > Thank you for explanation!
> > I've understood why it is needed even when the same struct is defined.
> > I'll add it in the next spin.
> > 
> > 
> >>
> >>> +#include "bpf_tracing_net.h"
> >>> +#include <bpf/bpf_helpers.h>
> >>> +#include <bpf/bpf_endian.h>
> >>> +
> >>> +char _license[] SEC("license") = "GPL";
> >>> +
> >>> +#define __SO_ACCEPTCON		(1 << 16)
> >>> +#define UNIX_HASH_SIZE		256
> >>> +#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)
> >>
> >> Could you add the above three define's in bpf_tracing_net.h?
> >> We try to keep all these common defines in a common header for
> >> potential reusability.
> > 
> > Will do.
> > 
> > 
> >>
> >>> +
> >>> +static long sock_i_ino(const struct sock *sk)
> >>> +{
> >>> +	const struct socket *sk_socket = sk->sk_socket;
> >>> +	const struct inode *inode;
> >>> +	unsigned long ino;
> >>> +
> >>> +	if (!sk_socket)
> >>> +		return 0;
> >>> +
> >>> +	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
> >>> +	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
> >>> +	return ino;
> >>> +}
> >>> +
> >>> +SEC("iter/unix")
> >>> +int dump_unix(struct bpf_iter__unix *ctx)
> >>> +{
> >>> +	struct unix_sock *unix_sk = ctx->unix_sk;
> >>> +	struct sock *sk = (struct sock *)unix_sk;
> >>> +	struct seq_file *seq;
> >>> +	__u32 seq_num;
> >>> +
> >>> +	if (!unix_sk)
> >>> +		return 0;
> >>> +
> >>> +	seq = ctx->meta->seq;
> >>> +	seq_num = ctx->meta->seq_num;
> >>> +	if (seq_num == 0)
> >>> +		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> >>> +			       "Type St Inode Path\n");
> >>> +
> >>> +	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> >>> +		       unix_sk,
> >>> +		       sk->sk_refcnt.refs.counter,
> >>> +		       0,
> >>> +		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> >>> +		       sk->sk_type,
> >>> +		       sk->sk_socket ?
> >>> +		       (sk->sk_state == TCP_ESTABLISHED ?
> >>> +			SS_CONNECTED : SS_UNCONNECTED) :
> >>> +		       (sk->sk_state == TCP_ESTABLISHED ?
> >>> +			SS_CONNECTING : SS_DISCONNECTING),
> >>> +		       sock_i_ino(sk));
> >>> +
> >>> +	if (unix_sk->addr) {
> >>> +		if (UNIX_ABSTRACT(unix_sk))
> >>> +			/* Abstract UNIX domain socket can contain '\0' in
> >>> +			 * the path, and it should be escaped.  However, it
> >>> +			 * requires loops and the BPF verifier rejects it.
> >>> +			 * So here, print only the escaped first byte to
> >>> +			 * indicate it is an abstract UNIX domain socket.
> >>> +			 * (See: unix_seq_show() and commit e7947ea770d0d)
> >>> +			 */
> >>> +			BPF_SEQ_PRINTF(seq, " @");
> >>> +		else
> >>> +			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> >>> +	}
> >>
> >> I looked at af_unix.c, for the above "if (unix_sk->addr) { ... }" code,
> >> the following is the kernel source code,
> >>
> >>                   if (u->addr) {  // under unix_table_lock here
> >>                           int i, len;
> >>                           seq_putc(seq, ' ');
> >>
> >>                           i = 0;
> >>                           len = u->addr->len - sizeof(short);
> >>                           if (!UNIX_ABSTRACT(s))
> >>                                   len--;
> >>                           else {
> >>                                   seq_putc(seq, '@');
> >>                                   i++;
> >>                           }
> >>                           for ( ; i < len; i++)
> >>                                   seq_putc(seq, u->addr->name->sun_path[i] ?:
> >>                                            '@');
> >>                   }
> >>
> >> It does not seem to match bpf program non UNIX_ABSTRACT case.
> >> I am not familiar with unix socket so it would be good if you can
> >> explain a little more.
> > 
> > There is three kinds of unix sockets: pathname, unnamed, abstract.  The
> > first two terminate the addr with `\0`, but abstract must start with `\0`
> > and can contain `\0` anywhere in addr.  The `\0` in addr of abstract socket
> > does not have special meaning. [1]
> > 
> > They are inserted into the same hash table in unix_bind(), so the bpf prog
> > matches all of them.
> > 
> > ``` net/unix/af_unix.c
> >    1114		if (sun_path[0])
> >    1115			err = unix_bind_bsd(sk, addr);
> >    1116		else
> >    1117			err = unix_bind_abstract(sk, addr);
> > ```
> > 
> > [1]: https://man7.org/linux/man-pages/man7/unix.7.html
> > 
> > 
> >>
> >> For verifier issue with loops, do we have a maximum upper bound for
> >> u->addr->len? If yes, does bounded loop work?
> > 
> > That has a maximum length in unix_mkname(): sizeof(struct sockaddr_un).
> > 
> > ``` net/unix/af_unix.c
> >     223	/*
> >     224	 *	Check unix socket name:
> >     225	 *		- should be not zero length.
> >     226	 *	        - if started by not zero, should be NULL terminated (FS object)
> >     227	 *		- if started by zero, it is abstract name.
> >     228	 */
> >     229	
> >     230	static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
> >     231	{
> > ...
> >     234		if (len <= sizeof(short) || len > sizeof(*sunaddr))
> >     235			return -EINVAL;
> > ...
> >     253	}
> > ```
> > 
> > So, I rewrote the test like this, but it still causes an error.
> > 
> > ```
> > 	if (unix_sk->addr) {
> > 		int i, len;
> > 
> > 		len = unix_sk->addr->len - sizeof(short);
> > 
> > 		if (!UNIX_ABSTRACT(unix_sk)) {
> > 			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> > 		} else {
> > 			BPF_SEQ_PRINTF(seq, " @");
> > 			i++;
> 
> i++ is not useful here and "i" is not initialized earlier.

Good catch.
I'll remove this.


> 
> > 
> > 			if (len < sizeof(struct sockaddr_un)) {
> > 				for (i = 1 ; i < len; i++)
> > 					BPF_SEQ_PRINTF(seq, "%c",
> > 						       unix_sk->addr->name->sun_path[i] ?:
> > 						       '@');
> > 			}
> > 		}
> > 	}
> > ```
> > 
> > ```
> > processed 196505 insns (limit 1000000) max_states_per_insn 4 total_states 1830 peak_states 1830 mark_read 3
> > ```
> 
> I did some debugging, the main reason is that llvm compiler used "!=" 
> instead of "<" for "i < len" comparison.
> 
>       107:       b4 05 00 00 08 00 00 00 w5 = 8
>       108:       85 00 00 00 7e 00 00 00 call 126
> ;                               for (i = 1 ; i < len; i++)
>       109:       07 09 00 00 01 00 00 00 r9 += 1
>       110:       5d 98 09 00 00 00 00 00 if r8 != r9 goto +9 <LBB0_18>
> 
> 
> Considering "len" is not a constant, for verifier, r8 will never be 
> equal to r9 in the above.
> 
> Digging into llvm compilation, it is llvm pass Induction Variable 
> simplication pass made this code change. I will try to dig more and
> find a solution. In your next revision, could you add the above code
> as a comment so once llvm code gen is improved, we can have proper
> implementation to match /proc/net/unix?

Thanks for debugging!
Ok, I'll include the code and add some note and this thread link in the
commit log.


> 
> > 
> >>
> >>> +
> >>> +	BPF_SEQ_PRINTF(seq, "\n");
> >>> +
> >>> +	return 0;
> >>> +}
Iwashima, Kuniyuki July 30, 2021, 11:03 p.m. UTC | #6
From:   Andrii Nakryiko <andrii.nakryiko@gmail.com>
Date:   Fri, 30 Jul 2021 12:34:43 -0700
> On Thu, Jul 29, 2021 at 4:37 PM Kuniyuki Iwashima <kuniyu@amazon.co.jp> wrote:
> >
> > If there are no abstract sockets, this prog can output the same result
> > compared to /proc/net/unix.
> >
> >   # cat /sys/fs/bpf/unix | head -n 2
> >   Num       RefCount Protocol Flags    Type St Inode Path
> >   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> >
> >   # cat /proc/net/unix | head -n 2
> >   Num       RefCount Protocol Flags    Type St Inode Path
> >   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer
> >
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
> > ---
> >  .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
> >  .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
> >  2 files changed, 92 insertions(+)
> >  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > index 1f1aade56504..4746bac68d36 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
> > @@ -13,6 +13,7 @@
> >  #include "bpf_iter_tcp6.skel.h"
> >  #include "bpf_iter_udp4.skel.h"
> >  #include "bpf_iter_udp6.skel.h"
> > +#include "bpf_iter_unix.skel.h"
> >  #include "bpf_iter_test_kern1.skel.h"
> >  #include "bpf_iter_test_kern2.skel.h"
> >  #include "bpf_iter_test_kern3.skel.h"
> > @@ -313,6 +314,20 @@ static void test_udp6(void)
> >         bpf_iter_udp6__destroy(skel);
> >  }
> >
> > +static void test_unix(void)
> > +{
> > +       struct bpf_iter_unix *skel;
> > +
> > +       skel = bpf_iter_unix__open_and_load();
> > +       if (CHECK(!skel, "bpf_iter_unix__open_and_load",
> 
> please use new ASSERT_PTR_OK() macro instead

I'll use ASSERT_PTR_OK().


> 
> > +                 "skeleton open_and_load failed\n"))
> > +               return;
> > +
> > +       do_dummy_read(skel->progs.dump_unix);
> > +
> > +       bpf_iter_unix__destroy(skel);
> > +}
> > +
> >  /* The expected string is less than 16 bytes */
> >  static int do_read_with_fd(int iter_fd, const char *expected,
> >                            bool read_one_char)
> > @@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
> >                 test_udp4();
> >         if (test__start_subtest("udp6"))
> >                 test_udp6();
> > +       if (test__start_subtest("unix"))
> > +               test_unix();
> >         if (test__start_subtest("anon"))
> >                 test_anon_iter(false);
> >         if (test__start_subtest("anon-read-one-char"))
> > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> > new file mode 100644
> > index 000000000000..285ec2f7944d
> > --- /dev/null
> > +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
> > @@ -0,0 +1,75 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Copyright Amazon.com Inc. or its affiliates. */
> > +#include "bpf_iter.h"
> > +#include "bpf_tracing_net.h"
> > +#include <bpf/bpf_helpers.h>
> > +#include <bpf/bpf_endian.h>
> > +
> > +char _license[] SEC("license") = "GPL";
> > +
> > +#define __SO_ACCEPTCON         (1 << 16)
> > +#define UNIX_HASH_SIZE         256
> > +#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
> > +
> > +static long sock_i_ino(const struct sock *sk)
> > +{
> > +       const struct socket *sk_socket = sk->sk_socket;
> > +       const struct inode *inode;
> > +       unsigned long ino;
> > +
> > +       if (!sk_socket)
> > +               return 0;
> > +
> > +       inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
> > +       bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
> > +       return ino;
> > +}
> > +
> > +SEC("iter/unix")
> > +int dump_unix(struct bpf_iter__unix *ctx)
> > +{
> > +       struct unix_sock *unix_sk = ctx->unix_sk;
> > +       struct sock *sk = (struct sock *)unix_sk;
> > +       struct seq_file *seq;
> > +       __u32 seq_num;
> > +
> > +       if (!unix_sk)
> > +               return 0;
> > +
> > +       seq = ctx->meta->seq;
> > +       seq_num = ctx->meta->seq_num;
> > +       if (seq_num == 0)
> > +               BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
> > +                              "Type St Inode Path\n");
> > +
> > +       BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
> > +                      unix_sk,
> > +                      sk->sk_refcnt.refs.counter,
> > +                      0,
> > +                      sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
> > +                      sk->sk_type,
> > +                      sk->sk_socket ?
> > +                      (sk->sk_state == TCP_ESTABLISHED ?
> > +                       SS_CONNECTED : SS_UNCONNECTED) :
> > +                      (sk->sk_state == TCP_ESTABLISHED ?
> > +                       SS_CONNECTING : SS_DISCONNECTING),
> 
> nit: I'd keep these ternary operators on a single line for
> readability. Same for header PRINTF above.

I'll make the two ternary operations of `sk->sk_socket ?` on single lines.

Thanks for your review!


> 
> > +                      sock_i_ino(sk));
> > +
> > +       if (unix_sk->addr) {
> > +               if (UNIX_ABSTRACT(unix_sk))
> > +                       /* Abstract UNIX domain socket can contain '\0' in
> > +                        * the path, and it should be escaped.  However, it
> > +                        * requires loops and the BPF verifier rejects it.
> > +                        * So here, print only the escaped first byte to
> > +                        * indicate it is an abstract UNIX domain socket.
> > +                        * (See: unix_seq_show() and commit e7947ea770d0d)
> > +                        */
> > +                       BPF_SEQ_PRINTF(seq, " @");
> > +               else
> > +                       BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
> > +       }
> > +
> > +       BPF_SEQ_PRINTF(seq, "\n");
> > +
> > +       return 0;
> > +}
> > --
> > 2.30.2
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 1f1aade56504..4746bac68d36 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -13,6 +13,7 @@ 
 #include "bpf_iter_tcp6.skel.h"
 #include "bpf_iter_udp4.skel.h"
 #include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_unix.skel.h"
 #include "bpf_iter_test_kern1.skel.h"
 #include "bpf_iter_test_kern2.skel.h"
 #include "bpf_iter_test_kern3.skel.h"
@@ -313,6 +314,20 @@  static void test_udp6(void)
 	bpf_iter_udp6__destroy(skel);
 }
 
+static void test_unix(void)
+{
+	struct bpf_iter_unix *skel;
+
+	skel = bpf_iter_unix__open_and_load();
+	if (CHECK(!skel, "bpf_iter_unix__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_unix);
+
+	bpf_iter_unix__destroy(skel);
+}
+
 /* The expected string is less than 16 bytes */
 static int do_read_with_fd(int iter_fd, const char *expected,
 			   bool read_one_char)
@@ -1255,6 +1270,8 @@  void test_bpf_iter(void)
 		test_udp4();
 	if (test__start_subtest("udp6"))
 		test_udp6();
+	if (test__start_subtest("unix"))
+		test_unix();
 	if (test__start_subtest("anon"))
 		test_anon_iter(false);
 	if (test__start_subtest("anon-read-one-char"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
new file mode 100644
index 000000000000..285ec2f7944d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -0,0 +1,75 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define __SO_ACCEPTCON		(1 << 16)
+#define UNIX_HASH_SIZE		256
+#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+SEC("iter/unix")
+int dump_unix(struct bpf_iter__unix *ctx)
+{
+	struct unix_sock *unix_sk = ctx->unix_sk;
+	struct sock *sk = (struct sock *)unix_sk;
+	struct seq_file *seq;
+	__u32 seq_num;
+
+	if (!unix_sk)
+		return 0;
+
+	seq = ctx->meta->seq;
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
+			       "Type St Inode Path\n");
+
+	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
+		       unix_sk,
+		       sk->sk_refcnt.refs.counter,
+		       0,
+		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+		       sk->sk_type,
+		       sk->sk_socket ?
+		       (sk->sk_state == TCP_ESTABLISHED ?
+			SS_CONNECTED : SS_UNCONNECTED) :
+		       (sk->sk_state == TCP_ESTABLISHED ?
+			SS_CONNECTING : SS_DISCONNECTING),
+		       sock_i_ino(sk));
+
+	if (unix_sk->addr) {
+		if (UNIX_ABSTRACT(unix_sk))
+			/* Abstract UNIX domain socket can contain '\0' in
+			 * the path, and it should be escaped.  However, it
+			 * requires loops and the BPF verifier rejects it.
+			 * So here, print only the escaped first byte to
+			 * indicate it is an abstract UNIX domain socket.
+			 * (See: unix_seq_show() and commit e7947ea770d0d)
+			 */
+			BPF_SEQ_PRINTF(seq, " @");
+		else
+			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
+	}
+
+	BPF_SEQ_PRINTF(seq, "\n");
+
+	return 0;
+}