diff mbox series

[bpf-next,07/15] selftests/xsk: get rid of asm store/release implementations

Message ID 20221206090826.2957-8-magnus.karlsson@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series selftests/xsk: speed-ups, fixes, and new XDP programs | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 10 maintainers not CCed: linux-kselftest@vger.kernel.org paul.walmsley@sifive.com davem@davemloft.net kuba@kernel.org aou@eecs.berkeley.edu linux-riscv@lists.infradead.org palmer@dabbelt.com shuah@kernel.org hawk@kernel.org mykolal@fb.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-8 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-9 success Logs for set-matrix

Commit Message

Magnus Karlsson Dec. 6, 2022, 9:08 a.m. UTC
From: Magnus Karlsson <magnus.karlsson@intel.com>

Get rid of our own homegrown assembly store/release and load/acquire
implementations. Use the HW agnositic APIs the compiler offers
instead.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
---
 tools/testing/selftests/bpf/xsk.h | 80 ++-----------------------------
 1 file changed, 4 insertions(+), 76 deletions(-)

Comments

Daniel Borkmann Dec. 6, 2022, 11:48 p.m. UTC | #1
On 12/6/22 10:08 AM, Magnus Karlsson wrote:
> From: Magnus Karlsson <magnus.karlsson@intel.com>
> 
> Get rid of our own homegrown assembly store/release and load/acquire
> implementations. Use the HW agnositic APIs the compiler offers
> instead.

The description is a bit terse. Could you add a bit more context, discussion
or reference on why it's safe to replace them with C11 atomics?

> Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
> ---
>   tools/testing/selftests/bpf/xsk.h | 80 ++-----------------------------
>   1 file changed, 4 insertions(+), 76 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
> index 997723b0bfb2..24ee765aded3 100644
> --- a/tools/testing/selftests/bpf/xsk.h
> +++ b/tools/testing/selftests/bpf/xsk.h
> @@ -23,77 +23,6 @@
>   extern "C" {
>   #endif
>   
> -/* This whole API has been deprecated and moved to libxdp that can be found at
> - * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so
> - * it should just be linking with libxdp instead of libbpf for this set of
> - * functionality. If not, please submit a bug report on the aforementioned page.
> - */
> -
> -/* Load-Acquire Store-Release barriers used by the XDP socket
> - * library. The following macros should *NOT* be considered part of
> - * the xsk.h API, and is subject to change anytime.
> - *
> - * LIBRARY INTERNAL
> - */
> -
> -#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
> -#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
> -
> -#if defined(__i386__) || defined(__x86_64__)
> -# define libbpf_smp_store_release(p, v)					\
> -	do {								\
> -		asm volatile("" : : : "memory");			\
> -		__XSK_WRITE_ONCE(*p, v);				\
> -	} while (0)
> -# define libbpf_smp_load_acquire(p)					\
> -	({								\
> -		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
> -		asm volatile("" : : : "memory");			\
> -		___p1;							\
> -	})
> -#elif defined(__aarch64__)
> -# define libbpf_smp_store_release(p, v)					\
> -		asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
> -# define libbpf_smp_load_acquire(p)					\
> -	({								\
> -		typeof(*p) ___p1;					\
> -		asm volatile ("ldar %w0, %1"				\
> -			      : "=r" (___p1) : "Q" (*p) : "memory");	\
> -		___p1;							\
> -	})
> -#elif defined(__riscv)
> -# define libbpf_smp_store_release(p, v)					\
> -	do {								\
> -		asm volatile ("fence rw,w" : : : "memory");		\
> -		__XSK_WRITE_ONCE(*p, v);				\
> -	} while (0)
> -# define libbpf_smp_load_acquire(p)					\
> -	({								\
> -		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
> -		asm volatile ("fence r,rw" : : : "memory");		\
> -		___p1;							\
> -	})
> -#endif
> -
> -#ifndef libbpf_smp_store_release
> -#define libbpf_smp_store_release(p, v)					\
> -	do {								\
> -		__sync_synchronize();					\
> -		__XSK_WRITE_ONCE(*p, v);				\
> -	} while (0)
> -#endif
> -
> -#ifndef libbpf_smp_load_acquire
> -#define libbpf_smp_load_acquire(p)					\
> -	({								\
> -		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
> -		__sync_synchronize();					\
> -		___p1;							\
> -	})
> -#endif
> -
> -/* LIBRARY INTERNAL -- END */
> -
>   /* Do not access these members directly. Use the functions below. */
>   #define DEFINE_XSK_RING(name) \
>   struct name { \
> @@ -168,7 +97,7 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
>   	 * this function. Without this optimization it whould have been
>   	 * free_entries = r->cached_prod - r->cached_cons + r->size.
>   	 */
> -	r->cached_cons = libbpf_smp_load_acquire(r->consumer);
> +	r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE);
>   	r->cached_cons += r->size;
>   
>   	return r->cached_cons - r->cached_prod;
> @@ -179,7 +108,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
>   	__u32 entries = r->cached_prod - r->cached_cons;
>   
>   	if (entries == 0) {
> -		r->cached_prod = libbpf_smp_load_acquire(r->producer);
> +		r->cached_prod = __atomic_load_n(r->producer, __ATOMIC_ACQUIRE);
>   		entries = r->cached_prod - r->cached_cons;
>   	}
>   
> @@ -202,7 +131,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
>   	/* Make sure everything has been written to the ring before indicating
>   	 * this to the kernel by writing the producer pointer.
>   	 */
> -	libbpf_smp_store_release(prod->producer, *prod->producer + nb);
> +	__atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE);
>   }
>   
>   static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
> @@ -227,8 +156,7 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
>   	/* Make sure data has been read before indicating we are done
>   	 * with the entries by updating the consumer pointer.
>   	 */
> -	libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
> -
> +	__atomic_store_n(cons->consumer, *cons->consumer + nb, __ATOMIC_RELEASE);
>   }
>   
>   static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
>
Magnus Karlsson Dec. 7, 2022, 8:23 a.m. UTC | #2
On Wed, Dec 7, 2022 at 12:48 AM Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 12/6/22 10:08 AM, Magnus Karlsson wrote:
> > From: Magnus Karlsson <magnus.karlsson@intel.com>
> >
> > Get rid of our own homegrown assembly store/release and load/acquire
> > implementations. Use the HW agnositic APIs the compiler offers
> > instead.
>
> The description is a bit terse. Could you add a bit more context, discussion
> or reference on why it's safe to replace them with C11 atomics?

Will do, though I will hold off on a v2 in case there are further comments.

> > Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
> > ---
> >   tools/testing/selftests/bpf/xsk.h | 80 ++-----------------------------
> >   1 file changed, 4 insertions(+), 76 deletions(-)
> >
> > diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
> > index 997723b0bfb2..24ee765aded3 100644
> > --- a/tools/testing/selftests/bpf/xsk.h
> > +++ b/tools/testing/selftests/bpf/xsk.h
> > @@ -23,77 +23,6 @@
> >   extern "C" {
> >   #endif
> >
> > -/* This whole API has been deprecated and moved to libxdp that can be found at
> > - * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so
> > - * it should just be linking with libxdp instead of libbpf for this set of
> > - * functionality. If not, please submit a bug report on the aforementioned page.
> > - */
> > -
> > -/* Load-Acquire Store-Release barriers used by the XDP socket
> > - * library. The following macros should *NOT* be considered part of
> > - * the xsk.h API, and is subject to change anytime.
> > - *
> > - * LIBRARY INTERNAL
> > - */
> > -
> > -#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
> > -#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
> > -
> > -#if defined(__i386__) || defined(__x86_64__)
> > -# define libbpf_smp_store_release(p, v)                                      \
> > -     do {                                                            \
> > -             asm volatile("" : : : "memory");                        \
> > -             __XSK_WRITE_ONCE(*p, v);                                \
> > -     } while (0)
> > -# define libbpf_smp_load_acquire(p)                                  \
> > -     ({                                                              \
> > -             typeof(*p) ___p1 = __XSK_READ_ONCE(*p);                 \
> > -             asm volatile("" : : : "memory");                        \
> > -             ___p1;                                                  \
> > -     })
> > -#elif defined(__aarch64__)
> > -# define libbpf_smp_store_release(p, v)                                      \
> > -             asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
> > -# define libbpf_smp_load_acquire(p)                                  \
> > -     ({                                                              \
> > -             typeof(*p) ___p1;                                       \
> > -             asm volatile ("ldar %w0, %1"                            \
> > -                           : "=r" (___p1) : "Q" (*p) : "memory");    \
> > -             ___p1;                                                  \
> > -     })
> > -#elif defined(__riscv)
> > -# define libbpf_smp_store_release(p, v)                                      \
> > -     do {                                                            \
> > -             asm volatile ("fence rw,w" : : : "memory");             \
> > -             __XSK_WRITE_ONCE(*p, v);                                \
> > -     } while (0)
> > -# define libbpf_smp_load_acquire(p)                                  \
> > -     ({                                                              \
> > -             typeof(*p) ___p1 = __XSK_READ_ONCE(*p);                 \
> > -             asm volatile ("fence r,rw" : : : "memory");             \
> > -             ___p1;                                                  \
> > -     })
> > -#endif
> > -
> > -#ifndef libbpf_smp_store_release
> > -#define libbpf_smp_store_release(p, v)                                       \
> > -     do {                                                            \
> > -             __sync_synchronize();                                   \
> > -             __XSK_WRITE_ONCE(*p, v);                                \
> > -     } while (0)
> > -#endif
> > -
> > -#ifndef libbpf_smp_load_acquire
> > -#define libbpf_smp_load_acquire(p)                                   \
> > -     ({                                                              \
> > -             typeof(*p) ___p1 = __XSK_READ_ONCE(*p);                 \
> > -             __sync_synchronize();                                   \
> > -             ___p1;                                                  \
> > -     })
> > -#endif
> > -
> > -/* LIBRARY INTERNAL -- END */
> > -
> >   /* Do not access these members directly. Use the functions below. */
> >   #define DEFINE_XSK_RING(name) \
> >   struct name { \
> > @@ -168,7 +97,7 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
> >        * this function. Without this optimization it whould have been
> >        * free_entries = r->cached_prod - r->cached_cons + r->size.
> >        */
> > -     r->cached_cons = libbpf_smp_load_acquire(r->consumer);
> > +     r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE);
> >       r->cached_cons += r->size;
> >
> >       return r->cached_cons - r->cached_prod;
> > @@ -179,7 +108,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
> >       __u32 entries = r->cached_prod - r->cached_cons;
> >
> >       if (entries == 0) {
> > -             r->cached_prod = libbpf_smp_load_acquire(r->producer);
> > +             r->cached_prod = __atomic_load_n(r->producer, __ATOMIC_ACQUIRE);
> >               entries = r->cached_prod - r->cached_cons;
> >       }
> >
> > @@ -202,7 +131,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
> >       /* Make sure everything has been written to the ring before indicating
> >        * this to the kernel by writing the producer pointer.
> >        */
> > -     libbpf_smp_store_release(prod->producer, *prod->producer + nb);
> > +     __atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE);
> >   }
> >
> >   static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
> > @@ -227,8 +156,7 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
> >       /* Make sure data has been read before indicating we are done
> >        * with the entries by updating the consumer pointer.
> >        */
> > -     libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
> > -
> > +     __atomic_store_n(cons->consumer, *cons->consumer + nb, __ATOMIC_RELEASE);
> >   }
> >
> >   static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
> >
>
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 997723b0bfb2..24ee765aded3 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -23,77 +23,6 @@ 
 extern "C" {
 #endif
 
-/* This whole API has been deprecated and moved to libxdp that can be found at
- * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so
- * it should just be linking with libxdp instead of libbpf for this set of
- * functionality. If not, please submit a bug report on the aforementioned page.
- */
-
-/* Load-Acquire Store-Release barriers used by the XDP socket
- * library. The following macros should *NOT* be considered part of
- * the xsk.h API, and is subject to change anytime.
- *
- * LIBRARY INTERNAL
- */
-
-#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
-#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
-
-#if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_store_release(p, v)					\
-	do {								\
-		asm volatile("" : : : "memory");			\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		asm volatile("" : : : "memory");			\
-		___p1;							\
-	})
-#elif defined(__aarch64__)
-# define libbpf_smp_store_release(p, v)					\
-		asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1;					\
-		asm volatile ("ldar %w0, %1"				\
-			      : "=r" (___p1) : "Q" (*p) : "memory");	\
-		___p1;							\
-	})
-#elif defined(__riscv)
-# define libbpf_smp_store_release(p, v)					\
-	do {								\
-		asm volatile ("fence rw,w" : : : "memory");		\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-# define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		asm volatile ("fence r,rw" : : : "memory");		\
-		___p1;							\
-	})
-#endif
-
-#ifndef libbpf_smp_store_release
-#define libbpf_smp_store_release(p, v)					\
-	do {								\
-		__sync_synchronize();					\
-		__XSK_WRITE_ONCE(*p, v);				\
-	} while (0)
-#endif
-
-#ifndef libbpf_smp_load_acquire
-#define libbpf_smp_load_acquire(p)					\
-	({								\
-		typeof(*p) ___p1 = __XSK_READ_ONCE(*p);			\
-		__sync_synchronize();					\
-		___p1;							\
-	})
-#endif
-
-/* LIBRARY INTERNAL -- END */
-
 /* Do not access these members directly. Use the functions below. */
 #define DEFINE_XSK_RING(name) \
 struct name { \
@@ -168,7 +97,7 @@  static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
 	 * this function. Without this optimization it whould have been
 	 * free_entries = r->cached_prod - r->cached_cons + r->size.
 	 */
-	r->cached_cons = libbpf_smp_load_acquire(r->consumer);
+	r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE);
 	r->cached_cons += r->size;
 
 	return r->cached_cons - r->cached_prod;
@@ -179,7 +108,7 @@  static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
 	__u32 entries = r->cached_prod - r->cached_cons;
 
 	if (entries == 0) {
-		r->cached_prod = libbpf_smp_load_acquire(r->producer);
+		r->cached_prod = __atomic_load_n(r->producer, __ATOMIC_ACQUIRE);
 		entries = r->cached_prod - r->cached_cons;
 	}
 
@@ -202,7 +131,7 @@  static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
 	/* Make sure everything has been written to the ring before indicating
 	 * this to the kernel by writing the producer pointer.
 	 */
-	libbpf_smp_store_release(prod->producer, *prod->producer + nb);
+	__atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE);
 }
 
 static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
@@ -227,8 +156,7 @@  static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
 	/* Make sure data has been read before indicating we are done
 	 * with the entries by updating the consumer pointer.
 	 */
-	libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
-
+	__atomic_store_n(cons->consumer, *cons->consumer + nb, __ATOMIC_RELEASE);
 }
 
 static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)