diff mbox series

[bpf,v2,1/2] compiler_types.h: Define __retain for __attribute__((__retain__))

Message ID b31bca5a5e6765a0f32cc8c19b1d9cdbfaa822b5.1717477560.git.Tony.Ambardar@gmail.com (mailing list archive)
State Accepted
Commit 0a5d3258d7c97295a89d22e54733b54aacb62562
Delegated to: BPF
Headers show
Series bpf: Fix linker optimization removing kfuncs | expand

Checks

Context Check Description
bpf/vmtest-bpf-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-46 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-43 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-44 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 14539 this patch: 14539
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers fail 8 maintainers not CCed: przemyslaw.kitszel@intel.com llvm@lists.linux.dev justinstitt@google.com keescook@chromium.org morbo@google.com elver@google.com ndesaulniers@google.com nathan@kernel.org
netdev/build_clang fail Errors and warnings before: 1220 this patch: 1220
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 15569 this patch: 15569
netdev/checkpatch warning CHECK: From:/Signed-off-by: email comments mismatch: 'From: Tony Ambardar <tony.ambardar@gmail.com>' != 'Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>'
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-PR success PR summary
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc

Commit Message

Tony Ambardar June 4, 2024, 5:23 a.m. UTC
Some code includes the __used macro to prevent functions and data from
being optimized out. This macro implements __attribute__((__used__)), which
operates at the compiler and IR-level, and so still allows a linker to
remove objects intended to be kept.

Compilers supporting __attribute__((__retain__)) can address this gap by
setting the flag SHF_GNU_RETAIN on the section of a function/variable,
indicating to the linker the object should be retained. This attribute is
available since gcc 11, clang 13, and binutils 2.36.

Provide a __retain macro implementing __attribute__((__retain__)), whose
first user will be the '__bpf_kfunc' tag.

Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/
Cc: stable@vger.kernel.org # v6.6+
Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
---
 include/linux/compiler_types.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

Comments

Yonghong Song June 5, 2024, 5:55 a.m. UTC | #1
On 6/3/24 10:23 PM, Tony Ambardar wrote:
> Some code includes the __used macro to prevent functions and data from
> being optimized out. This macro implements __attribute__((__used__)), which
> operates at the compiler and IR-level, and so still allows a linker to
> remove objects intended to be kept.
>
> Compilers supporting __attribute__((__retain__)) can address this gap by
> setting the flag SHF_GNU_RETAIN on the section of a function/variable,
> indicating to the linker the object should be retained. This attribute is
> available since gcc 11, clang 13, and binutils 2.36.
>
> Provide a __retain macro implementing __attribute__((__retain__)), whose
> first user will be the '__bpf_kfunc' tag.
>
> Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/
> Cc: stable@vger.kernel.org # v6.6+
> Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
> ---
>   include/linux/compiler_types.h | 23 +++++++++++++++++++++++
>   1 file changed, 23 insertions(+)
>
> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index 93600de3800b..f14c275950b5 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -143,6 +143,29 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
>   # define __preserve_most
>   #endif
>   
> +/*
> + * Annotating a function/variable with __retain tells the compiler to place
> + * the object in its own section and set the flag SHF_GNU_RETAIN. This flag
> + * instructs the linker to retain the object during garbage-cleanup or LTO
> + * phases.
> + *
> + * Note that the __used macro is also used to prevent functions or data
> + * being optimized out, but operates at the compiler/IR-level and may still
> + * allow unintended removal of objects during linking.
> + *
> + * Optional: only supported since gcc >= 11, clang >= 13
> + *
> + *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute
> + * clang: https://clang.llvm.org/docs/AttributeReference.html#retain
> + */
> +#if __has_attribute(__retain__) && \
> +	(defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \
> +	 defined(CONFIG_LTO_CLANG))

Could you explain why CONFIG_LTO_CLANG is added here?
IIUC, the __used macro permits garbage collection at section
level, so CLANG_LTO_CLANG without
CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
shuold not change final section dynamics, right?

> +# define __retain			__attribute__((__retain__))
> +#else
> +# define __retain
> +#endif
> +
>   /* Compiler specific macros. */
>   #ifdef __clang__
>   #include <linux/compiler-clang.h>
Tony Ambardar June 10, 2024, 10:56 p.m. UTC | #2
On Tue, Jun 04, 2024 at 10:55:39PM -0700, Yonghong Song wrote:
> 
> On 6/3/24 10:23 PM, Tony Ambardar wrote:
> > Some code includes the __used macro to prevent functions and data from
> > being optimized out. This macro implements __attribute__((__used__)), which
> > operates at the compiler and IR-level, and so still allows a linker to
> > remove objects intended to be kept.
> > 
> > Compilers supporting __attribute__((__retain__)) can address this gap by
> > setting the flag SHF_GNU_RETAIN on the section of a function/variable,
> > indicating to the linker the object should be retained. This attribute is
> > available since gcc 11, clang 13, and binutils 2.36.
> > 
> > Provide a __retain macro implementing __attribute__((__retain__)), whose
> > first user will be the '__bpf_kfunc' tag.
> > 
> > Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/
> > Cc: stable@vger.kernel.org # v6.6+
> > Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
> > ---
> >   include/linux/compiler_types.h | 23 +++++++++++++++++++++++
> >   1 file changed, 23 insertions(+)
> > 
> > diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> > index 93600de3800b..f14c275950b5 100644
> > --- a/include/linux/compiler_types.h
> > +++ b/include/linux/compiler_types.h
> > @@ -143,6 +143,29 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
> >   # define __preserve_most
> >   #endif
> > +/*
> > + * Annotating a function/variable with __retain tells the compiler to place
> > + * the object in its own section and set the flag SHF_GNU_RETAIN. This flag
> > + * instructs the linker to retain the object during garbage-cleanup or LTO
> > + * phases.
> > + *
> > + * Note that the __used macro is also used to prevent functions or data
> > + * being optimized out, but operates at the compiler/IR-level and may still
> > + * allow unintended removal of objects during linking.
> > + *
> > + * Optional: only supported since gcc >= 11, clang >= 13
> > + *
> > + *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute
> > + * clang: https://clang.llvm.org/docs/AttributeReference.html#retain
> > + */
> > +#if __has_attribute(__retain__) && \
> > +	(defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \
> > +	 defined(CONFIG_LTO_CLANG))
> 
> Could you explain why CONFIG_LTO_CLANG is added here?
> IIUC, the __used macro permits garbage collection at section
> level, so CLANG_LTO_CLANG without
> CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
> shuold not change final section dynamics, right?

Hi Yonghong,

I included the conditional guard to ensure consistent behaviour between
__retain and other features forcing split sections. In particular, the same
guard is used in vmlinux.lds.h to merge split sections where needed. For
example, using __retain in llvm builds without CONFIG_LTO was failing CI
tests on kernel-patches/bpf because the kernel didn't boot properly. And in
further testing, the kernel had no issues loading BPF kfunc modules with
such split sections, so I left the module (partial) linking scripts alone.

Maybe I misunderstand you question re: __used?

Thanks,
Tony
> 
> > +# define __retain			__attribute__((__retain__))
> > +#else
> > +# define __retain
> > +#endif
> > +
> >   /* Compiler specific macros. */
> >   #ifdef __clang__
> >   #include <linux/compiler-clang.h>
Yonghong Song June 14, 2024, 6:47 p.m. UTC | #3
On 6/10/24 3:56 PM, Tony Ambardar wrote:
> On Tue, Jun 04, 2024 at 10:55:39PM -0700, Yonghong Song wrote:
>> On 6/3/24 10:23 PM, Tony Ambardar wrote:
>>> Some code includes the __used macro to prevent functions and data from
>>> being optimized out. This macro implements __attribute__((__used__)), which
>>> operates at the compiler and IR-level, and so still allows a linker to
>>> remove objects intended to be kept.
>>>
>>> Compilers supporting __attribute__((__retain__)) can address this gap by
>>> setting the flag SHF_GNU_RETAIN on the section of a function/variable,
>>> indicating to the linker the object should be retained. This attribute is
>>> available since gcc 11, clang 13, and binutils 2.36.
>>>
>>> Provide a __retain macro implementing __attribute__((__retain__)), whose
>>> first user will be the '__bpf_kfunc' tag.
>>>
>>> Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/
>>> Cc: stable@vger.kernel.org # v6.6+
>>> Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
>>> ---
>>>    include/linux/compiler_types.h | 23 +++++++++++++++++++++++
>>>    1 file changed, 23 insertions(+)
>>>
>>> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
>>> index 93600de3800b..f14c275950b5 100644
>>> --- a/include/linux/compiler_types.h
>>> +++ b/include/linux/compiler_types.h
>>> @@ -143,6 +143,29 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
>>>    # define __preserve_most
>>>    #endif
>>> +/*
>>> + * Annotating a function/variable with __retain tells the compiler to place
>>> + * the object in its own section and set the flag SHF_GNU_RETAIN. This flag
>>> + * instructs the linker to retain the object during garbage-cleanup or LTO
>>> + * phases.
>>> + *
>>> + * Note that the __used macro is also used to prevent functions or data
>>> + * being optimized out, but operates at the compiler/IR-level and may still
>>> + * allow unintended removal of objects during linking.
>>> + *
>>> + * Optional: only supported since gcc >= 11, clang >= 13
>>> + *
>>> + *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute
>>> + * clang: https://clang.llvm.org/docs/AttributeReference.html#retain
>>> + */
>>> +#if __has_attribute(__retain__) && \
>>> +	(defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \
>>> +	 defined(CONFIG_LTO_CLANG))
>> Could you explain why CONFIG_LTO_CLANG is added here?
>> IIUC, the __used macro permits garbage collection at section
>> level, so CLANG_LTO_CLANG without
>> CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
>> shuold not change final section dynamics, right?
> Hi Yonghong,
>
> I included the conditional guard to ensure consistent behaviour between
> __retain and other features forcing split sections. In particular, the same
> guard is used in vmlinux.lds.h to merge split sections where needed. For
> example, using __retain in llvm builds without CONFIG_LTO was failing CI
> tests on kernel-patches/bpf because the kernel didn't boot properly. And in
> further testing, the kernel had no issues loading BPF kfunc modules with
> such split sections, so I left the module (partial) linking scripts alone.

I tried with both bpf and bpf-next tree and I cannot make CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION=y
in .config file. The following are all occurances in Kconfig:

$ egrep -r HAVE_LD_DEAD_CODE_DATA_ELIMINATION
arch/mips/Kconfig:      select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
arch/powerpc/Kconfig:   select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT && (!ARCH_USING_PATCHABLE_FUNCTION_ENTRY || (!CC_IS_GCC || GCC_VERSION >= 110100))
arch/riscv/Kconfig:     select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
init/Kconfig:config HAVE_LD_DEAD_CODE_DATA_ELIMINATION
init/Kconfig:   depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION

Are there some pending patches to enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
for x86?

I could foce CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION=y with the following hack:
diff --git a/init/Kconfig b/init/Kconfig
index 72404c1f2157..adf8718e2f5b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1402,7 +1402,7 @@ config CC_OPTIMIZE_FOR_SIZE
  endchoice
  
  config HAVE_LD_DEAD_CODE_DATA_ELIMINATION
-       bool
+       def_bool y
         help
           This requires that the arch annotates or otherwise protects
           its external entry points from being discarded. Linker scripts

But with the above, I cannot boot the kernel.


Did I miss anything?

>
> Maybe I misunderstand you question re: __used?
>
> Thanks,
> Tony
>>> +# define __retain			__attribute__((__retain__))
>>> +#else
>>> +# define __retain
>>> +#endif
>>> +
>>>    /* Compiler specific macros. */
>>>    #ifdef __clang__
>>>    #include <linux/compiler-clang.h>
Tony Ambardar June 15, 2024, 6:57 a.m. UTC | #4
On Fri, Jun 14, 2024 at 11:47:19AM -0700, Yonghong Song wrote:
> 
> On 6/10/24 3:56 PM, Tony Ambardar wrote:
> > On Tue, Jun 04, 2024 at 10:55:39PM -0700, Yonghong Song wrote:
> > > On 6/3/24 10:23 PM, Tony Ambardar wrote:
> > > > Some code includes the __used macro to prevent functions and data from
> > > > being optimized out. This macro implements __attribute__((__used__)), which
> > > > operates at the compiler and IR-level, and so still allows a linker to
> > > > remove objects intended to be kept.
> > > > 
> > > > Compilers supporting __attribute__((__retain__)) can address this gap by
> > > > setting the flag SHF_GNU_RETAIN on the section of a function/variable,
> > > > indicating to the linker the object should be retained. This attribute is
> > > > available since gcc 11, clang 13, and binutils 2.36.
> > > > 
> > > > Provide a __retain macro implementing __attribute__((__retain__)), whose
> > > > first user will be the '__bpf_kfunc' tag.
> > > > 
> > > > Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/
> > > > Cc: stable@vger.kernel.org # v6.6+
> > > > Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
> > > > ---
> > > >    include/linux/compiler_types.h | 23 +++++++++++++++++++++++
> > > >    1 file changed, 23 insertions(+)
> > > > 
> > > > diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> > > > index 93600de3800b..f14c275950b5 100644
> > > > --- a/include/linux/compiler_types.h
> > > > +++ b/include/linux/compiler_types.h
> > > > @@ -143,6 +143,29 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
> > > >    # define __preserve_most
> > > >    #endif
> > > > +/*
> > > > + * Annotating a function/variable with __retain tells the compiler to place
> > > > + * the object in its own section and set the flag SHF_GNU_RETAIN. This flag
> > > > + * instructs the linker to retain the object during garbage-cleanup or LTO
> > > > + * phases.
> > > > + *
> > > > + * Note that the __used macro is also used to prevent functions or data
> > > > + * being optimized out, but operates at the compiler/IR-level and may still
> > > > + * allow unintended removal of objects during linking.
> > > > + *
> > > > + * Optional: only supported since gcc >= 11, clang >= 13
> > > > + *
> > > > + *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute
> > > > + * clang: https://clang.llvm.org/docs/AttributeReference.html#retain
> > > > + */
> > > > +#if __has_attribute(__retain__) && \
> > > > +	(defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \
> > > > +	 defined(CONFIG_LTO_CLANG))
> > > Could you explain why CONFIG_LTO_CLANG is added here?
> > > IIUC, the __used macro permits garbage collection at section
> > > level, so CLANG_LTO_CLANG without
> > > CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
> > > shuold not change final section dynamics, right?
> > Hi Yonghong,
> > 
> > I included the conditional guard to ensure consistent behaviour between
> > __retain and other features forcing split sections. In particular, the same
> > guard is used in vmlinux.lds.h to merge split sections where needed. For
> > example, using __retain in llvm builds without CONFIG_LTO was failing CI
> > tests on kernel-patches/bpf because the kernel didn't boot properly. And in
> > further testing, the kernel had no issues loading BPF kfunc modules with
> > such split sections, so I left the module (partial) linking scripts alone.
> 
> I tried with both bpf and bpf-next tree and I cannot make CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION=y
> in .config file. The following are all occurances in Kconfig:

My understanding is one doesn't directly set HAVE_LD_DEAD_CODE_...; it's a
per-arch capability flag which guards setting LD_DEAD_CODE_DATA_ELIMINATION
but only targets "small systems" (i.e. embedded), so no surprise x86 isn't
in the arch list below.

> 
> $ egrep -r HAVE_LD_DEAD_CODE_DATA_ELIMINATION
> arch/mips/Kconfig:      select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
> arch/powerpc/Kconfig:   select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT && (!ARCH_USING_PATCHABLE_FUNCTION_ENTRY || (!CC_IS_GCC || GCC_VERSION >= 110100))
> arch/riscv/Kconfig:     select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
> init/Kconfig:config HAVE_LD_DEAD_CODE_DATA_ELIMINATION
> init/Kconfig:   depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION
> 
> Are there some pending patches to enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
> for x86?

I doubt it given the target arches above, but curious what's the need for
x86 support? Only x86_32? My patches were motivated seeing resolve_btfids
and pahole errors for a couple years on MIPS routers. I don't recall seeing
the same for x86 builds, so my testing focussed more on preserving x86
builds rather than adding/testing the arch flag for x86.

> 
> I could foce CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION=y with the following hack:
> diff --git a/init/Kconfig b/init/Kconfig
> index 72404c1f2157..adf8718e2f5b 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1402,7 +1402,7 @@ config CC_OPTIMIZE_FOR_SIZE
>  endchoice
>  config HAVE_LD_DEAD_CODE_DATA_ELIMINATION
> -       bool
> +       def_bool y
>         help
>           This requires that the arch annotates or otherwise protects
>           its external entry points from being discarded. Linker scripts
> 
> But with the above, I cannot boot the kernel.

OK, interesting exercise. Setting HAVE_LD_DEAD_CODE_DATA_ELIMINATION
shouldn't change anything itself so I suppose you are also setting
LD_DEAD_CODE_DATA_ELIMINATION? From previous testing on kernel-patches/CI,
first guess would be vmlinux linker script doing section merges unaware of
some x86 quirk. Or x86-specific linker script unhappy with split sections.

> 
> 
> Did I miss anything?
> 
> > 
> > Maybe I misunderstand you question re: __used?
> > 
> > Thanks,
> > Tony
> > > > +# define __retain			__attribute__((__retain__))
> > > > +#else
> > > > +# define __retain
> > > > +#endif
> > > > +
> > > >    /* Compiler specific macros. */
> > > >    #ifdef __clang__
> > > >    #include <linux/compiler-clang.h>
Yonghong Song June 17, 2024, 3:26 a.m. UTC | #5
On 6/14/24 11:57 PM, Tony Ambardar wrote:
> On Fri, Jun 14, 2024 at 11:47:19AM -0700, Yonghong Song wrote:
>> On 6/10/24 3:56 PM, Tony Ambardar wrote:
>>> On Tue, Jun 04, 2024 at 10:55:39PM -0700, Yonghong Song wrote:
>>>> On 6/3/24 10:23 PM, Tony Ambardar wrote:
>>>>> Some code includes the __used macro to prevent functions and data from
>>>>> being optimized out. This macro implements __attribute__((__used__)), which
>>>>> operates at the compiler and IR-level, and so still allows a linker to
>>>>> remove objects intended to be kept.
>>>>>
>>>>> Compilers supporting __attribute__((__retain__)) can address this gap by
>>>>> setting the flag SHF_GNU_RETAIN on the section of a function/variable,
>>>>> indicating to the linker the object should be retained. This attribute is
>>>>> available since gcc 11, clang 13, and binutils 2.36.
>>>>>
>>>>> Provide a __retain macro implementing __attribute__((__retain__)), whose
>>>>> first user will be the '__bpf_kfunc' tag.
>>>>>
>>>>> Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/
>>>>> Cc: stable@vger.kernel.org # v6.6+
>>>>> Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
>>>>> ---
>>>>>     include/linux/compiler_types.h | 23 +++++++++++++++++++++++
>>>>>     1 file changed, 23 insertions(+)
>>>>>
>>>>> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
>>>>> index 93600de3800b..f14c275950b5 100644
>>>>> --- a/include/linux/compiler_types.h
>>>>> +++ b/include/linux/compiler_types.h
>>>>> @@ -143,6 +143,29 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
>>>>>     # define __preserve_most
>>>>>     #endif
>>>>> +/*
>>>>> + * Annotating a function/variable with __retain tells the compiler to place
>>>>> + * the object in its own section and set the flag SHF_GNU_RETAIN. This flag
>>>>> + * instructs the linker to retain the object during garbage-cleanup or LTO
>>>>> + * phases.
>>>>> + *
>>>>> + * Note that the __used macro is also used to prevent functions or data
>>>>> + * being optimized out, but operates at the compiler/IR-level and may still
>>>>> + * allow unintended removal of objects during linking.
>>>>> + *
>>>>> + * Optional: only supported since gcc >= 11, clang >= 13
>>>>> + *
>>>>> + *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute
>>>>> + * clang: https://clang.llvm.org/docs/AttributeReference.html#retain
>>>>> + */
>>>>> +#if __has_attribute(__retain__) && \
>>>>> +	(defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \
>>>>> +	 defined(CONFIG_LTO_CLANG))
>>>> Could you explain why CONFIG_LTO_CLANG is added here?
>>>> IIUC, the __used macro permits garbage collection at section
>>>> level, so CLANG_LTO_CLANG without
>>>> CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
>>>> shuold not change final section dynamics, right?
>>> Hi Yonghong,
>>>
>>> I included the conditional guard to ensure consistent behaviour between
>>> __retain and other features forcing split sections. In particular, the same
>>> guard is used in vmlinux.lds.h to merge split sections where needed. For
>>> example, using __retain in llvm builds without CONFIG_LTO was failing CI
>>> tests on kernel-patches/bpf because the kernel didn't boot properly. And in
>>> further testing, the kernel had no issues loading BPF kfunc modules with
>>> such split sections, so I left the module (partial) linking scripts alone.
>> I tried with both bpf and bpf-next tree and I cannot make CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION=y
>> in .config file. The following are all occurances in Kconfig:
> My understanding is one doesn't directly set HAVE_LD_DEAD_CODE_...; it's a
> per-arch capability flag which guards setting LD_DEAD_CODE_DATA_ELIMINATION
> but only targets "small systems" (i.e. embedded), so no surprise x86 isn't
> in the arch list below.

I see. Yes, mips should support it but not x86. No wonder why I cannot reproduce.

>
>> $ egrep -r HAVE_LD_DEAD_CODE_DATA_ELIMINATION
>> arch/mips/Kconfig:      select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
>> arch/powerpc/Kconfig:   select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT && (!ARCH_USING_PATCHABLE_FUNCTION_ENTRY || (!CC_IS_GCC || GCC_VERSION >= 110100))
>> arch/riscv/Kconfig:     select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
>> init/Kconfig:config HAVE_LD_DEAD_CODE_DATA_ELIMINATION
>> init/Kconfig:   depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION
>>
>> Are there some pending patches to enable HAVE_LD_DEAD_CODE_DATA_ELIMINATION
>> for x86?
> I doubt it given the target arches above, but curious what's the need for
> x86 support? Only x86_32? My patches were motivated seeing resolve_btfids
> and pahole errors for a couple years on MIPS routers. I don't recall seeing
> the same for x86 builds, so my testing focussed more on preserving x86
> builds rather than adding/testing the arch flag for x86.
>> I could foce CONFIG_HAVE_LD_DEAD_CODE_DATA_ELIMINATION=y with the following hack:
>> diff --git a/init/Kconfig b/init/Kconfig
>> index 72404c1f2157..adf8718e2f5b 100644
>> --- a/init/Kconfig
>> +++ b/init/Kconfig
>> @@ -1402,7 +1402,7 @@ config CC_OPTIMIZE_FOR_SIZE
>>   endchoice
>>   config HAVE_LD_DEAD_CODE_DATA_ELIMINATION
>> -       bool
>> +       def_bool y
>>          help
>>            This requires that the arch annotates or otherwise protects
>>            its external entry points from being discarded. Linker scripts
>>
>> But with the above, I cannot boot the kernel.
> OK, interesting exercise. Setting HAVE_LD_DEAD_CODE_DATA_ELIMINATION
> shouldn't change anything itself so I suppose you are also setting
> LD_DEAD_CODE_DATA_ELIMINATION? From previous testing on kernel-patches/CI,
> first guess would be vmlinux linker script doing section merges unaware of
> some x86 quirk. Or x86-specific linker script unhappy with split sections.

I guess x86 needs additional change to make HAVE_LD_DEAD_CODE_DATA_ELIMINATION
work. I still curious about why CONFIG_LTO_CLANG is necessary.

In asm-generic/vmlinux.lds.h,

/*
  * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which
  * generates .data.identifier sections, which need to be pulled in with
  * .data. We don't want to pull in .data..other sections, which Linux
  * has defined. Same for text and bss.
  *
  * With LTO_CLANG, the linker also splits sections by default, so we need
  * these macros to combine the sections during the final link.
  *
  * RODATA_MAIN is not used because existing code already defines .rodata.x
  * sections to be brought in with rodata.
  */
#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
#define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L*
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L*
#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral*
#define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]*
#else
#define TEXT_MAIN .text
#define DATA_MAIN .data
#define SDATA_MAIN .sdata
#define RODATA_MAIN .rodata
#define BSS_MAIN .bss
#define SBSS_MAIN .sbss
#endif

If CONFIG_LTO_CLANG is defined and CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is
not defined, it is not clear whether __used functions will get eliminated
or not. I tried with thinlto with a simple example on x86 with some unused
function marked with __used, and that function survived in the final binary.

But your patch won't hurt, so I am okay with it.

>
>>
>> Did I miss anything?
>>
>>> Maybe I misunderstand you question re: __used?
>>>
>>> Thanks,
>>> Tony
>>>>> +# define __retain			__attribute__((__retain__))
>>>>> +#else
>>>>> +# define __retain
>>>>> +#endif
>>>>> +
>>>>>     /* Compiler specific macros. */
>>>>>     #ifdef __clang__
>>>>>     #include <linux/compiler-clang.h>
diff mbox series

Patch

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 93600de3800b..f14c275950b5 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -143,6 +143,29 @@  static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
 # define __preserve_most
 #endif
 
+/*
+ * Annotating a function/variable with __retain tells the compiler to place
+ * the object in its own section and set the flag SHF_GNU_RETAIN. This flag
+ * instructs the linker to retain the object during garbage-cleanup or LTO
+ * phases.
+ *
+ * Note that the __used macro is also used to prevent functions or data
+ * being optimized out, but operates at the compiler/IR-level and may still
+ * allow unintended removal of objects during linking.
+ *
+ * Optional: only supported since gcc >= 11, clang >= 13
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#retain
+ */
+#if __has_attribute(__retain__) && \
+	(defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \
+	 defined(CONFIG_LTO_CLANG))
+# define __retain			__attribute__((__retain__))
+#else
+# define __retain
+#endif
+
 /* Compiler specific macros. */
 #ifdef __clang__
 #include <linux/compiler-clang.h>