Message ID | 20241014213342.1480681-5-xur@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add AutoFDO and Propeller support for Clang build | expand |
On Tue, Oct 15, 2024 at 6:33 AM Rong Xu <xur@google.com> wrote: > > Enable -ffunction-sections by default for the AutoFDO build. > > With -ffunction-sections, the compiler places each function in its own > section named .text.function_name instead of placing all functions in > the .text section. In the AutoFDO build, this allows the linker to > utilize profile information to reorganize functions for improved > utilization of iCache and iTLB. > > Co-developed-by: Han Shen <shenhan@google.com> > Signed-off-by: Han Shen <shenhan@google.com> > Signed-off-by: Rong Xu <xur@google.com> > Suggested-by: Sriraman Tallam <tmsriram@google.com> > --- > include/asm-generic/vmlinux.lds.h | 37 ++++++++++++++++++++++++------- > scripts/Makefile.autofdo | 2 +- > 2 files changed, 30 insertions(+), 9 deletions(-) > > diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h > index 5df589c60401..ace617d1af9b 100644 > --- a/include/asm-generic/vmlinux.lds.h > +++ b/include/asm-generic/vmlinux.lds.h > @@ -95,18 +95,25 @@ > * With LTO_CLANG, the linker also splits sections by default, so we need > * these macros to combine the sections during the final link. > * > + * With LTO_CLANG, the linker also splits sections by default, so we need > + * these macros to combine the sections during the final link. > + * > * RODATA_MAIN is not used because existing code already defines .rodata.x > * sections to be brought in with rodata. > */ > -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ > +defined(CONFIG_AUTOFDO_CLANG) > #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* > +#else > +#define TEXT_MAIN .text > +#endif > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* > #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* > #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* > #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* > #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* > #else > -#define TEXT_MAIN .text > #define DATA_MAIN .data > #define SDATA_MAIN .sdata > #define RODATA_MAIN .rodata > @@ -549,6 +556,20 @@ > __cpuidle_text_end = .; \ > __noinstr_text_end = .; > > +#ifdef CONFIG_AUTOFDO_CLANG > +#define TEXT_HOT \ > + __hot_text_start = .; \ > + *(.text.hot .text.hot.*) \ > + __hot_text_end = .; > +#define TEXT_UNLIKELY \ > + __unlikely_text_start = .; \ > + *(.text.unlikely .text.unlikely.*) \ > + __unlikely_text_end = .; > +#else > +#define TEXT_HOT *(.text.hot .text.hot.*) > +#define TEXT_UNLIKELY *(.text.unlikely .text.unlikely.*) > +#endif Again, why is this conditional? The only difference is *_start and *_end symbols are defined when CONFIG_AUTOFDO_CLANG=y. And, where are these symbols used? > + > /* > * .text section. Map to function alignment to avoid address changes > * during second ld run in second ld pass when generating System.map > @@ -557,30 +578,30 @@ > * code elimination or function-section is enabled. Match these symbols > * first when in these builds. > */ > -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ > +defined(CONFIG_AUTOFDO_CLANG) > #define TEXT_TEXT \ > ALIGN_FUNCTION(); \ > *(.text.asan.* .text.tsan.*) \ > *(.text.unknown .text.unknown.*) \ > - *(.text.unlikely .text.unlikely.*) \ > + TEXT_UNLIKELY \ > . = ALIGN(PAGE_SIZE); \ > - *(.text.hot .text.hot.*) \ > + TEXT_HOT \ > *(TEXT_MAIN .text.fixup) \ > NOINSTR_TEXT \ > *(.ref.text) > #else > #define TEXT_TEXT \ > ALIGN_FUNCTION(); \ > - *(.text.hot .text.hot.*) \ > + TEXT_HOT \ > *(TEXT_MAIN .text.fixup) \ > - *(.text.unlikely .text.unlikely.*) \ > + TEXT_UNLIKELY \ > *(.text.unknown .text.unknown.*) \ > NOINSTR_TEXT \ > *(.ref.text) \ > *(.text.asan.* .text.tsan.*) > #endif > > - > /* sched.text is aling to function alignment to secure we have same > * address even at second ld pass when generating System.map */ > #define SCHED_TEXT \ > diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo > index 1c9f224bc221..9c9a530ef090 100644 > --- a/scripts/Makefile.autofdo > +++ b/scripts/Makefile.autofdo > @@ -10,7 +10,7 @@ ifndef CONFIG_DEBUG_INFO > endif > > ifdef CLANG_AUTOFDO_PROFILE > - CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) > + CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) -ffunction-sections > endif > > ifdef CONFIG_LTO_CLANG_THIN > -- > 2.47.0.rc1.288.g06298d1525-goog > >
The answers are the same as the reply in [PATCH v4 5/6] On Sun, Oct 20, 2024 at 7:26 PM Masahiro Yamada <masahiroy@kernel.org> wrote: > > On Tue, Oct 15, 2024 at 6:33 AM Rong Xu <xur@google.com> wrote: > > > > Enable -ffunction-sections by default for the AutoFDO build. > > > > With -ffunction-sections, the compiler places each function in its own > > section named .text.function_name instead of placing all functions in > > the .text section. In the AutoFDO build, this allows the linker to > > utilize profile information to reorganize functions for improved > > utilization of iCache and iTLB. > > > > Co-developed-by: Han Shen <shenhan@google.com> > > Signed-off-by: Han Shen <shenhan@google.com> > > Signed-off-by: Rong Xu <xur@google.com> > > Suggested-by: Sriraman Tallam <tmsriram@google.com> > > --- > > include/asm-generic/vmlinux.lds.h | 37 ++++++++++++++++++++++++------- > > scripts/Makefile.autofdo | 2 +- > > 2 files changed, 30 insertions(+), 9 deletions(-) > > > > diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h > > index 5df589c60401..ace617d1af9b 100644 > > --- a/include/asm-generic/vmlinux.lds.h > > +++ b/include/asm-generic/vmlinux.lds.h > > @@ -95,18 +95,25 @@ > > * With LTO_CLANG, the linker also splits sections by default, so we need > > * these macros to combine the sections during the final link. > > * > > + * With LTO_CLANG, the linker also splits sections by default, so we need > > + * these macros to combine the sections during the final link. > > + * > > * RODATA_MAIN is not used because existing code already defines .rodata.x > > * sections to be brought in with rodata. > > */ > > -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ > > +defined(CONFIG_AUTOFDO_CLANG) > > #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* > > +#else > > +#define TEXT_MAIN .text > > +#endif > > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > > #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* > > #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* > > #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* > > #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* > > #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* > > #else > > -#define TEXT_MAIN .text > > #define DATA_MAIN .data > > #define SDATA_MAIN .sdata > > #define RODATA_MAIN .rodata > > @@ -549,6 +556,20 @@ > > __cpuidle_text_end = .; \ > > __noinstr_text_end = .; > > > > +#ifdef CONFIG_AUTOFDO_CLANG > > +#define TEXT_HOT \ > > + __hot_text_start = .; \ > > + *(.text.hot .text.hot.*) \ > > + __hot_text_end = .; > > +#define TEXT_UNLIKELY \ > > + __unlikely_text_start = .; \ > > + *(.text.unlikely .text.unlikely.*) \ > > + __unlikely_text_end = .; > > +#else > > +#define TEXT_HOT *(.text.hot .text.hot.*) > > +#define TEXT_UNLIKELY *(.text.unlikely .text.unlikely.*) > > +#endif > > > > Again, why is this conditional? The condition is to ensure that we don't change the default kernel build by any means. The new code will introduce a few new symbols. > > > The only difference is *_start and *_end symbols are defined > when CONFIG_AUTOFDO_CLANG=y. > > And, where are these symbols used? These new symbols are currently unreferenced within the kernel source tree. However, they provide a valuable means of identifying hot and cold sections of text, and how large they are. I think they are useful information. > > > > > > > > > > > > > + > > /* > > * .text section. Map to function alignment to avoid address changes > > * during second ld run in second ld pass when generating System.map > > @@ -557,30 +578,30 @@ > > * code elimination or function-section is enabled. Match these symbols > > * first when in these builds. > > */ > > -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ > > +defined(CONFIG_AUTOFDO_CLANG) > > #define TEXT_TEXT \ > > ALIGN_FUNCTION(); \ > > *(.text.asan.* .text.tsan.*) \ > > *(.text.unknown .text.unknown.*) \ > > - *(.text.unlikely .text.unlikely.*) \ > > + TEXT_UNLIKELY \ > > . = ALIGN(PAGE_SIZE); \ > > - *(.text.hot .text.hot.*) \ > > + TEXT_HOT \ > > *(TEXT_MAIN .text.fixup) \ > > NOINSTR_TEXT \ > > *(.ref.text) > > #else > > #define TEXT_TEXT \ > > ALIGN_FUNCTION(); \ > > - *(.text.hot .text.hot.*) \ > > + TEXT_HOT \ > > *(TEXT_MAIN .text.fixup) \ > > - *(.text.unlikely .text.unlikely.*) \ > > + TEXT_UNLIKELY \ > > *(.text.unknown .text.unknown.*) \ > > NOINSTR_TEXT \ > > *(.ref.text) \ > > *(.text.asan.* .text.tsan.*) > > #endif > > > > - > > /* sched.text is aling to function alignment to secure we have same > > * address even at second ld pass when generating System.map */ > > #define SCHED_TEXT \ > > diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo > > index 1c9f224bc221..9c9a530ef090 100644 > > --- a/scripts/Makefile.autofdo > > +++ b/scripts/Makefile.autofdo > > @@ -10,7 +10,7 @@ ifndef CONFIG_DEBUG_INFO > > endif > > > > ifdef CLANG_AUTOFDO_PROFILE > > - CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) > > + CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) -ffunction-sections > > endif > > > > ifdef CONFIG_LTO_CLANG_THIN > > -- > > 2.47.0.rc1.288.g06298d1525-goog > > > > > > > -- > Best Regards > Masahiro Yamada
On Tue, Oct 22, 2024 at 8:32 AM Rong Xu <xur@google.com> wrote: > > The answers are the same as the reply in [PATCH v4 5/6] > > > > > > Again, why is this conditional? > > The condition is to ensure that we don't change the default kernel > build by any means. The new code will introduce a few new symbols. > Same answer. I guess you prefer unmaintainable code because you are not a maintainer. > > > > The only difference is *_start and *_end symbols are defined > > when CONFIG_AUTOFDO_CLANG=y. > > > > And, where are these symbols used? > > These new symbols are currently unreferenced within the kernel source tree. > However, they provide a valuable means of identifying hot and cold > sections of text, and how large they are. I think they are useful information. OK, then you are doing unrelated changes to include/asm-generic/vmlinux.lds.h. This patch should touch only scripts/Makefile.autofdo If you want to insert *_start and *_end markers, you can add a separate patch, explaining your motivation. > > > > > > > > > > > > > > > > > > > > > > > > > > + > > > /* > > > * .text section. Map to function alignment to avoid address changes > > > * during second ld run in second ld pass when generating System.map > > > @@ -557,30 +578,30 @@ > > > * code elimination or function-section is enabled. Match these symbols > > > * first when in these builds. > > > */ > > > -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) > > > +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ > > > +defined(CONFIG_AUTOFDO_CLANG) > > > #define TEXT_TEXT \ > > > ALIGN_FUNCTION(); \ > > > *(.text.asan.* .text.tsan.*) \ > > > *(.text.unknown .text.unknown.*) \ > > > - *(.text.unlikely .text.unlikely.*) \ > > > + TEXT_UNLIKELY \ > > > . = ALIGN(PAGE_SIZE); \ > > > - *(.text.hot .text.hot.*) \ > > > + TEXT_HOT \ > > > *(TEXT_MAIN .text.fixup) \ > > > NOINSTR_TEXT \ > > > *(.ref.text) > > > #else > > > #define TEXT_TEXT \ > > > ALIGN_FUNCTION(); \ > > > - *(.text.hot .text.hot.*) \ > > > + TEXT_HOT \ > > > *(TEXT_MAIN .text.fixup) \ > > > - *(.text.unlikely .text.unlikely.*) \ > > > + TEXT_UNLIKELY \ > > > *(.text.unknown .text.unknown.*) \ > > > NOINSTR_TEXT \ > > > *(.ref.text) \ > > > *(.text.asan.* .text.tsan.*) > > > #endif > > > > > > - > > > /* sched.text is aling to function alignment to secure we have same > > > * address even at second ld pass when generating System.map */ > > > #define SCHED_TEXT \ > > > diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo > > > index 1c9f224bc221..9c9a530ef090 100644 > > > --- a/scripts/Makefile.autofdo > > > +++ b/scripts/Makefile.autofdo > > > @@ -10,7 +10,7 @@ ifndef CONFIG_DEBUG_INFO > > > endif > > > > > > ifdef CLANG_AUTOFDO_PROFILE > > > - CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) > > > + CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) -ffunction-sections > > > endif > > > > > > ifdef CONFIG_LTO_CLANG_THIN > > > -- > > > 2.47.0.rc1.288.g06298d1525-goog > > > > > > > > > > > > -- > > Best Regards > > Masahiro Yamada > -- Best Regards Masahiro Yamada
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5df589c60401..ace617d1af9b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -95,18 +95,25 @@ * With LTO_CLANG, the linker also splits sections by default, so we need * these macros to combine the sections during the final link. * + * With LTO_CLANG, the linker also splits sections by default, so we need + * these macros to combine the sections during the final link. + * * RODATA_MAIN is not used because existing code already defines .rodata.x * sections to be brought in with rodata. */ -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ +defined(CONFIG_AUTOFDO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* +#else +#define TEXT_MAIN .text +#endif +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* #else -#define TEXT_MAIN .text #define DATA_MAIN .data #define SDATA_MAIN .sdata #define RODATA_MAIN .rodata @@ -549,6 +556,20 @@ __cpuidle_text_end = .; \ __noinstr_text_end = .; +#ifdef CONFIG_AUTOFDO_CLANG +#define TEXT_HOT \ + __hot_text_start = .; \ + *(.text.hot .text.hot.*) \ + __hot_text_end = .; +#define TEXT_UNLIKELY \ + __unlikely_text_start = .; \ + *(.text.unlikely .text.unlikely.*) \ + __unlikely_text_end = .; +#else +#define TEXT_HOT *(.text.hot .text.hot.*) +#define TEXT_UNLIKELY *(.text.unlikely .text.unlikely.*) +#endif + /* * .text section. Map to function alignment to avoid address changes * during second ld run in second ld pass when generating System.map @@ -557,30 +578,30 @@ * code elimination or function-section is enabled. Match these symbols * first when in these builds. */ -#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) || \ +defined(CONFIG_AUTOFDO_CLANG) #define TEXT_TEXT \ ALIGN_FUNCTION(); \ *(.text.asan.* .text.tsan.*) \ *(.text.unknown .text.unknown.*) \ - *(.text.unlikely .text.unlikely.*) \ + TEXT_UNLIKELY \ . = ALIGN(PAGE_SIZE); \ - *(.text.hot .text.hot.*) \ + TEXT_HOT \ *(TEXT_MAIN .text.fixup) \ NOINSTR_TEXT \ *(.ref.text) #else #define TEXT_TEXT \ ALIGN_FUNCTION(); \ - *(.text.hot .text.hot.*) \ + TEXT_HOT \ *(TEXT_MAIN .text.fixup) \ - *(.text.unlikely .text.unlikely.*) \ + TEXT_UNLIKELY \ *(.text.unknown .text.unknown.*) \ NOINSTR_TEXT \ *(.ref.text) \ *(.text.asan.* .text.tsan.*) #endif - /* sched.text is aling to function alignment to secure we have same * address even at second ld pass when generating System.map */ #define SCHED_TEXT \ diff --git a/scripts/Makefile.autofdo b/scripts/Makefile.autofdo index 1c9f224bc221..9c9a530ef090 100644 --- a/scripts/Makefile.autofdo +++ b/scripts/Makefile.autofdo @@ -10,7 +10,7 @@ ifndef CONFIG_DEBUG_INFO endif ifdef CLANG_AUTOFDO_PROFILE - CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) + CFLAGS_AUTOFDO_CLANG += -fprofile-sample-use=$(CLANG_AUTOFDO_PROFILE) -ffunction-sections endif ifdef CONFIG_LTO_CLANG_THIN