Message ID | 20230615133239.442736-6-anshuman.khandual@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64/perf: Enable branch stack sampling | expand |
Hi Anshuman, kernel test robot noticed the following build errors: [auto build test ERROR on arm64/for-next/core] [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352 base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config) compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a) reproduce (this is a W=1 build): mkdir -p ~/bin wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install arm cross compiling tool for clang build # apt-get install binutils-arm-linux-gnueabi git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git git fetch arm64 for-next/core git checkout arm64/for-next/core b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/ If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202306160706.Uei5XDoi-lkp@intel.com/ All errors (new ones prefixed by >>): | ^~~~~~ drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here 140 | PERF_CACHE_MAP_ALL_UNSUPPORTED, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED' 43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ | ^~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED' 35 | #define CACHE_OP_UNSUPPORTED 0xFFFF | ^~~~~~ drivers/perf/arm_pmuv3.c:147:44: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides] 147 | [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:133:44: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD' 133 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E | ^~~~~~ drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here 140 | PERF_CACHE_MAP_ALL_UNSUPPORTED, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED' 43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ | ^~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED' 35 | #define CACHE_OP_UNSUPPORTED 0xFFFF | ^~~~~~ drivers/perf/arm_pmuv3.c:148:45: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides] 148 | [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:134:44: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR' 134 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F | ^~~~~~ drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here 140 | PERF_CACHE_MAP_ALL_UNSUPPORTED, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED' 43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ | ^~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED' 35 | #define CACHE_OP_UNSUPPORTED 0xFFFF | ^~~~~~ drivers/perf/arm_pmuv3.c:149:42: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides] 149 | [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:131:50: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD' 131 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C | ^~~~~~ drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here 140 | PERF_CACHE_MAP_ALL_UNSUPPORTED, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED' 43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ | ^~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED' 35 | #define CACHE_OP_UNSUPPORTED 0xFFFF | ^~~~~~ drivers/perf/arm_pmuv3.c:150:43: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides] 150 | [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:132:50: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR' 132 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D | ^~~~~~ drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here 140 | PERF_CACHE_MAP_ALL_UNSUPPORTED, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED' 43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ | ^~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED' 35 | #define CACHE_OP_UNSUPPORTED 0xFFFF | ^~~~~~ drivers/perf/arm_pmuv3.c:152:44: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides] 152 | [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:148:46: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD' 148 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 | ^~~~~~ drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here 140 | PERF_CACHE_MAP_ALL_UNSUPPORTED, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED' 43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ | ^~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED' 35 | #define CACHE_OP_UNSUPPORTED 0xFFFF | ^~~~~~ drivers/perf/arm_pmuv3.c:153:45: warning: initializer overrides prior initialization of this subobject [-Winitializer-overrides] 153 | [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:149:46: note: expanded from macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR' 149 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 | ^~~~~~ drivers/perf/arm_pmuv3.c:140:2: note: previous initialization is here 140 | PERF_CACHE_MAP_ALL_UNSUPPORTED, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:43:31: note: expanded from macro 'PERF_CACHE_MAP_ALL_UNSUPPORTED' 43 | [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ | ^~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmu.h:35:31: note: expanded from macro 'CACHE_OP_UNSUPPORTED' 35 | #define CACHE_OP_UNSUPPORTED 0xFFFF | ^~~~~~ >> drivers/perf/arm_pmuv3.c:714:3: error: call to undeclared function 'armv8pmu_branch_enable'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 714 | armv8pmu_branch_enable(event); | ^ >> drivers/perf/arm_pmuv3.c:720:3: error: call to undeclared function 'armv8pmu_branch_disable'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 720 | armv8pmu_branch_disable(event); | ^ >> drivers/perf/arm_pmuv3.c:801:4: error: call to undeclared function 'armv8pmu_branch_read'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 801 | armv8pmu_branch_read(cpuc, event); | ^ drivers/perf/arm_pmuv3.c:801:4: note: did you mean 'armv8pmu_pmcr_read'? drivers/perf/arm_pmuv3.c:430:19: note: 'armv8pmu_pmcr_read' declared here 430 | static inline u32 armv8pmu_pmcr_read(void) | ^ >> drivers/perf/arm_pmuv3.c:908:3: error: call to undeclared function 'armv8pmu_branch_reset'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 908 | armv8pmu_branch_reset(); | ^ drivers/perf/arm_pmuv3.c:983:3: error: call to undeclared function 'armv8pmu_branch_reset'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 983 | armv8pmu_branch_reset(); | ^ >> drivers/perf/arm_pmuv3.c:1021:34: error: call to undeclared function 'armv8pmu_branch_attr_valid'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 1021 | if (has_branch_stack(event) && !armv8pmu_branch_attr_valid(event)) | ^ >> drivers/perf/arm_pmuv3.c:1140:2: error: call to undeclared function 'armv8pmu_branch_probe'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 1140 | armv8pmu_branch_probe(cpu_pmu); | ^ 55 warnings and 7 errors generated. vim +/armv8pmu_branch_enable +714 drivers/perf/arm_pmuv3.c 701 702 static void armv8pmu_enable_event(struct perf_event *event) 703 { 704 /* 705 * Enable counter and interrupt, and set the counter to count 706 * the event that we're interested in. 707 */ 708 armv8pmu_disable_event_counter(event); 709 armv8pmu_write_event_type(event); 710 armv8pmu_enable_event_irq(event); 711 armv8pmu_enable_event_counter(event); 712 713 if (has_branch_stack(event)) > 714 armv8pmu_branch_enable(event); 715 } 716 717 static void armv8pmu_disable_event(struct perf_event *event) 718 { 719 if (has_branch_stack(event)) > 720 armv8pmu_branch_disable(event); 721 722 armv8pmu_disable_event_counter(event); 723 armv8pmu_disable_event_irq(event); 724 } 725 726 static void armv8pmu_start(struct arm_pmu *cpu_pmu) 727 { 728 struct perf_event_context *ctx; 729 int nr_user = 0; 730 731 ctx = perf_cpu_task_ctx(); 732 if (ctx) 733 nr_user = ctx->nr_user; 734 735 if (sysctl_perf_user_access && nr_user) 736 armv8pmu_enable_user_access(cpu_pmu); 737 else 738 armv8pmu_disable_user_access(); 739 740 /* Enable all counters */ 741 armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); 742 } 743 744 static void armv8pmu_stop(struct arm_pmu *cpu_pmu) 745 { 746 /* Disable all counters */ 747 armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); 748 } 749 750 static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) 751 { 752 u32 pmovsr; 753 struct perf_sample_data data; 754 struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); 755 struct pt_regs *regs; 756 int idx; 757 758 /* 759 * Get and reset the IRQ flags 760 */ 761 pmovsr = armv8pmu_getreset_flags(); 762 763 /* 764 * Did an overflow occur? 765 */ 766 if (!armv8pmu_has_overflowed(pmovsr)) 767 return IRQ_NONE; 768 769 /* 770 * Handle the counter(s) overflow(s) 771 */ 772 regs = get_irq_regs(); 773 774 /* 775 * Stop the PMU while processing the counter overflows 776 * to prevent skews in group events. 777 */ 778 armv8pmu_stop(cpu_pmu); 779 for (idx = 0; idx < cpu_pmu->num_events; ++idx) { 780 struct perf_event *event = cpuc->events[idx]; 781 struct hw_perf_event *hwc; 782 783 /* Ignore if we don't have an event. */ 784 if (!event) 785 continue; 786 787 /* 788 * We have a single interrupt for all counters. Check that 789 * each counter has overflowed before we process it. 790 */ 791 if (!armv8pmu_counter_has_overflowed(pmovsr, idx)) 792 continue; 793 794 hwc = &event->hw; 795 armpmu_event_update(event); 796 perf_sample_data_init(&data, 0, hwc->last_period); 797 if (!armpmu_event_set_period(event)) 798 continue; 799 800 if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) { > 801 armv8pmu_branch_read(cpuc, event); 802 perf_sample_save_brstack(&data, event, &cpuc->branches->branch_stack); 803 } 804 805 /* 806 * Perf event overflow will queue the processing of the event as 807 * an irq_work which will be taken care of in the handling of 808 * IPI_IRQ_WORK. 809 */ 810 if (perf_event_overflow(event, &data, regs)) 811 cpu_pmu->disable(event); 812 } 813 armv8pmu_start(cpu_pmu); 814 815 return IRQ_HANDLED; 816 } 817 818 static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, 819 struct arm_pmu *cpu_pmu) 820 { 821 int idx; 822 823 for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) { 824 if (!test_and_set_bit(idx, cpuc->used_mask)) 825 return idx; 826 } 827 return -EAGAIN; 828 } 829 830 static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, 831 struct arm_pmu *cpu_pmu) 832 { 833 int idx; 834 835 /* 836 * Chaining requires two consecutive event counters, where 837 * the lower idx must be even. 838 */ 839 for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { 840 if (!test_and_set_bit(idx, cpuc->used_mask)) { 841 /* Check if the preceding even counter is available */ 842 if (!test_and_set_bit(idx - 1, cpuc->used_mask)) 843 return idx; 844 /* Release the Odd counter */ 845 clear_bit(idx, cpuc->used_mask); 846 } 847 } 848 return -EAGAIN; 849 } 850 851 static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, 852 struct perf_event *event) 853 { 854 struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); 855 struct hw_perf_event *hwc = &event->hw; 856 unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; 857 858 /* Always prefer to place a cycle counter into the cycle counter. */ 859 if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { 860 if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) 861 return ARMV8_IDX_CYCLE_COUNTER; 862 else if (armv8pmu_event_is_64bit(event) && 863 armv8pmu_event_want_user_access(event) && 864 !armv8pmu_has_long_event(cpu_pmu)) 865 return -EAGAIN; 866 } 867 868 /* 869 * Otherwise use events counters 870 */ 871 if (armv8pmu_event_is_chained(event)) 872 return armv8pmu_get_chain_idx(cpuc, cpu_pmu); 873 else 874 return armv8pmu_get_single_idx(cpuc, cpu_pmu); 875 } 876 877 static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, 878 struct perf_event *event) 879 { 880 int idx = event->hw.idx; 881 882 clear_bit(idx, cpuc->used_mask); 883 if (armv8pmu_event_is_chained(event)) 884 clear_bit(idx - 1, cpuc->used_mask); 885 } 886 887 static int armv8pmu_user_event_idx(struct perf_event *event) 888 { 889 if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event)) 890 return 0; 891 892 /* 893 * We remap the cycle counter index to 32 to 894 * match the offset applied to the rest of 895 * the counter indices. 896 */ 897 if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER) 898 return ARMV8_IDX_CYCLE_COUNTER_USER; 899 900 return event->hw.idx; 901 } 902 903 static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) 904 { 905 struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu); 906 907 if (sched_in && armpmu->has_branch_stack) > 908 armv8pmu_branch_reset(); 909 } 910
On 6/16/23 05:12, kernel test robot wrote: > Hi Anshuman, > > kernel test robot noticed the following build errors: > > [auto build test ERROR on arm64/for-next/core] > [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615] > [If your patch is applied to the wrong git tree, kindly drop us a note. > And when submitting patch, we suggest to use '--base' as documented in > https://git-scm.com/docs/git-format-patch#_base_tree_information] > > url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352 > base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core > patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com > patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU > config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config) > compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a) > reproduce (this is a W=1 build): > mkdir -p ~/bin > wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross > chmod +x ~/bin/make.cross > # install arm cross compiling tool for clang build > # apt-get install binutils-arm-linux-gnueabi > git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git > git fetch arm64 for-next/core > git checkout arm64/for-next/core > b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com > # save the config file > mkdir build_dir && cp config build_dir/.config > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/ I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler on a W=1 build. Looking at all other problems reported on the file, it seems something is not right here. Reported build problems around these callbacks, i.e armv8pmu_branch_XXXX() do not make sense as they are available via config CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this test config.
Hi Anshuman, kernel test robot noticed the following build errors: [auto build test ERROR on arm64/for-next/core] [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352 base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU config: arm-allmodconfig (https://download.01.org/0day-ci/archive/20230616/202306161154.PwcAiVfV-lkp@intel.com/config) compiler: arm-linux-gnueabi-gcc (GCC) 12.3.0 reproduce (this is a W=1 build): mkdir -p ~/bin wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git git fetch arm64 for-next/core git checkout arm64/for-next/core b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/ If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202306161154.PwcAiVfV-lkp@intel.com/ All errors (new ones prefixed by >>): drivers/perf/arm_pmuv3.c:143:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD' 143 | [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:122:65: warning: initialized field overwritten [-Woverride-init] 122 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041 | ^~~~~~ drivers/perf/arm_pmuv3.c:144:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR' 144 | [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:122:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[0][1][0]') 122 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041 | ^~~~~~ drivers/perf/arm_pmuv3.c:144:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR' 144 | [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:124:65: warning: initialized field overwritten [-Woverride-init] 124 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043 | ^~~~~~ drivers/perf/arm_pmuv3.c:145:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR' 145 | [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:124:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[0][1][1]') 124 | #define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043 | ^~~~~~ drivers/perf/arm_pmuv3.c:145:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR' 145 | [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:133:65: warning: initialized field overwritten [-Woverride-init] 133 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E | ^~~~~~ drivers/perf/arm_pmuv3.c:147:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD' 147 | [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:133:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][0][0]') 133 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E | ^~~~~~ drivers/perf/arm_pmuv3.c:147:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD' 147 | [C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:134:65: warning: initialized field overwritten [-Woverride-init] 134 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F | ^~~~~~ drivers/perf/arm_pmuv3.c:148:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR' 148 | [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:134:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][1][0]') 134 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F | ^~~~~~ drivers/perf/arm_pmuv3.c:148:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR' 148 | [C(DTLB)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:131:65: warning: initialized field overwritten [-Woverride-init] 131 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C | ^~~~~~ drivers/perf/arm_pmuv3.c:149:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD' 149 | [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:131:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][0][1]') 131 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C | ^~~~~~ drivers/perf/arm_pmuv3.c:149:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD' 149 | [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:132:65: warning: initialized field overwritten [-Woverride-init] 132 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D | ^~~~~~ drivers/perf/arm_pmuv3.c:150:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR' 150 | [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:132:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[3][1][1]') 132 | #define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D | ^~~~~~ drivers/perf/arm_pmuv3.c:150:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR' 150 | [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:148:65: warning: initialized field overwritten [-Woverride-init] 148 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 | ^~~~~~ drivers/perf/arm_pmuv3.c:152:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD' 152 | [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:148:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[6][0][0]') 148 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 | ^~~~~~ drivers/perf/arm_pmuv3.c:152:51: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD' 152 | [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:149:65: warning: initialized field overwritten [-Woverride-init] 149 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 | ^~~~~~ drivers/perf/arm_pmuv3.c:153:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR' 153 | [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/perf/arm_pmuv3.h:149:65: note: (near initialization for 'armv8_vulcan_perf_cache_map[6][1][0]') 149 | #define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 | ^~~~~~ drivers/perf/arm_pmuv3.c:153:52: note: in expansion of macro 'ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR' 153 | [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/perf/arm_pmuv3.c: In function 'armv8pmu_enable_event': >> drivers/perf/arm_pmuv3.c:714:17: error: implicit declaration of function 'armv8pmu_branch_enable'; did you mean 'static_branch_enable'? [-Werror=implicit-function-declaration] 714 | armv8pmu_branch_enable(event); | ^~~~~~~~~~~~~~~~~~~~~~ | static_branch_enable drivers/perf/arm_pmuv3.c: In function 'armv8pmu_disable_event': >> drivers/perf/arm_pmuv3.c:720:17: error: implicit declaration of function 'armv8pmu_branch_disable'; did you mean 'static_branch_disable'? [-Werror=implicit-function-declaration] 720 | armv8pmu_branch_disable(event); | ^~~~~~~~~~~~~~~~~~~~~~~ | static_branch_disable drivers/perf/arm_pmuv3.c: In function 'armv8pmu_handle_irq': >> drivers/perf/arm_pmuv3.c:801:25: error: implicit declaration of function 'armv8pmu_branch_read'; did you mean 'armv8pmu_pmcr_read'? [-Werror=implicit-function-declaration] 801 | armv8pmu_branch_read(cpuc, event); | ^~~~~~~~~~~~~~~~~~~~ | armv8pmu_pmcr_read drivers/perf/arm_pmuv3.c: In function 'armv8pmu_sched_task': >> drivers/perf/arm_pmuv3.c:908:17: error: implicit declaration of function 'armv8pmu_branch_reset' [-Werror=implicit-function-declaration] 908 | armv8pmu_branch_reset(); | ^~~~~~~~~~~~~~~~~~~~~ drivers/perf/arm_pmuv3.c: In function '__armv8_pmuv3_map_event': >> drivers/perf/arm_pmuv3.c:1021:41: error: implicit declaration of function 'armv8pmu_branch_attr_valid' [-Werror=implicit-function-declaration] 1021 | if (has_branch_stack(event) && !armv8pmu_branch_attr_valid(event)) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/perf/arm_pmuv3.c: In function '__armv8pmu_probe_pmu': >> drivers/perf/arm_pmuv3.c:1140:9: error: implicit declaration of function 'armv8pmu_branch_probe'; did you mean 'arm_pmu_acpi_probe'? [-Werror=implicit-function-declaration] 1140 | armv8pmu_branch_probe(cpu_pmu); | ^~~~~~~~~~~~~~~~~~~~~ | arm_pmu_acpi_probe cc1: some warnings being treated as errors vim +714 drivers/perf/arm_pmuv3.c 701 702 static void armv8pmu_enable_event(struct perf_event *event) 703 { 704 /* 705 * Enable counter and interrupt, and set the counter to count 706 * the event that we're interested in. 707 */ 708 armv8pmu_disable_event_counter(event); 709 armv8pmu_write_event_type(event); 710 armv8pmu_enable_event_irq(event); 711 armv8pmu_enable_event_counter(event); 712 713 if (has_branch_stack(event)) > 714 armv8pmu_branch_enable(event); 715 } 716 717 static void armv8pmu_disable_event(struct perf_event *event) 718 { 719 if (has_branch_stack(event)) > 720 armv8pmu_branch_disable(event); 721 722 armv8pmu_disable_event_counter(event); 723 armv8pmu_disable_event_irq(event); 724 } 725 726 static void armv8pmu_start(struct arm_pmu *cpu_pmu) 727 { 728 struct perf_event_context *ctx; 729 int nr_user = 0; 730 731 ctx = perf_cpu_task_ctx(); 732 if (ctx) 733 nr_user = ctx->nr_user; 734 735 if (sysctl_perf_user_access && nr_user) 736 armv8pmu_enable_user_access(cpu_pmu); 737 else 738 armv8pmu_disable_user_access(); 739 740 /* Enable all counters */ 741 armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); 742 } 743 744 static void armv8pmu_stop(struct arm_pmu *cpu_pmu) 745 { 746 /* Disable all counters */ 747 armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); 748 } 749 750 static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) 751 { 752 u32 pmovsr; 753 struct perf_sample_data data; 754 struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); 755 struct pt_regs *regs; 756 int idx; 757 758 /* 759 * Get and reset the IRQ flags 760 */ 761 pmovsr = armv8pmu_getreset_flags(); 762 763 /* 764 * Did an overflow occur? 765 */ 766 if (!armv8pmu_has_overflowed(pmovsr)) 767 return IRQ_NONE; 768 769 /* 770 * Handle the counter(s) overflow(s) 771 */ 772 regs = get_irq_regs(); 773 774 /* 775 * Stop the PMU while processing the counter overflows 776 * to prevent skews in group events. 777 */ 778 armv8pmu_stop(cpu_pmu); 779 for (idx = 0; idx < cpu_pmu->num_events; ++idx) { 780 struct perf_event *event = cpuc->events[idx]; 781 struct hw_perf_event *hwc; 782 783 /* Ignore if we don't have an event. */ 784 if (!event) 785 continue; 786 787 /* 788 * We have a single interrupt for all counters. Check that 789 * each counter has overflowed before we process it. 790 */ 791 if (!armv8pmu_counter_has_overflowed(pmovsr, idx)) 792 continue; 793 794 hwc = &event->hw; 795 armpmu_event_update(event); 796 perf_sample_data_init(&data, 0, hwc->last_period); 797 if (!armpmu_event_set_period(event)) 798 continue; 799 800 if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) { > 801 armv8pmu_branch_read(cpuc, event); 802 perf_sample_save_brstack(&data, event, &cpuc->branches->branch_stack); 803 } 804 805 /* 806 * Perf event overflow will queue the processing of the event as 807 * an irq_work which will be taken care of in the handling of 808 * IPI_IRQ_WORK. 809 */ 810 if (perf_event_overflow(event, &data, regs)) 811 cpu_pmu->disable(event); 812 } 813 armv8pmu_start(cpu_pmu); 814 815 return IRQ_HANDLED; 816 } 817 818 static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, 819 struct arm_pmu *cpu_pmu) 820 { 821 int idx; 822 823 for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) { 824 if (!test_and_set_bit(idx, cpuc->used_mask)) 825 return idx; 826 } 827 return -EAGAIN; 828 } 829 830 static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, 831 struct arm_pmu *cpu_pmu) 832 { 833 int idx; 834 835 /* 836 * Chaining requires two consecutive event counters, where 837 * the lower idx must be even. 838 */ 839 for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) { 840 if (!test_and_set_bit(idx, cpuc->used_mask)) { 841 /* Check if the preceding even counter is available */ 842 if (!test_and_set_bit(idx - 1, cpuc->used_mask)) 843 return idx; 844 /* Release the Odd counter */ 845 clear_bit(idx, cpuc->used_mask); 846 } 847 } 848 return -EAGAIN; 849 } 850 851 static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, 852 struct perf_event *event) 853 { 854 struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); 855 struct hw_perf_event *hwc = &event->hw; 856 unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; 857 858 /* Always prefer to place a cycle counter into the cycle counter. */ 859 if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { 860 if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) 861 return ARMV8_IDX_CYCLE_COUNTER; 862 else if (armv8pmu_event_is_64bit(event) && 863 armv8pmu_event_want_user_access(event) && 864 !armv8pmu_has_long_event(cpu_pmu)) 865 return -EAGAIN; 866 } 867 868 /* 869 * Otherwise use events counters 870 */ 871 if (armv8pmu_event_is_chained(event)) 872 return armv8pmu_get_chain_idx(cpuc, cpu_pmu); 873 else 874 return armv8pmu_get_single_idx(cpuc, cpu_pmu); 875 } 876 877 static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, 878 struct perf_event *event) 879 { 880 int idx = event->hw.idx; 881 882 clear_bit(idx, cpuc->used_mask); 883 if (armv8pmu_event_is_chained(event)) 884 clear_bit(idx - 1, cpuc->used_mask); 885 } 886 887 static int armv8pmu_user_event_idx(struct perf_event *event) 888 { 889 if (!sysctl_perf_user_access || !armv8pmu_event_has_user_read(event)) 890 return 0; 891 892 /* 893 * We remap the cycle counter index to 32 to 894 * match the offset applied to the rest of 895 * the counter indices. 896 */ 897 if (event->hw.idx == ARMV8_IDX_CYCLE_COUNTER) 898 return ARMV8_IDX_CYCLE_COUNTER_USER; 899 900 return event->hw.idx; 901 } 902 903 static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) 904 { 905 struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu); 906 907 if (sched_in && armpmu->has_branch_stack) > 908 armv8pmu_branch_reset(); 909 } 910
On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote: > On 6/16/23 05:12, kernel test robot wrote: > > kernel test robot noticed the following build errors: > > > > [auto build test ERROR on arm64/for-next/core] > > [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615] > > [If your patch is applied to the wrong git tree, kindly drop us a note. > > And when submitting patch, we suggest to use '--base' as documented in > > https://git-scm.com/docs/git-format-patch#_base_tree_information] > > > > url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352 > > base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core > > patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com > > patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU > > config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config) > > compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a) > > reproduce (this is a W=1 build): > > mkdir -p ~/bin > > wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross > > chmod +x ~/bin/make.cross > > # install arm cross compiling tool for clang build > > # apt-get install binutils-arm-linux-gnueabi > > git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git > > git fetch arm64 for-next/core > > git checkout arm64/for-next/core > > b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com > > # save the config file > > mkdir build_dir && cp config build_dir/.config > > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig > > COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/ > > I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler > on a W=1 build. Looking at all other problems reported on the file, it seems > something is not right here. Reported build problems around these callbacks, > i.e armv8pmu_branch_XXXX() do not make sense as they are available via config > CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this > test config. Have you tried applying this series on top of the arm64 for-next/core branch? That's what the robot it testing (in the absence of a --base option when generating the patches).
On 6/16/23 14:51, Catalin Marinas wrote: > On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote: >> On 6/16/23 05:12, kernel test robot wrote: >>> kernel test robot noticed the following build errors: >>> >>> [auto build test ERROR on arm64/for-next/core] >>> [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615] >>> [If your patch is applied to the wrong git tree, kindly drop us a note. >>> And when submitting patch, we suggest to use '--base' as documented in >>> https://git-scm.com/docs/git-format-patch#_base_tree_information] >>> >>> url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352 >>> base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core >>> patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com >>> patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU >>> config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config) >>> compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a) >>> reproduce (this is a W=1 build): >>> mkdir -p ~/bin >>> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross >>> chmod +x ~/bin/make.cross >>> # install arm cross compiling tool for clang build >>> # apt-get install binutils-arm-linux-gnueabi >>> git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git >>> git fetch arm64 for-next/core >>> git checkout arm64/for-next/core >>> b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com >>> # save the config file >>> mkdir build_dir && cp config build_dir/.config >>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig >>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/ >> >> I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler >> on a W=1 build. Looking at all other problems reported on the file, it seems >> something is not right here. Reported build problems around these callbacks, >> i.e armv8pmu_branch_XXXX() do not make sense as they are available via config >> CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this >> test config. > > Have you tried applying this series on top of the arm64 for-next/core > branch? That's what the robot it testing (in the absence of a --base > option when generating the patches). Right, it turned out to be a build problem on arm (32 bit) platform instead. After arm_pmuv3.c moved into common ./drivers/perf from ./arch/arm64/kernel/, it can no longer access arch/arm64/include/asm/perf_event.h defined functions without breaking arm (32) bit. The following code block needs to be moved out from arch/arm64/include/asm/perf_event.h into include/linux/perf/arm_pmuv3.h (which is preferred as all call sites are inside drivers/perf/arm_pmuv3.c) or may be arm_pmu.h (which is one step higher in the abstraction). struct pmu_hw_events; struct arm_pmu; struct perf_event; #ifdef CONFIG_PERF_EVENTS static inline bool has_branch_stack(struct perf_event *event); #ifdef CONFIG_ARM64_BRBE void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event); bool armv8pmu_branch_attr_valid(struct perf_event *event); void armv8pmu_branch_enable(struct perf_event *event); void armv8pmu_branch_disable(struct perf_event *event); void armv8pmu_branch_probe(struct arm_pmu *arm_pmu); void armv8pmu_branch_reset(void); int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu); void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu); void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx); #else static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event) { WARN_ON_ONCE(!has_branch_stack(event)); } static inline bool armv8pmu_branch_attr_valid(struct perf_event *event) { WARN_ON_ONCE(!has_branch_stack(event)); return false; } static inline void armv8pmu_branch_enable(struct perf_event *event) { WARN_ON_ONCE(!has_branch_stack(event)); } static inline void armv8pmu_branch_disable(struct perf_event *event) { WARN_ON_ONCE(!has_branch_stack(event)); } static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { } static inline void armv8pmu_branch_reset(void) { } static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *arm_pmu) { return 0; } static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *arm_pmu) { } static inline void armv8pmu_branch_save(struct arm_pmu *arm_pmu, void *ctx) { } #endif #endif
On Mon, 19 Jun 2023 06:45:07 +0100, Anshuman Khandual <anshuman.khandual@arm.com> wrote: > > > > On 6/16/23 14:51, Catalin Marinas wrote: > > On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote: > >> On 6/16/23 05:12, kernel test robot wrote: > >>> kernel test robot noticed the following build errors: > >>> > >>> [auto build test ERROR on arm64/for-next/core] > >>> [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615] > >>> [If your patch is applied to the wrong git tree, kindly drop us a note. > >>> And when submitting patch, we suggest to use '--base' as documented in > >>> https://git-scm.com/docs/git-format-patch#_base_tree_information] > >>> > >>> url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352 > >>> base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core > >>> patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com > >>> patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU > >>> config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config) > >>> compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a) > >>> reproduce (this is a W=1 build): > >>> mkdir -p ~/bin > >>> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross > >>> chmod +x ~/bin/make.cross > >>> # install arm cross compiling tool for clang build > >>> # apt-get install binutils-arm-linux-gnueabi > >>> git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git > >>> git fetch arm64 for-next/core > >>> git checkout arm64/for-next/core > >>> b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com > >>> # save the config file > >>> mkdir build_dir && cp config build_dir/.config > >>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig > >>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/ > >> > >> I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler > >> on a W=1 build. Looking at all other problems reported on the file, it seems > >> something is not right here. Reported build problems around these callbacks, > >> i.e armv8pmu_branch_XXXX() do not make sense as they are available via config > >> CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this > >> test config. > > > > Have you tried applying this series on top of the arm64 for-next/core > > branch? That's what the robot it testing (in the absence of a --base > > option when generating the patches). > > Right, it turned out to be a build problem on arm (32 bit) platform instead. > After arm_pmuv3.c moved into common ./drivers/perf from ./arch/arm64/kernel/, > it can no longer access arch/arm64/include/asm/perf_event.h defined functions > without breaking arm (32) bit. The following code block needs to be moved out > from arch/arm64/include/asm/perf_event.h into include/linux/perf/arm_pmuv3.h > (which is preferred as all call sites are inside drivers/perf/arm_pmuv3.c) or > may be arm_pmu.h (which is one step higher in the abstraction). No, that's the wrong approach. The 32bit backend must have its own stubs for the stuff it implements or not. Just add something like the patch below, and please *test* that a 32bit VM using PMUv3 doesn't have any regression. Thanks, M. From 017362ca518e6d6ac3262514d1f7f27e73232799 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Mon, 19 Jun 2023 10:05:52 +0100 Subject: [PATCH] 32bit hack Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm/include/asm/arm_pmuv3.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h index f4db3e75d75f..c4bcb7a18267 100644 --- a/arch/arm/include/asm/arm_pmuv3.h +++ b/arch/arm/include/asm/arm_pmuv3.h @@ -244,4 +244,22 @@ static inline bool is_pmuv3p5(int pmuver) return pmuver >= ARMV8_PMU_DFR_VER_V3P5; } +/* BRBE stubs */ +static inline void armv8pmu_branch_enable(struct perf_event *event) { } +static inline void armv8pmu_branch_disable(struct perf_event *event) { } +static inline void armv8pmu_branch_read(struct pmu_hw_events * cpuc, + struct perf_event *event) { } +static inline void armv8pmu_branch_save(struct arm_pmu *armpmu, void *ctx) {} +static inline void armv8pmu_branch_reset(void) {} +static inline bool armv8pmu_branch_attr_valid(struct perf_event *event) +{ + return false; +} +static inline void armv8pmu_branch_probe(struct arm_pmu *armpmu) {} +static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *armpmu) +{ + return 0; +} +static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *armpmu) {} + #endif
On 6/19/23 14:38, Marc Zyngier wrote: > On Mon, 19 Jun 2023 06:45:07 +0100, > Anshuman Khandual <anshuman.khandual@arm.com> wrote: >> >> >> >> On 6/16/23 14:51, Catalin Marinas wrote: >>> On Fri, Jun 16, 2023 at 06:57:52AM +0530, Anshuman Khandual wrote: >>>> On 6/16/23 05:12, kernel test robot wrote: >>>>> kernel test robot noticed the following build errors: >>>>> >>>>> [auto build test ERROR on arm64/for-next/core] >>>>> [also build test ERROR on tip/perf/core acme/perf/core linus/master v6.4-rc6 next-20230615] >>>>> [If your patch is applied to the wrong git tree, kindly drop us a note. >>>>> And when submitting patch, we suggest to use '--base' as documented in >>>>> https://git-scm.com/docs/git-format-patch#_base_tree_information] >>>>> >>>>> url: https://github.com/intel-lab-lkp/linux/commits/Anshuman-Khandual/drivers-perf-arm_pmu-Add-new-sched_task-callback/20230615-223352 >>>>> base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core >>>>> patch link: https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual%40arm.com >>>>> patch subject: [PATCH V12 05/10] arm64/perf: Add branch stack support in ARMV8 PMU >>>>> config: arm-randconfig-r004-20230615 (https://download.01.org/0day-ci/archive/20230616/202306160706.Uei5XDoi-lkp@intel.com/config) >>>>> compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a) >>>>> reproduce (this is a W=1 build): >>>>> mkdir -p ~/bin >>>>> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross >>>>> chmod +x ~/bin/make.cross >>>>> # install arm cross compiling tool for clang build >>>>> # apt-get install binutils-arm-linux-gnueabi >>>>> git remote add arm64 https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git >>>>> git fetch arm64 for-next/core >>>>> git checkout arm64/for-next/core >>>>> b4 shazam https://lore.kernel.org/r/20230615133239.442736-6-anshuman.khandual@arm.com >>>>> # save the config file >>>>> mkdir build_dir && cp config build_dir/.config >>>>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm olddefconfig >>>>> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash drivers/perf/ >>>> >>>> I am unable to reproduce this on mainline 6.4-rc6 via default cross compiler >>>> on a W=1 build. Looking at all other problems reported on the file, it seems >>>> something is not right here. Reported build problems around these callbacks, >>>> i.e armv8pmu_branch_XXXX() do not make sense as they are available via config >>>> CONFIG_PERF_EVENTS which is also enabled along with CONFIG_ARM_PMUV3 in this >>>> test config. >>> >>> Have you tried applying this series on top of the arm64 for-next/core >>> branch? That's what the robot it testing (in the absence of a --base >>> option when generating the patches). >> >> Right, it turned out to be a build problem on arm (32 bit) platform instead. >> After arm_pmuv3.c moved into common ./drivers/perf from ./arch/arm64/kernel/, >> it can no longer access arch/arm64/include/asm/perf_event.h defined functions >> without breaking arm (32) bit. The following code block needs to be moved out >> from arch/arm64/include/asm/perf_event.h into include/linux/perf/arm_pmuv3.h >> (which is preferred as all call sites are inside drivers/perf/arm_pmuv3.c) or >> may be arm_pmu.h (which is one step higher in the abstraction). > > No, that's the wrong approach. The 32bit backend must have its own > stubs for the stuff it implements or not. Okay. > > Just add something like the patch below, and please *test* that a > 32bit VM using PMUv3 doesn't have any regression. Sure. > > Thanks, > > M. > >>From 017362ca518e6d6ac3262514d1f7f27e73232799 Mon Sep 17 00:00:00 2001 > From: Marc Zyngier <maz@kernel.org> > Date: Mon, 19 Jun 2023 10:05:52 +0100 > Subject: [PATCH] 32bit hack > > Signed-off-by: Marc Zyngier <maz@kernel.org> > --- > arch/arm/include/asm/arm_pmuv3.h | 18 ++++++++++++++++++ > 1 file changed, 18 insertions(+) > > diff --git a/arch/arm/include/asm/arm_pmuv3.h b/arch/arm/include/asm/arm_pmuv3.h > index f4db3e75d75f..c4bcb7a18267 100644 > --- a/arch/arm/include/asm/arm_pmuv3.h > +++ b/arch/arm/include/asm/arm_pmuv3.h > @@ -244,4 +244,22 @@ static inline bool is_pmuv3p5(int pmuver) > return pmuver >= ARMV8_PMU_DFR_VER_V3P5; > } > > +/* BRBE stubs */ These stubs also need to be wrapped around with #ifdef CONFIG_PERF_EVENTS > +static inline void armv8pmu_branch_enable(struct perf_event *event) { } > +static inline void armv8pmu_branch_disable(struct perf_event *event) { } > +static inline void armv8pmu_branch_read(struct pmu_hw_events * cpuc, > + struct perf_event *event) { } > +static inline void armv8pmu_branch_save(struct arm_pmu *armpmu, void *ctx) {} > +static inline void armv8pmu_branch_reset(void) {} > +static inline bool armv8pmu_branch_attr_valid(struct perf_event *event) > +{ > + return false; > +} > +static inline void armv8pmu_branch_probe(struct arm_pmu *armpmu) {} > +static inline int armv8pmu_task_ctx_cache_alloc(struct arm_pmu *armpmu) > +{ > + return 0; > +} > +static inline void armv8pmu_task_ctx_cache_free(struct arm_pmu *armpmu) {} > + > #endif Sure, will make all the necessary changes.
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index eb7071c9eb34..ebc392ba3559 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -24,4 +24,35 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); (regs)->pstate = PSR_MODE_EL1h; \ } +struct pmu_hw_events; +struct arm_pmu; +struct perf_event; + +#ifdef CONFIG_PERF_EVENTS +static inline bool has_branch_stack(struct perf_event *event); + +static inline void armv8pmu_branch_read(struct pmu_hw_events *cpuc, struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); +} + +static inline bool armv8pmu_branch_attr_valid(struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); + return false; +} + +static inline void armv8pmu_branch_enable(struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); +} + +static inline void armv8pmu_branch_disable(struct perf_event *event) +{ + WARN_ON_ONCE(!has_branch_stack(event)); +} + +static inline void armv8pmu_branch_probe(struct arm_pmu *arm_pmu) { } +static inline void armv8pmu_branch_reset(void) { } +#endif #endif diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index c98e4039386d..54c80f393eb6 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -705,38 +705,21 @@ static void armv8pmu_enable_event(struct perf_event *event) * Enable counter and interrupt, and set the counter to count * the event that we're interested in. */ - - /* - * Disable counter - */ armv8pmu_disable_event_counter(event); - - /* - * Set event. - */ armv8pmu_write_event_type(event); - - /* - * Enable interrupt for this counter - */ armv8pmu_enable_event_irq(event); - - /* - * Enable counter - */ armv8pmu_enable_event_counter(event); + + if (has_branch_stack(event)) + armv8pmu_branch_enable(event); } static void armv8pmu_disable_event(struct perf_event *event) { - /* - * Disable counter - */ - armv8pmu_disable_event_counter(event); + if (has_branch_stack(event)) + armv8pmu_branch_disable(event); - /* - * Disable interrupt for this counter - */ + armv8pmu_disable_event_counter(event); armv8pmu_disable_event_irq(event); } @@ -814,6 +797,11 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) if (!armpmu_event_set_period(event)) continue; + if (has_branch_stack(event) && !WARN_ON(!cpuc->branches)) { + armv8pmu_branch_read(cpuc, event); + perf_sample_save_brstack(&data, event, &cpuc->branches->branch_stack); + } + /* * Perf event overflow will queue the processing of the event as * an irq_work which will be taken care of in the handling of @@ -912,6 +900,14 @@ static int armv8pmu_user_event_idx(struct perf_event *event) return event->hw.idx; } +static void armv8pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu_ctx->pmu); + + if (sched_in && armpmu->has_branch_stack) + armv8pmu_branch_reset(); +} + /* * Add an event filter to a given event. */ @@ -982,6 +978,9 @@ static void armv8pmu_reset(void *info) pmcr |= ARMV8_PMU_PMCR_LP; armv8pmu_pmcr_write(pmcr); + + if (cpu_pmu->has_branch_stack) + armv8pmu_branch_reset(); } static int __armv8_pmuv3_map_event_id(struct arm_pmu *armpmu, @@ -1019,6 +1018,9 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, hw_event_id = __armv8_pmuv3_map_event_id(armpmu, event); + if (has_branch_stack(event) && !armv8pmu_branch_attr_valid(event)) + return -EOPNOTSUPP; + /* * CHAIN events only work when paired with an adjacent counter, and it * never makes sense for a user to open one in isolation, as they'll be @@ -1135,6 +1137,33 @@ static void __armv8pmu_probe_pmu(void *info) cpu_pmu->reg_pmmir = read_pmmir(); else cpu_pmu->reg_pmmir = 0; + armv8pmu_branch_probe(cpu_pmu); +} + +static int branch_records_alloc(struct arm_pmu *armpmu) +{ + struct branch_records __percpu *records; + int cpu; + + records = alloc_percpu_gfp(struct branch_records, GFP_KERNEL); + if (!records) + return -ENOMEM; + + /* + * FIXME: Memory allocated via records gets completely + * consumed here, never required to be freed up later. Hence + * losing access to on stack 'records' is acceptable. + * Otherwise this alloc handle has to be saved some where. + */ + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events_cpu; + struct branch_records *records_cpu; + + events_cpu = per_cpu_ptr(armpmu->hw_events, cpu); + records_cpu = per_cpu_ptr(records, cpu); + events_cpu->branches = records_cpu; + } + return 0; } static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) @@ -1151,7 +1180,15 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu) if (ret) return ret; - return probe.present ? 0 : -ENODEV; + if (!probe.present) + return -ENODEV; + + if (cpu_pmu->has_branch_stack) { + ret = branch_records_alloc(cpu_pmu); + if (ret) + return ret; + } + return 0; } static void armv8pmu_disable_user_access_ipi(void *unused) @@ -1214,6 +1251,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx; + cpu_pmu->sched_task = armv8pmu_sched_task; cpu_pmu->name = name; cpu_pmu->map_event = map_event;