Message ID | 20230206201455.1790329-6-evan@rivosinc.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Palmer Dabbelt |
Headers | show |
Series | RISC-V Hardware Probing User Interface | expand |
Context | Check | Description |
---|---|---|
conchuod/tree_selection | fail | Failed to apply to next/pending-fixes or riscv/for-next |
Hi Evan, Thank you for the patch! Yet something to improve: [auto build test ERROR on shuah-kselftest/next] [also build test ERROR on shuah-kselftest/fixes robh/for-next soc/for-next linus/master v6.2-rc7] [cannot apply to next-20230207] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Evan-Green/RISC-V-Move-struct-riscv_cpuinfo-to-new-header/20230207-041746 base: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next patch link: https://lore.kernel.org/r/20230206201455.1790329-6-evan%40rivosinc.com patch subject: [PATCH v2 5/6] RISC-V: hwprobe: Support probing of misaligned access performance config: riscv-randconfig-s052-20230206 (https://download.01.org/0day-ci/archive/20230207/202302071444.L48pajcK-lkp@intel.com/config) compiler: riscv64-linux-gcc (GCC) 12.1.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # apt-get install sparse # sparse version: v0.6.4-39-gce1a6720-dirty # https://github.com/intel-lab-lkp/linux/commit/9e77be253d64809a98f31c17abd12761dd9fad8e git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Evan-Green/RISC-V-Move-struct-riscv_cpuinfo-to-new-header/20230207-041746 git checkout 9e77be253d64809a98f31c17abd12761dd9fad8e # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=riscv olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=riscv SHELL=/bin/bash arch/riscv/kernel/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): arch/riscv/kernel/cpufeature.c: In function 'riscv_fill_hwcap': >> arch/riscv/kernel/cpufeature.c:258:23: error: implicit declaration of function 'hartid_to_cpuid_map' [-Werror=implicit-function-declaration] 258 | cpu = hartid_to_cpuid_map(hartid); | ^~~~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors vim +/hartid_to_cpuid_map +258 arch/riscv/kernel/cpufeature.c 93 94 void __init riscv_fill_hwcap(void) 95 { 96 struct device_node *node; 97 const char *isa, *misaligned; 98 char print_str[NUM_ALPHA_EXTS + 1]; 99 int i, j, rc; 100 unsigned long isa2hwcap[26] = {0}; 101 unsigned long hartid, cpu; 102 103 isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I; 104 isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M; 105 isa2hwcap['a' - 'a'] = COMPAT_HWCAP_ISA_A; 106 isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F; 107 isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D; 108 isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C; 109 110 elf_hwcap = 0; 111 112 bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX); 113 114 for_each_of_cpu_node(node) { 115 unsigned long this_hwcap = 0; 116 DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX); 117 const char *temp; 118 119 rc = riscv_of_processor_hartid(node, &hartid); 120 if (rc < 0) 121 continue; 122 123 if (of_property_read_string(node, "riscv,isa", &isa)) { 124 pr_warn("Unable to find \"riscv,isa\" devicetree entry\n"); 125 continue; 126 } 127 128 temp = isa; 129 #if IS_ENABLED(CONFIG_32BIT) 130 if (!strncmp(isa, "rv32", 4)) 131 isa += 4; 132 #elif IS_ENABLED(CONFIG_64BIT) 133 if (!strncmp(isa, "rv64", 4)) 134 isa += 4; 135 #endif 136 /* The riscv,isa DT property must start with rv64 or rv32 */ 137 if (temp == isa) 138 continue; 139 bitmap_zero(this_isa, RISCV_ISA_EXT_MAX); 140 for (; *isa; ++isa) { 141 const char *ext = isa++; 142 const char *ext_end = isa; 143 bool ext_long = false, ext_err = false; 144 145 switch (*ext) { 146 case 's': 147 /** 148 * Workaround for invalid single-letter 's' & 'u'(QEMU). 149 * No need to set the bit in riscv_isa as 's' & 'u' are 150 * not valid ISA extensions. It works until multi-letter 151 * extension starting with "Su" appears. 152 */ 153 if (ext[-1] != '_' && ext[1] == 'u') { 154 ++isa; 155 ext_err = true; 156 break; 157 } 158 fallthrough; 159 case 'x': 160 case 'z': 161 ext_long = true; 162 /* Multi-letter extension must be delimited */ 163 for (; *isa && *isa != '_'; ++isa) 164 if (unlikely(!islower(*isa) 165 && !isdigit(*isa))) 166 ext_err = true; 167 /* Parse backwards */ 168 ext_end = isa; 169 if (unlikely(ext_err)) 170 break; 171 if (!isdigit(ext_end[-1])) 172 break; 173 /* Skip the minor version */ 174 while (isdigit(*--ext_end)) 175 ; 176 if (ext_end[0] != 'p' 177 || !isdigit(ext_end[-1])) { 178 /* Advance it to offset the pre-decrement */ 179 ++ext_end; 180 break; 181 } 182 /* Skip the major version */ 183 while (isdigit(*--ext_end)) 184 ; 185 ++ext_end; 186 break; 187 default: 188 if (unlikely(!islower(*ext))) { 189 ext_err = true; 190 break; 191 } 192 /* Find next extension */ 193 if (!isdigit(*isa)) 194 break; 195 /* Skip the minor version */ 196 while (isdigit(*++isa)) 197 ; 198 if (*isa != 'p') 199 break; 200 if (!isdigit(*++isa)) { 201 --isa; 202 break; 203 } 204 /* Skip the major version */ 205 while (isdigit(*++isa)) 206 ; 207 break; 208 } 209 if (*isa != '_') 210 --isa; 211 212 #define SET_ISA_EXT_MAP(name, bit) \ 213 do { \ 214 if ((ext_end - ext == sizeof(name) - 1) && \ 215 !memcmp(ext, name, sizeof(name) - 1) && \ 216 riscv_isa_extension_check(bit)) \ 217 set_bit(bit, this_isa); \ 218 } while (false) \ 219 220 if (unlikely(ext_err)) 221 continue; 222 if (!ext_long) { 223 int nr = *ext - 'a'; 224 225 if (riscv_isa_extension_check(nr)) { 226 this_hwcap |= isa2hwcap[nr]; 227 set_bit(nr, this_isa); 228 } 229 } else { 230 SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); 231 SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); 232 SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM); 233 SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE); 234 SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC); 235 SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL); 236 } 237 #undef SET_ISA_EXT_MAP 238 } 239 240 /* 241 * All "okay" hart should have same isa. Set HWCAP based on 242 * common capabilities of every "okay" hart, in case they don't 243 * have. 244 */ 245 if (elf_hwcap) 246 elf_hwcap &= this_hwcap; 247 else 248 elf_hwcap = this_hwcap; 249 250 if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) 251 bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); 252 else 253 bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); 254 255 /* 256 * Check for the performance of misaligned accesses. 257 */ > 258 cpu = hartid_to_cpuid_map(hartid); 259 if (cpu < 0) 260 continue; 261 262 if (!of_property_read_string(node, "riscv,misaligned-access-performance", 263 &misaligned)) { 264 if (strcmp(misaligned, "emulated") == 0) 265 per_cpu(misaligned_access_speed, cpu) = 266 RISCV_HWPROBE_MISALIGNED_EMULATED; 267 268 if (strcmp(misaligned, "slow") == 0) 269 per_cpu(misaligned_access_speed, cpu) = 270 RISCV_HWPROBE_MISALIGNED_SLOW; 271 272 if (strcmp(misaligned, "fast") == 0) 273 per_cpu(misaligned_access_speed, cpu) = 274 RISCV_HWPROBE_MISALIGNED_FAST; 275 } 276 } 277 278 /* We don't support systems with F but without D, so mask those out 279 * here. */ 280 if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) { 281 pr_info("This kernel does not support systems with F but not D\n"); 282 elf_hwcap &= ~COMPAT_HWCAP_ISA_F; 283 } 284 285 memset(print_str, 0, sizeof(print_str)); 286 for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) 287 if (riscv_isa[0] & BIT_MASK(i)) 288 print_str[j++] = (char)('a' + i); 289 pr_info("riscv: base ISA extensions %s\n", print_str); 290 291 memset(print_str, 0, sizeof(print_str)); 292 for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) 293 if (elf_hwcap & BIT_MASK(i)) 294 print_str[j++] = (char)('a' + i); 295 pr_info("riscv: ELF capabilities %s\n", print_str); 296 297 for_each_set_bit(i, riscv_isa, RISCV_ISA_EXT_MAX) { 298 j = riscv_isa_ext2key(i); 299 if (j >= 0) 300 static_branch_enable(&riscv_isa_ext_keys[j]); 301 } 302 } 303
On Mon, Feb 06, 2023 at 12:14:54PM -0800, Evan Green wrote: > This allows userspace to select various routines to use based on the > performance of misaligned access on the target hardware. > > Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com> > Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> > Signed-off-by: Evan Green <evan@rivosinc.com> > > --- > > Changes in v2: > - Fixed logic error in if(of_property_read_string...) that caused crash > - Include cpufeature.h in cpufeature.h to avoid undeclared variable > warning. > - Added a _MASK define > - Fix random checkpatch complaints > > Documentation/riscv/hwprobe.rst | 13 +++++++++++ > arch/riscv/include/asm/cpufeature.h | 2 ++ > arch/riscv/include/asm/hwprobe.h | 2 +- > arch/riscv/include/asm/smp.h | 9 ++++++++ > arch/riscv/include/uapi/asm/hwprobe.h | 6 ++++++ > arch/riscv/kernel/cpufeature.c | 31 +++++++++++++++++++++++++-- > arch/riscv/kernel/sys_riscv.c | 23 ++++++++++++++++++++ > 7 files changed, 83 insertions(+), 3 deletions(-) > > diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst > index ce186967861f..0dc75e83e127 100644 > --- a/Documentation/riscv/hwprobe.rst > +++ b/Documentation/riscv/hwprobe.rst > @@ -51,3 +51,16 @@ The following keys are defined: > not minNum/maxNum") of the RISC-V ISA manual. > * :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by > version 2.2 of the RISC-V ISA manual. > +* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information This doesn't match what's defined? > + about the selected set of processors. > + * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned > + accesses is unknown. > + * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via > + software, either in or below the kernel. These accesses are always > + extremely slow. > + * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in > + hardware, but are slower than the cooresponding aligned accesses > + sequences. > + * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in > + hardware and are faster than the cooresponding aligned accesses > + sequences. > diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h > index 3831b638ecab..6c1759091e44 100644 > --- a/arch/riscv/include/asm/smp.h > +++ b/arch/riscv/include/asm/smp.h > @@ -26,6 +26,15 @@ struct riscv_ipi_ops { > */ > extern unsigned long __cpuid_to_hartid_map[NR_CPUS]; > #define cpuid_to_hartid_map(cpu) __cpuid_to_hartid_map[cpu] > +static inline long hartid_to_cpuid_map(unsigned long hartid) > +{ > + long i; > + > + for (i = 0; i < NR_CPUS; ++i) I'm never (or not yet?) sure about these things. Should this be for_each_possible_cpu()? > + if (cpuid_to_hartid_map(i) == hartid) > + return i; > + return -1; > +} > > /* print IPI stats */ > void show_ipi_stats(struct seq_file *p, int prec); > diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h > index ce39d6e74103..5d55e2da2b1f 100644 > --- a/arch/riscv/include/uapi/asm/hwprobe.h > +++ b/arch/riscv/include/uapi/asm/hwprobe.h > @@ -25,5 +25,11 @@ struct riscv_hwprobe { > #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 > #define RISCV_HWPROBE_IMA_FD (1 << 0) > #define RISCV_HWPROBE_IMA_C (1 << 1) > +#define RISCV_HWPROBE_KEY_CPUPERF_0 5 > +#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) > +#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) > +#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0) > +#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0) > +#define RISCV_HWPROBE_MISALIGNED_MASK (3 << 0) Why is it UNKNOWN rather than UNSUPPORTED? I thought I saw Palmer saying that there is no requirement to support misaligned accesses any more. Plenty of old DTs are going to lack this property so would be UNKNOWN, and I *assume* that the user of the syscall is gonna conflate the two, but the rationale interests me. > /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ > #endif > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index 93e45560af30..12af6f7a2f53 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -14,8 +14,10 @@ > #include <linux/of.h> > #include <asm/alternative.h> > #include <asm/cacheflush.h> > +#include <asm/cpufeature.h> > #include <asm/errata_list.h> > #include <asm/hwcap.h> > +#include <asm/hwprobe.h> > #include <asm/patch.h> > #include <asm/pgtable.h> > #include <asm/processor.h> > @@ -32,6 +34,9 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; > DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX); > EXPORT_SYMBOL(riscv_isa_ext_keys); > > +/* Performance information */ > +DEFINE_PER_CPU(long, misaligned_access_speed); > + > /** > * riscv_isa_extension_base() - Get base extension word > * > @@ -89,11 +94,11 @@ static bool riscv_isa_extension_check(int id) > void __init riscv_fill_hwcap(void) > { > struct device_node *node; > - const char *isa; > + const char *isa, *misaligned; > char print_str[NUM_ALPHA_EXTS + 1]; > int i, j, rc; > unsigned long isa2hwcap[26] = {0}; > - unsigned long hartid; > + unsigned long hartid, cpu; > > isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I; > isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M; > @@ -246,6 +251,28 @@ void __init riscv_fill_hwcap(void) > bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); > else > bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); > + > + /* > + * Check for the performance of misaligned accesses. > + */ > + cpu = hartid_to_cpuid_map(hartid); > + if (cpu < 0) > + continue; > + > + if (!of_property_read_string(node, "riscv,misaligned-access-performance", > + &misaligned)) { > + if (strcmp(misaligned, "emulated") == 0) > + per_cpu(misaligned_access_speed, cpu) = > + RISCV_HWPROBE_MISALIGNED_EMULATED; > + > + if (strcmp(misaligned, "slow") == 0) > + per_cpu(misaligned_access_speed, cpu) = > + RISCV_HWPROBE_MISALIGNED_SLOW; > + > + if (strcmp(misaligned, "fast") == 0) > + per_cpu(misaligned_access_speed, cpu) = > + RISCV_HWPROBE_MISALIGNED_FAST; > + } > } > > /* We don't support systems with F but without D, so mask those out > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c > index 74e0d72c877d..73d937c54f4e 100644 > --- a/arch/riscv/kernel/sys_riscv.c > +++ b/arch/riscv/kernel/sys_riscv.c > @@ -133,6 +133,25 @@ static long hwprobe_mid(struct riscv_hwprobe __user *pair, size_t key, > return set_hwprobe(pair, id); > } > > +static long hwprobe_misaligned(cpumask_t *cpus) > +{ > + long cpu, perf = -1; > + > + for_each_cpu(cpu, cpus) { > + long this_perf = per_cpu(misaligned_access_speed, cpu); > + > + if (perf == -1) > + perf = this_perf; > + > + if (perf != this_perf) > + perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; Is there any reason to continue in the loop if this condition is met? > + } > + > + if (perf == -1) > + return RISCV_HWPROBE_MISALIGNED_UNKNOWN; > + return perf; heh, nitpicking the maintainer's use of whitespace... newline before return please :) Cheers, Conor. > +} > + > static > long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count, > long cpu_count, unsigned long __user *cpus_user, > @@ -205,6 +224,10 @@ long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count, > } > break; > > + case RISCV_HWPROBE_KEY_CPUPERF_0: > + ret = set_hwprobe(pairs, hwprobe_misaligned(&cpus)); > + break; > + > /* > * For forward compatibility, unknown keys don't fail the whole > * call, but get their element key set to -1 and value set to 0 > -- > 2.25.1 >
On Wed, Feb 15, 2023 at 1:57 PM Conor Dooley <conor@kernel.org> wrote: > > On Mon, Feb 06, 2023 at 12:14:54PM -0800, Evan Green wrote: > > This allows userspace to select various routines to use based on the > > performance of misaligned access on the target hardware. > > > > Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com> > > Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> > > Signed-off-by: Evan Green <evan@rivosinc.com> > > > > --- > > > > Changes in v2: > > - Fixed logic error in if(of_property_read_string...) that caused crash > > - Include cpufeature.h in cpufeature.h to avoid undeclared variable > > warning. > > - Added a _MASK define > > - Fix random checkpatch complaints > > > > Documentation/riscv/hwprobe.rst | 13 +++++++++++ > > arch/riscv/include/asm/cpufeature.h | 2 ++ > > arch/riscv/include/asm/hwprobe.h | 2 +- > > arch/riscv/include/asm/smp.h | 9 ++++++++ > > arch/riscv/include/uapi/asm/hwprobe.h | 6 ++++++ > > arch/riscv/kernel/cpufeature.c | 31 +++++++++++++++++++++++++-- > > arch/riscv/kernel/sys_riscv.c | 23 ++++++++++++++++++++ > > 7 files changed, 83 insertions(+), 3 deletions(-) > > > > diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst > > index ce186967861f..0dc75e83e127 100644 > > --- a/Documentation/riscv/hwprobe.rst > > +++ b/Documentation/riscv/hwprobe.rst > > @@ -51,3 +51,16 @@ The following keys are defined: > > not minNum/maxNum") of the RISC-V ISA manual. > > * :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by > > version 2.2 of the RISC-V ISA manual. > > +* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information > > This doesn't match what's defined? > > > + about the selected set of processors. > > + * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned > > + accesses is unknown. > > + * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via > > + software, either in or below the kernel. These accesses are always > > + extremely slow. > > + * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in > > + hardware, but are slower than the cooresponding aligned accesses > > + sequences. > > + * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in > > + hardware and are faster than the cooresponding aligned accesses > > + sequences. > > > diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h > > index 3831b638ecab..6c1759091e44 100644 > > --- a/arch/riscv/include/asm/smp.h > > +++ b/arch/riscv/include/asm/smp.h > > @@ -26,6 +26,15 @@ struct riscv_ipi_ops { > > */ > > extern unsigned long __cpuid_to_hartid_map[NR_CPUS]; > > #define cpuid_to_hartid_map(cpu) __cpuid_to_hartid_map[cpu] > > +static inline long hartid_to_cpuid_map(unsigned long hartid) > > +{ > > + long i; > > + > > + for (i = 0; i < NR_CPUS; ++i) > > I'm never (or not yet?) sure about these things. > Should this be for_each_possible_cpu()? Me neither. I believe it's the same, as for_each_possible_cpu() iterates over a CPU mask of all 1s, and the size of struct cpumask is set by NR_CPUS. Some architectures appear to have an init_cpu_possible() function to further restrict the set, though riscv does not. It's probably better to use for_each_possible_cpu() though in case a call to init_cpu_possible() ever does get added. > > > + if (cpuid_to_hartid_map(i) == hartid) > > + return i; > > + return -1; > > +} > > > > /* print IPI stats */ > > void show_ipi_stats(struct seq_file *p, int prec); > > diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h > > index ce39d6e74103..5d55e2da2b1f 100644 > > --- a/arch/riscv/include/uapi/asm/hwprobe.h > > +++ b/arch/riscv/include/uapi/asm/hwprobe.h > > @@ -25,5 +25,11 @@ struct riscv_hwprobe { > > #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 > > #define RISCV_HWPROBE_IMA_FD (1 << 0) > > #define RISCV_HWPROBE_IMA_C (1 << 1) > > +#define RISCV_HWPROBE_KEY_CPUPERF_0 5 > > +#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) > > +#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) > > +#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0) > > +#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0) > > +#define RISCV_HWPROBE_MISALIGNED_MASK (3 << 0) > > Why is it UNKNOWN rather than UNSUPPORTED? > I thought I saw Palmer saying that there is no requirement to support > misaligned accesses any more. > Plenty of old DTs are going to lack this property so would be UNKNOWN, > and I *assume* that the user of the syscall is gonna conflate the two, > but the rationale interests me. Palmer had mentioned on the DT bindings patch that historically it was required but emulated. So because old binaries assumed it was there, the default values for DTs without this needs to imply "supported, but no idea how fast it is". But you bring up an interesting point: should the bindings and these defines have a value that indicates no support at all for unaligned accesses? We could always add the value to the bindings later, but maybe we should leave space in this field now. -Evan
diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst index ce186967861f..0dc75e83e127 100644 --- a/Documentation/riscv/hwprobe.rst +++ b/Documentation/riscv/hwprobe.rst @@ -51,3 +51,16 @@ The following keys are defined: not minNum/maxNum") of the RISC-V ISA manual. * :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by version 2.2 of the RISC-V ISA manual. +* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information + about the selected set of processors. + * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned + accesses is unknown. + * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via + software, either in or below the kernel. These accesses are always + extremely slow. + * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in + hardware, but are slower than the cooresponding aligned accesses + sequences. + * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in + hardware and are faster than the cooresponding aligned accesses + sequences. diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 66c251d98290..ac51a9e6387a 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -18,4 +18,6 @@ struct riscv_cpuinfo { DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); +DECLARE_PER_CPU(long, misaligned_access_speed); + #endif diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 7e52f1e1fe10..4e45e33015bc 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,6 +8,6 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 4 +#define RISCV_HWPROBE_MAX_KEY 5 #endif diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index 3831b638ecab..6c1759091e44 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -26,6 +26,15 @@ struct riscv_ipi_ops { */ extern unsigned long __cpuid_to_hartid_map[NR_CPUS]; #define cpuid_to_hartid_map(cpu) __cpuid_to_hartid_map[cpu] +static inline long hartid_to_cpuid_map(unsigned long hartid) +{ + long i; + + for (i = 0; i < NR_CPUS; ++i) + if (cpuid_to_hartid_map(i) == hartid) + return i; + return -1; +} /* print IPI stats */ void show_ipi_stats(struct seq_file *p, int prec); diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index ce39d6e74103..5d55e2da2b1f 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -25,5 +25,11 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 #define RISCV_HWPROBE_IMA_FD (1 << 0) #define RISCV_HWPROBE_IMA_C (1 << 1) +#define RISCV_HWPROBE_KEY_CPUPERF_0 5 +#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) +#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) +#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0) +#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0) +#define RISCV_HWPROBE_MISALIGNED_MASK (3 << 0) /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ #endif diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 93e45560af30..12af6f7a2f53 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -14,8 +14,10 @@ #include <linux/of.h> #include <asm/alternative.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/errata_list.h> #include <asm/hwcap.h> +#include <asm/hwprobe.h> #include <asm/patch.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -32,6 +34,9 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX); EXPORT_SYMBOL(riscv_isa_ext_keys); +/* Performance information */ +DEFINE_PER_CPU(long, misaligned_access_speed); + /** * riscv_isa_extension_base() - Get base extension word * @@ -89,11 +94,11 @@ static bool riscv_isa_extension_check(int id) void __init riscv_fill_hwcap(void) { struct device_node *node; - const char *isa; + const char *isa, *misaligned; char print_str[NUM_ALPHA_EXTS + 1]; int i, j, rc; unsigned long isa2hwcap[26] = {0}; - unsigned long hartid; + unsigned long hartid, cpu; isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I; isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M; @@ -246,6 +251,28 @@ void __init riscv_fill_hwcap(void) bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); else bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); + + /* + * Check for the performance of misaligned accesses. + */ + cpu = hartid_to_cpuid_map(hartid); + if (cpu < 0) + continue; + + if (!of_property_read_string(node, "riscv,misaligned-access-performance", + &misaligned)) { + if (strcmp(misaligned, "emulated") == 0) + per_cpu(misaligned_access_speed, cpu) = + RISCV_HWPROBE_MISALIGNED_EMULATED; + + if (strcmp(misaligned, "slow") == 0) + per_cpu(misaligned_access_speed, cpu) = + RISCV_HWPROBE_MISALIGNED_SLOW; + + if (strcmp(misaligned, "fast") == 0) + per_cpu(misaligned_access_speed, cpu) = + RISCV_HWPROBE_MISALIGNED_FAST; + } } /* We don't support systems with F but without D, so mask those out diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 74e0d72c877d..73d937c54f4e 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -133,6 +133,25 @@ static long hwprobe_mid(struct riscv_hwprobe __user *pair, size_t key, return set_hwprobe(pair, id); } +static long hwprobe_misaligned(cpumask_t *cpus) +{ + long cpu, perf = -1; + + for_each_cpu(cpu, cpus) { + long this_perf = per_cpu(misaligned_access_speed, cpu); + + if (perf == -1) + perf = this_perf; + + if (perf != this_perf) + perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + } + + if (perf == -1) + return RISCV_HWPROBE_MISALIGNED_UNKNOWN; + return perf; +} + static long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count, long cpu_count, unsigned long __user *cpus_user, @@ -205,6 +224,10 @@ long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count, } break; + case RISCV_HWPROBE_KEY_CPUPERF_0: + ret = set_hwprobe(pairs, hwprobe_misaligned(&cpus)); + break; + /* * For forward compatibility, unknown keys don't fail the whole * call, but get their element key set to -1 and value set to 0