Message ID | 1461843824-19853-1-git-send-email-zengzhaoxiu@163.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Hi, [auto build test ERROR on v4.6-rc5] [cannot apply to next-20160428] [if your patch is applied to the wrong git tree, please drop us a note to help improving the system] url: https://github.com/0day-ci/linux/commits/zengzhaoxiu-163-com/lib-GCD-add-binary-GCD-algorithm/20160428-195527 config: mips-allyesconfig (attached as .config) reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree make.cross ARCH=mips All error/warnings (new ones prefixed by >>): In file included from arch/mips/include/asm/bitops.h:21:0, from include/linux/bitops.h:36, from include/linux/kernel.h:10, from include/asm-generic/bug.h:13, from arch/mips/include/asm/bug.h:41, from include/linux/bug.h:4, from include/linux/page-flags.h:9, from kernel/bounds.c:9: >> arch/mips/include/asm/cpu-features.h:205:28: warning: "cpu_data" is not defined [-Wundef] # define cpu_has_mips32r6 (cpu_data[0].isa_level & MIPS_CPU_ISA_M32R6) ^ >> arch/mips/include/asm/cpu-features.h:241:5: note: in expansion of macro 'cpu_has_mips32r6' cpu_has_mips32r6 | cpu_has_mips64r1 | \ ^ >> arch/mips/include/asm/cpu-features.h:289:25: note: in expansion of macro 'cpu_has_mips_r' #define cpu_has_clo_clz cpu_has_mips_r ^ >> arch/mips/include/asm/cpu-features.h:291:6: note: in expansion of macro 'cpu_has_clo_clz' #if !cpu_has_clo_clz ^ >> arch/mips/include/asm/cpu-features.h:205:36: error: token "[" is not valid in preprocessor expressions # define cpu_has_mips32r6 (cpu_data[0].isa_level & MIPS_CPU_ISA_M32R6) ^ >> arch/mips/include/asm/cpu-features.h:241:5: note: in expansion of macro 'cpu_has_mips32r6' cpu_has_mips32r6 | cpu_has_mips64r1 | \ ^ >> arch/mips/include/asm/cpu-features.h:289:25: note: in expansion of macro 'cpu_has_mips_r' #define cpu_has_clo_clz cpu_has_mips_r ^ >> arch/mips/include/asm/cpu-features.h:291:6: note: in expansion of macro 'cpu_has_clo_clz' #if !cpu_has_clo_clz ^ make[2]: *** [kernel/bounds.s] Error 1 make[2]: Target '__build' not remade because of errors. make[1]: *** [prepare0] Error 2 make[1]: Target 'prepare' not remade because of errors. make: *** [sub-make] Error 2 vim +205 arch/mips/include/asm/cpu-features.h 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 199 # define cpu_has_mips32r1 (cpu_data[0].isa_level & MIPS_CPU_ISA_M32R1) 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 200 #endif 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 201 #ifndef cpu_has_mips32r2 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 202 # define cpu_has_mips32r2 (cpu_data[0].isa_level & MIPS_CPU_ISA_M32R2) 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 203 #endif 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 204 #ifndef cpu_has_mips32r6 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 @205 # define cpu_has_mips32r6 (cpu_data[0].isa_level & MIPS_CPU_ISA_M32R6) 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 206 #endif 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 207 #ifndef cpu_has_mips64r1 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 208 # define cpu_has_mips64r1 (cpu_data[0].isa_level & MIPS_CPU_ISA_M64R1) 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 209 #endif 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 210 #ifndef cpu_has_mips64r2 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 211 # define cpu_has_mips64r2 (cpu_data[0].isa_level & MIPS_CPU_ISA_M64R2) 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 212 #endif 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 213 #ifndef cpu_has_mips64r6 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 214 # define cpu_has_mips64r6 (cpu_data[0].isa_level & MIPS_CPU_ISA_M64R6) 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 215 #endif 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 216 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 217 /* 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 218 * Shortcuts ... 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 219 */ 08a07904e arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 220 #define cpu_has_mips_2_3_4_5 (cpu_has_mips_2 | cpu_has_mips_3_4_5) 08a07904e arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 221 #define cpu_has_mips_3_4_5 (cpu_has_mips_3 | cpu_has_mips_4_5) 08a07904e arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 222 #define cpu_has_mips_4_5 (cpu_has_mips_4 | cpu_has_mips_5) 08a07904e arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 223 08a07904e arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 224 #define cpu_has_mips_2_3_4_5_r (cpu_has_mips_2 | cpu_has_mips_3_4_5_r) 08a07904e arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 225 #define cpu_has_mips_3_4_5_r (cpu_has_mips_3 | cpu_has_mips_4_5_r) 08a07904e arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 226 #define cpu_has_mips_4_5_r (cpu_has_mips_4 | cpu_has_mips_5_r) 08a07904e arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 227 #define cpu_has_mips_5_r (cpu_has_mips_5 | cpu_has_mips_r) 08a07904e arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 228 2d83fea78 arch/mips/include/asm/cpu-features.h Maciej W. Rozycki 2015-04-03 229 #define cpu_has_mips_3_4_5_64_r2_r6 \ 2d83fea78 arch/mips/include/asm/cpu-features.h Maciej W. Rozycki 2015-04-03 230 (cpu_has_mips_3 | cpu_has_mips_4_5_64_r2_r6) 2d83fea78 arch/mips/include/asm/cpu-features.h Maciej W. Rozycki 2015-04-03 231 #define cpu_has_mips_4_5_64_r2_r6 \ 2d83fea78 arch/mips/include/asm/cpu-features.h Maciej W. Rozycki 2015-04-03 232 (cpu_has_mips_4_5 | cpu_has_mips64r1 | \ 2d83fea78 arch/mips/include/asm/cpu-features.h Maciej W. Rozycki 2015-04-03 233 cpu_has_mips_r2 | cpu_has_mips_r6) 08a07904e arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 234 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 235 #define cpu_has_mips32 (cpu_has_mips32r1 | cpu_has_mips32r2 | cpu_has_mips32r6) 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 236 #define cpu_has_mips64 (cpu_has_mips64r1 | cpu_has_mips64r2 | cpu_has_mips64r6) 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 237 #define cpu_has_mips_r1 (cpu_has_mips32r1 | cpu_has_mips64r1) 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 238 #define cpu_has_mips_r2 (cpu_has_mips32r2 | cpu_has_mips64r2) 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 239 #define cpu_has_mips_r6 (cpu_has_mips32r6 | cpu_has_mips64r6) c46b302b9 arch/mips/include/asm/cpu-features.h Ralf Baechle 2008-10-28 240 #define cpu_has_mips_r (cpu_has_mips32r1 | cpu_has_mips32r2 | \ 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 @241 cpu_has_mips32r6 | cpu_has_mips64r1 | \ 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 242 cpu_has_mips64r2 | cpu_has_mips64r6) 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 243 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 244 /* MIPSR2 and MIPSR6 have a lot of similarities */ 34c56fc1c arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 245 #define cpu_has_mips_r2_r6 (cpu_has_mips_r2 | cpu_has_mips_r6) 0401572a9 include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 246 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 247 /* 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 248 * cpu_has_mips_r2_exec_hazard - return if IHB is required on current processor 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 249 * 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 250 * Returns non-zero value if the current processor implementation requires 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 251 * an IHB instruction to deal with an instruction hazard as per MIPS R2 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 252 * architecture specification, zero otherwise. 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 253 */ 41f0e4d04 arch/mips/include/asm/cpu-features.h David Daney 2009-05-12 254 #ifndef cpu_has_mips_r2_exec_hazard 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 255 #define cpu_has_mips_r2_exec_hazard \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 256 ({ \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 257 int __res; \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 258 \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 259 switch (current_cpu_type()) { \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 260 case CPU_M14KC: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 261 case CPU_74K: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 262 case CPU_1074K: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 263 case CPU_PROAPTIV: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 264 case CPU_P5600: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 265 case CPU_M5150: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 266 case CPU_QEMU_GENERIC: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 267 case CPU_CAVIUM_OCTEON: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 268 case CPU_CAVIUM_OCTEON_PLUS: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 269 case CPU_CAVIUM_OCTEON2: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 270 case CPU_CAVIUM_OCTEON3: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 271 __res = 0; \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 272 break; \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 273 \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 274 default: \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 275 __res = 1; \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 276 } \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 277 \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 278 __res; \ 9cdf30bd3 arch/mips/include/asm/cpu-features.h Ralf Baechle 2015-03-25 279 }) 41f0e4d04 arch/mips/include/asm/cpu-features.h David Daney 2009-05-12 280 #endif 41f0e4d04 arch/mips/include/asm/cpu-features.h David Daney 2009-05-12 281 47740eb88 arch/mips/include/asm/cpu-features.h Ralf Baechle 2009-04-19 282 /* 47740eb88 arch/mips/include/asm/cpu-features.h Ralf Baechle 2009-04-19 283 * MIPS32, MIPS64, VR5500, IDT32332, IDT32334 and maybe a few other becee6b8c arch/mips/include/asm/cpu-features.h Maciej W. Rozycki 2013-09-22 284 * pre-MIPS32/MIPS64 processors have CLO, CLZ. The IDT RC64574 is 64-bit and 417a5eb02 arch/mips/include/asm/cpu-features.h Ralf Baechle 2010-08-05 285 * has CLO and CLZ but not DCLO nor DCLZ. For 64-bit kernels 47740eb88 arch/mips/include/asm/cpu-features.h Ralf Baechle 2009-04-19 286 * cpu_has_clo_clz also indicates the availability of DCLO and DCLZ. 47740eb88 arch/mips/include/asm/cpu-features.h Ralf Baechle 2009-04-19 287 */ 47740eb88 arch/mips/include/asm/cpu-features.h Ralf Baechle 2009-04-19 288 #ifndef cpu_has_clo_clz 47740eb88 arch/mips/include/asm/cpu-features.h Ralf Baechle 2009-04-19 @289 #define cpu_has_clo_clz cpu_has_mips_r 47740eb88 arch/mips/include/asm/cpu-features.h Ralf Baechle 2009-04-19 290 #endif 35e1a24e8 arch/mips/include/asm/cpu-features.h Zhaoxiu Zeng 2016-04-28 @291 #if !cpu_has_clo_clz 35e1a24e8 arch/mips/include/asm/cpu-features.h Zhaoxiu Zeng 2016-04-28 292 #define CONFIG_CPU_NO_EFFICIENT_FFS 1 35e1a24e8 arch/mips/include/asm/cpu-features.h Zhaoxiu Zeng 2016-04-28 293 #endif 47740eb88 arch/mips/include/asm/cpu-features.h Ralf Baechle 2009-04-19 294 :::::: The code at line 205 was first introduced by commit :::::: 34c56fc1c167facc375d927687df0a3891d164ac MIPS: asm: cpu: Add MIPSR6 ISA definitions :::::: TO: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com> :::::: CC: Markos Chandras <markos.chandras@imgtec.com> --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
On Apr 28, 2016, at 7:43 AM, zengzhaoxiu@163.com wrote: > + * This implements the binary GCD algorithm. (Often attributed to Stein, > + * but as Knuth has noted, appears a first-century Chinese math text.) Should this be "appears in a"? Josh -- To unsubscribe from this list: send the line "unsubscribe linux-parisc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2016-04-28 at 19:43 +0800, zengzhaoxiu@163.com wrote: > From: Zhaoxiu Zeng <zhaoxiu.zeng@gmail.com> > > Because some architectures (alpha, armv6, etc.) don't provide > hardware division, the mod operation is slow! Binary GCD algorithm > uses simple arithmetic operations, it replaces division with > arithmetic shifts, comparisons, and subtractions. > > I have compiled successfully with x86_64_defconfig and > i386_defconfig. What's the reason for wanting to optimize this and thus have to maintain (and test) two separate code paths, which is a significant expense? As far as I can see, gcd() is mosly used in finding optimal clocks for operations, which is usually done at start of day and not time critical. James -- To unsubscribe from this list: send the line "unsubscribe linux-parisc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Apr 28, 2016 at 6:48 PM, George Spelvin <linux@horizon.com> wrote: > Another few comments: > > 1. Would ARCH_HAS_FAST_FFS involve fewer changes than CPU_NO_EFFICIENT_FFS? No, as you want to _disable_ ARCH_HAS_FAST_FFS / _enable_ CPU_NO_EFFICIENT_FFS as soon as you're enabling support for a CPU that doesn't support it. Logical OR is easier in both the Kconfig and C preprocessor languages than logical NAND. E.g. in Kconfig, a CPU core not supporting it can just select CPU_NO_EFFICIENT_FFS. Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds -- To unsubscribe from this list: send the line "unsubscribe linux-parisc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Apr 28, 2016 at 07:51:06PM +0200, Geert Uytterhoeven wrote: > On Thu, Apr 28, 2016 at 6:48 PM, George Spelvin <linux@horizon.com> wrote: > > Another few comments: > > > > 1. Would ARCH_HAS_FAST_FFS involve fewer changes than CPU_NO_EFFICIENT_FFS? > > No, as you want to _disable_ ARCH_HAS_FAST_FFS / _enable_ > CPU_NO_EFFICIENT_FFS as soon as you're enabling support for a > CPU that doesn't support it. > > Logical OR is easier in both the Kconfig and C preprocessor languages > than logical NAND. > > E.g. in Kconfig, a CPU core not supporting it can just select > CPU_NO_EFFICIENT_FFS. How does a CPU lack an efficient ffs/ctz anyway? There are all sorts of ways to implement it without a native insn, some of which are almost or just as fast as the native insn on cpus that have the latter. On anything with a fast multiply, the de Bruijn sequence approach is near-optimal, and otherwise one of the binary-search type approaches (possibly branchless) can be used. If the compiler doesn't generate an appropriate one for __builtin_ctz, that's arguably a compiler bug. Rich -- To unsubscribe from this list: send the line "unsubscribe linux-parisc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Apr 28, 2016 at 7:58 PM, Rich Felker <dalias@libc.org> wrote: > On Thu, Apr 28, 2016 at 07:51:06PM +0200, Geert Uytterhoeven wrote: >> On Thu, Apr 28, 2016 at 6:48 PM, George Spelvin <linux@horizon.com> wrote: >> > Another few comments: >> > >> > 1. Would ARCH_HAS_FAST_FFS involve fewer changes than CPU_NO_EFFICIENT_FFS? >> >> No, as you want to _disable_ ARCH_HAS_FAST_FFS / _enable_ >> CPU_NO_EFFICIENT_FFS as soon as you're enabling support for a >> CPU that doesn't support it. >> >> Logical OR is easier in both the Kconfig and C preprocessor languages >> than logical NAND. >> >> E.g. in Kconfig, a CPU core not supporting it can just select >> CPU_NO_EFFICIENT_FFS. > > How does a CPU lack an efficient ffs/ctz anyway? There are all sorts > of ways to implement it without a native insn, some of which are > almost or just as fast as the native insn on cpus that have the > latter. On anything with a fast multiply, the de Bruijn sequence > approach is near-optimal, and otherwise one of the binary-search type > approaches (possibly branchless) can be used. If the compiler doesn't > generate an appropriate one for __builtin_ctz, that's arguably a > compiler bug. m68k-linux-gcc 4.6.3 generates: jsr __ctzsi2 Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds -- To unsubscribe from this list: send the line "unsubscribe linux-parisc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> How does a CPU lack an efficient ffs/ctz anyway? There are all sorts > of ways to implement it without a native insn, some of which are > almost or just as fast as the native insn on cpus that have the > latter. On anything with a fast multiply, the de Bruijn sequence > approach is near-optimal, and otherwise one of the binary-search type > approaches (possibly branchless) can be used. If the compiler doesn't > generate an appropriate one for __builtin_ctz, that's arguably a > compiler bug. What's wanted here is something faster than any of those. Yes, there's a simple constant-time branch-free implementation: unsigned inline __attribute__((const)) hweight32(uint32_t x) { x -= (x >> 1) & 0x55555555; x = ((x >> 2) & 0x33333333) + (x & 0x33333333); x += x >> 4; x &= 0x0f0f0f0f; x += x >> 8; x += x >> 16; return x & 63; } unsigned inline __attribute__((const)) __ffs32(uint32_t x) { return hweight(~x & (x-1)); } but if you work it through, that's about 19 instructions; a few more on platforms without 32-bit immediates. The shift itself makes an even 20, and there are a lot of sequential dependencies (I count a 17-op chain including the shift) limiting execution time. The de Bruijn hack reduces the length but adds a memory access for the table lookup. (http://supertech.csail.mit.edu/papers/debruijn.pdf) In the GCD code, the number to normalize is basically random, so the normalization loop shifts an average of 1 bit. One bit half the time, a second bit 1/4 of the time, etc. (The posted code in the FAST_FFS case omits one guaranteed shift at the end of the loop because the normalization code is constant-time.) So "fast __ffs" basically means faster than *one* iteration of "while (!(x & 1)) x >>= 1;". In this case "fast" means cheaper than *one* unpredictable branch, which is a very small handful of instructions. -- To unsubscribe from this list: send the line "unsubscribe linux-parisc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi,
[auto build test ERROR on v4.6-rc5]
[cannot apply to next-20160519]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/zengzhaoxiu-163-com/lib-GCD-add-binary-GCD-algorithm/20160428-195527
config: mips-jz4740 (attached as .config)
compiler: mips-linux-gnu-gcc (Debian 5.3.1-8) 5.3.1 20160205
reproduce:
wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=mips
Note: the linux-review/zengzhaoxiu-163-com/lib-GCD-add-binary-GCD-algorithm/20160428-195527 HEAD 35e1a24e8fc3a5308b053ed3c744f02ec2a76820 builds fine.
It only hurts bisectibility.
All errors (new ones prefixed by >>):
In file included from arch/mips/include/asm/bitops.h:21:0,
from include/linux/bitops.h:36,
from include/linux/kernel.h:10,
from include/asm-generic/bug.h:13,
from arch/mips/include/asm/bug.h:41,
from include/linux/bug.h:4,
from include/linux/page-flags.h:9,
from kernel/bounds.c:9:
arch/mips/include/asm/cpu-features.h:205:28: warning: "cpu_data" is not defined [-Wundef]
# define cpu_has_mips32r6 (cpu_data[0].isa_level & MIPS_CPU_ISA_M32R6)
^
arch/mips/include/asm/cpu-features.h:241:5: note: in expansion of macro 'cpu_has_mips32r6'
cpu_has_mips32r6 | cpu_has_mips64r1 | \
^
arch/mips/include/asm/cpu-features.h:289:25: note: in expansion of macro 'cpu_has_mips_r'
#define cpu_has_clo_clz cpu_has_mips_r
^
arch/mips/include/asm/cpu-features.h:291:6: note: in expansion of macro 'cpu_has_clo_clz'
#if !cpu_has_clo_clz
^
>> arch/mips/include/asm/cpu-features.h:205:36: error: token "[" is not valid in preprocessor expressions
# define cpu_has_mips32r6 (cpu_data[0].isa_level & MIPS_CPU_ISA_M32R6)
^
arch/mips/include/asm/cpu-features.h:241:5: note: in expansion of macro 'cpu_has_mips32r6'
cpu_has_mips32r6 | cpu_has_mips64r1 | \
^
arch/mips/include/asm/cpu-features.h:289:25: note: in expansion of macro 'cpu_has_mips_r'
#define cpu_has_clo_clz cpu_has_mips_r
^
arch/mips/include/asm/cpu-features.h:291:6: note: in expansion of macro 'cpu_has_clo_clz'
#if !cpu_has_clo_clz
^
make[2]: *** [kernel/bounds.s] Error 1
make[2]: Target '__build' not remade because of errors.
make[1]: *** [prepare0] Error 2
make[1]: Target 'prepare' not remade because of errors.
make: *** [sub-make] Error 2
vim +205 arch/mips/include/asm/cpu-features.h
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 199 # define cpu_has_mips32r1 (cpu_data[0].isa_level & MIPS_CPU_ISA_M32R1)
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 200 #endif
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 201 #ifndef cpu_has_mips32r2
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 202 # define cpu_has_mips32r2 (cpu_data[0].isa_level & MIPS_CPU_ISA_M32R2)
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 203 #endif
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 204 #ifndef cpu_has_mips32r6
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 @205 # define cpu_has_mips32r6 (cpu_data[0].isa_level & MIPS_CPU_ISA_M32R6)
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 206 #endif
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 207 #ifndef cpu_has_mips64r1
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 208 # define cpu_has_mips64r1 (cpu_data[0].isa_level & MIPS_CPU_ISA_M64R1)
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 209 #endif
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 210 #ifndef cpu_has_mips64r2
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 211 # define cpu_has_mips64r2 (cpu_data[0].isa_level & MIPS_CPU_ISA_M64R2)
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 212 #endif
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 213 #ifndef cpu_has_mips64r6
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 214 # define cpu_has_mips64r6 (cpu_data[0].isa_level & MIPS_CPU_ISA_M64R6)
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 215 #endif
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 216
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 217 /*
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 218 * Shortcuts ...
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 219 */
08a07904 arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 220 #define cpu_has_mips_2_3_4_5 (cpu_has_mips_2 | cpu_has_mips_3_4_5)
08a07904 arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 221 #define cpu_has_mips_3_4_5 (cpu_has_mips_3 | cpu_has_mips_4_5)
08a07904 arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 222 #define cpu_has_mips_4_5 (cpu_has_mips_4 | cpu_has_mips_5)
08a07904 arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 223
08a07904 arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 224 #define cpu_has_mips_2_3_4_5_r (cpu_has_mips_2 | cpu_has_mips_3_4_5_r)
08a07904 arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 225 #define cpu_has_mips_3_4_5_r (cpu_has_mips_3 | cpu_has_mips_4_5_r)
08a07904 arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 226 #define cpu_has_mips_4_5_r (cpu_has_mips_4 | cpu_has_mips_5_r)
08a07904 arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 227 #define cpu_has_mips_5_r (cpu_has_mips_5 | cpu_has_mips_r)
08a07904 arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 228
2d83fea7 arch/mips/include/asm/cpu-features.h Maciej W. Rozycki 2015-04-03 229 #define cpu_has_mips_3_4_5_64_r2_r6 \
2d83fea7 arch/mips/include/asm/cpu-features.h Maciej W. Rozycki 2015-04-03 230 (cpu_has_mips_3 | cpu_has_mips_4_5_64_r2_r6)
2d83fea7 arch/mips/include/asm/cpu-features.h Maciej W. Rozycki 2015-04-03 231 #define cpu_has_mips_4_5_64_r2_r6 \
2d83fea7 arch/mips/include/asm/cpu-features.h Maciej W. Rozycki 2015-04-03 232 (cpu_has_mips_4_5 | cpu_has_mips64r1 | \
2d83fea7 arch/mips/include/asm/cpu-features.h Maciej W. Rozycki 2015-04-03 233 cpu_has_mips_r2 | cpu_has_mips_r6)
08a07904 arch/mips/include/asm/cpu-features.h Ralf Baechle 2014-04-19 234
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 235 #define cpu_has_mips32 (cpu_has_mips32r1 | cpu_has_mips32r2 | cpu_has_mips32r6)
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 236 #define cpu_has_mips64 (cpu_has_mips64r1 | cpu_has_mips64r2 | cpu_has_mips64r6)
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 237 #define cpu_has_mips_r1 (cpu_has_mips32r1 | cpu_has_mips64r1)
0401572a include/asm-mips/cpu-features.h Ralf Baechle 2005-12-09 238 #define cpu_has_mips_r2 (cpu_has_mips32r2 | cpu_has_mips64r2)
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 239 #define cpu_has_mips_r6 (cpu_has_mips32r6 | cpu_has_mips64r6)
c46b302b arch/mips/include/asm/cpu-features.h Ralf Baechle 2008-10-28 240 #define cpu_has_mips_r (cpu_has_mips32r1 | cpu_has_mips32r2 | \
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 @241 cpu_has_mips32r6 | cpu_has_mips64r1 | \
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 242 cpu_has_mips64r2 | cpu_has_mips64r6)
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 243
34c56fc1 arch/mips/include/asm/cpu-features.h Leonid Yegoshin 2014-11-13 244 /* MIPSR2 and MIPSR6 have a lot of similarities */
:::::: The code at line 205 was first introduced by commit
:::::: 34c56fc1c167facc375d927687df0a3891d164ac MIPS: asm: cpu: Add MIPSR6 ISA definitions
:::::: TO: Leonid Yegoshin <Leonid.Yegoshin@imgtec.com>
:::::: CC: Markos Chandras <markos.chandras@imgtec.com>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
diff --git a/arch/Kconfig b/arch/Kconfig index 81869a5..275f17d 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -638,4 +638,7 @@ config COMPAT_OLD_SIGACTION config ARCH_NO_COHERENT_DMA_MMAP bool +config CPU_NO_EFFICIENT_FFS + def_bool n + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 9d8a858..44e6f05 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -27,6 +27,7 @@ config ALPHA select MODULES_USE_ELF_RELA select ODD_RT_SIGACTION select OLD_SIGSUSPEND + select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67 help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 5534766..cb569b6 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -421,18 +421,21 @@ config CPU_32v3 select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v4 bool select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v4T bool select CPU_USE_DOMAINS if MMU select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS config CPU_32v5 bool diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 986ea84..aa232de 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -20,6 +20,7 @@ config H8300 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO select HAVE_ARCH_KGDB + select CPU_NO_EFFICIENT_FFS config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index c82b292..3cc8498 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -17,6 +17,7 @@ config M32R select ARCH_USES_GETTIMEOFFSET select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW + select CPU_NO_EFFICIENT_FFS config SBUS bool diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 0dfcf12..0b6efe8 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -40,6 +40,7 @@ config M68000 select CPU_HAS_NO_MULDIV64 select CPU_HAS_NO_UNALIGNED select GENERIC_CSUM + select CPU_NO_EFFICIENT_FFS help The Freescale (was Motorola) 68000 CPU is the first generation of the well known M68K family of processors. The CPU core as well as @@ -51,6 +52,7 @@ config MCPU32 bool select CPU_HAS_NO_BITFIELDS select CPU_HAS_NO_UNALIGNED + select CPU_NO_EFFICIENT_FFS help The Freescale (was then Motorola) CPU32 is a CPU core that is based on the 68020 processor. For the most part it is used in @@ -130,6 +132,7 @@ config M5206 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5206 processor support. @@ -138,6 +141,7 @@ config M5206e depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5206e processor support. @@ -163,6 +167,7 @@ config M5249 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5249 processor support. @@ -171,6 +176,7 @@ config M525x depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale (Motorola) Coldfire 5251/5253 processor support. @@ -189,6 +195,7 @@ config M5272 depends on !MMU select COLDFIRE_SW_A7 select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5272 processor support. @@ -217,6 +224,7 @@ config M5307 select COLDFIRE_SW_A7 select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5307 processor support. @@ -242,6 +250,7 @@ config M5407 select COLDFIRE_SW_A7 select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Motorola ColdFire 5407 processor support. @@ -251,6 +260,7 @@ config M547x select MMU_COLDFIRE if MMU select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale ColdFire 5470/5471/5472/5473/5474/5475 processor support. @@ -260,6 +270,7 @@ config M548x select M54xx select HAVE_CACHE_CB select HAVE_MBAR + select CPU_NO_EFFICIENT_FFS help Freescale ColdFire 5480/5481/5482/5483/5484/5485 processor support. diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index a0fa88d..2ac2de6 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -29,6 +29,7 @@ config METAG select OF select OF_EARLY_FLATTREE select SPARSE_IRQ + select CPU_NO_EFFICIENT_FFS config STACKTRACE_SUPPORT def_bool y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 3d793b5..f17c3a4 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -32,6 +32,7 @@ config MICROBLAZE select OF_EARLY_FLATTREE select TRACING_SUPPORT select VIRT_TO_BUS + select CPU_NO_EFFICIENT_FFS config SWAP def_bool n diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index eeec8c8..fd4ae7d 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -288,6 +288,9 @@ #ifndef cpu_has_clo_clz #define cpu_has_clo_clz cpu_has_mips_r #endif +#if !cpu_has_clo_clz +#define CONFIG_CPU_NO_EFFICIENT_FFS 1 +#endif /* * MIPS32 R2, MIPS64 R2, Loongson 3A and Octeon have WSBH. diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index 4375554..f10bd2c 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -16,6 +16,7 @@ config NIOS2 select SOC_BUS select SPARSE_IRQ select USB_ARCH_HAS_HCD if USB_SUPPORT + select CPU_NO_EFFICIENT_FFS config GENERIC_CSUM def_bool y diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index e118c02..142cb05 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -25,6 +25,7 @@ config OPENRISC select MODULES_USE_ELF_RELA select HAVE_DEBUG_STACKOVERFLOW select OR1K_PIC + select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1 config MMU def_bool y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 88cfaa8..3d498a6 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -32,6 +32,7 @@ config PARISC select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER select ARCH_NO_COHERENT_DMA_MMAP + select CPU_NO_EFFICIENT_FFS help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/score/Kconfig b/arch/score/Kconfig index 366e1b5..507d631 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -14,6 +14,7 @@ config SCORE select VIRT_TO_BUS select MODULES_USE_ELF_REL select CLONE_BACKWARDS + select CPU_NO_EFFICIENT_FFS choice prompt "System type" diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 7ed20fc..56cf5e5 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -44,6 +44,7 @@ config SUPERH select OLD_SIGSUSPEND select OLD_SIGACTION select HAVE_ARCH_AUDITSYSCALL + select CPU_NO_EFFICIENT_FFS help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 57ffaf2..ca675ed 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -42,6 +42,7 @@ config SPARC select ODD_RT_SIGACTION select OLD_SIGSUSPEND select ARCH_HAS_SG_CHAIN + select CPU_NO_EFFICIENT_FFS config SPARC32 def_bool !64BIT diff --git a/lib/gcd.c b/lib/gcd.c index 3657f12..eba7d4e 100644 --- a/lib/gcd.c +++ b/lib/gcd.c @@ -2,20 +2,68 @@ #include <linux/gcd.h> #include <linux/export.h> -/* Greatest common divisor */ +/* + * This implements the binary GCD algorithm. (Often attributed to Stein, + * but as Knuth has noted, appears a first-century Chinese math text.) + * + * This is faster than the division-based algorithm even on x86, which + * has decent hardware division. + */ + +#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) + +/* If __ffs is available, the even/odd algorithm benchmarks slower. */ unsigned long gcd(unsigned long a, unsigned long b) { - unsigned long r; + unsigned long r = a | b; + + if (!a || !b) + return r; - if (a < b) - swap(a, b); + b >>= __ffs(b); - if (!b) - return a; - while ((r = a % b) != 0) { - a = b; - b = r; + for (;;) { + a >>= __ffs(a); + if (a == b) + return a << __ffs(r); + if (a < b) + swap(a, b); + a -= b; } +} + +#else + +/* If normalization is done by loops, the even/odd algorithm is a win. */ +unsigned long gcd(unsigned long a, unsigned long b) +{ + unsigned long r = a | b; + + if (!a || !b) + return r; + + /* Isolate lsbit of r */ + r &= -r; + + while (!(a & r)) + a >>= 1; + while (!(b & r)) + b >>= 1; + + while (a != b) { + if (a < b) + swap(a, b); + a -= b; + + a >>= 1; + if (a & r) + a += b; + do a >>= 1; while (!(a & r)); + } + return b; } + +#endif + EXPORT_SYMBOL_GPL(gcd);