@@ -102,7 +102,7 @@
/* preserve low part of n for reminder computation */ \
__r = __n; \
/* determine number of bits to represent __b */ \
- __p = 1 << __div64_fls(__b); \
+ __p = 1 << fls(__b); \
/* compute __m = ((__p << 64) + __b - 1) / __b */ \
__m = (~0ULL / __b) * __p; \
__m += (((~0ULL % __b + 1) * __p) + __b - 1) / __b; \
@@ -150,8 +150,8 @@
__p /= (__m & -__m); \
__m /= (__m & -__m); \
} else { \
- __p >>= __div64_fls(__bits); \
- __m >>= __div64_fls(__bits); \
+ __p >>= fls(__bits); \
+ __m >>= fls(__bits); \
} \
/* No correction needed. */ \
__c = 0; \
@@ -217,18 +217,6 @@
__r; \
})
-/* our own fls implementation to make sure constant propagation is fine */
-#define __div64_fls(bits) \
-({ \
- unsigned int __left = (bits), __nr = 0; \
- if (__left & 0xffff0000) __nr += 16, __left >>= 16; \
- if (__left & 0x0000ff00) __nr += 8, __left >>= 8; \
- if (__left & 0x000000f0) __nr += 4, __left >>= 4; \
- if (__left & 0x0000000c) __nr += 2, __left >>= 2; \
- if (__left & 0x00000002) __nr += 1; \
- __nr; \
-})
-
#endif /* GCC version */
#endif /* BITS_PER_LONG */
Following the improvements to Xen's bitops, fls() does constant propagation in all cases. Use it, and drop the local opencoded helper. No functional change. Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> --- CC: Stefano Stabellini <sstabellini@kernel.org> CC: Julien Grall <julien@xen.org> CC: Volodymyr Babchuk <Volodymyr_Babchuk@epam.com> CC: Bertrand Marquis <bertrand.marquis@arm.com> CC: Michal Orzel <michal.orzel@amd.com> ARM32 gets a very minor code generation improvement: xen.git/xen$ ../scripts/bloat-o-meter xen-syms-arm32-{before,after} add/remove: 0/0 grow/shrink: 0/6 up/down: 0/-48 (-48) Function old new delta wallclock_time 288 280 -8 printk_start_of_line 560 552 -8 domain_vtimer_init 472 464 -8 do_settime 376 368 -8 burn_credits 760 752 -8 __printk_ratelimit 424 416 -8 But it's just a couple of operations improvement and no real change in code structure. I expect that the constant propagation being done through __builtin_clz(), rather than pure C, is giving the optimiser a bit more information to work with. This file also has an __GNUC__ < 4 case which seems ripe for removing... --- xen/arch/arm/include/asm/div64.h | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) base-commit: b8cdfac2be38c98dd3ad0e911a3f7f787f5bcf6b prerequisite-patch-id: 57ffe02b03d27a12f20d9e08fa21eed01c8c6299 prerequisite-patch-id: 56393fef18638a92eae127f36ffddb655fc7b9f4 prerequisite-patch-id: 4f963f44331104dc00663f8ff22bd306ef04f301 prerequisite-patch-id: 81a434352bbb36e17d3b7a45f489974fc4603ecb prerequisite-patch-id: f2f2a00eee52f668b3f557fb6d357ec3bf00ac92 prerequisite-patch-id: 67b871715259e60fbf7db917233dbdecce6891da prerequisite-patch-id: f8562e07c91fa42b1501efa759734a7874b9d909 prerequisite-patch-id: a5f304a67525412f0669a298a5f66285b56c3a58 prerequisite-patch-id: 6b5b1dc3f6760888a15c11cc658c52ba6fd3f33d prerequisite-patch-id: 29eb6b854e9df37f5e8ed212215baab0ac7fbe87 prerequisite-patch-id: d87fe52c264dc5a33883a04b615043fbefd94f92 prerequisite-patch-id: 26a2978b861386fda945f1e60e9153cf0bdd24f3 prerequisite-patch-id: ab50b5247a29b4fbbd7207a558647dd3c57d5175 prerequisite-patch-id: 0f2a6cfa7d77c6f05f23c3aada161d02a9fc7660 prerequisite-patch-id: 7153c7bb3a45877fd84286dd9915046fa0a76056 prerequisite-patch-id: 74830838bac94ed1e036a8173cf3210a314b35d8 prerequisite-patch-id: 74a6e5ffb9f477afb61e73ed80a40c9359bc77a2 prerequisite-patch-id: 795f6e9425cc6a953166b530ae68df466a7a3c2b prerequisite-patch-id: e37b1bc5dd69e7e68abf0e6c004431537f70175f prerequisite-patch-id: 2e510b0a05df30c68bec8baf8b411a71e5f14d74 prerequisite-patch-id: e0397c86b545a1d65f2e6b2049c282b926c40c64 prerequisite-patch-id: 44606527ccbdf980a4c2401394f728f9c2011b8a prerequisite-patch-id: 65b83839f7a477b9fa8e8913380e8eac2ac1ca0e