@@ -455,44 +455,35 @@ set_default_state(struct radeon_device *rdev)
radeon_ring_write(ring, sq_stack_resource_mgmt_2);
}
-#define I2F_MAX_BITS 15
-#define I2F_MAX_INPUT ((1 << I2F_MAX_BITS) - 1)
-#define I2F_SHIFT (24 - I2F_MAX_BITS)
+/* 23 bits of float fractional data */
+#define I2F_FRAC_BITS 23
+#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1)
/*
* Converts unsigned integer into 32-bit IEEE floating point representation.
- * Conversion is not universal and only works for the range from 0
- * to 2^I2F_MAX_BITS-1. Currently we only use it with inputs between
- * 0 and 16384 (inclusive), so I2F_MAX_BITS=15 is enough. If necessary,
- * I2F_MAX_BITS can be increased, but that will add to the loop iterations
- * and slow us down. Conversion is done by shifting the input and counting
- * down until the first 1 reaches bit position 23. The resulting counter
- * and the shifted input are, respectively, the exponent and the fraction.
- * The sign is always zero.
+ * Will be exact from 0 to 2^24. Above that, we round towards zero
+ * as the fractional bits will not fit in a float. (It would be better to
+ * round towards even as the fpu does, but that is slower.)
*/
-static uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t x)
{
- u32 result, i, exponent, fraction;
+ uint32_t msb, exponent, fraction;
- WARN_ON_ONCE(input > I2F_MAX_INPUT);
+ /* Zero is special */
+ if (!x) return 0;
- if ((input & I2F_MAX_INPUT) == 0)
- result = 0;
- else {
- exponent = 126 + I2F_MAX_BITS;
- fraction = (input & I2F_MAX_INPUT) << I2F_SHIFT;
+ /* Get location of the most significant bit */
+ msb = __fls(x);
- for (i = 0; i < I2F_MAX_BITS; i++) {
- if (fraction & 0x800000)
- break;
- else {
- fraction = fraction << 1;
- exponent = exponent - 1;
- }
- }
- result = exponent << 23 | (fraction & 0x7fffff);
- }
- return result;
+ /*
+ * Use a rotate instead of a shift because that works both leftwards
+ * and rightwards due to the mod(32) behaviour. This means we don't
+ * need to check to see if we are above 2^24 or not.
+ */
+ fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK;
+ exponent = (127 + msb) << I2F_FRAC_BITS;
+
+ return fraction + exponent;
}
int r600_blit_init(struct radeon_device *rdev)
We use __fls() to find the most significant bit. Using that, the loop can be avoided. A second trick is to use the behaviour of the rotate instructions to expand the range of the unsigned int to float conversion to the full 32 bits in a branchless way. The routine is now exact up to 2^24. Above that, we truncate which is equivalent to rounding towards zero. Signed-off-by: Steven Fuerst <svfuerst@gmail.com> --- drivers/gpu/drm/radeon/r600_blit_kms.c | 51 +++++++++++++------------------- 1 file changed, 21 insertions(+), 30 deletions(-)