Message ID | 20230531065458.2082-1-zhiwei_liu@linux.alibaba.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | fpu: Add conversions between bfloat16 and [u]int8 | expand |
On 2023/6/1 1:47, Richard Henderson wrote: > On 5/30/23 23:54, LIU Zhiwei wrote: >> We missed these functions when upstreaming the bfloat16 support. >> >> Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> > > They look ok, so far as it goes. What will they be used for? T-Head Xuantie CPUs custom extension need these interfaces. It uses a custom CSR(still not upstream) to switch between the fp16 and bfloat16. All fp16 instructions(Zfh) can process the bfloat16 types. In its custom matrix extension[1] or vector extension, this feature is also supported. As a side note, the RISC-V port support for custom extension at least should have these aspects: * ISA decoding (Ready, Philipp Tomsich) * CSR (WIP, Andes?) * Disassemble(Under review, Christopher) * Errata(Not start) * Split TB flags like ARM for custom(In the wild for the Xuantie CPUs) 1. https://github.com/T-head-Semi/riscv-matrix-extension-spec/releases/tag/v0.1.0 > > > r~
Hi Richard, Can you pick it to your tree? Thanks, Zhiwei On 2023/5/31 14:54, LIU Zhiwei wrote: > We missed these functions when upstreaming the bfloat16 support. > > Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> > --- > fpu/softfloat.c | 58 +++++++++++++++++++++++++++++++++++++++++ > include/fpu/softfloat.h | 12 +++++++++ > 2 files changed, 70 insertions(+) > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 108f9cb224..576b026f4e 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -3113,6 +3113,15 @@ int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale, > return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); > } > > +int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, > + float_status *s) > +{ > + FloatParts64 p; > + > + bfloat16_unpack_canonical(&p, a, s); > + return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s); > +} > + > int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, > float_status *s) > { > @@ -3379,6 +3388,11 @@ int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s) > return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s); > } > > +int8_t bfloat16_to_int8(bfloat16 a, float_status *s) > +{ > + return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s); > +} > + > int16_t bfloat16_to_int16(bfloat16 a, float_status *s) > { > return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); > @@ -3394,6 +3408,11 @@ int64_t bfloat16_to_int64(bfloat16 a, float_status *s) > return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); > } > > +int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s) > +{ > + return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s); > +} > + > int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s) > { > return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s); > @@ -3503,6 +3522,15 @@ uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale, > return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); > } > > +uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode, > + int scale, float_status *s) > +{ > + FloatParts64 p; > + > + bfloat16_unpack_canonical(&p, a, s); > + return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s); > +} > + > uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode, > int scale, float_status *s) > { > @@ -3728,6 +3756,11 @@ Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s) > return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s); > } > > +uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s) > +{ > + return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s); > +} > + > uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s) > { > return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); > @@ -3743,6 +3776,11 @@ uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s) > return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); > } > > +uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s) > +{ > + return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s); > +} > + > uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s) > { > return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s); > @@ -3898,6 +3936,11 @@ bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status) > return int64_to_bfloat16_scalbn(a, scale, status); > } > > +bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status) > +{ > + return int64_to_bfloat16_scalbn(a, scale, status); > +} > + > bfloat16 int64_to_bfloat16(int64_t a, float_status *status) > { > return int64_to_bfloat16_scalbn(a, 0, status); > @@ -3913,6 +3956,11 @@ bfloat16 int16_to_bfloat16(int16_t a, float_status *status) > return int64_to_bfloat16_scalbn(a, 0, status); > } > > +bfloat16 int8_to_bfloat16(int8_t a, float_status *status) > +{ > + return int64_to_bfloat16_scalbn(a, 0, status); > +} > + > float128 int128_to_float128(Int128 a, float_status *status) > { > FloatParts128 p = { }; > @@ -4108,6 +4156,11 @@ bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status) > return uint64_to_bfloat16_scalbn(a, scale, status); > } > > +bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status) > +{ > + return uint64_to_bfloat16_scalbn(a, scale, status); > +} > + > bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status) > { > return uint64_to_bfloat16_scalbn(a, 0, status); > @@ -4123,6 +4176,11 @@ bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status) > return uint64_to_bfloat16_scalbn(a, 0, status); > } > > +bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status) > +{ > + return uint64_to_bfloat16_scalbn(a, 0, status); > +} > + > float128 uint64_to_float128(uint64_t a, float_status *status) > { > FloatParts128 p; > diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h > index 3dcf20e3a2..6d02f619d0 100644 > --- a/include/fpu/softfloat.h > +++ b/include/fpu/softfloat.h > @@ -366,6 +366,8 @@ float32 bfloat16_to_float32(bfloat16, float_status *status); > bfloat16 float64_to_bfloat16(float64 a, float_status *status); > float64 bfloat16_to_float64(bfloat16 a, float_status *status); > > +int8_t bfloat16_to_int8_scalbn(bfloat16, FloatRoundMode, > + int, float_status *status); > int16_t bfloat16_to_int16_scalbn(bfloat16, FloatRoundMode, > int, float_status *status); > int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode, > @@ -373,14 +375,18 @@ int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode, > int64_t bfloat16_to_int64_scalbn(bfloat16, FloatRoundMode, > int, float_status *status); > > +int8_t bfloat16_to_int8(bfloat16, float_status *status); > int16_t bfloat16_to_int16(bfloat16, float_status *status); > int32_t bfloat16_to_int32(bfloat16, float_status *status); > int64_t bfloat16_to_int64(bfloat16, float_status *status); > > +int8_t bfloat16_to_int8_round_to_zero(bfloat16, float_status *status); > int16_t bfloat16_to_int16_round_to_zero(bfloat16, float_status *status); > int32_t bfloat16_to_int32_round_to_zero(bfloat16, float_status *status); > int64_t bfloat16_to_int64_round_to_zero(bfloat16, float_status *status); > > +uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode, > + int, float_status *status); > uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode, > int, float_status *status); > uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode, > @@ -388,24 +394,30 @@ uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode, > uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode, > int, float_status *status); > > +uint8_t bfloat16_to_uint8(bfloat16 a, float_status *status); > uint16_t bfloat16_to_uint16(bfloat16 a, float_status *status); > uint32_t bfloat16_to_uint32(bfloat16 a, float_status *status); > uint64_t bfloat16_to_uint64(bfloat16 a, float_status *status); > > +uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *status); > uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *status); > uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *status); > uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *status); > > +bfloat16 int8_to_bfloat16_scalbn(int8_t a, int, float_status *status); > bfloat16 int16_to_bfloat16_scalbn(int16_t a, int, float_status *status); > bfloat16 int32_to_bfloat16_scalbn(int32_t a, int, float_status *status); > bfloat16 int64_to_bfloat16_scalbn(int64_t a, int, float_status *status); > +bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int, float_status *status); > bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int, float_status *status); > bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int, float_status *status); > bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int, float_status *status); > > +bfloat16 int8_to_bfloat16(int8_t a, float_status *status); > bfloat16 int16_to_bfloat16(int16_t a, float_status *status); > bfloat16 int32_to_bfloat16(int32_t a, float_status *status); > bfloat16 int64_to_bfloat16(int64_t a, float_status *status); > +bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status); > bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status); > bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status); > bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status);
On 9/13/23 02:54, LIU Zhiwei wrote: > Hi Richard, > > Can you pick it to your tree? Sure. Queued to tcg-next. r~
diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 108f9cb224..576b026f4e 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -3113,6 +3113,15 @@ int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale, return parts_float_to_sint(&p, rmode, scale, INT64_MIN, INT64_MAX, s); } +int8_t bfloat16_to_int8_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, + float_status *s) +{ + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, s); + return parts_float_to_sint(&p, rmode, scale, INT8_MIN, INT8_MAX, s); +} + int16_t bfloat16_to_int16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, float_status *s) { @@ -3379,6 +3388,11 @@ int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *s) return floatx80_to_int64_scalbn(a, float_round_to_zero, 0, s); } +int8_t bfloat16_to_int8(bfloat16 a, float_status *s) +{ + return bfloat16_to_int8_scalbn(a, s->float_rounding_mode, 0, s); +} + int16_t bfloat16_to_int16(bfloat16 a, float_status *s) { return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s); @@ -3394,6 +3408,11 @@ int64_t bfloat16_to_int64(bfloat16 a, float_status *s) return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s); } +int8_t bfloat16_to_int8_round_to_zero(bfloat16 a, float_status *s) +{ + return bfloat16_to_int8_scalbn(a, float_round_to_zero, 0, s); +} + int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s) { return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s); @@ -3503,6 +3522,15 @@ uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale, return parts_float_to_uint(&p, rmode, scale, UINT64_MAX, s); } +uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode rmode, + int scale, float_status *s) +{ + FloatParts64 p; + + bfloat16_unpack_canonical(&p, a, s); + return parts_float_to_uint(&p, rmode, scale, UINT8_MAX, s); +} + uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode rmode, int scale, float_status *s) { @@ -3728,6 +3756,11 @@ Int128 float128_to_uint128_round_to_zero(float128 a, float_status *s) return float128_to_uint128_scalbn(a, float_round_to_zero, 0, s); } +uint8_t bfloat16_to_uint8(bfloat16 a, float_status *s) +{ + return bfloat16_to_uint8_scalbn(a, s->float_rounding_mode, 0, s); +} + uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s) { return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s); @@ -3743,6 +3776,11 @@ uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s) return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s); } +uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *s) +{ + return bfloat16_to_uint8_scalbn(a, float_round_to_zero, 0, s); +} + uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s) { return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s); @@ -3898,6 +3936,11 @@ bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status) return int64_to_bfloat16_scalbn(a, scale, status); } +bfloat16 int8_to_bfloat16_scalbn(int8_t a, int scale, float_status *status) +{ + return int64_to_bfloat16_scalbn(a, scale, status); +} + bfloat16 int64_to_bfloat16(int64_t a, float_status *status) { return int64_to_bfloat16_scalbn(a, 0, status); @@ -3913,6 +3956,11 @@ bfloat16 int16_to_bfloat16(int16_t a, float_status *status) return int64_to_bfloat16_scalbn(a, 0, status); } +bfloat16 int8_to_bfloat16(int8_t a, float_status *status) +{ + return int64_to_bfloat16_scalbn(a, 0, status); +} + float128 int128_to_float128(Int128 a, float_status *status) { FloatParts128 p = { }; @@ -4108,6 +4156,11 @@ bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status) return uint64_to_bfloat16_scalbn(a, scale, status); } +bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int scale, float_status *status) +{ + return uint64_to_bfloat16_scalbn(a, scale, status); +} + bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status) { return uint64_to_bfloat16_scalbn(a, 0, status); @@ -4123,6 +4176,11 @@ bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status) return uint64_to_bfloat16_scalbn(a, 0, status); } +bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status) +{ + return uint64_to_bfloat16_scalbn(a, 0, status); +} + float128 uint64_to_float128(uint64_t a, float_status *status) { FloatParts128 p; diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index 3dcf20e3a2..6d02f619d0 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -366,6 +366,8 @@ float32 bfloat16_to_float32(bfloat16, float_status *status); bfloat16 float64_to_bfloat16(float64 a, float_status *status); float64 bfloat16_to_float64(bfloat16 a, float_status *status); +int8_t bfloat16_to_int8_scalbn(bfloat16, FloatRoundMode, + int, float_status *status); int16_t bfloat16_to_int16_scalbn(bfloat16, FloatRoundMode, int, float_status *status); int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode, @@ -373,14 +375,18 @@ int32_t bfloat16_to_int32_scalbn(bfloat16, FloatRoundMode, int64_t bfloat16_to_int64_scalbn(bfloat16, FloatRoundMode, int, float_status *status); +int8_t bfloat16_to_int8(bfloat16, float_status *status); int16_t bfloat16_to_int16(bfloat16, float_status *status); int32_t bfloat16_to_int32(bfloat16, float_status *status); int64_t bfloat16_to_int64(bfloat16, float_status *status); +int8_t bfloat16_to_int8_round_to_zero(bfloat16, float_status *status); int16_t bfloat16_to_int16_round_to_zero(bfloat16, float_status *status); int32_t bfloat16_to_int32_round_to_zero(bfloat16, float_status *status); int64_t bfloat16_to_int64_round_to_zero(bfloat16, float_status *status); +uint8_t bfloat16_to_uint8_scalbn(bfloat16 a, FloatRoundMode, + int, float_status *status); uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, FloatRoundMode, int, float_status *status); uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode, @@ -388,24 +394,30 @@ uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, FloatRoundMode, uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, FloatRoundMode, int, float_status *status); +uint8_t bfloat16_to_uint8(bfloat16 a, float_status *status); uint16_t bfloat16_to_uint16(bfloat16 a, float_status *status); uint32_t bfloat16_to_uint32(bfloat16 a, float_status *status); uint64_t bfloat16_to_uint64(bfloat16 a, float_status *status); +uint8_t bfloat16_to_uint8_round_to_zero(bfloat16 a, float_status *status); uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *status); uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *status); uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *status); +bfloat16 int8_to_bfloat16_scalbn(int8_t a, int, float_status *status); bfloat16 int16_to_bfloat16_scalbn(int16_t a, int, float_status *status); bfloat16 int32_to_bfloat16_scalbn(int32_t a, int, float_status *status); bfloat16 int64_to_bfloat16_scalbn(int64_t a, int, float_status *status); +bfloat16 uint8_to_bfloat16_scalbn(uint8_t a, int, float_status *status); bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int, float_status *status); bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int, float_status *status); bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int, float_status *status); +bfloat16 int8_to_bfloat16(int8_t a, float_status *status); bfloat16 int16_to_bfloat16(int16_t a, float_status *status); bfloat16 int32_to_bfloat16(int32_t a, float_status *status); bfloat16 int64_to_bfloat16(int64_t a, float_status *status); +bfloat16 uint8_to_bfloat16(uint8_t a, float_status *status); bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status); bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status); bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status);
We missed these functions when upstreaming the bfloat16 support. Signed-off-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com> --- fpu/softfloat.c | 58 +++++++++++++++++++++++++++++++++++++++++ include/fpu/softfloat.h | 12 +++++++++ 2 files changed, 70 insertions(+)