Message ID | 20200722091641.8834-42-frank.chang@sifive.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | target/riscv: support vector extension v0.9 | expand |
On 7/22/20 2:16 AM, frank.chang@sifive.com wrote: > From: Frank Chang <frank.chang@sifive.com> > > NaN-boxed the scalar floating-point register based on RVV 0.9's rules. > > Signed-off-by: Frank Chang <frank.chang@sifive.com> > --- > target/riscv/insn32.decode | 4 +-- > target/riscv/insn_trans/trans_rvv.inc.c | 45 ++++++++++++++++--------- > 2 files changed, 31 insertions(+), 18 deletions(-) > > diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode > index ef53df7c73..4be1b88e2d 100644 > --- a/target/riscv/insn32.decode > +++ b/target/riscv/insn32.decode > @@ -583,8 +583,8 @@ vid_v 010100 . 00000 10001 010 ..... 1010111 @r1_vm > vmv_x_s 010000 1 ..... 00000 010 ..... 1010111 @r2rd > vmv_s_x 010000 1 00000 ..... 110 ..... 1010111 @r2 > vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r > -vfmv_f_s 001100 1 ..... 00000 001 ..... 1010111 @r2rd > -vfmv_s_f 001101 1 00000 ..... 101 ..... 1010111 @r2 > +vfmv_f_s 010000 1 ..... 00000 001 ..... 1010111 @r2rd > +vfmv_s_f 010000 1 00000 ..... 101 ..... 1010111 @r2 > vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm > vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm > vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm > diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c > index 54c08ea1f8..56cd7444f2 100644 > --- a/target/riscv/insn_trans/trans_rvv.inc.c > +++ b/target/riscv/insn_trans/trans_rvv.inc.c > @@ -3326,14 +3326,22 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) > /* Floating-Point Scalar Move Instructions */ > static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a) > { > - if (!s->vill && has_ext(s, RVF) && > - (s->mstatus_fs != 0) && (s->sew != 0)) { > - unsigned int len = 8 << s->sew; > - > - vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0); > - if (len < 64) { > - tcg_gen_ori_i64(cpu_fpr[a->rd], cpu_fpr[a->rd], > - MAKE_64BIT_MASK(len, 64 - len)); > + if (require_rvv(s) && > + vext_check_isa_ill(s) && > + has_ext(s, RVF) && > + (s->mstatus_fs != 0) && > + (s->sew != 0)) { > + unsigned int ofs = (8 << s->sew); > + unsigned int len = 64 - ofs; > + TCGv_i64 t_nan; > + > + vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0, false); > + /* NaN-box f[rd] as necessary for SEW */ > + if (len) { > + t_nan = tcg_const_i64(UINT64_MAX); > + tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rd], > + t_nan, ofs, len); > + tcg_temp_free_i64(t_nan); > } It would be better to use the gen_nanbox_[sh] functions here. > t1 = tcg_temp_new_i64(); > if (s->sew == MO_64 && !has_ext(s, RVD)) { > + /* SEW > FLEN, f[rs1] is NaN-boxed to SEW bits */ > tcg_gen_ori_i64(t1, cpu_fpr[a->rs1], MAKE_64BIT_MASK(32, 32)); With my nanbox patch set, RVF-only cpus will *always* have values nanboxed to 64-bits. So there should be no need to special case this, only provide a comment noting that RVF and RVD can be treated equally. > + } else if ((s->sew < MO_64 && has_ext(s, RVD)) || > + (s->sew < MO_32)) { > + /* SEW < FLEN */ > + TCGv_i32 sew = tcg_const_i32(1 << (s->sew + 3)); > + gen_helper_narrower_nanbox_fpr(t1, cpu_fpr[a->rs1], sew, cpu_env); Use gen_check_nanbox_[sh]. r~
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index ef53df7c73..4be1b88e2d 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -583,8 +583,8 @@ vid_v 010100 . 00000 10001 010 ..... 1010111 @r1_vm vmv_x_s 010000 1 ..... 00000 010 ..... 1010111 @r2rd vmv_s_x 010000 1 00000 ..... 110 ..... 1010111 @r2 vext_x_v 001100 1 ..... ..... 010 ..... 1010111 @r -vfmv_f_s 001100 1 ..... 00000 001 ..... 1010111 @r2rd -vfmv_s_f 001101 1 00000 ..... 101 ..... 1010111 @r2 +vfmv_f_s 010000 1 ..... 00000 001 ..... 1010111 @r2rd +vfmv_s_f 010000 1 00000 ..... 101 ..... 1010111 @r2 vslideup_vx 001110 . ..... ..... 100 ..... 1010111 @r_vm vslideup_vi 001110 . ..... ..... 011 ..... 1010111 @r_vm vslide1up_vx 001110 . ..... ..... 110 ..... 1010111 @r_vm diff --git a/target/riscv/insn_trans/trans_rvv.inc.c b/target/riscv/insn_trans/trans_rvv.inc.c index 54c08ea1f8..56cd7444f2 100644 --- a/target/riscv/insn_trans/trans_rvv.inc.c +++ b/target/riscv/insn_trans/trans_rvv.inc.c @@ -3326,14 +3326,22 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) /* Floating-Point Scalar Move Instructions */ static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a) { - if (!s->vill && has_ext(s, RVF) && - (s->mstatus_fs != 0) && (s->sew != 0)) { - unsigned int len = 8 << s->sew; - - vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0); - if (len < 64) { - tcg_gen_ori_i64(cpu_fpr[a->rd], cpu_fpr[a->rd], - MAKE_64BIT_MASK(len, 64 - len)); + if (require_rvv(s) && + vext_check_isa_ill(s) && + has_ext(s, RVF) && + (s->mstatus_fs != 0) && + (s->sew != 0)) { + unsigned int ofs = (8 << s->sew); + unsigned int len = 64 - ofs; + TCGv_i64 t_nan; + + vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0, false); + /* NaN-box f[rd] as necessary for SEW */ + if (len) { + t_nan = tcg_const_i64(UINT64_MAX); + tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rd], + t_nan, ofs, len); + tcg_temp_free_i64(t_nan); } mark_fs_dirty(s); @@ -3345,22 +3353,27 @@ static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a) /* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) { - if (!s->vill && has_ext(s, RVF) && (s->sew != 0)) { - TCGv_i64 t1; + if (require_rvv(s) && + vext_check_isa_ill(s) && + has_ext(s, RVF) && + (s->sew != 0)) { /* The instructions ignore LMUL and vector register group. */ - uint32_t vlmax = s->vlen >> 3; + TCGv_i64 t1; + TCGLabel *over = gen_new_label(); /* if vl == 0, skip vector register write back */ - TCGLabel *over = gen_new_label(); tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); - /* zeroed all elements */ - tcg_gen_gvec_dup_imm(SEW64, vreg_ofs(s, a->rd), vlmax, vlmax, 0); - - /* NaN-box f[rs1] as necessary for SEW */ t1 = tcg_temp_new_i64(); if (s->sew == MO_64 && !has_ext(s, RVD)) { + /* SEW > FLEN, f[rs1] is NaN-boxed to SEW bits */ tcg_gen_ori_i64(t1, cpu_fpr[a->rs1], MAKE_64BIT_MASK(32, 32)); + } else if ((s->sew < MO_64 && has_ext(s, RVD)) || + (s->sew < MO_32)) { + /* SEW < FLEN */ + TCGv_i32 sew = tcg_const_i32(1 << (s->sew + 3)); + gen_helper_narrower_nanbox_fpr(t1, cpu_fpr[a->rs1], sew, cpu_env); + tcg_temp_free_i32(sew); } else { tcg_gen_mov_i64(t1, cpu_fpr[a->rs1]); }