Message ID | 20241020155324.35273-8-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | target/i386: miscellaneous flags improvements | expand |
On 10/20/24 08:53, Paolo Bonzini wrote: > Most uses of CC_OP_DYNAMIC are for CMP/JB/JE or similar sequences. > We can optimize many of them to avoid computation of the flags. > This eliminates both TCG ops to set up the new cc_op, and helper > instructions because evaluating just ZF is much cheaper. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > target/i386/helper.h | 1 + > target/i386/tcg/cc_helper.c | 20 ++++++++++++++++++++ > target/i386/tcg/translate.c | 10 +++++++--- > 3 files changed, 28 insertions(+), 3 deletions(-) > > diff --git a/target/i386/helper.h b/target/i386/helper.h > index eeb8df56eaa..3f67098f11f 100644 > --- a/target/i386/helper.h > +++ b/target/i386/helper.h > @@ -1,5 +1,6 @@ > DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) > DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) > +DEF_HELPER_FLAGS_3(cc_compute_nz, TCG_CALL_NO_RWG_SE, tl, tl, tl, int) > > DEF_HELPER_3(write_eflags, void, env, tl, i32) > DEF_HELPER_1(read_eflags, tl, env) > diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c > index 40583c04cf9..c24e6a14c07 100644 > --- a/target/i386/tcg/cc_helper.c > +++ b/target/i386/tcg/cc_helper.c > @@ -95,6 +95,26 @@ static target_ulong compute_all_adcox(target_ulong dst, target_ulong src1, > return (src1 & ~(CC_C | CC_O)) | (dst * CC_C) | (src2 * CC_O); > } > > +target_ulong helper_cc_compute_nz(target_ulong dst, target_ulong src1, > + int op) > +{ > + target_ulong mask; > + > + if (CC_OP_HAS_EFLAGS(op)) { > + return ~src1 & CC_Z; > + } else { > + MemOp size = cc_op_size(op); > + > + if (size == MO_TL) { > + /* Avoid shift count overflow when computing the mask below. */ > + return dst; > + } > + > + mask = (1ull << (8 << size)) - 1; FWIW, MAKE_64BIT_MASK(0, 8 << size) does not have the overflow problem. r~
diff --git a/target/i386/helper.h b/target/i386/helper.h index eeb8df56eaa..3f67098f11f 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -1,5 +1,6 @@ DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int) +DEF_HELPER_FLAGS_3(cc_compute_nz, TCG_CALL_NO_RWG_SE, tl, tl, tl, int) DEF_HELPER_3(write_eflags, void, env, tl, i32) DEF_HELPER_1(read_eflags, tl, env) diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c index 40583c04cf9..c24e6a14c07 100644 --- a/target/i386/tcg/cc_helper.c +++ b/target/i386/tcg/cc_helper.c @@ -95,6 +95,26 @@ static target_ulong compute_all_adcox(target_ulong dst, target_ulong src1, return (src1 & ~(CC_C | CC_O)) | (dst * CC_C) | (src2 * CC_O); } +target_ulong helper_cc_compute_nz(target_ulong dst, target_ulong src1, + int op) +{ + target_ulong mask; + + if (CC_OP_HAS_EFLAGS(op)) { + return ~src1 & CC_Z; + } else { + MemOp size = cc_op_size(op); + + if (size == MO_TL) { + /* Avoid shift count overflow when computing the mask below. */ + return dst; + } + + mask = (1ull << (8 << size)) - 1; + return dst & mask; + } +} + target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, target_ulong src2, int op) { diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 1a9a2fe709e..5e326ab1aff 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1008,15 +1008,19 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) { switch (s->cc_op) { - case CC_OP_DYNAMIC: - gen_compute_eflags(s); - /* FALLTHRU */ case CC_OP_EFLAGS: case CC_OP_ADCX: case CC_OP_ADOX: case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src, .imm = CC_Z }; + case CC_OP_DYNAMIC: + gen_update_cc_op(s); + if (!reg) { + reg = tcg_temp_new(); + } + gen_helper_cc_compute_nz(reg, cpu_cc_dst, cpu_cc_src, cpu_cc_op); + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = reg, .imm = 0 }; default: { MemOp size = cc_op_size(s->cc_op);
Most uses of CC_OP_DYNAMIC are for CMP/JB/JE or similar sequences. We can optimize many of them to avoid computation of the flags. This eliminates both TCG ops to set up the new cc_op, and helper instructions because evaluating just ZF is much cheaper. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- target/i386/helper.h | 1 + target/i386/tcg/cc_helper.c | 20 ++++++++++++++++++++ target/i386/tcg/translate.c | 10 +++++++--- 3 files changed, 28 insertions(+), 3 deletions(-)