diff mbox series

[1/5] target/i386: special case ADC/SBB x,0 and SBB x,x

Message ID 20250403092251.54441-2-pbonzini@redhat.com (mailing list archive)
State New
Headers show
Series target/i386: TCG changes | expand

Commit Message

Paolo Bonzini April 3, 2025, 9:22 a.m. UTC
Avoid the three-operand CC_OP_ADD and CC_OP_ADC in these relatively
common cases.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/translate.c | 20 ++++++++++++
 target/i386/tcg/emit.c.inc  | 65 ++++++++++++++++++++++++++++++++++---
 2 files changed, 80 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index a8935f487aa..aee33428989 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -1183,6 +1183,26 @@  static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
     return cc;
 }
 
+static void gen_neg_setcc(DisasContext *s, int b, TCGv reg)
+{
+    CCPrepare cc = gen_prepare_cc(s, b, reg);
+
+    if (cc.no_setcond) {
+        if (cc.cond == TCG_COND_EQ) {
+            tcg_gen_addi_tl(reg, cc.reg, -1);
+        } else {
+            tcg_gen_neg_tl(reg, cc.reg);
+        }
+        return;
+    }
+
+    if (cc.use_reg2) {
+        tcg_gen_negsetcond_tl(cc.cond, reg, cc.reg, cc.reg2);
+    } else {
+        tcg_gen_negsetcondi_tl(cc.cond, reg, cc.reg, cc.imm);
+    }
+}
+
 static void gen_setcc(DisasContext *s, int b, TCGv reg)
 {
     CCPrepare cc = gen_prepare_cc(s, b, reg);
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 0fa1664a24f..76cd7f00308 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1170,11 +1170,28 @@  static void gen_AAS(DisasContext *s, X86DecodedInsn *decode)
     assume_cc_op(s, CC_OP_EFLAGS);
 }
 
+static void gen_ADD(DisasContext *s, X86DecodedInsn *decode);
 static void gen_ADC(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
-    TCGv c_in = tcg_temp_new();
+    TCGv c_in;
 
+    /*
+     * Try to avoid CC_OP_ADC.  The definition of ADD and ADC is different
+     * for AF and OF: CC_OP_ADC would make the second source argument 0 and
+     * the incoming carry would not be taken into account; whereas with ADD
+     * the second source argument is the incoming carry (c_in).  However it does
+     * not matter here:
+     * - for AF, only bit 4 matters and it's zero for both 0 and c_in
+     * - for OF, only the sign bit matters and it's zero for both 0 and c_in
+     */
+    if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) {
+        gen_compute_eflags_c(s, s->T1);
+        gen_ADD(s, decode);
+        return;
+    }
+
+    c_in = tcg_temp_new();
     gen_compute_eflags_c(s, c_in);
     if (s->prefix & PREFIX_LOCK) {
         tcg_gen_add_tl(s->T0, c_in, s->T1);
@@ -3830,22 +3847,60 @@  static void gen_SARX(DisasContext *s, X86DecodedInsn *decode)
     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
 }
 
+static void gen_SUB(DisasContext *s, X86DecodedInsn *decode);
 static void gen_SBB(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
     TCGv c_in = tcg_temp_new();
 
+    /*
+     * Try to avoid CC_OP_SBB.  The definition of SUB and SBB is different
+     * for AF and OF: CC_OP_SBB would make the second source argument 0 and
+     * the incoming carry would not be taken into account; whereas with SUB
+     * the second source argument is the incoming carry (c_in).  However it does
+     * not matter here:
+     * - for AF, only bit 4 matters and it's zero for both 0 and c_in
+     * - for OF, only the sign bit matters and it's zero for both 0 and c_in
+     */
+    if (decode->e.op2 == X86_TYPE_I && decode->immediate == 0) {
+        gen_compute_eflags_c(s, s->T1);
+        gen_SUB(s, decode);
+        return;
+    }
+
     gen_compute_eflags_c(s, c_in);
+
+    /*
+     * Here, src1 is changed from T0 to 0, and src2 is changed from T1 to c_in
+     * (and T0 = T1).  AF and OF are unaffected because:
+     * - for AF, only bit 4 of src1^src2 matters, and it's zero for both
+     *   T0^T1 and 0^c_in
+     * - for OF, the sign bit of both T0^T1 and 0^c_in is zero, so there can
+     *   be no overflow.
+     */
+    if (decode->e.op2 != X86_TYPE_I && !decode->op[0].has_ea && decode->op[0].n == decode->op[2].n) {
+        if (s->cc_op == CC_OP_DYNAMIC) {
+            tcg_gen_neg_tl(s->T0, c_in);
+        } else {
+            /*
+             * Do not negate c_in because it will often be dead and only the
+             * instruction generated by negsetcond will survive.
+             */
+            gen_neg_setcc(s, JCC_B << 1, s->T0);
+        }
+        tcg_gen_movi_tl(s->cc_srcT, 0);
+        decode->cc_src = c_in;
+        decode->cc_dst = s->T0;
+        decode->cc_op = CC_OP_SUBB + ot;
+        return;
+    }
+
     if (s->prefix & PREFIX_LOCK) {
         tcg_gen_add_tl(s->T0, s->T1, c_in);
         tcg_gen_neg_tl(s->T0, s->T0);
         tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0,
                                     s->mem_index, ot | MO_LE);
     } else {
-        /*
-         * TODO: SBB reg, reg could use gen_prepare_eflags_c followed by
-         * negsetcond, and CC_OP_SUBB as the cc_op.
-         */
         tcg_gen_sub_tl(s->T0, s->T0, s->T1);
         tcg_gen_sub_tl(s->T0, s->T0, c_in);
     }